Review actions.

nvaccess · Jul 11, 2017 · d3db08f · d3db08f
1 parent 620b283
commit d3db08f
Show file tree

Hide file tree

Showing 7 changed files with 111 additions and 70 deletions.
diff --git a/nvdaHelper/localWin10/utils.h b/nvdaHelper/localWin10/utils.h
@@ -15,4 +15,9 @@ This license can be found at:
 
 #include <robuffer.h>
 
+/** Get access to the raw byte array backing an IBuffer object.
+ * This is necessary when interoperating with non-WinRT components;
+ * e.g. returning bytes from an IBuffer to a C caller.
+ * This byte array is mutable; it is *not* a copy.
+ */
 byte* getBytes(Windows::Storage::Streams::IBuffer^ buffer);
diff --git a/nvdaHelper/localWin10/uwpOcr.cpp b/nvdaHelper/localWin10/uwpOcr.cpp
@@ -37,7 +37,7 @@ using namespace Windows::Data::Json;
 UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback) {
 	auto engine = OcrEngine::TryCreateFromLanguage(ref new Language(ref new String(language)));
 	if (!engine)
-		return NULL;
+		return nullptr;
 	auto instance = new UwpOcr;
 	instance->engine = engine;
 	instance->callback = callback;

diff --git a/nvdaHelper/localWin10/uwpOcr.h b/nvdaHelper/localWin10/uwpOcr.h
@@ -26,5 +26,7 @@ extern "C" {
 export UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback);
 export void __stdcall uwpOcr_terminate(UwpOcr* instance);
 export void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height);
+// Returns a BSTR of language codes terminated by semi-colons;
+// e.g. "de-de;en-us;".
 export BSTR __stdcall uwpOcr_getLanguages();
 }
diff --git a/source/contentRecog/__init__.py b/source/contentRecog/__init__.py
@@ -32,7 +32,7 @@ def getResizeFactor(self, width, height):
 		"""
 		return 1
 
-	def recognize(self, pixels, width, height, coordConverter, onResult):
+	def recognize(self, pixels, width, height, imageInfo, onResult):
 		"""Asynchronously recognize content from an image.
 		This method should not block.
 		Only one recognition can be performed at a time.
@@ -43,14 +43,8 @@ def recognize(self, pixels, width, height, coordConverter, onResult):
 			i.e. four bytes per pixel in the order blue, green, red, alpha.
 			However, the alpha channel should be ignored.
 		@type pixels: Two dimensional array (y then x) of L{winGDI.RGBQUAD}
-		@param width: The width of the image in pixels.
-		@type width: int
-		@param height: The height of the image in pixels.
-		@type height: int
-		@param coordConverter: The converter to convert coordinates
-			in the supplied image to screen coordinates.
-			This should be used when returning coordinates to NVDA.
-		@type coordConverter: L{ResultCoordConverter}
+		@param imageInfo: Informationabout the image for recognition.
+		@type imageInfo: L{RecogImageInfo}
 		@param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure) as its only argument.
 		@type onResult: callable
 		"""
@@ -61,38 +55,64 @@ def cancel(self):
 		"""
 		raise NotImplementedError
 
-class ResultCoordConverter(object):
-	"""Converts coordinates in a recognition result to screen coordinates.
+class RecogImageInfo(object):
+	"""Encapsulates information about a recognized image and
+	provides functionality to convert coordinates.
 	An image captured for recognition can begin at any point on the screen.
-	However, the image is cropped when passed to the recognizer.
+	However, the image must be cropped when passed to the recognizer.
 	Also, some recognizers need the image to be resized prior to recognition.
-	This converter converts coordinates in the recognized image
+	This class calculates the width and height of the image for recognition;
+	see the L{recogWidth} and L{recogHeight} attributes.
+	It can also convert coordinates in the recognized image
 	to screen coordinates suitable to be returned to NVDA;
 	e.g. in order to route the mouse.
+	This is done using the L{convertXToScreen} and L{convertYToScreen} methods.
 	"""
 
-	def __init__(self, left, top, resizeFactor):
+	def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFactor):
 		"""
-		@param left: The x screen coordinate of the upper-left corner of the image.
-		@type left: int
-		@param top: The y screen coordinate of the upper-left corner of the image.
-		@type top: int
-		@param resizeFactor: The factor by which the image was resized for recognition.
+		@param screenLeft: The x screen coordinate of the upper-left corner of the image.
+		@type screenLeft: int
+		@param screenTop: The y screen coordinate of the upper-left corner of the image.
+		@type screenTop: int
+		@param screenWidth: The width of the image on the screen.
+		@type screenWidth: int
+		@param screenHeight: The height of the image on the screen.
+		@type screenHeight: int
+		@param resizeFactor: The factor by which the image must be resized for recognition.
 		@type resizeFactor: int or float
+		@raise ValueError: If the supplied screen coordinates indicate that
+			the image is not visible; e.g. width or height of 0.
 		"""
-		self.left = left
-		self.top = top
+		if screenLeft < 0 or screenTop < 0 or screenWidth <= 0 or screenHeight <= 0:
+			raise ValueError("Image not visible (invalid screen coordinates)")
+		self.screenLeft = screenLeft
+		self.screenTop = screenTop
+		self.screenWidth = screenWidth
+		self.screenHeight = screenHeight
 		self.resizeFactor = resizeFactor
+		#: The width of the recognized image.
+		self.recogWidth = int(screenWidth * resizeFactor)
+		#: The height of the recognized image.
+		self.recogHeight = int(screenHeight * resizeFactor)
+
+	@classmethod
+	def createFromRecognizer(cls, screenLeft, screenTop, screenWidth, screenHeight, recognizer):
+		"""Convenience method to construct an instance using a L{ContentRecognizer}.
+		The resize factor is obtained by calling L{ContentRecognizer.getResizeFactor}.
+		"""
+		resize = recognizer.getResizeFactor(screenWidth, screenHeight)
+		return cls(screenLeft, screenTop, screenWidth, screenHeight, resize)
 
-	def convertX(self, x):
-		"""Convert an x coordinate in the result to an x coordinate on the screen.
+	def convertXToScreen(self, x):
+		"""Convert an x coordinate in the recognized image to an x coordinate on the screen.
 		"""
-		return self.left + int(x / self.resizeFactor)
+		return self.screenLeft + int(x / self.resizeFactor)
 
-	def convertY(self, y):
-		"""Convert an x coordinate in the result to an x coordinate on the screen.
+	def convertYToScreen(self, y):
+		"""Convert an x coordinate in the recognized image to an x coordinate on the screen.
 		"""
-		return self.top + int(y / self.resizeFactor)
+		return self.screenTop + int(y / self.resizeFactor)
 
 class RecognitionResult(object):
 	"""Provides access to the result of recognition by a recognizer.
@@ -113,7 +133,8 @@ def makeTextInfo(self, obj, position):
 		"""
 		raise NotImplementedError
 
-# Used by LinesWordsResult.
+# Used internally by LinesWordsResult.
+# (Lwr is short for LinesWordsResult.)
 LwrWord = namedtuple("LwrWord", ("offset", "left", "top"))
 
 class LinesWordsResult(RecognitionResult):
@@ -123,7 +144,7 @@ class LinesWordsResult(RecognitionResult):
 	Several OCR engines produce output in a format which can be easily converted to this.
 	"""
 
-	def __init__(self, data, coordConverter):
+	def __init__(self, data, imageInfo):
 		"""Constructor.
 		@param data: The lines/words data structure. For example:
 			[
@@ -137,13 +158,13 @@ def __init__(self, data, coordConverter):
 				]
 			]
 		@type data: list of lists of dicts
-		@param coordConverter: The converter to convert coordinates
-			in the supplied image to screen coordinates.
-			This should be used when returning coordinates to NVDA.
-		@type coordConverter: L{ResultCoordConverter}
+		@param imageInfo: Information about the recognized image.
+			This is used to convert coordinates in the recognized image
+			to screen coordinates.
+		@type imageInfo: L{RecogImageInfo}
 		"""
 		self.data = data
-		self.coordConverter = coordConverter
+		self.imageInfo = imageInfo
 		self._textList = []
 		self.textLen = 0
 		#: End offsets for each line.
@@ -164,8 +185,8 @@ def _parseData(self):
 					self._textList.append(" ")
 					self.textLen += 1
 				self.words.append(LwrWord(self.textLen,
-					self.coordConverter.convertX(word["x"]),
-					self.coordConverter.convertY(word["y"])))
+					self.imageInfo.convertXToScreen(word["x"]),
+					self.imageInfo.convertYToScreen(word["y"])))
 				text = word["text"]
 				self._textList.append(text)
 				self.textLen += len(text)
@@ -224,11 +245,13 @@ def _getPointFromOffset(self, offset):
 
 class SimpleTextResult(RecognitionResult):
 	"""A L{RecognitionResult} which presents a simple text string.
-	NVDA calculates words and lines itself based on the text;
-	e.g. a new line character breaks a line.
-	Routing the mouse, etc. cannot be supported.
 	This should only be used if the recognizer only returns text
 	and no coordinate information.
+	In this case, NVDA calculates words and lines itself based on the text;
+	e.g. a new line character breaks a line.
+	Routing the mouse, etc. cannot be supported because even though NVDA
+	has the coordinates for the entire block of content, it doesn't have
+	the coordinates for individual words or characters.
 	"""
 
 	def __init__(self, text):

diff --git a/source/contentRecog/recogUi.py b/source/contentRecog/recogUi.py
@@ -22,7 +22,7 @@
 import textInfos
 from logHandler import log
 import queueHandler
-from . import ResultCoordConverter
+from . import RecogImageInfo
 
 class RecogResultNVDAObject(cursorManager.CursorManager, NVDAObjects.window.Window):
 	"""Fake NVDAObject used to present a recognition result in a cursor manager.
@@ -99,24 +99,27 @@ def script_exit(self, gesture):
 def recognizeNavigatorObject(recognizer):
 	"""User interface function to recognize content in the navigator object.
 	This should be called from a script or in response to a GUI action.
-	@param recognizer: The content recognizer ot use.
+	@param recognizer: The content recognizer to use.
 	@type recognizer: L{contentRecog.ContentRecognizer}
 	"""
 	global _activeRecog
+	nav = api.getNavigatorObject()
+	left, top, width, height = nav.location
+	try:
+		imgInfo = RecogImageInfo.createFromRecognizer(left, top, width, height, recognizer)
+	except ValueError:
+		# Translators: Reported when content recognition (e.g. OCR) is attempted,
+		# but the content is not visible.
+		ui.message(_("Content is not visible"))
+		return
 	if _activeRecog:
 		_activeRecog.cancel()
 	# Translators: Reporting when content recognition (e.g. OCR) begins.
 	ui.message(_("Recognizing"))
-	nav = api.getNavigatorObject()
-	left, top, width, height = nav.location
-	resize = recognizer.getResizeFactor(width, height)
-	coordConv = ResultCoordConverter(left, top, resize)
-	destWidth = int(width * resize)
-	destHeight = int(height * resize)
-	sb = screenBitmap.ScreenBitmap(destWidth, destHeight)
+	sb = screenBitmap.ScreenBitmap(imgInfo.recogWidth, imgInfo.recogHeight)
 	pixels = sb.captureImage(left, top, width, height)
 	_activeRecog = recognizer
-	recognizer.recognize(pixels, destWidth, destHeight, coordConv, _recogOnResult)
+	recognizer.recognize(pixels, imgInfo, _recogOnResult)
 
 def _recogOnResult(result):
 	global _activeRecog

diff --git a/source/contentRecog/uwpOcr.py b/source/contentRecog/uwpOcr.py
@@ -86,15 +86,15 @@ def __init__(self, language=None):
 			self.language = getConfigLanguage()
 		self._dll = NVDAHelper.getHelperLocalWin10Dll()
 
-	def recognize(self, pixels, width, height, coordConv, onResult):
+	def recognize(self, pixels, imgInfo, onResult):
 		self._onResult = onResult
 		@uwpOcr_Callback
 		def callback(result):
 			# If self._onResult is None, recognition was cancelled.
 			if self._onResult:
 				if result:
 					data = json.loads(result)
-					self._onResult(LinesWordsResult(data, coordConv))
+					self._onResult(LinesWordsResult(data, imgInfo))
 				else:
 					self._onResult(RuntimeError("UWP OCR failed"))
 			self._dll.uwpOcr_terminate(self._handle)
@@ -105,7 +105,7 @@ def callback(result):
 		if not self._handle:
 			onResult(RuntimeError("UWP OCR initialization failed"))
 			return
-		self._dll.uwpOcr_recognize(self._handle, pixels, width, height)
+		self._dll.uwpOcr_recognize(self._handle, pixels, imgInfo.recogWidth, imgInfo.recogHeight)
 
 	def cancel(self):
 		self._onResult = None
diff --git a/tests/unit/contentRecog/test_contentRecog.py b/tests/unit/contentRecog/test_contentRecog.py
@@ -11,33 +11,42 @@
 import contentRecog
 import textInfos
 
-class TestResultCoordConverter(unittest.TestCase):
+class TestRecogImageInfo(unittest.TestCase):
 
 	def test_noOffsetNoResize(self):
-		conv = contentRecog.ResultCoordConverter(0, 0, 1)
-		actual = conv.convertX(100), conv.convertY(200)
-		self.assertEqual(actual, (100, 200))
+		info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 1)
+		self.assertEqual(info.recogWidth, 1000)
+		self.assertEqual(info.recogHeight, 2000)
+		self.assertEqual(info.convertXToScreen(100), 100)
+		self.assertEqual(info.convertYToScreen(200), 200)
 
 	def test_withOffsetNoResize(self):
-		conv = contentRecog.ResultCoordConverter(10, 20, 1)
-		actual = conv.convertX(100), conv.convertY(200)
-		self.assertEqual(actual, (110, 220))
+		info = contentRecog.RecogImageInfo(10, 20, 1000, 2000, 1)
+		self.assertEqual(info.recogWidth, 1000)
+		self.assertEqual(info.recogHeight, 2000)
+		self.assertEqual(info.convertXToScreen(100), 110)
+		self.assertEqual(info.convertYToScreen(200), 220)
 
 	def test_noOffsetWithResize(self):
-		conv = contentRecog.ResultCoordConverter(0, 0, 2)
-		actual = conv.convertX(200), conv.convertY(400)
-		self.assertEqual(actual, (100, 200))
+		info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 2)
+		self.assertEqual(info.recogWidth, 2000)
+		self.assertEqual(info.recogHeight, 4000)
+		self.assertEqual(info.convertXToScreen(200), 100)
+		self.assertEqual(info.convertYToScreen(400), 200)
 
 	def test_withOffsetWithResize(self):
-		conv = contentRecog.ResultCoordConverter(10, 20, 2)
-		actual = conv.convertX(200), conv.convertY(400)
-		self.assertEqual(actual, (110, 220))
+		info = contentRecog.RecogImageInfo(10, 20, 1000, 2000, 2)
+		self.assertEqual(info.recogWidth, 2000)
+		self.assertEqual(info.recogHeight, 4000)
+		self.assertEqual(info.convertXToScreen(200), 110)
+		self.assertEqual(info.convertYToScreen(400), 220)
 
 class FakeNVDAObject(object):
 	pass
 
 class TestLinesWordsResult(unittest.TestCase):
-	"""Tests contentRecog.LinesWordsResult and contentRecog.LwrTextInfo.
+	"""Tests that contentRecog.LinesWordsResult and contentRecog.LwrTextInfo
+	correctly parse and process the JSON from a recognizer.
 	"""
 	DATA = [
 		[
@@ -70,9 +79,8 @@ class TestLinesWordsResult(unittest.TestCase):
 	LINE2_START = 12
 
 	def setUp(self):
-		# Use a no-op coordinate converter.
-		conv = contentRecog.ResultCoordConverter(0, 0, 1)
-		self.result = contentRecog.LinesWordsResult(self.DATA, conv)
+		info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 1)
+		self.result = contentRecog.LinesWordsResult(self.DATA, info)
 		self.fakeObj = FakeNVDAObject()
 		self.textInfo = self.result.makeTextInfo(self.fakeObj, textInfos.POSITION_FIRST)