Skip to content

Commit

Permalink
Review actions.
Browse files Browse the repository at this point in the history
  • Loading branch information
jcsteh committed Jul 11, 2017
1 parent 620b283 commit d3db08f
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 70 deletions.
5 changes: 5 additions & 0 deletions nvdaHelper/localWin10/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,9 @@ This license can be found at:

#include <robuffer.h>

/** Get access to the raw byte array backing an IBuffer object.
* This is necessary when interoperating with non-WinRT components;
* e.g. returning bytes from an IBuffer to a C caller.
* This byte array is mutable; it is *not* a copy.
*/
byte* getBytes(Windows::Storage::Streams::IBuffer^ buffer);
2 changes: 1 addition & 1 deletion nvdaHelper/localWin10/uwpOcr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ using namespace Windows::Data::Json;
UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback) {
auto engine = OcrEngine::TryCreateFromLanguage(ref new Language(ref new String(language)));
if (!engine)
return NULL;
return nullptr;
auto instance = new UwpOcr;
instance->engine = engine;
instance->callback = callback;
Expand Down
2 changes: 2 additions & 0 deletions nvdaHelper/localWin10/uwpOcr.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@ extern "C" {
export UwpOcr* __stdcall uwpOcr_initialize(const char16* language, uwpOcr_Callback callback);
export void __stdcall uwpOcr_terminate(UwpOcr* instance);
export void __stdcall uwpOcr_recognize(UwpOcr* instance, const RGBQUAD* image, unsigned int width, unsigned int height);
// Returns a BSTR of language codes terminated by semi-colons;
// e.g. "de-de;en-us;".
export BSTR __stdcall uwpOcr_getLanguages();
}
101 changes: 62 additions & 39 deletions source/contentRecog/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def getResizeFactor(self, width, height):
"""
return 1

def recognize(self, pixels, width, height, coordConverter, onResult):
def recognize(self, pixels, width, height, imageInfo, onResult):
"""Asynchronously recognize content from an image.
This method should not block.
Only one recognition can be performed at a time.
Expand All @@ -43,14 +43,8 @@ def recognize(self, pixels, width, height, coordConverter, onResult):
i.e. four bytes per pixel in the order blue, green, red, alpha.
However, the alpha channel should be ignored.
@type pixels: Two dimensional array (y then x) of L{winGDI.RGBQUAD}
@param width: The width of the image in pixels.
@type width: int
@param height: The height of the image in pixels.
@type height: int
@param coordConverter: The converter to convert coordinates
in the supplied image to screen coordinates.
This should be used when returning coordinates to NVDA.
@type coordConverter: L{ResultCoordConverter}
@param imageInfo: Informationabout the image for recognition.
@type imageInfo: L{RecogImageInfo}
@param onResult: A callable which takes a L{RecognitionResult} (or an exception on failure) as its only argument.
@type onResult: callable
"""
Expand All @@ -61,38 +55,64 @@ def cancel(self):
"""
raise NotImplementedError

class ResultCoordConverter(object):
"""Converts coordinates in a recognition result to screen coordinates.
class RecogImageInfo(object):
"""Encapsulates information about a recognized image and
provides functionality to convert coordinates.
An image captured for recognition can begin at any point on the screen.
However, the image is cropped when passed to the recognizer.
However, the image must be cropped when passed to the recognizer.
Also, some recognizers need the image to be resized prior to recognition.
This converter converts coordinates in the recognized image
This class calculates the width and height of the image for recognition;
see the L{recogWidth} and L{recogHeight} attributes.
It can also convert coordinates in the recognized image
to screen coordinates suitable to be returned to NVDA;
e.g. in order to route the mouse.
This is done using the L{convertXToScreen} and L{convertYToScreen} methods.
"""

def __init__(self, left, top, resizeFactor):
def __init__(self, screenLeft, screenTop, screenWidth, screenHeight, resizeFactor):
"""
@param left: The x screen coordinate of the upper-left corner of the image.
@type left: int
@param top: The y screen coordinate of the upper-left corner of the image.
@type top: int
@param resizeFactor: The factor by which the image was resized for recognition.
@param screenLeft: The x screen coordinate of the upper-left corner of the image.
@type screenLeft: int
@param screenTop: The y screen coordinate of the upper-left corner of the image.
@type screenTop: int
@param screenWidth: The width of the image on the screen.
@type screenWidth: int
@param screenHeight: The height of the image on the screen.
@type screenHeight: int
@param resizeFactor: The factor by which the image must be resized for recognition.
@type resizeFactor: int or float
@raise ValueError: If the supplied screen coordinates indicate that
the image is not visible; e.g. width or height of 0.
"""
self.left = left
self.top = top
if screenLeft < 0 or screenTop < 0 or screenWidth <= 0 or screenHeight <= 0:
raise ValueError("Image not visible (invalid screen coordinates)")
self.screenLeft = screenLeft
self.screenTop = screenTop
self.screenWidth = screenWidth
self.screenHeight = screenHeight
self.resizeFactor = resizeFactor
#: The width of the recognized image.
self.recogWidth = int(screenWidth * resizeFactor)
#: The height of the recognized image.
self.recogHeight = int(screenHeight * resizeFactor)

@classmethod
def createFromRecognizer(cls, screenLeft, screenTop, screenWidth, screenHeight, recognizer):
"""Convenience method to construct an instance using a L{ContentRecognizer}.
The resize factor is obtained by calling L{ContentRecognizer.getResizeFactor}.
"""
resize = recognizer.getResizeFactor(screenWidth, screenHeight)
return cls(screenLeft, screenTop, screenWidth, screenHeight, resize)

def convertX(self, x):
"""Convert an x coordinate in the result to an x coordinate on the screen.
def convertXToScreen(self, x):
"""Convert an x coordinate in the recognized image to an x coordinate on the screen.
"""
return self.left + int(x / self.resizeFactor)
return self.screenLeft + int(x / self.resizeFactor)

def convertY(self, y):
"""Convert an x coordinate in the result to an x coordinate on the screen.
def convertYToScreen(self, y):
"""Convert an x coordinate in the recognized image to an x coordinate on the screen.
"""
return self.top + int(y / self.resizeFactor)
return self.screenTop + int(y / self.resizeFactor)

class RecognitionResult(object):
"""Provides access to the result of recognition by a recognizer.
Expand All @@ -113,7 +133,8 @@ def makeTextInfo(self, obj, position):
"""
raise NotImplementedError

# Used by LinesWordsResult.
# Used internally by LinesWordsResult.
# (Lwr is short for LinesWordsResult.)
LwrWord = namedtuple("LwrWord", ("offset", "left", "top"))

class LinesWordsResult(RecognitionResult):
Expand All @@ -123,7 +144,7 @@ class LinesWordsResult(RecognitionResult):
Several OCR engines produce output in a format which can be easily converted to this.
"""

def __init__(self, data, coordConverter):
def __init__(self, data, imageInfo):
"""Constructor.
@param data: The lines/words data structure. For example:
[
Expand All @@ -137,13 +158,13 @@ def __init__(self, data, coordConverter):
]
]
@type data: list of lists of dicts
@param coordConverter: The converter to convert coordinates
in the supplied image to screen coordinates.
This should be used when returning coordinates to NVDA.
@type coordConverter: L{ResultCoordConverter}
@param imageInfo: Information about the recognized image.
This is used to convert coordinates in the recognized image
to screen coordinates.
@type imageInfo: L{RecogImageInfo}
"""
self.data = data
self.coordConverter = coordConverter
self.imageInfo = imageInfo
self._textList = []
self.textLen = 0
#: End offsets for each line.
Expand All @@ -164,8 +185,8 @@ def _parseData(self):
self._textList.append(" ")
self.textLen += 1
self.words.append(LwrWord(self.textLen,
self.coordConverter.convertX(word["x"]),
self.coordConverter.convertY(word["y"])))
self.imageInfo.convertXToScreen(word["x"]),
self.imageInfo.convertYToScreen(word["y"])))
text = word["text"]
self._textList.append(text)
self.textLen += len(text)
Expand Down Expand Up @@ -224,11 +245,13 @@ def _getPointFromOffset(self, offset):

class SimpleTextResult(RecognitionResult):
"""A L{RecognitionResult} which presents a simple text string.
NVDA calculates words and lines itself based on the text;
e.g. a new line character breaks a line.
Routing the mouse, etc. cannot be supported.
This should only be used if the recognizer only returns text
and no coordinate information.
In this case, NVDA calculates words and lines itself based on the text;
e.g. a new line character breaks a line.
Routing the mouse, etc. cannot be supported because even though NVDA
has the coordinates for the entire block of content, it doesn't have
the coordinates for individual words or characters.
"""

def __init__(self, text):
Expand Down
23 changes: 13 additions & 10 deletions source/contentRecog/recogUi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import textInfos
from logHandler import log
import queueHandler
from . import ResultCoordConverter
from . import RecogImageInfo

class RecogResultNVDAObject(cursorManager.CursorManager, NVDAObjects.window.Window):
"""Fake NVDAObject used to present a recognition result in a cursor manager.
Expand Down Expand Up @@ -99,24 +99,27 @@ def script_exit(self, gesture):
def recognizeNavigatorObject(recognizer):
"""User interface function to recognize content in the navigator object.
This should be called from a script or in response to a GUI action.
@param recognizer: The content recognizer ot use.
@param recognizer: The content recognizer to use.
@type recognizer: L{contentRecog.ContentRecognizer}
"""
global _activeRecog
nav = api.getNavigatorObject()
left, top, width, height = nav.location
try:
imgInfo = RecogImageInfo.createFromRecognizer(left, top, width, height, recognizer)
except ValueError:
# Translators: Reported when content recognition (e.g. OCR) is attempted,
# but the content is not visible.
ui.message(_("Content is not visible"))
return
if _activeRecog:
_activeRecog.cancel()
# Translators: Reporting when content recognition (e.g. OCR) begins.
ui.message(_("Recognizing"))
nav = api.getNavigatorObject()
left, top, width, height = nav.location
resize = recognizer.getResizeFactor(width, height)
coordConv = ResultCoordConverter(left, top, resize)
destWidth = int(width * resize)
destHeight = int(height * resize)
sb = screenBitmap.ScreenBitmap(destWidth, destHeight)
sb = screenBitmap.ScreenBitmap(imgInfo.recogWidth, imgInfo.recogHeight)
pixels = sb.captureImage(left, top, width, height)
_activeRecog = recognizer
recognizer.recognize(pixels, destWidth, destHeight, coordConv, _recogOnResult)
recognizer.recognize(pixels, imgInfo, _recogOnResult)

def _recogOnResult(result):
global _activeRecog
Expand Down
6 changes: 3 additions & 3 deletions source/contentRecog/uwpOcr.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ def __init__(self, language=None):
self.language = getConfigLanguage()
self._dll = NVDAHelper.getHelperLocalWin10Dll()

def recognize(self, pixels, width, height, coordConv, onResult):
def recognize(self, pixels, imgInfo, onResult):
self._onResult = onResult
@uwpOcr_Callback
def callback(result):
# If self._onResult is None, recognition was cancelled.
if self._onResult:
if result:
data = json.loads(result)
self._onResult(LinesWordsResult(data, coordConv))
self._onResult(LinesWordsResult(data, imgInfo))
else:
self._onResult(RuntimeError("UWP OCR failed"))
self._dll.uwpOcr_terminate(self._handle)
Expand All @@ -105,7 +105,7 @@ def callback(result):
if not self._handle:
onResult(RuntimeError("UWP OCR initialization failed"))
return
self._dll.uwpOcr_recognize(self._handle, pixels, width, height)
self._dll.uwpOcr_recognize(self._handle, pixels, imgInfo.recogWidth, imgInfo.recogHeight)

def cancel(self):
self._onResult = None
42 changes: 25 additions & 17 deletions tests/unit/contentRecog/test_contentRecog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,42 @@
import contentRecog
import textInfos

class TestResultCoordConverter(unittest.TestCase):
class TestRecogImageInfo(unittest.TestCase):

def test_noOffsetNoResize(self):
conv = contentRecog.ResultCoordConverter(0, 0, 1)
actual = conv.convertX(100), conv.convertY(200)
self.assertEqual(actual, (100, 200))
info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 1)
self.assertEqual(info.recogWidth, 1000)
self.assertEqual(info.recogHeight, 2000)
self.assertEqual(info.convertXToScreen(100), 100)
self.assertEqual(info.convertYToScreen(200), 200)

def test_withOffsetNoResize(self):
conv = contentRecog.ResultCoordConverter(10, 20, 1)
actual = conv.convertX(100), conv.convertY(200)
self.assertEqual(actual, (110, 220))
info = contentRecog.RecogImageInfo(10, 20, 1000, 2000, 1)
self.assertEqual(info.recogWidth, 1000)
self.assertEqual(info.recogHeight, 2000)
self.assertEqual(info.convertXToScreen(100), 110)
self.assertEqual(info.convertYToScreen(200), 220)

def test_noOffsetWithResize(self):
conv = contentRecog.ResultCoordConverter(0, 0, 2)
actual = conv.convertX(200), conv.convertY(400)
self.assertEqual(actual, (100, 200))
info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 2)
self.assertEqual(info.recogWidth, 2000)
self.assertEqual(info.recogHeight, 4000)
self.assertEqual(info.convertXToScreen(200), 100)
self.assertEqual(info.convertYToScreen(400), 200)

def test_withOffsetWithResize(self):
conv = contentRecog.ResultCoordConverter(10, 20, 2)
actual = conv.convertX(200), conv.convertY(400)
self.assertEqual(actual, (110, 220))
info = contentRecog.RecogImageInfo(10, 20, 1000, 2000, 2)
self.assertEqual(info.recogWidth, 2000)
self.assertEqual(info.recogHeight, 4000)
self.assertEqual(info.convertXToScreen(200), 110)
self.assertEqual(info.convertYToScreen(400), 220)

class FakeNVDAObject(object):
pass

class TestLinesWordsResult(unittest.TestCase):
"""Tests contentRecog.LinesWordsResult and contentRecog.LwrTextInfo.
"""Tests that contentRecog.LinesWordsResult and contentRecog.LwrTextInfo
correctly parse and process the JSON from a recognizer.
"""
DATA = [
[
Expand Down Expand Up @@ -70,9 +79,8 @@ class TestLinesWordsResult(unittest.TestCase):
LINE2_START = 12

def setUp(self):
# Use a no-op coordinate converter.
conv = contentRecog.ResultCoordConverter(0, 0, 1)
self.result = contentRecog.LinesWordsResult(self.DATA, conv)
info = contentRecog.RecogImageInfo(0, 0, 1000, 2000, 1)
self.result = contentRecog.LinesWordsResult(self.DATA, info)
self.fakeObj = FakeNVDAObject()
self.textInfo = self.result.makeTextInfo(self.fakeObj, textInfos.POSITION_FIRST)

Expand Down

0 comments on commit d3db08f

Please sign in to comment.