Code clean-up

* Add readem * Some commends for translators * start using script decorator * Bump version to 2.0 * remove some debug statements
lukaszgo1 · Apr 18, 2020 · 4eb5b0a · 4eb5b0a
1 parent d4b6d1b
commit 4eb5b0a
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 17 deletions.
diff --git a/addon/globalPlugins/ocr/__init__.py b/addon/globalPlugins/ocr/__init__.py
@@ -1,9 +1,10 @@
+# -*- coding: UTF-8 -*-
 """NVDA OCR plugin
-This plugin uses Tesseract for OCR: http://code.google.com/p/tesseract-ocr/
-It also uses the Python Imaging Library (PIL): http://www.pythonware.com/products/pil/
+This plugin uses Tesseract for OCR: https://github.com/tesseract-ocr
+It also uses Pillow: https://python-pillow.org/
 @author: James Teh <jamie@nvaccess.org>
 @author: Rui Batista <ruiandrebatista@gmail.com>
-@copyright: 2011-2013 NV Access Limited, Rui Batista
+@copyright: 2011-2020 NV Access Limited, Rui Batista, Łukasz Golonka
 @license: GNU General Public License version 2.0
 """
 
@@ -25,6 +26,7 @@
 import textInfos.offsets
 import ui
 import locationHelper
+import scriptHandler
 
 PLUGIN_DIR = os.path.dirname(__file__)
 TESSERACT_EXE = os.path.join(PLUGIN_DIR, "tesseract", "tesseract.exe")
@@ -59,7 +61,6 @@ def __init__(self, xml, leftCoordOffset, topCoordOffset):
 		del self._textList
 
 	def _startElement(self, tag, attrs):
-		print(f"ElementStart: {tag} {attrs}")
 		if tag in ("p", "div"):
 			self._hasBlockHadContent = False
 		elif tag == "span":
@@ -69,14 +70,12 @@ def _startElement(self, tag, attrs):
 			elif cls == "ocr_word":
 				# Get the coordinates from the bbox info specified in the title attribute.
 				title = attrs.get("title")
-				print(f"title: {title}")
 				prefix, l, t, r, b = title.split(" ")
 				self.words.append(OcrWord(self.textLen,
 					self.leftCoordOffset + int(l) / IMAGE_RESIZE_FACTOR,
 					self.topCoordOffset + int(t) / IMAGE_RESIZE_FACTOR))
 
 	def _endElement(self, tag):
-		print(f"elementEnd: {tag}")
 		pass
 
 	def _charData(self, data):
@@ -170,6 +169,11 @@ def __init__(self):
 	def terminate(self):
 		gui.NVDASettingsDialog.categoryClasses.remove(OCRSettingsPanel)
 
+	@scriptHandler.script(
+		# Translators: Input help mode message for the script used to recognize current navigator object.
+		description = _("Recognizes current navigator object using Tesseract OCR. After recognition is done thext can be reviewed with review cursor commands."),
+		gesture="kb:NVDA+r"
+		)
 	def script_ocrNavigatorObject(self, gesture):
 		nav = api.getNavigatorObject()
 		left, top, width, height = nav.location
@@ -182,7 +186,7 @@ def script_ocrNavigatorObject(self, gesture):
 		try:
 			imgFile = baseFile + ".bmp"
 			img.save(imgFile)
-
+			# Translators: Announced when recognition starts.
 			ui.message(_("Running OCR"))
 			lang = config.conf["ocr"]["language"]
 			# Hide the Tesseract window.
@@ -198,24 +202,19 @@ def script_ocrNavigatorObject(self, gesture):
 				pass
 		try:
 			hocrFile = baseFile + ".html"
-
 			parser = HocrParser(open(hocrFile,encoding='utf8').read(),
 				left, top)
 		finally:
 			try:
 				os.remove(hocrFile)
 			except OSError:
 				pass
-
 		# Let the user review the OCR output.
 		nav.makeTextInfo = lambda position: OcrTextInfo(nav, position, parser)
 		api.setReviewPosition(nav.makeTextInfo(textInfos.POSITION_FIRST))
+		# Translators: Announced when recognition is finished, note that it is not guaranteed that some text has been found.
 		ui.message(_("Done"))
 
-	__gestures = {
-		"kb:NVDA+r": "ocrNavigatorObject",
-	}
-
 localesToTesseractLangs = {
 "bg" : "bul",
 "ca" : "cat",

diff --git a/buildVars.py b/buildVars.py
@@ -20,19 +20,19 @@
 	"addon_description" : _("""Performs optical character recognition (OCR) to extract text from an object which is inaccessible.
 The Tesseract OCR engine is used.
 To perform OCR, move to the object in question using object navigation and press NVDA+r.
-You can set the OCR recognition language by going to the NVDA preferences menu and selecting OCR settings."""),
+You can set the OCR recognition language by going to the NVDA settings dialog  and selecting OCR settings."""),
 	# version
-	"addon_version" : "2020.1",
+	"addon_version" : "2.0",
 	# Author(s)
-	"addon_author" : u"NV Access Limited <info@nvaccess.org>",
+	"addon_author" : u"NV Access Limited <info@nvaccess.org>, Łukasz Golonka <lukasz.golonka@mailbox.org>",
 	# URL for the add-on documentation support
 	"addon_url" : None,
 	# Documentation file name
 	"addon_docFileName" : None,
 	# Minimum NVDA version supported (e.g. "2018.3.0", minor version is optional)
 	"addon_minimumNVDAVersion" : "2019.3.0",
 	# Last NVDA version supported/tested (e.g. "2018.4.0", ideally more recent than minimum version)
-	"addon_lastTestedNVDAVersion" : "2019.3.1",
+	"addon_lastTestedNVDAVersion" : "2020.1",
 	# Add-on update channel (default is None, denoting stable releases, and for development releases, use "dev"; do not change unless you know what you are doing)
 	"addon_updateChannel" : None,
 }

diff --git a/readme.md b/readme.md
@@ -0,0 +1,15 @@
+# OCR
+
+* Authors: NV Access Limited & other contributors. Currently maintained by Łukasz Golonka <lukasz.golonka@mailbox.org>
+* NVDA compatibility: 2019.3 and beyond
+* Download [Stable version][1]
+* Download [version compatible with NVDA 2019.2 and older][2]
+
+Important: if you are using NVDA 2017.3 or later on Windows 10, please consider using buit-in Windows 10 OCR.
+Performs optical character recognition (OCR) to extract text from an object which is inaccessible. The Tesseract OCR engine is used. To perform OCR, move to the object in question using object navigation and press NVDA+r. You can set the OCR recognition language by going to the NVDA settings panel  and selecting OCR settings. The keyboard shortcut can be reassigned from NVDA input gestures dialog in the "Miscellaneous" category.
+
+
+
+[1]: https://addons.nvda-project.org/files/get.php?file=ocr
+[2]: https://www.nvaccess.org/files/nvda-addons/ocr_0.20120529.01.nvda-addon
+