-
-
Notifications
You must be signed in to change notification settings - Fork 449
/
pyocr.py
79 lines (60 loc) · 2.22 KB
/
pyocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from __future__ import absolute_import, unicode_literals
from contextlib import contextmanager
import locale
import logging
from PIL import Image
import pyocr
import pyocr.builders
from ..classes import OCRBackendBase
from ..exceptions import OCRError
logger = logging.getLogger(__name__)
@contextmanager
def c_locale():
locale_current = locale.getlocale()
locale.setlocale(locale.LC_ALL, 'C')
yield
locale.setlocale(locale.LC_ALL, locale_current)
class PyOCR(OCRBackendBase):
def __init__(self, *args, **kwargs):
super(PyOCR, self).__init__(*args, **kwargs)
self.languages = ()
tools = pyocr.get_available_tools()
if len(tools) == 0:
raise OCRError('No OCR tool found')
self.tool = tools[0]
# The tools are returned in the recommended order of usage
for tool in tools:
if tool.__name__ == 'pyocr.libtesseract':
self.tool = tool
logger.debug('Will use tool \'%s\'', self.tool.get_name())
with c_locale():
self.languages = self.tool.get_available_languages()
logger.debug('Available languages: %s', ', '.join(self.languages))
def execute(self, *args, **kwargs):
"""
Execute the command line binary of tesseract
"""
super(PyOCR, self).execute(*args, **kwargs)
image = Image.open(self.converter.get_page())
try:
with c_locale():
result = self.tool.image_to_string(
image,
lang=self.language,
builder=pyocr.builders.TextBuilder()
)
except Exception as exception:
error_message = (
'Exception calling pyocr with language option: {}; {}'
).format(self.language, exception)
if self.language not in self.languages:
error_message = (
'{}\nThe requested OCR language "{}" is not '
'available and needs to be installed.\n'
).format(
error_message, self.language
)
logger.error(error_message)
raise OCRError(error_message)
else:
return result