Skip to content
This repository has been archived by the owner on Jun 14, 2018. It is now read-only.

Commit

Permalink
Cleanup the exception mess:
Browse files Browse the repository at this point in the history
- All tools raise exceptions inheriting from pyocr.PyocrException
- There is now one and only one TesseractError

Signed-off-by: Jerome Flesch <jflesch@openpaper.work>
  • Loading branch information
jflesch committed Jan 10, 2017
1 parent eae14a3 commit 2fde897
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 31 deletions.
20 changes: 15 additions & 5 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,28 @@ the tool used.
Argument 'builder' is optional. Default value is
builders.TextBuilder().

If the OCR fails, an exception ```pyocr.PyocrException```
will be raised.

An exception MAY be raised if the input image contains no
text at all (depends on the OCR tool behavior).


### Orientation detection

Currently only available with Tesseract or Libtesseract.

```Python
if tool.can_detect_orientation():
orientation = tool.detect_orientation(
Image.open('test.png'),
lang='fra'
)
pprint("Orientation: {}".format(orientation))
try:
orientation = tool.detect_orientation(
Image.open('test.png'),
lang='fra'
)
except pyocr.PyocrException as exc:
print("Orientation detection failed: {}".format(exc))
return
print("Orientation: {}".format(orientation))
# Ex: Orientation: {
# 'angle': 90,
# 'confidence': 123.4,
Expand Down
8 changes: 8 additions & 0 deletions src/pyocr/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
from .pyocr import *
from .error import PyocrException

__all__ = [
'get_available_tools',
'PyocrException',
'TOOLS',
'VERSION',
]
5 changes: 3 additions & 2 deletions src/pyocr/cuneiform.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import tempfile

from . import builders
from . import error
from . import util


Expand Down Expand Up @@ -65,9 +66,9 @@ def get_available_builders():
]


class CuneiformError(Exception):
class CuneiformError(error.PyocrException):
def __init__(self, status, message):
Exception.__init__(self, message)
error.PyocrException.__init__(self, message)
self.status = status
self.message = message
self.args = (status, message)
Expand Down
13 changes: 13 additions & 0 deletions src/pyocr/error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class PyocrException(Exception):
pass


class TesseractError(PyocrException):
"""
Obsolete. You should look for PyocrException
"""
def __init__(self, status, message):
PyocrException.__init__(self, message)
self.status = status
self.message = message
self.args = (status, message)
6 changes: 4 additions & 2 deletions src/pyocr/libtesseract/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
'''
from .. import builders

from ..error import TesseractError

from . import tesseract_raw


Expand Down Expand Up @@ -45,7 +47,7 @@ def detect_orientation(image, lang=None):
tesseract_raw.set_image(handle, image)
os = tesseract_raw.detect_os(handle)
if os['confidence'] <= 0:
raise tesseract_raw.TesseractError(
raise TesseractError(
"no script", "no script detected"
)
orientation = {
Expand Down Expand Up @@ -171,7 +173,7 @@ def get_available_languages():
def get_version():
version = tesseract_raw.get_version()
version = version.split(" ", 1)[0]

# cut off "dev" string if exists for proper int conversion
index = version.find("dev")
if index != -1:
Expand Down
13 changes: 2 additions & 11 deletions src/pyocr/libtesseract/tesseract_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import os
import sys

from ..error import TesseractError


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -46,17 +48,6 @@
pass


class TesseractError(Exception):
"""
Exception raised when Tesseract fails.
"""
def __init__(self, status, message):
Exception.__init__(self, message)
self.status = status
self.message = message
self.args = (status, message)


class PageSegMode(object):
OSD_ONLY = 0
AUTO_OSD = 1
Expand Down
13 changes: 2 additions & 11 deletions src/pyocr/tesseract.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
import tempfile

from . import builders
from . import error
from . import util
from .builders import DigitBuilder # backward compatibility
from .error import TesseractError # backward compatibility

# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
Expand Down Expand Up @@ -306,17 +308,6 @@ def temp_file(suffix):
return tempfile.NamedTemporaryFile(prefix='tess_', suffix=suffix)


class TesseractError(Exception):
"""
Exception raised when Tesseract fails.
"""
def __init__(self, status, message):
Exception.__init__(self, message)
self.status = status
self.message = message
self.args = (status, message)


def image_to_string(image, lang=None, builder=None):
'''
Runs tesseract on the specified image. First, the image is written to disk,
Expand Down

0 comments on commit 2fde897

Please sign in to comment.