Permalink
Browse files

Merge branch 'master' into pr

  • Loading branch information...
zuphilip committed Jan 23, 2017
2 parents 2bfce1f + 7020a76 commit dc698cf24b854f9e5c391ba88d7ab7d1d7176551
Showing with 132 additions and 212 deletions.
  1. +14 −198 ocrolib/common.py
  2. +2 −0 ocrolib/edist.py
  3. +73 −0 ocrolib/exceptions.py
  4. +9 −5 ocrolib/lstm.py
  5. +17 −7 ocropus-econf
  6. +2 −1 ocropus-gpageseg
  7. +2 −1 ocropus-rpred
  8. +13 −0 tests/run-unit
View
@@ -11,100 +11,33 @@
import sys
import sysconfig
import unicodedata
-import warnings
import inspect
import glob
import cPickle
+from ocrolib.exceptions import (BadClassLabel, BadInput, FileNotFound,
+ OcropusException)
import numpy
-from numpy import *
+from numpy import (amax, amin, array, bitwise_and, clip, dtype, mean, minimum,
+ nan, sin, sqrt, zeros)
import pylab
-from pylab import imshow
-from scipy.ndimage import morphology,measurements
+from pylab import (clf, cm, ginput, gray, imshow, ion, subplot, where)
+from scipy.ndimage import morphology, measurements
import PIL
from default import getlocal
-from toplevel import *
+from toplevel import (checks, ABINARY2, AINT2, AINT3, BOOL, DARKSEG, GRAYSCALE,
+ LIGHTSEG, LINESEG, PAGESEG)
import chars
import codecs
import ligatures
import lstm
import morph
import multiprocessing
-
-################################################################
-### exceptions
-################################################################
-
-class OcropusException(Exception):
- trace = 1
- def __init__(self,*args,**kw):
- Exception.__init__(self,*args,**kw)
-
-class Unimplemented(OcropusException):
- trace = 1
- "Exception raised when a feature is unimplemented."
- def __init__(self,s):
- Exception.__init__(self,inspect.stack()[1][3])
-
-class Internal(OcropusException):
- trace = 1
- "Exception raised when a feature is unimplemented."
- def __init__(self,s):
- Exception.__init__(self,inspect.stack()[1][3])
-
-class RecognitionError(OcropusException):
- trace = 1
- "Some kind of error during recognition."
- def __init__(self,explanation,**kw):
- self.context = kw
- s = [explanation]
- s += ["%s=%s"%(k,summary(kw[k])) for k in kw]
- message = " ".join(s)
- Exception.__init__(self,message)
-
-class Warning(OcropusException):
- trace = 0
- def __init__(self,*args,**kw):
- OcropusException.__init__(self,*args,**kw)
-
-class BadClassLabel(OcropusException):
- trace = 0
- "Exception for bad class labels in a dataset or input."
- def __init__(self,s):
- Exception.__init__(self,s)
-
-class BadImage(OcropusException):
- trace = 0
- def __init__(self,*args,**kw):
- OcropusException.__init__(self,*args)
-
-class BadInput(OcropusException):
- trace = 0
- def __init__(self,*args,**kw):
- OcropusException.__init__(self,*args,**kw)
-
-class FileNotFound(OcropusException):
- trace = 0
- """Some file-not-found error during OCRopus processing."""
- def __init__(self,fname):
- self.fname = fname
- def __str__(self):
- return "file not found %s"%(self.fname,)
+import sl
pickle_mode = 2
-def deprecated(f):
- def _wrap(f):
- warned = 0
- def _wrapper(*args,**kw):
- if not warned:
- print(f, "has been DEPRECATED")
- warned = 1
- return f(*args,**kw)
- return _wrap
-
-
################################################################
# text normalization
@@ -466,10 +399,10 @@ def extractMasked(self,image,index,grow=0,bg=None,margin=0,dtype=None):
mh,mw = mask.shape
box = self.bbox(index)
r0,c0,r1,c1 = box
- subimage = improc.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg)
+ subimage = sl.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg)
return where(mask,subimage,bg)
-
+
################################################################
### Object reading and writing
@@ -517,7 +450,7 @@ class names that have changed."""
unpickler.find_global = unpickle_find_global
return unpickler.load()
-
+
################################################################
### Simple record object.
@@ -581,16 +514,6 @@ def check_valid_class_label(s):
else:
raise BadClassLabel(s)
-def summary(x):
- """Summarize a datatype as a string (for display and debugging)."""
- if type(x)==numpy.ndarray:
- return "<ndarray %s %s>"%(x.shape,x.dtype)
- if type(x)==str and len(x)>10:
- return '"%s..."'%x
- if type(x)==list and len(x)>10:
- return '%s...'%x
- return str(x)
-
################################################################
### file name manipulation
################################################################
@@ -824,19 +747,6 @@ def quick_check_line_components(line_bin,dpi):
there is probably something wrong."""
return 1.0
-def deprecated(func):
- """This is a decorator which can be used to mark functions
- as deprecated. It will result in a warning being emitted
- when the function is used."""
- def newFunc(*args, **kwargs):
- warnings.warn("Call to deprecated function %s." % func.__name__,
- category=DeprecationWarning,stacklevel=2)
- return func(*args, **kwargs)
- newFunc.__name__ = func.__name__
- newFunc.__doc__ = func.__doc__
- newFunc.__dict__.update(func.__dict__)
- return newFunc
-
################################################################
### conversion functions
################################################################
@@ -904,67 +814,6 @@ def obinfo(ob):
result += str(ob.shape)
return result
-def save_component(file,object,verbose=0,verify=0):
- """Save an object to disk in an appropriate format. If the object
- is a wrapper for a native component (=inherits from
- CommonComponent and has a comp attribute, or is in package
- ocropus), write it using ocropus.save_component in native format.
- Otherwise, write it using Python's pickle. We could use pickle
- for everything (since the native components pickle), but that
- would be slower and more confusing."""
- if hasattr(object,"save_component"):
- object.save_component(file)
- return
- if object.__class__.__name__=="CommonComponent" and hasattr(object,"comp"):
- # FIXME -- get rid of this eventually
- import ocropus
- ocropus.save_component(file,object.comp)
- return
- if type(object).__module__=="ocropus":
- import ocropus
- ocropus.save_component(file,object)
- return
- if verbose:
- print("[save_component]")
- if verbose:
- for k,v in object.__dict__.items():
- print(":", k, obinfo(v))
- with open(file,"wb") as stream:
- pickle.dump(object,stream,pickle_mode)
- if verify:
- if verbose:
- print("[trying to read it again]")
- with open(file,"rb") as stream:
- pickle.load(stream)
-
-def load_component(file):
- """Load a component. This handles various special cases,
- including old-style C++ recognizers (soon to be gotten rid of),
- python expressions ("=package.ObjectName(arg1,arg2)"),
- and simple pickled Python objects (default)."""
- if file[0]=="=":
- return pyconstruct(file[1:])
- elif file[0]=="@":
- file = file[1:]
- with open(file,"r") as stream:
- # FIXME -- get rid of this eventually
- start = stream.read(128)
- if start.startswith("<object>\nlinerec\n"):
- # FIXME -- get rid of this eventually
- warnings.warn("loading old-style linerec: %s"%file)
- result = RecognizeLine()
- import ocropus
- result.comp = ocropus.load_IRecognizeLine(file)
- return result
- if start.startswith("<object>"):
- # FIXME -- get rid of this eventually
- warnings.warn("loading old-style cmodel: %s"%file)
- import ocroold
- result = ocroold.Model()
- import ocropus
- result.comp = ocropus.load_IModel(file)
- return result
- return load_object(file)
def binarize_range(image,dtype='B',threshold=0.5):
"""Binarize an image by its range."""
@@ -973,39 +822,6 @@ def binarize_range(image,dtype='B',threshold=0.5):
if dtype=='B': scale = 255
return array(scale*(image>threshold),dtype=dtype)
-def draw_pseg(pseg,axis=None):
- if axis is None:
- axis = subplot(111)
- h = pseg.dim(1)
- regions = ocropy.RegionExtractor()
- regions.setPageLines(pseg)
- for i in range(1,regions.length()):
- x0,y0,x1,y1 = (regions.x0(i),regions.y0(i),regions.x1(i),regions.y1(i))
- p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor="red",fill=0)
- axis.add_patch(p)
-
-def draw_aligned(result,axis=None):
- raise Unimplemented("FIXME draw_aligned")
- if axis is None:
- axis = subplot(111)
- axis.imshow(NI(result.image),cmap=cm.gray)
- cseg = result.cseg
- if type(cseg)==numpy.ndarray: cseg = common.lseg2narray(cseg)
- ocropy.make_line_segmentation_black(cseg)
- ocropy.renumber_labels(cseg,1)
- bboxes = ocropy.rectarray()
- ocropy.bounding_boxes(bboxes,cseg)
- s = re.sub(r'\s+','',result.output)
- h = cseg.dim(1)
- for i in range(1,bboxes.length()):
- r = bboxes.at(i)
- x0,y0,x1,y1 = (r.x0,r.y0,r.x1,r.y1)
- p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor=(0.0,0.0,1.0,0.5),fill=0)
- axis.add_patch(p)
- if i>0 and i-1<len(s):
- axis.text(x0,h-y0-1,s[i-1],color="red",weight="bold",fontsize=14)
- draw()
-
def plotgrid(data,d=10,shape=(30,30)):
"""Plot a list of images on a grid."""
ion()
@@ -1024,13 +840,13 @@ def showrgb(r,g=None,b=None):
imshow(array([r,g,b]).transpose([1,2,0]))
def showgrid(l,cols=None,n=400,titles=None,xlabels=None,ylabels=None,**kw):
- if "cmap" not in kw: kw["cmap"] = pylab.cm.gray
+ if "cmap" not in kw: kw["cmap"] = cm.gray
if "interpolation" not in kw: kw["interpolation"] = "nearest"
n = minimum(n,len(l))
if cols is None: cols = int(sqrt(n))
rows = (n+cols-1)//cols
for i in range(n):
- pylab.xticks([]); pylab.yticks([])
+ pylab.xticks([]) ;pylab.yticks([])
pylab.subplot(rows,cols,i+1)
pylab.imshow(l[i],**kw)
if titles is not None: pylab.title(str(titles[i]))
View
@@ -21,6 +21,8 @@ def xlevenshtein(a,b,context=1):
"""Calculates the Levensthein distance between a and b
and generates a list of differences by context."""
n, m = len(a), len(b)
+ assert m>0 # xlevenshtein should only be called with non-empty b string (ground truth)
+ if a == b: return 0,[] # speed up for the easy case
sources = empty((m+1,n+1),object)
sources[:,:] = None
dists = 99999*ones((m+1,n+1))
View
@@ -0,0 +1,73 @@
+import inspect
+import numpy
+
+def summary(x):
+ """Summarize a datatype as a string (for display and debugging)."""
+ if type(x)==numpy.ndarray:
+ return "<ndarray %s %s>"%(x.shape,x.dtype)
+ if type(x)==str and len(x)>10:
+ return '"%s..."'%x
+ if type(x)==list and len(x)>10:
+ return '%s...'%x
+ return str(x)
+
+
+################################################################
+### Ocropy exceptions
+################################################################
+
+class OcropusException(Exception):
+ trace = 1
+ def __init__(self,*args,**kw):
+ Exception.__init__(self,*args,**kw)
+
+class Unimplemented(OcropusException):
+ trace = 1
+ "Exception raised when a feature is unimplemented."
+ def __init__(self,s):
+ Exception.__init__(self,inspect.stack()[1][3])
+
+class Internal(OcropusException):
+ trace = 1
+ "Exception raised when a feature is unimplemented."
+ def __init__(self,s):
+ Exception.__init__(self,inspect.stack()[1][3])
+
+class RecognitionError(OcropusException):
+ trace = 1
+ "Some kind of error during recognition."
+ def __init__(self,explanation,**kw):
+ self.context = kw
+ s = [explanation]
+ s += ["%s=%s"%(k,summary(kw[k])) for k in kw]
+ message = " ".join(s)
+ Exception.__init__(self,message)
+
+class Warning(OcropusException):
+ trace = 0
+ def __init__(self,*args,**kw):
+ OcropusException.__init__(self,*args,**kw)
+
+class BadClassLabel(OcropusException):
+ trace = 0
+ "Exception for bad class labels in a dataset or input."
+ def __init__(self,s):
+ Exception.__init__(self,s)
+
+class BadImage(OcropusException):
+ trace = 0
+ def __init__(self,*args,**kw):
+ OcropusException.__init__(self,*args)
+
+class BadInput(OcropusException):
+ trace = 0
+ def __init__(self,*args,**kw):
+ OcropusException.__init__(self,*args,**kw)
+
+class FileNotFound(OcropusException):
+ trace = 0
+ """Some file-not-found error during OCRopus processing."""
+ def __init__(self,fname):
+ self.fname = fname
+ def __str__(self):
+ return "file not found %s"%(self.fname,)
Oops, something went wrong.

0 comments on commit dc698cf

Please sign in to comment.