Skip to content

Commit

Permalink
Merge branch 'master' into pr
Browse files Browse the repository at this point in the history
  • Loading branch information
zuphilip committed Jan 23, 2017
2 parents 2bfce1f + 7020a76 commit dc698cf
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 212 deletions.
212 changes: 14 additions & 198 deletions ocrolib/common.py
Expand Up @@ -11,100 +11,33 @@
import sys
import sysconfig
import unicodedata
import warnings
import inspect
import glob
import cPickle
from ocrolib.exceptions import (BadClassLabel, BadInput, FileNotFound,
OcropusException)

import numpy
from numpy import *
from numpy import (amax, amin, array, bitwise_and, clip, dtype, mean, minimum,
nan, sin, sqrt, zeros)
import pylab
from pylab import imshow
from scipy.ndimage import morphology,measurements
from pylab import (clf, cm, ginput, gray, imshow, ion, subplot, where)
from scipy.ndimage import morphology, measurements
import PIL

from default import getlocal
from toplevel import *
from toplevel import (checks, ABINARY2, AINT2, AINT3, BOOL, DARKSEG, GRAYSCALE,
LIGHTSEG, LINESEG, PAGESEG)
import chars
import codecs
import ligatures
import lstm
import morph
import multiprocessing

################################################################
### exceptions
################################################################

class OcropusException(Exception):
trace = 1
def __init__(self,*args,**kw):
Exception.__init__(self,*args,**kw)

class Unimplemented(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class Internal(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class RecognitionError(OcropusException):
trace = 1
"Some kind of error during recognition."
def __init__(self,explanation,**kw):
self.context = kw
s = [explanation]
s += ["%s=%s"%(k,summary(kw[k])) for k in kw]
message = " ".join(s)
Exception.__init__(self,message)

class Warning(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class BadClassLabel(OcropusException):
trace = 0
"Exception for bad class labels in a dataset or input."
def __init__(self,s):
Exception.__init__(self,s)

class BadImage(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args)

class BadInput(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class FileNotFound(OcropusException):
trace = 0
"""Some file-not-found error during OCRopus processing."""
def __init__(self,fname):
self.fname = fname
def __str__(self):
return "file not found %s"%(self.fname,)
import sl

pickle_mode = 2

def deprecated(f):
def _wrap(f):
warned = 0
def _wrapper(*args,**kw):
if not warned:
print(f, "has been DEPRECATED")
warned = 1
return f(*args,**kw)
return _wrap



################################################################
# text normalization
Expand Down Expand Up @@ -466,10 +399,10 @@ def extractMasked(self,image,index,grow=0,bg=None,margin=0,dtype=None):
mh,mw = mask.shape
box = self.bbox(index)
r0,c0,r1,c1 = box
subimage = improc.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg)
subimage = sl.cut(image,(r0,c0,r0+mh-2*margin,c0+mw-2*margin),margin,bg=bg)
return where(mask,subimage,bg)




################################################################
### Object reading and writing
Expand Down Expand Up @@ -517,7 +450,7 @@ class names that have changed."""
unpickler.find_global = unpickle_find_global
return unpickler.load()




################################################################
### Simple record object.
Expand Down Expand Up @@ -581,16 +514,6 @@ def check_valid_class_label(s):
else:
raise BadClassLabel(s)

def summary(x):
"""Summarize a datatype as a string (for display and debugging)."""
if type(x)==numpy.ndarray:
return "<ndarray %s %s>"%(x.shape,x.dtype)
if type(x)==str and len(x)>10:
return '"%s..."'%x
if type(x)==list and len(x)>10:
return '%s...'%x
return str(x)

################################################################
### file name manipulation
################################################################
Expand Down Expand Up @@ -824,19 +747,6 @@ def quick_check_line_components(line_bin,dpi):
there is probably something wrong."""
return 1.0

def deprecated(func):
"""This is a decorator which can be used to mark functions
as deprecated. It will result in a warning being emitted
when the function is used."""
def newFunc(*args, **kwargs):
warnings.warn("Call to deprecated function %s." % func.__name__,
category=DeprecationWarning,stacklevel=2)
return func(*args, **kwargs)
newFunc.__name__ = func.__name__
newFunc.__doc__ = func.__doc__
newFunc.__dict__.update(func.__dict__)
return newFunc

################################################################
### conversion functions
################################################################
Expand Down Expand Up @@ -904,67 +814,6 @@ def obinfo(ob):
result += str(ob.shape)
return result

def save_component(file,object,verbose=0,verify=0):
"""Save an object to disk in an appropriate format. If the object
is a wrapper for a native component (=inherits from
CommonComponent and has a comp attribute, or is in package
ocropus), write it using ocropus.save_component in native format.
Otherwise, write it using Python's pickle. We could use pickle
for everything (since the native components pickle), but that
would be slower and more confusing."""
if hasattr(object,"save_component"):
object.save_component(file)
return
if object.__class__.__name__=="CommonComponent" and hasattr(object,"comp"):
# FIXME -- get rid of this eventually
import ocropus
ocropus.save_component(file,object.comp)
return
if type(object).__module__=="ocropus":
import ocropus
ocropus.save_component(file,object)
return
if verbose:
print("[save_component]")
if verbose:
for k,v in object.__dict__.items():
print(":", k, obinfo(v))
with open(file,"wb") as stream:
pickle.dump(object,stream,pickle_mode)
if verify:
if verbose:
print("[trying to read it again]")
with open(file,"rb") as stream:
pickle.load(stream)

def load_component(file):
"""Load a component. This handles various special cases,
including old-style C++ recognizers (soon to be gotten rid of),
python expressions ("=package.ObjectName(arg1,arg2)"),
and simple pickled Python objects (default)."""
if file[0]=="=":
return pyconstruct(file[1:])
elif file[0]=="@":
file = file[1:]
with open(file,"r") as stream:
# FIXME -- get rid of this eventually
start = stream.read(128)
if start.startswith("<object>\nlinerec\n"):
# FIXME -- get rid of this eventually
warnings.warn("loading old-style linerec: %s"%file)
result = RecognizeLine()
import ocropus
result.comp = ocropus.load_IRecognizeLine(file)
return result
if start.startswith("<object>"):
# FIXME -- get rid of this eventually
warnings.warn("loading old-style cmodel: %s"%file)
import ocroold
result = ocroold.Model()
import ocropus
result.comp = ocropus.load_IModel(file)
return result
return load_object(file)

def binarize_range(image,dtype='B',threshold=0.5):
"""Binarize an image by its range."""
Expand All @@ -973,39 +822,6 @@ def binarize_range(image,dtype='B',threshold=0.5):
if dtype=='B': scale = 255
return array(scale*(image>threshold),dtype=dtype)

def draw_pseg(pseg,axis=None):
if axis is None:
axis = subplot(111)
h = pseg.dim(1)
regions = ocropy.RegionExtractor()
regions.setPageLines(pseg)
for i in range(1,regions.length()):
x0,y0,x1,y1 = (regions.x0(i),regions.y0(i),regions.x1(i),regions.y1(i))
p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor="red",fill=0)
axis.add_patch(p)

def draw_aligned(result,axis=None):
raise Unimplemented("FIXME draw_aligned")
if axis is None:
axis = subplot(111)
axis.imshow(NI(result.image),cmap=cm.gray)
cseg = result.cseg
if type(cseg)==numpy.ndarray: cseg = common.lseg2narray(cseg)
ocropy.make_line_segmentation_black(cseg)
ocropy.renumber_labels(cseg,1)
bboxes = ocropy.rectarray()
ocropy.bounding_boxes(bboxes,cseg)
s = re.sub(r'\s+','',result.output)
h = cseg.dim(1)
for i in range(1,bboxes.length()):
r = bboxes.at(i)
x0,y0,x1,y1 = (r.x0,r.y0,r.x1,r.y1)
p = patches.Rectangle((x0,h-y1-1),x1-x0,y1-y0,edgecolor=(0.0,0.0,1.0,0.5),fill=0)
axis.add_patch(p)
if i>0 and i-1<len(s):
axis.text(x0,h-y0-1,s[i-1],color="red",weight="bold",fontsize=14)
draw()

def plotgrid(data,d=10,shape=(30,30)):
"""Plot a list of images on a grid."""
ion()
Expand All @@ -1024,13 +840,13 @@ def showrgb(r,g=None,b=None):
imshow(array([r,g,b]).transpose([1,2,0]))

def showgrid(l,cols=None,n=400,titles=None,xlabels=None,ylabels=None,**kw):
if "cmap" not in kw: kw["cmap"] = pylab.cm.gray
if "cmap" not in kw: kw["cmap"] = cm.gray
if "interpolation" not in kw: kw["interpolation"] = "nearest"
n = minimum(n,len(l))
if cols is None: cols = int(sqrt(n))
rows = (n+cols-1)//cols
for i in range(n):
pylab.xticks([]); pylab.yticks([])
pylab.xticks([]) ;pylab.yticks([])
pylab.subplot(rows,cols,i+1)
pylab.imshow(l[i],**kw)
if titles is not None: pylab.title(str(titles[i]))
Expand Down
2 changes: 2 additions & 0 deletions ocrolib/edist.py
Expand Up @@ -21,6 +21,8 @@ def xlevenshtein(a,b,context=1):
"""Calculates the Levensthein distance between a and b
and generates a list of differences by context."""
n, m = len(a), len(b)
assert m>0 # xlevenshtein should only be called with non-empty b string (ground truth)
if a == b: return 0,[] # speed up for the easy case
sources = empty((m+1,n+1),object)
sources[:,:] = None
dists = 99999*ones((m+1,n+1))
Expand Down
73 changes: 73 additions & 0 deletions ocrolib/exceptions.py
@@ -0,0 +1,73 @@
import inspect
import numpy

def summary(x):
"""Summarize a datatype as a string (for display and debugging)."""
if type(x)==numpy.ndarray:
return "<ndarray %s %s>"%(x.shape,x.dtype)
if type(x)==str and len(x)>10:
return '"%s..."'%x
if type(x)==list and len(x)>10:
return '%s...'%x
return str(x)


################################################################
### Ocropy exceptions
################################################################

class OcropusException(Exception):
trace = 1
def __init__(self,*args,**kw):
Exception.__init__(self,*args,**kw)

class Unimplemented(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class Internal(OcropusException):
trace = 1
"Exception raised when a feature is unimplemented."
def __init__(self,s):
Exception.__init__(self,inspect.stack()[1][3])

class RecognitionError(OcropusException):
trace = 1
"Some kind of error during recognition."
def __init__(self,explanation,**kw):
self.context = kw
s = [explanation]
s += ["%s=%s"%(k,summary(kw[k])) for k in kw]
message = " ".join(s)
Exception.__init__(self,message)

class Warning(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class BadClassLabel(OcropusException):
trace = 0
"Exception for bad class labels in a dataset or input."
def __init__(self,s):
Exception.__init__(self,s)

class BadImage(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args)

class BadInput(OcropusException):
trace = 0
def __init__(self,*args,**kw):
OcropusException.__init__(self,*args,**kw)

class FileNotFound(OcropusException):
trace = 0
"""Some file-not-found error during OCRopus processing."""
def __init__(self,fname):
self.fname = fname
def __str__(self):
return "file not found %s"%(self.fname,)

0 comments on commit dc698cf

Please sign in to comment.