# tmbdev/ocropy

Merge pull request #282 from zuphilip/edist-py-imports

`Standardize imports II, #176`
zuphilip committed Feb 19, 2018
2 parents e2428a1 + 4e8fdf7 commit d3e5cc60b64d070b60d606a16baeda6b436cc23b
Showing with 226 additions and 215 deletions.
1. +7 −6 ocrolib/edist.py
2. +2 −2 ocrolib/exceptions.py
3. +19 −21 ocrolib/lineest.py
4. +112 −113 ocrolib/lstm.py
5. +54 −41 ocrolib/psegutils.py
6. +10 −11 ocrolib/sl.py
7. +22 −21 ocrolib/toplevel.py
 @@ -1,7 +1,8 @@ from scipy.ndimage import filters from pylab import * import re import numpy as np from scipy.ndimage import filters def levenshtein(a,b): """Calculates the Levenshtein distance between a and b. (Clever compact Pythonic implementation from hetland.org)""" @@ -23,10 +24,10 @@ def xlevenshtein(a,b,context=1): n, m = len(a), len(b) assert m>0 # xlevenshtein should only be called with non-empty b string (ground truth) if a == b: return 0,[] # speed up for the easy case sources = empty((m+1,n+1),object) sources = np.empty((m+1,n+1),object) sources[:,:] = None dists = 99999*ones((m+1,n+1)) dists[0,:] = arange(n+1) dists = np.full((m+1,n+1),99999) dists[0,:] = np.arange(n+1) for i in range(1,m+1): previous = dists[i-1,:] current = dists[i,:] @@ -70,7 +71,7 @@ def xlevenshtein(a,b,context=1): al = " "*context+al+" "*context bl = " "*context+bl+" "*context assert "~" not in al and "~" not in bl same = array([al[i]==bl[i] for i in range(len(al))],'i') same = np.array([al[i]==bl[i] for i in range(len(al))],'i') same = filters.minimum_filter(same,1+2*context) als = "".join([al[i] if not same[i] else "~" for i in range(len(al))]) bls = "".join([bl[i] if not same[i] else "~" for i in range(len(bl))])
 @@ -1,9 +1,9 @@ import inspect import numpy import numpy as np def summary(x): """Summarize a datatype as a string (for display and debugging).""" if type(x)==numpy.ndarray: if type(x)==np.ndarray: return ""%(x.shape,x.dtype) if type(x)==str and len(x)>10: return '"%s..."'%x