Permalink
Browse files

Merge pull request #282 from zuphilip/edist-py-imports

Standardize imports II, #176
  • Loading branch information...
zuphilip committed Feb 19, 2018
2 parents e2428a1 + 4e8fdf7 commit d3e5cc60b64d070b60d606a16baeda6b436cc23b
Showing with 226 additions and 215 deletions.
  1. +7 −6 ocrolib/edist.py
  2. +2 −2 ocrolib/exceptions.py
  3. +19 −21 ocrolib/lineest.py
  4. +112 −113 ocrolib/lstm.py
  5. +54 −41 ocrolib/psegutils.py
  6. +10 −11 ocrolib/sl.py
  7. +22 −21 ocrolib/toplevel.py
View
@@ -1,7 +1,8 @@
from scipy.ndimage import filters
from pylab import *
import re
import numpy as np
from scipy.ndimage import filters
def levenshtein(a,b):
"""Calculates the Levenshtein distance between a and b.
(Clever compact Pythonic implementation from hetland.org)"""
@@ -23,10 +24,10 @@ def xlevenshtein(a,b,context=1):
n, m = len(a), len(b)
assert m>0 # xlevenshtein should only be called with non-empty b string (ground truth)
if a == b: return 0,[] # speed up for the easy case
sources = empty((m+1,n+1),object)
sources = np.empty((m+1,n+1),object)
sources[:,:] = None
dists = 99999*ones((m+1,n+1))
dists[0,:] = arange(n+1)
dists = np.full((m+1,n+1),99999)
dists[0,:] = np.arange(n+1)
for i in range(1,m+1):
previous = dists[i-1,:]
current = dists[i,:]
@@ -70,7 +71,7 @@ def xlevenshtein(a,b,context=1):
al = " "*context+al+" "*context
bl = " "*context+bl+" "*context
assert "~" not in al and "~" not in bl
same = array([al[i]==bl[i] for i in range(len(al))],'i')
same = np.array([al[i]==bl[i] for i in range(len(al))],'i')
same = filters.minimum_filter(same,1+2*context)
als = "".join([al[i] if not same[i] else "~" for i in range(len(al))])
bls = "".join([bl[i] if not same[i] else "~" for i in range(len(bl))])
View
@@ -1,9 +1,9 @@
import inspect
import numpy
import numpy as np
def summary(x):
"""Summarize a datatype as a string (for display and debugging)."""
if type(x)==numpy.ndarray:
if type(x)==np.ndarray:
return "<ndarray %s %s>"%(x.shape,x.dtype)
if type(x)==str and len(x)>10:
return '"%s..."'%x
View
@@ -1,21 +1,19 @@
from __future__ import print_function
import sys
import os
import re
from scipy import stats
from scipy.ndimage import measurements,interpolation,filters
from pylab import *
from toplevel import *
def scale_to_h(img,target_height,order=1,dtype=dtype('f'),cval=0):
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import interpolation,filters
def scale_to_h(img,target_height,order=1,dtype=np.dtype('f'),cval=0):
h,w = img.shape
scale = target_height*1.0/h
target_width = int(scale*w)
output = interpolation.affine_transform(1.0*img,eye(2)/scale,order=order,
output = interpolation.affine_transform(1.0*img,np.eye(2)/scale,order=order,
output_shape=(target_height,target_width),
mode='constant',cval=cval)
output = array(output,dtype=dtype)
output = np.array(output,dtype=dtype)
return output
class CenterNormalizer:
@@ -31,29 +29,29 @@ def measure(self,line):
smoothed = filters.gaussian_filter(line,(h*0.5,h*self.smoothness),mode='constant')
smoothed += 0.001*filters.uniform_filter(smoothed,(h*0.5,w),mode='constant')
self.shape = (h,w)
a = argmax(smoothed,axis=0)
a = np.argmax(smoothed,axis=0)
a = filters.gaussian_filter(a,h*self.extra)
self.center = array(a,'i')
deltas = abs(arange(h)[:,newaxis]-self.center[newaxis,:])
self.mad = mean(deltas[line!=0])
self.center = np.array(a,'i')
deltas = np.abs(np.arange(h)[:,np.newaxis]-self.center[np.newaxis,:])
self.mad = np.mean(deltas[line!=0])
self.r = int(1+self.range*self.mad)
if self.debug:
figure("center")
imshow(line,cmap=cm.gray)
plot(self.center)
ginput(1,1000)
def dewarp(self,img,cval=0,dtype=dtype('f')):
plt.figure("center")
plt.imshow(line,cmap=plt.cm.gray)
plt.plot(self.center)
plt.ginput(1,1000)
def dewarp(self,img,cval=0,dtype=np.dtype('f')):
assert img.shape==self.shape
h,w = img.shape
# The actual image img is embedded into a larger image by
# adding vertical space on top and at the bottom (padding)
hpadding = self.r # this is large enough
padded = vstack([cval*ones((hpadding,w)),img,cval*ones((hpadding,w))])
padded = np.vstack([cval*np.ones((hpadding,w)),img,cval*np.ones((hpadding,w))])
center = self.center + hpadding
dewarped = [padded[center[i]-self.r:center[i]+self.r,i] for i in range(w)]
dewarped = array(dewarped,dtype=dtype).T
dewarped = np.array(dewarped,dtype=dtype).T
return dewarped
def normalize(self,img,order=1,dtype=dtype('f'),cval=0):
def normalize(self,img,order=1,dtype=np.dtype('f'),cval=0):
dewarped = self.dewarp(img,cval=cval,dtype=dtype)
h,w = dewarped.shape
# output = zeros(dewarped.shape,dtype)
Oops, something went wrong.

2 comments on commit d3e5cc6

@kaushikacharya

This comment has been minimized.

kaushikacharya replied Feb 20, 2018

In the function xlevenshtein(), instead of hard cording 99999 isn't it better to pass that as an input parameter with default value?

@zuphilip

This comment has been minimized.

Collaborator

zuphilip replied Feb 20, 2018

Note, this logic didn't change in this commit and 99999 is just a large enough starting value. I don't see any use for changing that value.

Please sign in to comment.