Permalink
Browse files

Merge branch 'master' into cleanup-common

  • Loading branch information...
kba committed Jan 23, 2017
2 parents 51ffbb6 + eb4d53c commit d23ee42005206ab5e648a0b84f5dd8ea908076a4
View
@@ -1,3 +1,5 @@
+from __future__ import print_function
+
from numpy import dtype
import tables
import re
@@ -10,7 +12,7 @@ def table_copy(source,dest,names=None,omit=[],verbose=1):
names = names.split()
for name in names:
a = source.getNode("/"+name)
- if verbose: print "[copying",name,a.shape,a.atom,"]"
+ if verbose: print("[copying", name, a.shape, a.atom, "]")
if "VLArray" in str(a):
b = dest.createVLArray(dest.root,name,a.atom,filters=tables.Filters(9))
else:
@@ -28,7 +30,7 @@ def assign_array(db,name,a,verbose=1):
atom = tables.Float32Atom()
else:
raise Exception('unknown array type: %s'%a.dtype)
- if verbose: print "[writing",name,a.shape,atom,"]"
+ if verbose: print("[writing", name, a.shape, atom, "]")
node = db.createEArray(db.root,name,atom,shape=[0]+list(a.shape[1:]),filters=tables.Filters(9))
node.append(a)
View
@@ -1,6 +1,8 @@
# TODO:
# - handle UTF-8 inputs correctly
+from __future__ import print_function
+
from pylab import *
from collections import Counter,defaultdict
import re
@@ -47,7 +49,7 @@ def safe_readlines(stream,nonl=0):
try:
line = stream.readline()
except UnicodeDecodeError as e:
- if not once: print lineno,":",e
+ if not once: print(lineno, ":", e)
once = 1
return
if line is None: return
@@ -75,14 +77,14 @@ def computeNGraphs(self,fnames,n):
lineskip = 0
linelimit = 2000
for fnum,fname in enumerate(fnames):
- print fnum,"of",len(fnames),":",fname
+ print(fnum, "of", len(fnames), ":", fname)
if fname.startswith("lineskip="):
lineskip = int(fname.split("=")[1])
- print "changing lineskip to",lineskip
+ print("changing lineskip to", lineskip)
continue
if fname.startswith("linelimit="):
linelimit = int(fname.split("=")[1])
- print "changing linelimit to",linelimit
+ print("changing linelimit to", linelimit)
continue
with codecs.open(fname,"r","utf-8") as stream:
for lineno,line in enumerate(safe_readlines(stream)):
@@ -106,11 +108,11 @@ def __init__(self,*args,**kw):
NGraphsCounts.__init__(self,*args,**kw)
def buildFromFiles(self,fnames,n):
"""Given a set of files, build the log posteriors."""
- print "reading",len(fnames),"files"
+ print("reading", len(fnames), "files")
counter = self.computeNGraphs(fnames,n)
- print "got",sum(counter.values()),"%d-graphs"%(n,)
+ print("got", sum(counter.values()), "%d-graphs" % (n, ))
self.computePosteriors(counter)
- print "done building lposteriors"
+ print("done building lposteriors")
def computePosteriors(self,counter):
"""Given a `counter` of all n-graphs, compute
(log) conditional probabilities."""
View
@@ -118,7 +118,7 @@ formats are different and because the text line normalization is slightly
different. Error rates are about the same.
In addition, the C++ command line tool (`clstmctc`) has different command line
-options and currently requiresloading training data into HDF5 files, instead
+options and currently requires loading training data into HDF5 files, instead
of being trained off a list of image files directly (image file-based training
will be added to `clstmctc` soon).
@@ -21,6 +21,8 @@
"cell_type": "code",
"collapsed": false,
"input": [
+ "from future import print_function\n",
+ "\n",
"from pylab import *\n",
"import os.path\n",
"import glob\n",
@@ -60,7 +62,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "print \"c_lstm\"\n",
+ "print(\"c_lstm\")\n",
"clstm = lstm_c.C_LSTM(Ni,Ns,No)\n",
"llayer = rnnscript.as_lstm2(clstm.net.hiddenLayers[0])\n",
"clstm.setLearningRate(1e-4,0.9)\n",
@@ -72,7 +74,7 @@
" err = sum((a-ys)**2)**.5\n",
" errs.append(err)\n",
" if i%10000==0:\n",
- " print i,mean(errs[-1000:])\n",
+ " print(i, mean(errs[-1000:]))\n",
" s = clstm.states()\n",
" clf()\n",
" subplot(121);\n",
@@ -83,7 +85,7 @@
" draw()\n",
"\n",
"test_xs,test_ys = genseq()\n",
- "print \"done\""
+ "print(\"done\")"
],
"language": "python",
"metadata": {},
@@ -221,7 +223,7 @@
"plot(deltas3,color='green',linestyle='dashed')\n",
"deltas4 = -array(before.nets[1].dzspre)\n",
"plot(deltas4,color='black',linestyle='dashed')\n",
- "print deltas4.shape"
+ "print(deltas4.shape)"
],
"language": "python",
"metadata": {},
@@ -289,12 +291,12 @@
"clstm.ctrain(xs,ys,update=0)\n",
"h2o.debug = False\n",
"b2o.debug = False\n",
- "print\n",
- "print array(h2o.dinputs).reshape(3,2)\n",
- "print array(h2o.dweights).reshape(3,2)\n",
- "print\n",
- "print array(b2o.dinputs).reshape(3)\n",
- "print array(b2o.dweights).reshape(3)"
+ "print()\n",
+ "print(array(h2o.dinputs).reshape(3, 2))\n",
+ "print(array(h2o.dweights).reshape(3, 2))\n",
+ "print()\n",
+ "print(array(b2o.dinputs).reshape(3))\n",
+ "print(array(b2o.dweights).reshape(3))"
],
"language": "python",
"metadata": {},
@@ -364,4 +366,4 @@
"metadata": {}
}
]
-}
+}
View
@@ -3,10 +3,13 @@
### common functions for data structures, file name manipulation, etc.
################################################################
+from __future__ import print_function
+
import os
import os.path
import re
import sys
+import sysconfig
import unicodedata
import inspect
import glob
@@ -181,7 +184,7 @@ def write_image_gray(fname,image,normalize=0,verbose=0):
type, its values are clipped to the range [0,1],
multiplied by 255 and converted to unsigned bytes. Otherwise,
the image must be of type unsigned byte."""
- if verbose: print "# writing",fname
+ if verbose: print("# writing", fname)
if isfloatarray(image):
image = array(255*clip(image,0.0,1.0),'B')
assert image.dtype==dtype('B'),"array has wrong dtype: %s"%image.dtype
@@ -204,7 +207,7 @@ def write_image_binary(fname,image,verbose=0):
"""Write a binary image to disk. This verifies first that the given image
is, in fact, binary. The image may be of any type, but must consist of only
two values."""
- if verbose: print "# writing",fname
+ if verbose: print("# writing", fname)
assert image.ndim==2
image = array(255*(image>midrange(image)),'B')
im = array2pil(image)
@@ -361,7 +364,7 @@ def bbox(self,i):
"""Return the bounding box in raster coordinates
(row0,col0,row1,col1)."""
r = self.objects[i]
- # print "@@@bbox",i,r
+ # print("@@@bbox", i, r)
return (r[0].start,r[1].start,r[0].stop,r[1].stop)
def bboxMath(self,i):
"""Return the bounding box in math coordinates
@@ -375,7 +378,7 @@ def length(self):
def mask(self,index,margin=0):
"""Return the mask for component index."""
b = self.objects[index]
- #print "@@@mask",index,b
+ # print("@@@mask", index, b)
m = self.labels[b]
m[m!=index] = 0
if margin>0: m = pad_by(m,margin)
@@ -432,7 +435,7 @@ class names that have changed."""
if not nofind:
fname = ocropus_find_file(fname)
if verbose:
- print "# loading object",fname
+ print("# loading object", fname)
if zip==0 and fname.endswith(".gz"):
zip = 1
if zip>0:
@@ -582,34 +585,66 @@ def expand_args(args):
else:
return args
-data_paths = [
- ".",
- "./models",
- "./data",
- "./gui",
- "/usr/local/share/ocropus/models",
- "/usr/local/share/ocropus/data",
- "/usr/local/share/ocropus/gui",
- "/usr/local/share/ocropus",
-]
-
-def ocropus_find_file(fname,gz=1):
- """Search for OCRopus-related files in common OCRopus install
- directories (as well as the current directory)."""
- if os.path.exists(fname):
- return fname
- if gz:
- if os.path.exists(fname+".gz"):
- return fname+".gz"
- for path in data_paths:
- full = path+"/"+fname
- if os.path.exists(full): return full
- if gz:
- for path in data_paths:
- full = path+"/"+fname+".gz"
- if os.path.exists(full): return full
+
+def ocropus_find_file(fname, gz=True):
+ """Search for `fname` in one of the OCRopus data directories, as well as
+ the current directory). If `gz` is True, search also for gzipped files.
+
+ Result of searching $fname is the first existing in:
+
+ * $base/$fname
+ * $base/$fname.gz # if gz
+ * $base/model/$fname
+ * $base/model/$fname.gz # if gz
+ * $base/data/$fname
+ * $base/data/$fname.gz # if gz
+ * $base/gui/$fname
+ * $base/gui/$fname.gz # if gz
+
+ $base can be four base paths:
+ * `$OCROPUS_DATA` environment variable
+ * current working directory
+ * ../../../../share/ocropus from this file's install location
+ * `/usr/local/share/ocropus`
+ * `$PREFIX/share/ocropus` ($PREFIX being the Python installation
+ prefix, usually `/usr`)
+ """
+ possible_prefixes = []
+
+ if os.getenv("OCROPUS_DATA"):
+ possible_prefixes.append(os.getenv("OCROPUS_DATA"))
+
+ possible_prefixes.append(os.curdir)
+
+ possible_prefixes.append(os.path.normpath(os.path.join(
+ os.path.dirname(inspect.getfile(inspect.currentframe())),
+ os.pardir, os.pardir, os.pardir, os.pardir, "share", "ocropus")))
+
+ possible_prefixes.append("/usr/local/share/ocropus")
+
+ possible_prefixes.append(os.path.join(
+ sysconfig.get_config_var("datarootdir"), "ocropus"))
+
+
+ # Unique entries with preserved order in possible_prefixes
+ # http://stackoverflow.com/a/15637398/201318
+ possible_prefixes = [possible_prefixes[i] for i in
+ sorted(numpy.unique(possible_prefixes, return_index=True)[1])]
+ for prefix in possible_prefixes:
+ if not os.path.isdir(prefix):
+ continue
+ for basename in [".", "models", "data", "gui"]:
+ if not os.path.isdir(os.path.join(prefix, basename)):
+ continue
+ full = os.path.join(prefix, basename, fname)
+ if os.path.exists(full):
+ return full
+ if gz and os.path.exists(full + ".gz"):
+ return full + ".gz"
+
raise FileNotFound(fname)
+
def fvariant(fname,kind,gt=""):
"""Find the file variant corresponding to the given file name.
Possible fil variants are line (or png), rseg, cseg, fst, costs, and txt.
@@ -743,7 +778,7 @@ def pyconstruct(s):
path = s[:s.find("(")]
if "." in path:
module = path[:path.rfind(".")]
- print "import",module
+ print("import", module)
exec "import "+module in env
return eval(s,env)
@@ -779,6 +814,7 @@ def obinfo(ob):
result += str(ob.shape)
return result
+
def binarize_range(image,dtype='B',threshold=0.5):
"""Binarize an image by its range."""
threshold = (amax(image)+amin(image))*threshold
View
@@ -72,8 +72,8 @@ def xlevenshtein(a,b,context=1):
same = filters.minimum_filter(same,1+2*context)
als = "".join([al[i] if not same[i] else "~" for i in range(len(al))])
bls = "".join([bl[i] if not same[i] else "~" for i in range(len(bl))])
- # print als
- # print bls
+ # print(als)
+ # print(bls)
ags = re.split(r'~+',als)
bgs = re.split(r'~+',bls)
confusions = [(a,b) for a,b in zip(ags,bgs) if a!="" or b!=""]
@@ -25,7 +25,7 @@ def create_cairo_font_face_for_file(filename, faceindex=0, loadoptions=0):
# initialize freetype
_ft_lib = ctypes.c_void_p()
if FT_Err_Ok != _freetype_so.FT_Init_FreeType(ctypes.byref(_ft_lib)):
- raise "Error initialising FreeType library."
+ raise OSError("Error initialising FreeType library.")
_surface = cairo.ImageSurface(cairo.FORMAT_A8, 0, 0)
_initialized = True
# create freetype face
@@ -34,14 +34,14 @@ def create_cairo_font_face_for_file(filename, faceindex=0, loadoptions=0):
cairo_t = PycairoContext.from_address(id(cairo_ctx)).ctx
_cairo_so.cairo_ft_font_face_create_for_ft_face.restype = ctypes.c_void_p
if FT_Err_Ok != _freetype_so.FT_New_Face(_ft_lib, filename, faceindex, ctypes.byref(ft_face)):
- raise "Error creating FreeType font face for " + filename
+ raise Exception("Error creating FreeType font face for " + filename)
# create cairo font face for freetype face
cr_face = _cairo_so.cairo_ft_font_face_create_for_ft_face(ft_face, loadoptions)
if CAIRO_STATUS_SUCCESS != _cairo_so.cairo_font_face_status(cr_face):
- raise "Error creating cairo font face for " + filename
+ raise Exception("Error creating cairo font face for " + filename)
_cairo_so.cairo_set_font_face(cairo_t, cr_face)
if CAIRO_STATUS_SUCCESS != _cairo_so.cairo_status(cairo_t):
- raise "Error creating cairo font face for " + filename
+ raise Exception("Error creating cairo font face for " + filename)
face = cairo_ctx.get_font_face()
return face
View
@@ -2,6 +2,8 @@
### text image generation with Cairo
################################################################
+from __future__ import print_function
+
import ctypes
import cairo
from cairoextras import *
@@ -104,7 +106,7 @@ def pango_render_string(s,spec=None,fontfile=None,size=None,bg=(0.0,0.0,0.0),fg=
else:
layout.set_markup(s)
((xbear,ybear,tw,th),_) = layout.get_pixel_extents()
- # print xbear,ybear,tw,th
+ # print(xbear, ybear, tw, th)
tw = tw+2*pad
th = th+2*pad
if tw<=w and th<=h: break
@@ -160,12 +162,12 @@ def gauss_distort(images,maxdelta=2.0,sigma=10.0):
deltas /= max(amax(deltas),-amin(deltas))
deltas *= maxdelta
xy = transpose(array(meshgrid(range(n),range(m))),axes=[0,2,1])
- # print xy.shape,deltas.shape
+ # print(xy.shape, deltas.shape)
deltas += xy
return [map_coordinates(image,deltas,order=1) for image in images]
if __name__=="__main__":
- # print sorted(pango_families())
+ # print(sorted(pango_families()))
ion()
show()
while 1:
Oops, something went wrong.

0 comments on commit d23ee42

Please sign in to comment.