|
| 1 | +#!/usr/bin/python |
| 2 | + |
| 3 | +# make the clustering class-specific |
| 4 | + |
| 5 | +import sys,os,re,glob,math,glob,signal |
| 6 | +from scipy.ndimage import interpolation |
| 7 | +from pylab import * |
| 8 | +from optparse import OptionParser |
| 9 | +import ocropy |
| 10 | +from ocropy import fstutils |
| 11 | +from ocropy import N,NI,F,FI |
| 12 | + |
| 13 | +signal.signal(signal.SIGINT,lambda *args:sys.exit(1)) |
| 14 | + |
| 15 | +# ocropy.dinit(512,512,1) |
| 16 | + |
| 17 | +parser = OptionParser(""" |
| 18 | +usage: %prog [options] [text.txt langmod.fst image.png ...] |
| 19 | +
|
| 20 | +Performs recognition and optional alignment using the given classifier |
| 21 | +and language models. The classifier should be an isolated character classifier. |
| 22 | +
|
| 23 | +Arguments can be a mix of text files, language models, and images. |
| 24 | +
|
| 25 | +If a language model is given, that's used for aligning/recognizing |
| 26 | +subsequent images. |
| 27 | +
|
| 28 | +If a text file is given, it is compiled into a language model and |
| 29 | +then used for recognizing subsequent images. |
| 30 | +
|
| 31 | +When alignment is performed, rseg.gt.png, cseg.gt.png, |
| 32 | +and gt.txt files are written. |
| 33 | +""") |
| 34 | + |
| 35 | +parser.add_option("-m","--model",help="model file",default="unlv.model") |
| 36 | +parser.add_option("-s","--segmenter",help="segmenter",default="DpSegmenter") |
| 37 | +parser.add_option("-l","--langmod",help="language model",default=None) |
| 38 | + |
| 39 | +parser.add_option("-p","--precomp",help="precompose extra transducer",default=None) |
| 40 | +parser.add_option("-v","--verbose",help="verbose",action="store_true") |
| 41 | +parser.add_option("-b","--best",help="# best chars to add to FST",type="int",default=10) |
| 42 | +parser.add_option("-t","--cthreshold",help="avg per character threshold",type="float",default=7.0) |
| 43 | +parser.add_option("-T","--gthreshold",help="max total threshold",type="float",default=100.0) |
| 44 | +parser.add_option("-d","--display",help="verbose",action="store_true") |
| 45 | +parser.add_option("-M","--maxcost",help="maxcost for transition",type="float",default=5.0) |
| 46 | +parser.add_option("-D","--maxdist",help="maxdist for grouper",type="int",default=5) |
| 47 | +parser.add_option("-A","--noambigs",help="don't use ambiguous classes",action="store_true") |
| 48 | +parser.add_option("-x","--gtextension",help="extension used for ground truth (ex: .txt, .gt.txt, .fst,...)",default=None) |
| 49 | +(options,args) = parser.parse_args() |
| 50 | + |
| 51 | +if len(args)==0: |
| 52 | + parser.print_help() |
| 53 | + sys.exit(0) |
| 54 | + |
| 55 | +assert options.precomp is None,"precomp not implemented yet" |
| 56 | + |
| 57 | +segmenter = ocropy.make_ISegmentLine(options.segmenter) |
| 58 | +grouper = ocropy.make_IGrouper("SimpleGrouper") |
| 59 | +grouper.pset("maxdist",options.maxdist) # use 5 to handle "/'' |
| 60 | + |
| 61 | +ion() |
| 62 | + |
| 63 | +print "loading",options.model |
| 64 | + |
| 65 | +cmodel = ocropy.load_IModel(options.model) |
| 66 | +linerec = ocropy.CmodelLineRecognizer(cmodel=cmodel) |
| 67 | + |
| 68 | +default_lmodel = None |
| 69 | + |
| 70 | +def read_lmodel(file): |
| 71 | + if file[-4:]==".fst": |
| 72 | + result = ocropy.make_OcroFST() |
| 73 | + result.load(imagefile) |
| 74 | + return result |
| 75 | + else: |
| 76 | + result = fstutils.load_text_file_as_fst(imagefile) |
| 77 | + return result |
| 78 | + |
| 79 | +for imagefile in args: |
| 80 | + prefix = re.sub(r'\.[^/]*$','',imagefile) |
| 81 | + |
| 82 | + ## if the file is a language model, load it |
| 83 | + if imagefile[-4:]==".fst" or imagefile[-4:]==".txt": |
| 84 | + assert options.gtextension is None,"either specify gtextension or language model" |
| 85 | + default_lmodel = read_lmodel(imagefile) |
| 86 | + continue |
| 87 | + |
| 88 | + ## load the line image |
| 89 | + print "load\t",imagefile |
| 90 | + image = ocropy.bytearray() |
| 91 | + ocropy.read_image_gray(image,imagefile) |
| 92 | + |
| 93 | + ## perform line recognition, yielding a recognition lattice |
| 94 | + lattice = ocropy.make_OcroFST() |
| 95 | + rseg = ocropy.intarray() |
| 96 | + linerec.recognizeLineSeg(lattice,rseg,image) |
| 97 | + lattice.save("_lattice.fst") |
| 98 | + |
| 99 | + ## compute and output the raw best path |
| 100 | + s = ocropy.ustrg() |
| 101 | + lattice.bestpath(s) |
| 102 | + cost = 0.0 |
| 103 | + print "lraw %6.2f\t%3d\t%s"%(cost,s.length(),ocropy.ustrg_as_string(s)) |
| 104 | + |
| 105 | + ## if we can find a language model for it then perform alignment |
| 106 | + lmodel = default_lmodel |
| 107 | + if options.gtextension is not None: |
| 108 | + base = re.sub(r'\.[^/]*$','',imagefile) |
| 109 | + lmodel = read_lmodel(base+options.gtextension) |
| 110 | + |
| 111 | + if lmodel is not None: |
| 112 | + result,cseg,costs = ocropy.compute_alignment(lattice,rseg,lmodel) |
| 113 | + print "costs",sum(costs),mean(costs),amax(costs),median(costs) |
| 114 | + print "result",result |
| 115 | + ## output the aligned segmentations |
| 116 | + ocropy.write_segmentation(prefix+".rseg.gt.png",rseg) |
| 117 | + ocropy.write_segmentation(prefix+".cseg.gt.png",cseg) |
| 118 | + ocropy.write_text(prefix+".gt.txt",result) |
| 119 | + |
| 120 | + print |
0 commit comments