Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 103 lines (83 sloc) 3.14 KB
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys
import numpy as np
import matplotlib.pyplot as plt
import ocrolib
from ocrolib import lineest
import ocrolib.lstm as lstm
from ocrolib import edist
import clstm
plt.ion()
plt.rc('xtick',labelsize=7)
plt.rc('ytick',labelsize=7)
plt.rcParams.update({"font.size":7})
np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
parser = argparse.ArgumentParser("run an RNN recognizer")
# character set
parser.add_argument("-c","--codec",default=[],nargs='*',
help="construct a codec from the input text")
parser.add_argument("-e","--eval",action="store_true")
parser.add_argument("-K","--kind",default="exact",help="kind of comparison (exact, nospace, letdig, letters, digits, lnc), default: %(default)s")
parser.add_argument("--lineheight",type=int,default=48,
help="# LSTM state units, default: %(default)s")
parser.add_argument("-p","--pad",type=int,default=16)
# learning
parser.add_argument("-S","--hiddensize",type=int,default=100,
help="# LSTM state units, default: %(default)s")
parser.add_argument('-m','--load',default=None,
help="start training with a previously trained model")
parser.add_argument("files",nargs="*")
args = parser.parse_args()
inputs = ocrolib.glob_all(args.files)
if len(inputs)==0:
parser.print_help()
sys.exit(0)
charset = sorted(list(set(list(lstm.ascii_labels) + list(ocrolib.chars.default))))
charset = [""," ","~",]+[c for c in charset if c not in [" ","~"]]
codec = lstm.Codec().init(charset)
lnorm = lineest.CenterNormalizer(args.lineheight)
network = clstm.make_BIDILSTM()
print("# network", (codec.size(), args.hiddensize, lnorm.target_height))
network.init(codec.size(),args.hiddensize,lnorm.target_height)
network = clstm.CNetwork(network)
network.load(args.load)
def preprocess(line):
lnorm.measure(np.amax(line)-line)
line = lnorm.normalize(line,cval=np.amax(line))
if line.size<10 or np.amax(line)==np.amin(line):
return None
line = line * 1.0/np.amax(line)
line = np.amax(line)-line
line = line.T
if args.pad>0:
w = line.shape[1]
line = np.vstack([np.zeros((args.pad,w)),line,np.zeros((args.pad,w))])
return line
if args.eval:
errs = 0
total = 0
for trial in range(len(inputs)):
try:
fname = inputs[trial]
base,_ = ocrolib.allsplitext(fname)
line = ocrolib.read_image_gray(fname)
line = preprocess(line)
if line is None: continue
outputs = np.array(network.forward(line))
result = lstm.translate_back(outputs)
pred = "".join(codec.decode(result))
print("%s\t%s" % (fname, pred))
ocrolib.write_text(base+".txt",pred)
if args.eval:
transcript = ocrolib.read_text(base+".gt.txt")
gt = ocrolib.project_text(transcript,kind=args.kind)
txt = ocrolib.project_text(pred,kind=args.kind)
err = edist.levenshtein(txt,gt)
errs += err
total += len(gt)
except Exception as e:
print(e)
if args.eval:
print(errs, total, errs * 1.0 / total)