Permalink
a154e63 Apr 28, 2017
@zuphilip @QuLogic @tmbdev @tajmorton
executable file 136 lines (111 sloc) 4.58 KB
#!/usr/bin/env python
from __future__ import print_function
import re
import argparse
import traceback
import sys
import numpy as np
import matplotlib.pyplot as plt
import ocrolib
from ocrolib import lineest
import ocrolib.lstm as lstm
import clstm
plt.ion()
plt.matplotlib.rc('xtick',labelsize=7)
plt.matplotlib.rc('ytick',labelsize=7)
plt.matplotlib.rcParams.update({"font.size":7})
np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
parser = argparse.ArgumentParser("train an RNN recognizer")
# character set
parser.add_argument("-c","--codec",default=[],nargs='*',
help="construct a codec from the input text")
parser.add_argument("--lineheight",type=int,default=48,
help="# LSTM state units, default: %(default)s")
parser.add_argument("-p","--pad",type=int,default=16)
# learning
parser.add_argument("-r","--lrate",type=float,default=1e-4,
help="LSTM learning rate, default: %(default)s")
parser.add_argument("-S","--hiddensize",type=int,default=100,
help="# LSTM state units, default: %(default)s")
parser.add_argument("-o","--output",default="temp",
help="LSTM model file")
parser.add_argument("-F","--savefreq",type=int,default=1000,
help="LSTM save frequency, default: %(default)s")
parser.add_argument('--load',default=None,
help="start training with a previously trained model")
parser.add_argument("--start",type=int,default=0,
help="# start training line, default: %(default)s")
parser.add_argument("--ntrain",type=int,default=1000000,
help="# lines to train before stopping, default: %(default)s")
parser.add_argument("files",nargs="*")
args = parser.parse_args()
inputs = ocrolib.glob_all(args.files)
if len(inputs)==0:
parser.print_help()
sys.exit(0)
if "%" not in args.output:
args.output = args.output + "-%08d-lstm.h5"
charset = sorted(list(set(list(lstm.ascii_labels) + list(ocrolib.chars.default))))
charset = [""," ","~",]+[c for c in charset if c not in [" ","~"]]
codec = lstm.Codec().init(charset)
lnorm = lineest.CenterNormalizer(args.lineheight)
network = clstm.make_BIDILSTM()
print("# network", (codec.size(), args.hiddensize, lnorm.target_height))
network.init(codec.size(),args.hiddensize,lnorm.target_height)
network = clstm.CNetwork(network)
if args.load: network.load(args.load)
network.setLearningRate(args.lrate,0.9)
def cleandisp(s):
return re.sub('[$]',r'#',s)
def preprocess(line):
lnorm.measure(np.amax(line)-line)
line = lnorm.normalize(line,cval=np.amax(line))
if line.size<10 or np.amax(line)==np.amin(line):
return None
line = line * 1.0/np.amax(line)
line = np.amax(line)-line
line = line.T
if args.pad>0:
w = line.shape[1]
line = np.vstack([np.zeros((args.pad,w)),line,np.zeros((args.pad,w))])
return line
for trial in range(args.start,args.ntrain):
if trial>args.start and trial%args.savefreq==0:
network.save(args.output % trial)
try:
# fname = inputs[trial%len(inputs)]
fname = inputs[np.random.randint(0,len(inputs))]
base,_ = ocrolib.allsplitext(fname)
line = ocrolib.read_image_gray(fname)
transcript = ocrolib.read_text(base+".gt.txt")
print("#", trial, fname, line.shape)
line = preprocess(line)
if line is None: continue
cs = np.array(codec.encode(transcript),'i')
outputs = np.array(network.forward(line))
targets = np.array(lstm.make_target(cs,network.noutput()))
aligned = np.array(lstm.ctc_align_targets(outputs,targets))
deltas = aligned-outputs
network.backward(deltas)
result = lstm.translate_back(outputs)
pred = "".join(codec.decode(result))
acs = lstm.translate_back(aligned)
gta = "".join(codec.decode(acs))
print(" TRU:", repr(transcript))
print(" ALN:", repr(gta[:len(transcript)+5]))
print(" OUT:", repr(pred[:len(transcript)+5]))
if trial%20==0:
plt.clf()
plt.subplot(311)
plt.title(cleandisp(transcript))
plt.imshow(line.T,cmap=plt.cm.gray,interpolation='bilinear')
plt.subplot(312)
plt.title(cleandisp(gta))
plt.imshow(aligned.T,cmap=plt.cm.hot,interpolation='bilinear',aspect='auto')
plt.subplot(313)
plt.title(cleandisp(pred))
plt.imshow(outputs.T,cmap=plt.cm.hot,interpolation='bilinear',aspect='auto')
tight_layout()
plt.ginput(1,0.01)
except Exception as e:
print(e)