Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 241 lines (215 sloc) 7.23 KB
#!/usr/bin/env python
# vim: set fileencoding=utf8 :
from __future__ import print_function
import random as pyrandom
import glob
import sys
import os
import re
import codecs
import traceback
import argparse
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageFont,ImageDraw
from scipy.ndimage import filters,measurements,interpolation
from scipy.misc import imsave
import ocrolib
parser = argparse.ArgumentParser(description = "Generate text line training data")
parser.add_argument('-o','--base',default='linegen',help='output directory, default: %(default)s')
parser.add_argument('-r','--distort',type=float,default=1.0)
parser.add_argument('-R','--dsigma',type=float,default=20.0)
parser.add_argument('-f','--fonts',default=None)
parser.add_argument('-F','--fontlist',default=None)
parser.add_argument('-t','--texts',default=None)
parser.add_argument('-T','--textlist',default=None)
parser.add_argument('-m','--maxlines',default=200,type=int,
help='max # lines for each directory, default: %(default)s')
parser.add_argument('-e','--degradations',default="lo",
help="lo, med, or hi; or give a file, default: %(default)s")
parser.add_argument('-j','--jitter',default=0.5)
parser.add_argument('-s','--sizes',default="40-70")
parser.add_argument('-d','--display',action="store_true")
parser.add_argument('--numdir',action="store_true")
parser.add_argument('-C','--cleanup',default='[_~#]')
parser.add_argument('-D','--debug_show',default=None,
help="select a class for stepping through")
args = parser.parse_args()
if "-" in args.sizes:
lo,hi = args.sizes.split("-")
sizes = range(int(lo),int(hi)+1)
else:
sizes = [int(x) for x in args.sizes.split(",")]
if args.degradations=="lo":
# sigma +/- threshold +/-
deglist = """
0.5 0.0 0.5 0.0
"""
elif args.degradations=="med":
deglist = """
0.5 0.0 0.5 0.05
1.0 0.3 0.4 0.05
1.0 0.3 0.5 0.05
1.0 0.3 0.6 0.05
"""
elif args.degradations=="hi":
deglist = """
0.5 0.0 0.5 0.0
1.0 0.3 0.4 0.1
1.0 0.3 0.5 0.1
1.0 0.3 0.6 0.1
1.3 0.3 0.4 0.1
1.3 0.3 0.5 0.1
1.3 0.3 0.6 0.1
"""
elif args.degradations is not None:
with open(args.degradations) as stream:
deglist = stream.read()
degradations = []
for deg in deglist.split("\n"):
deg = deg.strip()
if deg=="": continue
deg = [float(x) for x in deg.split()]
degradations.append(deg)
if args.fonts is not None:
fonts = []
for pat in args.fonts.split(':'):
if pat=="": continue
fonts += sorted(glob.glob(pat))
elif args.fontlist is not None:
with open(args.fontlist) as fh:
lines = (line.strip() for line in fh)
fonts = [line for line in lines if line]
else:
print("use -f or -F arguments to specify fonts")
sys.exit(1)
assert len(fonts)>0,"no fonts?"
print("fonts", fonts)
if args.texts is not None:
texts = []
for pat in args.texts.split(':'):
print(pat)
if pat=="": continue
texts += sorted(glob.glob(pat))
elif args.textlist is not None:
texts = re.split(r'\s*\n\s*',open(args.textlist).read())
else:
print("use -t or -T arguments to specify texts")
sys.exit(1)
assert len(texts)>0,"no texts?"
lines = []
for text in texts:
print("# reading", text)
with codecs.open(text,'r','utf-8') as stream:
for line in stream.readlines():
line = line.strip()
line = re.sub(args.cleanup,'',line)
if len(line)<1: continue
lines.append(line)
print("got", len(lines), "lines")
assert len(lines)>0
lines = list(set(lines))
print("got", len(lines), "unique lines")
def rgeometry(image,eps=0.03,delta=0.3):
m = np.array([[1+eps*np.random.randn(),0.0],[eps*np.random.randn(),1.0+eps*np.random.randn()]])
w,h = image.shape
c = np.array([w/2.0,h/2])
d = c-np.dot(m,c)+np.array([np.random.randn()*delta,np.random.randn()*delta])
return interpolation.affine_transform(image,m,offset=d,order=1,mode='constant',cval=image[0,0])
def rdistort(image,distort=3.0,dsigma=10.0,cval=0):
h,w = image.shape
hs = np.random.randn(h,w)
ws = np.random.randn(h,w)
hs = filters.gaussian_filter(hs,dsigma)
ws = filters.gaussian_filter(ws,dsigma)
hs *= distort/np.amax(hs)
ws *= distort/np.amax(ws)
def f(p):
return (p[0]+hs[p[0],p[1]],p[1]+ws[p[0],p[1]])
return interpolation.geometric_transform(image,f,output_shape=(h,w),
order=1,mode='constant',cval=cval)
if args.debug_show:
plt.ion()
plt.gray()
base = args.base
print("base", base)
os.system("rm -rf "+base)
os.mkdir(base)
def crop(image,pad=1):
[[r,c]] = measurements.find_objects(np.array(image==0,'i'))
r0 = r.start
r1 = r.stop
c0 = c.start
c1 = c.stop
image = image[r0-pad:r1+pad,c0-pad:c1+pad]
return image
last_font = None
last_size = None
last_fontfile = None
def genline(text,fontfile=None,size=36,sigma=0.5,threshold=0.5):
global image,draw,last_font,last_fontfile
if last_fontfile!=fontfile or last_size!=size:
last_font = ImageFont.truetype(fontfile,size)
last_fontfile = fontfile
font = last_font
image = Image.new("L",(6000,200))
draw = ImageDraw.Draw(image)
draw.rectangle((0,0,6000,6000),fill="white")
# print("\t", size, font)
draw.text((250,20),text,fill="black",font=font)
a = np.asarray(image,'f')
a = a*1.0/np.amax(a)
if sigma>0.0:
a = filters.gaussian_filter(a,sigma)
a += np.clip(np.random.randn(*a.shape)*0.2,-0.25,0.25)
a = rgeometry(a)
a = np.array(a>threshold,'f')
a = crop(a,pad=3)
# FIXME add grid warping here
# clf(); ion(); gray(); imshow(a); ginput(1,0.1)
del draw
del image
return a
lines_per_size = args.maxlines//len(sizes)
for pageno,font in enumerate(fonts):
if args.numdir:
pagedir = "%s/%04d"%(base,pageno+1)
else:
fbase = re.sub(r'^[./]*','',font)
fbase = re.sub(r'[.][^/]*$','',fbase)
fbase = re.sub(r'[/]','_',fbase)
pagedir = "%s/%s"%(base,fbase)
os.mkdir(pagedir)
print("===", pagedir, font)
lineno = 0
while lineno<args.maxlines:
(sigma,ssigma,threshold,sthreshold) = pyrandom.choice(degradations)
sigma += (2*np.random.rand()-1)*ssigma
threshold += (2*np.random.rand()-1)*sthreshold
line = pyrandom.choice(lines)
size = pyrandom.choice(sizes)
with open(pagedir+".info","w") as stream:
stream.write("%s\n"%font)
try:
image = genline(text=line,fontfile=font,
size=size,sigma=sigma,threshold=threshold)
except:
traceback.print_exc()
continue
if np.amin(image.shape)<10: continue
if np.amax(image)<0.5: continue
if args.distort>0:
image = rdistort(image,args.distort,args.dsigma,cval=np.amax(image))
if args.display:
plt.gray()
plt.clf()
plt.imshow(image)
plt.ginput(1,0.1)
fname = pagedir+"/01%04d"%lineno
imsave(fname+".bin.png",image)
gt = ocrolib.normalize_text(line)
with codecs.open(fname+".gt.txt","w",'utf-8') as stream:
stream.write(gt+"\n")
print("%5.2f %5.2f %3d\t%s" % (sigma, threshold, size, line))
lineno += 1