-
Notifications
You must be signed in to change notification settings - Fork 588
/
ocroold-pseg
executable file
·214 lines (186 loc) · 8.14 KB
/
ocroold-pseg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/python
import sys,os,re,optparse,shutil,glob
import matplotlib
if "DISPLAY" not in os.environ: matplotlib.use("AGG")
else: matplotlib.use("GTK")
import signal
signal.signal(signal.SIGINT,lambda *args:sys.exit(1))
from matplotlib import patches
from pylab import *
from scipy.stats.stats import trim1
from multiprocessing import Pool
from scipy.ndimage import measurements,interpolation
from scipy.misc import imsave
import traceback
from ocrolib import number_of_processors
import ocrolib
parser = optparse.OptionParser(usage="""
%prog [options] image1.png image2.png ...
Usually, you would use an argument pattern like: book/????/??????.png
Computes page segmentations and extracts text lines.
For each input image image.png, it generates:
* image.pseg.png -- page segmentation
* image/010001.png -- gray scale text line image column 1, line 1
* image/010001.bin.png -- bineary text line image column 1, line 1
Use the -d or -D argument to verify that the layout analysis is working
correctly.
If image.bin.png exists, it uses it. If not, it uses built-in
preprocessing and generates and writes its own binary version.
(The original gray scale image is, however, not altered, so it
may be rotated relative to the binary image.)
If an image.tiseg.png file exists, it uses it to constrain the
layout analysis (although some layout analysis methods may be
ignoring the map).
""")
parser.add_option("-g","--gray",action="store_true",
help="output grayscale images + binary masks")
parser.add_option("-u","--upscale",type=int,default=32,
help="upscale lines shorter than this to the given target height")
parser.add_option("-t","--target",type=int,default=64,
help="downscale lines taller than this to the given target height")
parser.add_option("-L","--low",type=int,default=16,
help="lower limit for text line height")
parser.add_option("-H","--high",type=int,default=200,
help="upper limit for text line height")
parser.add_option("-W","--width",type=int,default=20,
help="lower limit for text line width")
parser.add_option("-v","--verbose",action="store_true",
help="output additional information")
parser.add_option("-p","--pad",default=1,type=int,
help="pad lines by this amount")
parser.add_option("-d","--display",action="store_true",
help="display result")
parser.add_option("-D","--Display",action="store_true",
help="display continuously")
parser.add_option("-S","--segmenter",default="ocrorast.SegmentPageByRAST",
help="which segmentation component to use")
parser.add_option("-P","--preproc",default="ocropreproc.CommonPreprocessing",
help="which preprocessing component to use")
parser.add_option("-r","--dpi",default=300,type=int,
help="resolution of input image in DPI")
parser.add_option("-q","--silent",action="store_true",
help="disable warnings")
parser.add_option("-b","--blackout",action="store_false",default=True,
help="use blackout for image regions (instead of passing rectangles)")
parser.add_option("-R","--descender",type=float,default=-1,
help="maximum descender")
parser.add_option("-Q","--parallel",type=int,default=number_of_processors(),
help="number of parallel processes to use")
options,args = parser.parse_args()
if len(args)==1 and os.path.isdir(args[0]):
args = sorted(glob.glob(args[0]+"/????.png"))
preproc = ocrolib.make_IBinarize(options.preproc)
segmenter = ocrolib.make_ISegmentPage(options.segmenter)
if options.descender>0:
segmenter.pset("max_descender",options.descender)
if options.Display: options.display = 1
if options.display: ion()
def process_arg(arg):
print "===",arg
base,_ = ocrolib.allsplitext(arg)
if os.path.exists(base):
print "# output directory",base,"already exists"
return
image = ocrolib.read_image_gray(arg,'B')
h,w = image.shape
if options.display:
clf(); imshow(image,cmap=cm.gray); draw(); ginput(1,1)
# get a binary image, either saved on disk or through
# preprocessing
if os.path.exists(base+".bin.png"):
print "# loading",base+".bin.png"
page_gray = image
page_bin = ocrolib.read_image_gray(base+".bin.png")
else:
print "# binarizing"
(page_bin,page_gray) = preproc.binarize(image)
ocrolib.write_image_gray(base+".bin.png",page_bin)
# get a text/image segmentation if available
page_ti = None
tirects = None
if os.path.exists(base+".tiseg.png"):
# FIXME these aren't implemented
# print "# loading",base+".tiseg.png"
# rgb = ocrolib.read_image_packed(page_ti,base+".tiseg.png")
# ocrolib.unpack_rgb(r,g,b,page_ti)
# n = ocrolib.label_components(components)
# print "# number of image regions",n
# ocrolib.bounding_boxes(tirects,components)
raise Exception("tiseg handling unimplemented right now")
if options.blackout and tirects is not None:
for (r0,c0,r1,c1) in tirects:
page_bin[r0:r1,c0:c1] = 0
if options.display:
clf(); imshow(page_bin,cmap=cm.gray); draw(); ginput(1,1)
print "# segmenting"
if options.blackout:
page_seg = segmenter.segment(page_bin)
else:
page_seg = segmenter.segment(page_bin,tirects)
regions = ocrolib.RegionExtractor()
regions.setPageLines(page_seg)
if os.path.exists(base):
print "# removing",base
shutil.rmtree(base)
os.mkdir(base)
nregions = regions.length()
print "# writing",nregions,"lines"
if options.display:
clf()
axis = subplot(111)
axis.imshow(page_bin,cmap=cm.gray)
ocrolib.draw_pseg(page_seg,axis)
for i in range(1,regions.length()):
line = regions.extractMasked(page_bin,i,0,255,options.pad)
if line.shape[0]<options.low:
if options.verbose: print "# skipping %06x"%regions.id(i),", not tall enough:",line.shape[0]
continue
if line.shape[0]>options.high:
if options.verbose: print "# skipping %06x"%regions.id(i),", too tall:",line.shape[0]
continue
if line.shape[1]<options.width:
if options.verbose: print "# skipping %06x"%regions.id(i),", not wide enough:",line.shape[1]
continue
if options.upscale>0 and line.shape[0]<options.upscale:
scale = options.upscale*1.0/line.shape[0]
if options.verbose:
print "# upscaling %06x"%regions.id(i),"by",scale
line = interpolation.zoom(line,(scale,scale),order=1)
assert abs(line.shape[0]-options.upscale)<2,line.shape
elif options.target>0 and line.shape[0]>options.target:
scale = options.target*1.0/line.shape[0]
if options.verbose:
print "# downscaling %06x"%regions.id(i),"by",scale
line = interpolation.zoom(line,(scale,scale),order=1)
assert abs(line.shape[0]-options.target)<2,line.shape
if not options.silent:
if ocrolib.quick_check_line_components(line,dpi=options.dpi)<0.5:
continue
assert (regions.id(i)&0xff0000)>0
if options.gray:
ocrolib.write_image_gray("%s/%06x.bin.png"%(base,regions.id(i)),line)
line = regions.extract(page_gray,i,options.pad)
ocrolib.write_image_gray("%s/%06x.png"%(base,regions.id(i)),line)
else:
ocrolib.write_image_gray("%s/%06x.bin.png"%(base,regions.id(i)),line)
ocrolib.write_image_gray("%s/%06x.png"%(base,regions.id(i)),line)
ocrolib.write_page_segmentation(base+".pseg.png",page_seg)
if options.display:
draw()
if not options.Display:
raw_input("hit ENTER to continue")
else:
ginput(1,timeout=1)
def process_arg_safe(arg):
try:
process_arg(arg)
except:
traceback.print_exc()
print "# OOPS",arg,"failed"
if options.parallel<2:
for arg in args:
process_arg_safe(arg)
else:
pool = Pool(processes=options.parallel)
jobs = args
result = pool.map(process_arg_safe,jobs)