-
Notifications
You must be signed in to change notification settings - Fork 588
/
ocropus-cluster-csegs
executable file
·65 lines (55 loc) · 1.9 KB
/
ocropus-cluster-csegs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python
import code,pickle,sys,os
import ocropy
from pylab import *
from optparse import OptionParser
parser = OptionParser("""
usage: %prog [options] .../.../010001.png ...
Extract character images from OCR output. This assume that for each
line.png, there is a line.cseg.gt.png and line.gt.txt file.
""")
parser.add_option("-o","--output",help="output file",default="clusters.db")
parser.add_option("-u","--unmerged",help="unmerged output file",default=None)
parser.add_option("-m","--missegmented",help="output missegmented characters",action="store_true")
parser.add_option("-r","--raw",help="output unlabeled characters",action="store_true")
parser.add_option("-a","--maxage",help="output missegmented",default=10000000,type="int")
(options,args) = parser.parse_args()
if len(args)<1:
parser.print_help()
sys.exit(0)
ion()
show()
if os.path.exists(options.output):
print options.output,"exists; please remove"
sys.exit(1)
if os.path.exists(options.output+".temp"):
os.unlink(options.output+".temp")
binned = ocropy.BinnedNN()
total = 0
for raw,mask,cls in ocropy.cseg_chars(args):
if cls is None:
# no ground truth
if not options.raw: continue
cls = "_"
elif cls<=0 or cls=="":
# missegmented
if not options.missegmented: continue
cls = "~"
raw = ocropy.NI(raw)
if raw.shape[0]>255 or raw.shape[1]>255: continue
raw = raw/float(amax(raw))
binned.add(raw,cls)
total+=1
if total%100==0:
print total,"chars"
print binned.stats()
if total%options.maxage==0:
print "removed",binned.collect(options.maxage),"clusters"
if total%10000==0:
# this is just for "looking in" on the progress
print "saving after",total,"chars"
binned.save(options.output+".temp")
if options.unmerged is not None:
binned.save(options.unmerged)
# binned.remerge()
binned.save(options.output)