Skip to content

Commit

Permalink
added better help messages
Browse files Browse the repository at this point in the history
added additional output options to ocropus-binarize
added sanity checks to output of binarization
  • Loading branch information
tmb committed Mar 19, 2010
1 parent b1c04d8 commit b38390b
Show file tree
Hide file tree
Showing 12 changed files with 269 additions and 32 deletions.
File renamed without changes.
File renamed without changes.
60 changes: 47 additions & 13 deletions ocropus-binarize
Expand Up @@ -4,22 +4,41 @@

import signal
signal.signal(signal.SIGINT,lambda *args:sys.exit(1))
import ocropy.gtkyield
from pylab import *
import sys,os,re,ocropy,optparse
from ocropy import N,NI
from scipy.ndimage import measurements
from scipy.misc import imsave
from PIL import Image

parser = optparse.OptionParser()
parser = optparse.OptionParser(usage="""
%prog -o dir [options] image1 image2 ...
Performs preprocessing on each of the images on the command line and stores
the resulting output in a directory with "book structure". That is, the
input pages will be stored in dir/0001.png dir/0001.bin.png dir/0002.png
dir/0002.bin.png etc. dir/0001.png contains the deskewed grayscale image,
while dir/0001.bin.png contains the binarized version.
Preprocessing uses hysteresis thresholding; you control it mainly through the
-L and -H arguments, which take values between 0 and 1:
* large parts of characters are missing: decrease -H
* there is too much noise in the image: increase -H
* characters are too thin or broken up: increase -L
* characters are too thick or touching: decrease -L
""")
parser.add_option("-o","--output",help="output directory",default="book")
parser.add_option("-O","--Output",help="output image.png to image.bin.png (in place)",action="store_true")
parser.add_option("-d","--display",help="display result",action="store_true")
parser.add_option("-D","--Display",help="display continuously",action="store_true")
parser.add_option("-T","--threshold",help="threshold (simple Sauvola if set)",default=-1,type=float)
parser.add_option("-L","--low",help="low threshold",default=0.1,type=float)
parser.add_option("-H","--high",help="high threshold",default=0.4,type=float)
parser.add_option("-W","--width",help="width parameter",default=40,type=float)
parser.add_option("-r","--dpi",help="resolution (DPI)",default=300,type=float)
parser.add_option("-S","--silent",action="store_true",
help="disable warnings")
options,args = parser.parse_args()

if options.threshold>0:
Expand All @@ -35,19 +54,27 @@ binarizer.command("binarizer_pset","width",str(options.width))

if options.Display: options.display = 1
if options.display: ion(); show()

if os.path.exists(options.output):
print options.output,"already exists; please remove"
os.exit(0)

os.mkdir(options.output)
if options.Output:
options.output = None
if not options.Output and os.path.exists(options.output):
print "%s: already exists; please remove"%options.output
sys.exit(0)
if not options.Output:
os.mkdir(options.output)

count = 1

for arg in args:
print "# loading",arg
print
print "===",arg,"==="
print
image = ocropy.bytearray()
ocropy.read_image_gray(image,arg)
if options.display:
clf()
imshow(NI(image),cmap=cm.gray)
draw()
ginput(1,timeout=1)
bin = ocropy.bytearray()
gray = ocropy.bytearray()
try:
Expand All @@ -63,8 +90,15 @@ for arg in args:
raw_input("hit ENTER to continue")
else:
ginput(1,timeout=1)
dest = "%s/%04d" % (options.output,count)
print "# writing",dest
ocropy.write_image_gray(dest+".png",gray)
ocropy.write_image_gray(dest+".bin.png",bin)
if not options.silent:
ocropy.quick_check_page_components(bin,dpi=options.dpi)
if options.Output:
dest,_ = ocropy.allsplitext(arg)
print "# writing",dest
ocropy.write_image_gray(dest+".bin.png",bin)
else:
dest = "%s/%04d" % (options.output,count)
print "# writing",dest
ocropy.write_image_gray(dest+".png",gray)
ocropy.write_image_gray(dest+".bin.png",bin)
count += 1
4 changes: 2 additions & 2 deletions ocropus-cedit
Expand Up @@ -195,8 +195,8 @@ def cmd_similar(*args):
selected = [grid[i][2].image for i in cluster_viewer.get_selected_items()]
dists = [set_dist(x[2].image,selected) for x in grid]
else:
selected = [ocromisc.stdsize(grid[i][2].image) for i in cluster_viewer.get_selected_items()]
dists = [set_dist(ocromisc.stdsize(x[2].image),selected) for x in grid]
selected = [ocropy.stdsize(grid[i][2].image) for i in cluster_viewer.get_selected_items()]
dists = [set_dist(ocropy.stdsize(x[2].image),selected) for x in grid]
index = array(argsort(dists))
index = [int(i) for i in index]
grid.reorder(index)
Expand Down
47 changes: 47 additions & 0 deletions ocropus-cluster-db
@@ -0,0 +1,47 @@
#!/usr/bin/python
import code,pickle,sys,os,re,ocropy
from ocropy import dbtables,binnednn
from pylab import *
from optparse import OptionParser

parser = OptionParser("""
usage: %prog [options] clusters.db
""")

parser.add_option("-D","--display",help="display chars",action="store_true")
parser.add_option("-v","--verbose",help="verbose output",action="store_true")

(options,args) = parser.parse_args()

if len(args)!=2:
parser.print_help()
sys.exit(0)

input = args[0]
output = args[1]

ion()
show()

table = dbtables.Table(input,"chars")
table.converter("image",dbtables.SmallImage())
table.create(image="blob",count="integer",cls="text",classes="text",key="text")

classes = [row[0] for row in table.query("select distinct(cls) from chars order by cls")]

total = 0
for cls in classes:
print "# clustering",cls
binned = binnednn.BinnedNN()
total = 0
for row in table.get(cls=cls):
raw = row.image
if raw.shape[0]>255 or raw.shape[1]>255: continue
raw = raw/float(amax(raw))
binned.add(raw,cls)
total+=1
if total%1000==0:
print total,"chars"
print binned.stats()
binned.save(output)
13 changes: 6 additions & 7 deletions ocropus-extract-rsegs
Expand Up @@ -40,13 +40,15 @@ def chars(files):
for file in files:
print "# loading",file
image = ocropy.bytearray()
ocropy.read_image_gray(image,file)
binfile = re.sub(r'.png','.bin.png',file)
ocropy.read_image_gray(image,binfile)
segmentation = ocropy.intarray()
segmenter.charseg(segmentation,image)
ocropy.make_line_segmentation_black(segmentation)
ocropy.renumber_labels(segmentation,1)
grouper.setSegmentation(segmentation)
ocropy.sub(255,image)
print grouper.length()
for i in range(grouper.length()):
cls = None
raw = ocropy.bytearray()
Expand All @@ -61,12 +63,9 @@ for raw,mask,cls in chars(args):
if options.display:
clf(); gray(); imshow(raw); draw()
if raw.shape[0]>255 or raw.shape[1]>255: continue
raw = raw/float(amax(raw))
# key = re.sub(r'^.*/(\d\d\d\d/)','\\1',file)
# key = re.sub(r'\.png$','',key)
key = None
if options.verbose:
print key,cls,raw.shape
raw = array(255*raw/float(amax(raw)),'B')
table.set(image=raw,cls="_")
total += 1
if total%10000==0: table.commit()

table.commit()
15 changes: 13 additions & 2 deletions ocropus-linerec
Expand Up @@ -10,15 +10,26 @@ prefix = "/usr/local/share/ocropus/models/"
parser = OptionParser(usage="""
%prog [options] image.png ...
Recognize models using the OCRopus C++ line recognizer (IRecognizeLine)
Recognize models using OCRopus line recognizers.
For each input image, it reads image.png and image.bin.png, if it exists.
It produces the following output files:
* image.fst -- recognition lattice
* image.txt -- recognized output, combining the language model and recognition lattice
You can get a quick idea of how recognition is working by using the -d or -D arguments.
""")
parser.add_option("-m","--linerec",help="linerec model",default=prefix+"default.model")
parser.add_option("-l","--langmod",help="langmod",default=prefix+"default.fst")
parser.add_option("-l","--langmod",help="language model",default=prefix+"default.fst")
parser.add_option("-w","--lweight",help="language model weight",default=1.0,type=float)
parser.add_option("-v","--verbose",help="verbose",action="store_true")
parser.add_option("-d","--display",help="display result",action="store_true")
parser.add_option("-D","--Display",help="display continuously",action="store_true")
(options,args) = parser.parse_args()

assert options.lweight==1.0,"other language model weights not implemented yet"

if options.Display: options.display = 1
if options.display: ion()

Expand Down
43 changes: 37 additions & 6 deletions ocropus-pseg
Expand Up @@ -5,14 +5,34 @@ signal.signal(signal.SIGINT,lambda *args:sys.exit(1))
import sys,os,re,optparse,shutil
from matplotlib import patches
from pylab import *
from scipy.stats.stats import trim1

from scipy.ndimage import measurements
from scipy.misc import imsave
from PIL import Image
import ocropy
from ocropy import N,NI

parser = optparse.OptionParser()
parser = optparse.OptionParser(usage="""
%prog [options] image1.png image2.png ...
Usually, you would use an argument pattern like: book/????/??????.png
Computes page segmentations and extracts text lines.
For each input image image.png, it generates:
* image.pseg.png -- page segmentation
* image/010001.png -- gray scale text line image column 1, line 1
* image/010001.bin.png -- bineary text line image column 1, line 1
Use the -d or -D argument to verify that the layout analysis is working
correctly.
If image.bin.png exists, it uses it. If not, it uses built-in
preprocessing and generates and writes its own binary version.
(The original gray scale image is, however, not altered, so it
may be rotated relative to the binary image.)
""")
parser.add_option("-g","--gray",action="store_true",
help="output grayscale images + binary masks")
parser.add_option("-p","--pad",default=1,type=int,
Expand All @@ -25,30 +45,37 @@ parser.add_option("-S","--segmenter",default="SegmentPageByRAST",
help="which segmentation component to use")
parser.add_option("-P","--preproc",default="StandardPreprocessing",
help="which preprocessing component to use")
parser.add_option("-r","--dpi",default=300,type=int,
help="resolution of input image in DPI")
options,args = parser.parse_args()


preproc = ocropy.make_IBinarize(options.preproc)
segmenter = ocropy.make_ISegmentPage(options.segmenter)

if options.Display: options.display = 1
if options.display: ion()

for arg in args:
print "# loading",arg
print
print "===",arg,"==="
print
base,_ = ocropy.allsplitext(arg)
page_gray = ocropy.bytearray()
ocropy.read_image_gray(page_gray,arg)
w,h = [page_gray.dim(i) for i in [0,1]]
image = ocropy.bytearray()
ocropy.read_image_gray(image,arg)
w,h = [image.dim(i) for i in [0,1]]

# get a binary image, either saved on disk or through
# preprocessing
page_bin = ocropy.bytearray()
if os.path.exists(base+".bin.png"):
print "# loading",base+".bin.png"
page_gray = image
ocropy.read_image_gray(page_bin,base+".bin.png")
else:
print "# binarizing"
preproc.binarize(page_bin,page_gray)
page_gray = ocropy.bytearray()
preproc.binarize(page_bin,page_gray,image)
ocropy.write_image_gray(base+".bin.png",page_bin)

# get a text/image segmentation if available
Expand All @@ -60,6 +87,8 @@ for arg in args:
page_ti.resize(w,h)
page_ti.fill(0)

# FIXME remove images here

print "# segmenting"
page_seg = ocropy.intarray()
segmenter.segment(page_seg,page_bin)
Expand All @@ -69,11 +98,13 @@ for arg in args:
print "# removing",base
shutil.rmtree(base)
os.mkdir(base)

print "# writing",regions.length(),"lines"
if options.display:
clf()
axis = subplot(111)
axis.imshow(NI(page_bin),cmap=cm.gray)

line = ocropy.bytearray()
for i in range(1,regions.length()):
if options.display:
Expand Down
2 changes: 1 addition & 1 deletion ocropus-showlrecs
Expand Up @@ -37,7 +37,7 @@ from matplotlib.backends.backend_gtk import NavigationToolbar2GTK as NavigationT
parser = OptionParser(usage="""
%prog [options] [input.db]
Trains models based on a cluster database.
Interactively explore line recognition and line recognition errors.
""")
parser.add_option("-s","--spaces",help="count spaces",action="store_true")
parser.add_option("-v","--verbose",help="verbose",action="store_true")
Expand Down
2 changes: 1 addition & 1 deletion ocropy/__init__.py
Expand Up @@ -6,4 +6,4 @@
from simplerec import CmodelLineRecognizer
from ocropus import *
from iulib import *

from quickcheck import *
7 changes: 7 additions & 0 deletions ocropy/gtkyield.py
@@ -0,0 +1,7 @@
import matplotlib
matplotlib.use('GTK')

import gtk
def gtk_yield():
while gtk.events_pending():
gtk.main_iteration(False)

0 comments on commit b38390b

Please sign in to comment.