Permalink
Browse files

Merge branch 'master' into imports

  • Loading branch information...
zuphilip committed Jul 16, 2017
2 parents a154e63 + 358df8d commit 8d378ab0e9dd832638c0205149fd2cf6165215b7
Showing with 184 additions and 124 deletions.
  1. +1 −1 .gitignore
  2. +0 −39 .hgignore
  3. +2 −1 .travis.yml
  4. +2 −1 README.md
  5. +17 −0 circle.yml
  6. +7 −7 ocropus-errs
  7. +53 −35 ocropus-gpageseg
  8. +0 −8 ocropus-linegen
  9. +0 −12 ocropus-nlbin
  10. +15 −15 run-coverage
  11. +48 −0 run-test-ci
  12. +39 −5 tests/run-unit
View
@@ -1,6 +1,5 @@
OLD
JUNK
.hg
book/
temp/
models/
@@ -14,3 +13,4 @@ build/
*.a
*.so
.~*.vue
doc/.ipynb_checkpoints/
View

This file was deleted.

Oops, something went wrong.
View
@@ -45,4 +45,5 @@ install:
script:
- mkdir ../test_folder
- cd ../test_folder
- ../ocropy/run-test
- ../ocropy/tests/run-unit
- ../ocropy/run-test-ci
View
@@ -2,8 +2,9 @@ ocropy
======
[![Build Status](https://travis-ci.org/tmbdev/ocropy.svg)](https://travis-ci.org/tmbdev/ocropy)
[![CircleCI](https://circleci.com/gh/UB-Mannheim/ocropy/tree/pull%2F4.svg?style=svg)](https://circleci.com/gh/UB-Mannheim/ocropy/tree/pull%2F4)
[![license](https://img.shields.io/github/license/tmbdev/ocropy.svg)](https://github.com/tmbdev/ocropy/blob/master/LICENSE)
[![Wiki](https://img.shields.io/badge/wiki-9%20pages-orange.svg)](https://github.com/tmbdev/ocropy/wiki)
[![Wiki](https://img.shields.io/badge/wiki-11%20pages-orange.svg)](https://github.com/tmbdev/ocropy/wiki)
[![Join the chat at https://gitter.im/tmbdev/ocropy](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/tmbdev/ocropy?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
OCRopus is a collection of document analysis programs, not a turn-key OCR system.
View
@@ -0,0 +1,17 @@
machine:
python:
version: 2.7.12
environment:
# Set matplotlb backend to the non-interactive antigrain image lib
MPLBACKEND: Agg
dependencies:
pre:
# 'models' folder is cached, don't download twice
- cd models && wget -nc http://www.tmbdev.net/en-default.pyrnn.gz
# Pipe to cat to hide the progress bars
- pip install -r requirements.txt|cat
cache_directories:
- models
test:
override:
- PATH=$PWD:$PATH ./run-test-ci
View
@@ -58,11 +58,11 @@ for fname,e,t,m in sorted(outputs):
total += t
missing += m
print("errors %8d"%errs)
print("missing %8d"%missing)
print("total %8d"%total)
print("err %8.3f %%"%(errs*100.0/total))
print("errnomiss %8.3f %%"%((errs-missing)*100.0/total))
if not args.erroronly:
print("errors %8d"%errs)
print("missing %8d"%missing)
print("total %8d"%total)
print("err %8.3f %%"%(errs*100.0/total))
print("errnomiss %8.3f %%"%((errs-missing)*100.0/total))
if args.erroronly:
print(errs * 1.0 / total)
print(errs * 1.0 / total)
View
@@ -31,67 +31,78 @@ from ocrolib import psegutils,morph,sl
from ocrolib.exceptions import OcropusException
from ocrolib.toplevel import *
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(add_help=False)
# error checking
parser.add_argument('-n','--nocheck',action="store_true",
group_error_checking = parser.add_argument_group('error checking')
group_error_checking.add_argument('-n','--nocheck',action="store_true",
help="disable error checking on inputs")
parser.add_argument('-z','--zoom',type=float,default=0.5,
help='zoom for page background estimation, smaller=faster, default: %(default)s')
parser.add_argument('--gray',action='store_true',
help='output grayscale lines as well, default: %(default)s')
parser.add_argument('-q','--quiet',action='store_true',
help='be less verbose, default: %(default)s')
# limits
parser.add_argument('--minscale',type=float,default=12.0,
group_error_checking.add_argument('--minscale',type=float,default=12.0,
help='minimum scale permitted, default: %(default)s')
parser.add_argument('--maxlines',type=float,default=300,
group_error_checking.add_argument('--maxlines',type=float,default=300,
help='maximum # lines permitted, default: %(default)s')
# scale parameters
parser.add_argument('--scale',type=float,default=0.0,
group_scale = parser.add_argument_group('scale parameters')
group_scale.add_argument('--scale',type=float,default=0.0,
help='the basic scale of the document (roughly, xheight) 0=automatic, default: %(default)s')
parser.add_argument('--hscale',type=float,default=1.0,
group_scale.add_argument('--hscale',type=float,default=1.0,
help='non-standard scaling of horizontal parameters, default: %(default)s')
parser.add_argument('--vscale',type=float,default=1.0,
group_scale.add_argument('--vscale',type=float,default=1.0,
help='non-standard scaling of vertical parameters, default: %(default)s')
# line parameters
parser.add_argument('--threshold',type=float,default=0.2,
group_line = parser.add_argument_group('line parameters')
group_line.add_argument('--threshold',type=float,default=0.2,
help='baseline threshold, default: %(default)s')
parser.add_argument('--noise',type=int,default=8,
group_line.add_argument('--noise',type=int,default=8,
help="noise threshold for removing small components from lines, default: %(default)s")
parser.add_argument('--usegauss',action='store_true',
group_line.add_argument('--usegauss',action='store_true',
help='use gaussian instead of uniform, default: %(default)s')
# column parameters
parser.add_argument('--maxseps',type=int,default=2,
group_column = parser.add_argument_group('column parameters')
group_column.add_argument('--maxseps',type=int,default=0,
help='maximum black column separators, default: %(default)s')
parser.add_argument('--sepwiden',type=int,default=10,
group_column.add_argument('--sepwiden',type=int,default=10,
help='widen black separators (to account for warping), default: %(default)s')
parser.add_argument('-b','--blackseps',action="store_true",
help="also check for black column separators")
# Obsolete parameter for 'also check for black column separators'
# which can now be triggered simply by a positive maxseps value.
group_column.add_argument('-b','--blackseps',action="store_true",
help=argparse.SUPPRESS)
# whitespace column separators
parser.add_argument('--maxcolseps',type=int,default=3,
group_column.add_argument('--maxcolseps',type=int,default=3,
help='maximum # whitespace column separators, default: %(default)s')
parser.add_argument('--csminaspect',type=float,default=1.1,
help='minimum aspect ratio for column separators')
parser.add_argument('--csminheight',type=float,default=10,
group_column.add_argument('--csminheight',type=float,default=10,
help='minimum column height (units=scale), default: %(default)s')
# wait for input after everything is done
parser.add_argument('-p','--pad',type=int,default=3,
# Obsolete parameter for the 'minimum aspect ratio for column separators'
# used in the obsolete function compute_colseps_morph
group_column.add_argument('--csminaspect',type=float,default=1.1,
help=argparse.SUPPRESS)
# output parameters
group_output = parser.add_argument_group('output parameters')
group_output.add_argument('--gray',action='store_true',
help='output grayscale lines as well, default: %(default)s')
group_output.add_argument('-p','--pad',type=int,default=3,
help='padding for extracted lines, default: %(default)s')
parser.add_argument('-e','--expand',type=int,default=3,
group_output.add_argument('-e','--expand',type=int,default=3,
help='expand mask for grayscale extraction, default: %(default)s')
parser.add_argument('-Q','--parallel',type=int,default=0,
# other parameters
group_others = parser.add_argument_group('others')
group_others.add_argument('-q','--quiet',action='store_true',
help='be less verbose, default: %(default)s')
group_others.add_argument('-Q','--parallel',type=int,default=0,
help="number of CPUs to use")
parser.add_argument('-d','--debug',action="store_true")
group_others.add_argument('-d','--debug',action="store_true")
group_others.add_argument("-h", "--help", action="help", help="show this help message and exit")
# input files
parser.add_argument('files',nargs='+')
args = parser.parse_args()
args.files = ocrolib.glob_all(args.files)
@@ -232,9 +243,16 @@ def compute_colseps_conv(binary,scale=1.0):
def compute_colseps(binary,scale):
"""Computes column separators either from vertical black lines or whitespace."""
print_info("considering at most %g whitespace column separators" % args.maxcolseps)
colseps = compute_colseps_conv(binary,scale)
DSAVE("colwsseps",0.7*colseps+0.3*binary)
if args.blackseps:
if args.blackseps and args.maxseps == 0:
# simulate old behaviour of blackseps when the default value
# for maxseps was 2, but only when the maxseps-value is still zero
# and not set manually to a non-zero value
args.maxseps = 2
if args.maxseps > 0:
print_info("considering at most %g black column separators" % args.maxseps)
seps = compute_separators_morph(binary,scale)
DSAVE("colseps",0.7*seps+0.3*binary)
#colseps = compute_colseps_morph(binary,scale)
View
@@ -149,14 +149,6 @@ if args.debug_show:
plt.gray()
def bounding_box(a):
a = np.array(a>0,'i')
l = measurements.find_objects(a)
if len(l)<1: return (0,0,0,0)
ys,xs = l[0]
# y0,x0,y1,x1
return (ys.start,xs.start,ys.stop,xs.stop)
base = args.base
print("base", base)
os.system("rm -rf "+base)
View
@@ -68,18 +68,6 @@ def check_page(image):
if w>10000: return "line too wide for a page image %s"%(image.shape,)
return None
def estimate_scale(binary):
objects = binary_objects(binary)
bysize = sorted(objects,key=A)
scalemap = np.zeros(binary.shape)
for o in bysize:
if np.amax(scalemap[o])>0: continue
scalemap[o] = A(o)**0.5
scale = np.median(scalemap[(scalemap>3)&(scalemap<100)])
return scale
def estimate_skew_angle(image,angles):
estimates = []
for a in angles:
View
@@ -1,19 +1,19 @@
#!/bin/bash -e
BASE=$(dirname $0)
COVERAGE="python2 -m coverage"
rm -rf .coverage
rm -rf .coverage.*
rm -rf temp
python -m coverage run -p ocropus-nlbin $BASE/tests/testpage.png -o temp
python -m coverage run -p ocropus-gpageseg 'temp/????.bin.png'
python -m coverage run -p ocropus-rpred -n 'temp/????/??????.bin.png'
python -m coverage run -p ocropus-hocr 'temp/????.bin.png' -o temp.html
python -m coverage run -p ocropus-visualize-results temp
python -m coverage run -p ocropus-gtedit html temp/????/??????.bin.png -o temp-correction.html
python -m coverage run -p ocropus-rpred $BASE/tests/0079-01000d.png
python -m coverage run -p ocropus-errs $BASE/tests/0079-01000d.gt.txt
python -m coverage run -p ocropus-econf $BASE/tests/0079-01000d.gt.txt
python -m coverage merge
python2 -c "\
try:
import coverage
print('ok - python module coverage is installed')
except ImportError:
print('\nYou need to install the coverage module first, e.g. \n$ pip2 install coverage\n')
exit(1)"
rm -rf .coverage*
PATH=$PWD:$PATH $COVERAGE run -p --include=$PWD/**/* ./tests/run-unit
PATH=$PWD:$PATH RUNNER="$COVERAGE run -p --include=$PWD/**/*" ./run-test-ci
$COVERAGE combine
rm -rf htmlcov
python -m coverage html
$COVERAGE html
echo "# to see coverage results, browse to file://$PWD/htmlcov/index.html"
View
@@ -0,0 +1,48 @@
#!/bin/bash -e
BASE=$(dirname $0)
# 'RUNNER' is the binary that the scripts are executed by. It defaults to
# 'python2' if not set in the environment.
# Other examples:
# PATH=$PWD:$PATH RUNNER='python2 -m coverage run --include=$PWD/**/*' ./run-test
# PATH=$PWD:$PATH RUNNER='python3' ./run-test
# See ./run-coverage for a real example.
RUNNER="${RUNNER:-python2}"
test_page() {
set -x
$RUNNER $BASE/ocropus-nlbin "$BASE/tests/testpage.png" -o temp
$RUNNER $BASE/ocropus-gpageseg 'temp/????.bin.png'
$RUNNER $BASE/ocropus-rpred --parallel=0 --nocheck 'temp/0001/01000?.bin.png'
$RUNNER $BASE/ocropus-dewarp 'temp/0001/01001?.bin.png'
$RUNNER $BASE/ocropus-hocr 'temp/????.bin.png' -o temp.html 2>/dev/null
$RUNNER $BASE/ocropus-visualize-results temp
$RUNNER $BASE/ocropus-gtedit html temp/????/??????.bin.png -o temp-correction.html
set +x
}
test_conf() {
local TESTIMAGE=0079-01000d
mkdir -p temp
cp "$BASE/tests/$TESTIMAGE"* temp
set -x
$RUNNER $BASE/ocropus-rpred temp/$TESTIMAGE.png
$RUNNER $BASE/ocropus-errs temp/$TESTIMAGE.gt.txt
$RUNNER $BASE/ocropus-econf temp/$TESTIMAGE.gt.txt
}
test_linegen() {
$RUNNER $BASE/ocropus-linegen -m 3 -t $BASE/tests/tomsawyer.txt -f $BASE/tests/DejaVuSans.ttf
}
test_rtrain() {
tar -zxf $BASE/tests/uw3-500.tgz
$RUNNER $BASE/ocropus-rtrain 'book/*/*.bin.png' -N 5 -o ci-test-model
}
rm -rf temp
test_page
test_conf
test_linegen
test_rtrain
Oops, something went wrong.

0 comments on commit 8d378ab

Please sign in to comment.