Permalink
Browse files

Merge pull request #236 from lehzwo/master

ocropus-gpageseg: Enable usage of masks to specify column separators/ ignore areas of scan
  • Loading branch information...
kba committed Feb 19, 2018
2 parents 43381c4 + 289a58f commit e9b6121de2637e54495125c6a97a4ef75d872a2e
Showing with 14 additions and 1 deletion.
  1. +12 −1 ocropus-gpageseg
  2. +2 −0 run-test-ci
  3. BIN tests/table.bin.png
  4. BIN tests/table.mask.png
@@ -171,7 +171,7 @@ def DSAVE(title,image):
### This attempts to find column separators, either as extended
### vertical black lines or extended vertical whitespace.
### It will work fairly well in simple cases, but for unusual
### documents, you need to tune the parameters.
### documents, you need to tune the parameters or use a mask.
################################################################
def compute_separators_morph(binary,scale):
@@ -262,8 +262,19 @@ def compute_colseps(binary,scale):
#colseps = compute_colseps_morph(binary,scale)
colseps = np.maximum(colseps,seps)
binary = np.minimum(binary,1-seps)
binary,colseps = apply_mask(binary,colseps)
return colseps,binary
def apply_mask(binary,colseps):
try:
mask = ocrolib.read_image_binary(base+".mask.png")
except IOError:
return binary,colseps
masked_seps = np.maximum(colseps,mask)
binary = np.minimum(binary,1-masked_seps)
DSAVE("masked_seps", masked_seps)
return binary,masked_seps
################################################################
### Text Line Finding.
@@ -91,6 +91,8 @@ test_gpageseg() {
$RUNNER $BASE/ocropus-gpageseg temp/$TESTIMAGE -n --maxseps 3
$RUNNER $BASE/ocropus-gpageseg temp/$TESTIMAGE -n -b
$RUNNER $BASE/ocropus-gpageseg temp/$TESTIMAGE -n --usegauss
cp $BASE/tests/table.bin.png $BASE/tests/table.mask.png temp
$RUNNER $BASE/ocropus-gpageseg temp/table.bin.png --debug -n --minscale 7 --maxcolseps 0
}
test_rpred() {
BIN +72.1 KB tests/table.bin.png
Binary file not shown.
BIN +6.37 KB tests/table.mask.png
Binary file not shown.

0 comments on commit e9b6121

Please sign in to comment.