Permalink
Browse files

Remove Tesseract parameter "include_page_breaks" and use FF by default

Now Tesseract adds a page break (normally form feed) by default.

It is still possible to suppress page breaks by setting an empty
page_separator.

Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information...
stweil committed Jun 12, 2017
1 parent 3bb573a commit aa6eb6bd466101a3b89880f87580471a7694359d
Showing with 1 addition and 10 deletions.
  1. +1 −3 api/renderer.cpp
  2. +0 −4 ccmain/tesseractclass.cpp
  3. +0 −3 ccmain/tesseractclass.h
View
@@ -132,10 +132,8 @@ bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
AppendString(utf8.get());
bool pageBreak = false;
api->GetBoolVariable("include_page_breaks", &pageBreak);
const char* pageSeparator = api->GetStringVariable("page_separator");
if (pageBreak) {
if (pageSeparator != nullptr && *pageSeparator != '\0') {
AppendString(pageSeparator);
}
@@ -505,10 +505,6 @@ Tesseract::Tesseract()
this->params()),
BOOL_MEMBER(preserve_interword_spaces, false,
"Preserve multiple interword spaces", this->params()),
BOOL_MEMBER(include_page_breaks, FALSE,
"Include page separator string in output text after each "
"image/page.",
this->params()),
STRING_MEMBER(page_separator, "\f",
"Page separator (default is form feed control character)",
this->params()),
View
@@ -1103,9 +1103,6 @@ class Tesseract : public Wordrec {
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
BOOL_VAR_H(preserve_interword_spaces, false,
"Preserve multiple interword spaces");
BOOL_VAR_H(include_page_breaks, false,
"Include page separator string in output text after each "
"image/page.");
STRING_VAR_H(page_separator, "\f",
"Page separator (default is form feed control character)");

0 comments on commit aa6eb6b

Please sign in to comment.