Permalink
Browse files

Merge branch 'master' into disable-legacy

  • Loading branch information...
amitdo committed Jul 4, 2018
2 parents 15fb491 + b502bbf commit 62c7b796dab8af11df734c7c7e83a8a5d2a85ac2
Showing with 2,316 additions and 3,558 deletions.
  1. +1 −0 .clang-format
  2. +0 −1 INSTALL.GIT.md
  3. +1 −1 VERSION
  4. +0 −7 autogen.sh
  5. +2 −2 doc/tesseract.1.asc
  6. +5 −1 java/com/google/scrollview/ScrollView.java
  7. +5 −1 java/com/google/scrollview/events/SVEventHandler.java
  8. +0 −1 java/com/google/scrollview/ui/SVCheckboxMenuItem.java
  9. +4 −6 java/com/google/scrollview/ui/SVWindow.java
  10. +53 −0 m4/ax_check_compile_flag.m4
  11. +38 −0 m4/ax_split_version.m4
  12. +0 −1 snap/snapcraft.yaml
  13. +37 −33 src/api/baseapi.cpp
  14. +6 −6 src/api/pdfrenderer.cpp
  15. +4 −5 src/ccmain/applybox.cpp
  16. +6 −7 src/ccmain/control.cpp
  17. +2 −2 src/ccmain/fixxht.cpp
  18. +10 −3 src/ccmain/recogtraining.cpp
  19. +0 −3 src/ccmain/tessedit.cpp
  20. +2 −2 src/ccmain/tessvars.cpp
  21. +5 −5 src/ccmain/tessvars.h
  22. +3 −2 src/ccstruct/Makefile.am
  23. +0 −29 src/ccstruct/blckerr.h
  24. +98 −110 src/ccstruct/blobs.cpp
  25. +7 −7 src/ccstruct/crakedge.h
  26. +2 −2 src/ccstruct/detlinefit.cpp
  27. +0 −17 src/ccstruct/hpdsizes.h
  28. +6 −6 src/ccstruct/linlsq.cpp
  29. +2 −1 src/ccstruct/normalis.cpp
  30. +2 −3 src/ccstruct/ocrblock.cpp
  31. +8 −8 src/ccstruct/ocrblock.h
  32. +4 −4 src/ccstruct/ocrrow.cpp
  33. +1 −1 src/ccstruct/otsuthr.cpp
  34. +6 −4 src/ccstruct/pdblock.cpp
  35. +2 −2 src/ccstruct/points.cpp
  36. +6 −6 src/ccstruct/polyaprx.h
  37. +3 −4 src/ccstruct/polyblk.h
  38. +1 −1 src/ccstruct/publictypes.h
  39. +2 −2 src/ccstruct/quadlsq.cpp
  40. +5 −5 src/ccstruct/quadlsq.h
  41. +5 −5 src/ccstruct/quadratc.h
  42. +4 −3 src/ccstruct/ratngs.h
  43. +5 −5 src/ccstruct/rejctmap.cpp
  44. +2 −2 src/ccstruct/statistc.h
  45. +2 −2 src/ccstruct/stepblob.cpp
  46. +6 −6 src/ccstruct/stepblob.h
  47. +1 −1 src/ccstruct/vecfuncs.h
  48. +0 −1 src/ccstruct/werd.cpp
  49. +10 −11 src/ccstruct/werd.h
  50. +1 −1 src/ccutil/Makefile.am
  51. +4 −4 src/ccutil/bits16.h
  52. +0 −7 src/ccutil/host.h
  53. +0 −175 src/ccutil/nwmain.h
  54. +2 −2 src/ccutil/strngs.h
  55. +0 −30 src/classify/adaptive.cpp
  56. +11 −11 src/classify/adaptive.h
  57. +14 −84 src/classify/adaptmatch.cpp
  58. +8 −6 src/classify/blobclass.cpp
  59. +1 −1 src/classify/classify.cpp
  60. +5 −5 src/classify/classify.h
  61. +154 −264 src/classify/cluster.cpp
  62. +37 −40 src/classify/cluster.h
  63. +22 −54 src/classify/clusttool.cpp
  64. +2 −16 src/classify/clusttool.h
  65. +0 −3 src/classify/cutoffs.cpp
  66. +10 −36 src/classify/featdefs.cpp
  67. +0 −3 src/classify/featdefs.h
  68. +6 −17 src/classify/float2int.cpp
  69. +5 −12 src/classify/fpoint.cpp
  70. +20 −20 src/classify/fpoint.h
  71. +2 −2 src/classify/intfeaturespace.cpp
  72. +3 −4 src/classify/intfx.cpp
  73. +6 −25 src/classify/intmatcher.cpp
  74. +1 −1 src/classify/intmatcher.h
  75. +49 −110 src/classify/intproto.cpp
  76. +90 −95 src/classify/intproto.h
  77. +36 −55 src/classify/kdtree.cpp
  78. +20 −22 src/classify/kdtree.h
  79. +1 −2 src/classify/mastertrainer.cpp
  80. +0 −2 src/classify/mf.cpp
  81. +1 −3 src/classify/mfdefs.cpp
  82. +7 −7 src/classify/mfdefs.h
  83. +9 −32 src/classify/mfoutline.cpp
  84. +15 −14 src/classify/mfoutline.h
  85. +5 −18 src/classify/mfx.cpp
  86. +1 −1 src/classify/normfeat.cpp
  87. +10 −9 src/classify/normfeat.h
  88. +9 −17 src/classify/normmatch.cpp
  89. +11 −28 src/classify/ocrfeatures.cpp
  90. +44 −45 src/classify/ocrfeatures.h
  91. +5 −18 src/classify/outfeat.cpp
  92. +4 −17 src/classify/picofeat.cpp
  93. +16 −17 src/classify/picofeat.h
  94. +9 −10 src/classify/protos.cpp
  95. +39 −49 src/classify/protos.h
  96. +2 −2 src/classify/trainingsample.cpp
  97. +2 −2 src/cutil/Makefile.am
  98. +0 −6 src/cutil/bitvec.cpp
  99. +0 −23 src/cutil/const.h
  100. +0 −100 src/cutil/cutil.cpp
  101. +6 −86 src/cutil/cutil.h
  102. +0 −1 src/cutil/cutil_class.h
  103. +0 −45 src/cutil/danerror.cpp
  104. +0 −35 src/cutil/danerror.h
  105. +0 −55 src/cutil/efio.cpp
  106. +0 −32 src/cutil/efio.h
  107. +18 −55 src/cutil/emalloc.cpp
  108. +0 −17 src/cutil/emalloc.h
  109. +2 −4 src/cutil/globals.h
  110. +3 −8 src/cutil/oldlist.cpp
  111. +1 −1 src/cutil/oldlist.h
  112. +7 −21 src/cutil/structures.h
  113. +5 −2 src/dict/dawg.cpp
  114. +5 −1 src/dict/dict.cpp
  115. +2 −1 src/dict/dict.h
  116. +10 −9 src/dict/matchdefs.h
  117. +0 −1 src/dict/permdawg.cpp
  118. +5 −8 src/dict/stopper.cpp
  119. +0 −1 src/dict/trie.h
  120. +4 −3 src/lstm/ctc.cpp
  121. +4 −3 src/lstm/networkio.cpp
  122. +1 −1 src/opencl/oclkernels.h
  123. +6 −6 src/opencl/opencl_device_selection.h
  124. +853 −916 src/opencl/openclwrapper.cpp
  125. +141 −138 src/opencl/openclwrapper.h
  126. +3 −2 src/textord/baselinedetect.cpp
  127. +35 −32 src/textord/cjkpitch.cpp
  128. +14 −14 src/textord/edgblob.cpp
  129. +5 −4 src/textord/pithsync.cpp
  130. +5 −4 src/textord/pitsync1.cpp
  131. +2 −1 src/textord/tordmain.cpp
  132. +10 −12 src/training/cntraining.cpp
  133. +11 −32 src/training/commontraining.cpp
  134. +25 −44 src/training/mergenf.cpp
  135. +40 −59 src/training/mergenf.h
  136. +9 −25 src/training/mftraining.cpp
  137. +3 −4 src/wordrec/chop.cpp
  138. +4 −5 src/wordrec/chopper.cpp
  139. +0 −1 src/wordrec/chopper.h
  140. +1 −1 src/wordrec/lm_state.cpp
  141. +1 −1 src/wordrec/lm_state.h
  142. +2 −3 src/wordrec/tface.cpp
@@ -2,3 +2,4 @@
BasedOnStyle: Google
# Enforce always the same pointer alignment.
DerivePointerAlignment: false
IndentPPDirectives: AfterHash
@@ -10,7 +10,6 @@ You need Leptonica 1.74.2 (minimum) for Tesseract 4.0x.
Known dependencies for training tools (excluding leptonica):
* compiler with c++11 support
* autoconf-archive
* automake
* pkg-config
* pango-devel
@@ -1 +1 @@
4.0.0-beta.1
4.0.0-beta.3
@@ -113,13 +113,6 @@ automake --add-missing --copy --warnings=all || bail_out
echo "Running autoconf"
autoconf || bail_out
if grep -q AX_CHECK_COMPILE_FLAG configure; then
# The generated configure is invalid because autoconf-archive is unavailable.
rm configure
echo "Missing autoconf-archive. Check the build requirements."
bail_out
fi
if grep -q PKG_CHECK_MODULES configure; then
# The generated configure is invalid because pkg-config is unavailable.
rm configure
@@ -43,8 +43,8 @@ OPTIONS
'--user-words /path/to/file'::
Specify the location of user words file
'--user-patterns /path/to/file specify'::
The location of user patterns file
'--user-patterns /path/to/file'::
Specify the location of user patterns file
'-c configvar=value'::
Set value for control parameter. Multiple -c arguments are allowed.
@@ -96,6 +96,10 @@ private static void IOLoop() {
!socket.isOutputShutdown() &&
socket.isConnected() && socket.isBound()) {
inputLine = receiveMessage();
if (inputLine == null) {
// End of stream reached.
break;
}
nrInputLines++;
if (debugViewNetworkTraffic) {
System.out.println("(c->S," + nrInputLines + ")" + inputLine);
@@ -380,7 +384,7 @@ public static void main(String[] args) {
System.out.println("Client connected");
// Setup the streams
out = new PrintStream(socket.getOutputStream(), true);
out = new PrintStream(socket.getOutputStream(), true, "UTF-8");
in =
new BufferedReader(new InputStreamReader(socket.getInputStream(),
"UTF8"));
@@ -28,6 +28,7 @@
import java.awt.event.KeyListener;
import java.awt.event.WindowEvent;
import java.awt.event.WindowListener;
import java.awt.Window;
import javax.swing.Timer;
@@ -262,7 +263,10 @@ public void keyPressed(KeyEvent e) {
public void windowClosing(WindowEvent e) {
processEvent(new SVEvent(SVEventType.SVET_DESTROY, svWindow, lastXMove,
lastYMove, 0, 0, null));
e.getWindow().dispose();
Window w = e.getWindow();
if (w != null) {
w.dispose();
}
SVWindow.nrWindows--;
if (SVWindow.nrWindows == 0) {
processEvent(new SVEvent(SVEventType.SVET_EXIT, svWindow, lastXMove,
@@ -29,7 +29,6 @@
* Constructs a new menulistitem which possesses a flag that can be toggled.
*/
class SVCheckboxMenuItem extends SVAbstractMenuItem {
public String value = null;
public boolean bvalue;
SVCheckboxMenuItem(int id, String name, boolean val) {
@@ -606,20 +606,18 @@ public void showInputDialog(String msg) {
* Shows a dialog presenting "Yes" and "No" as answers and returns either a
* "y" or "n" to the client.
*
* Closing the dialog without answering is handled like "No".
*
* @param msg The text that is displayed in the dialog.
*/
public void showYesNoDialog(String msg) {
// res returns 0 on yes, 1 on no. Seems to be a bit counterintuitive
int res =
JOptionPane.showOptionDialog(this, msg, "", JOptionPane.YES_NO_OPTION,
JOptionPane.QUESTION_MESSAGE, null, null, null);
SVEvent e = null;
if (res == 0) {
e = new SVEvent(SVEventType.SVET_INPUT, this, 0, 0, 0, 0, "y");
} else if (res == 1) {
e = new SVEvent(SVEventType.SVET_INPUT, this, 0, 0, 0, 0, "n");
}
SVEvent e = new SVEvent(SVEventType.SVET_INPUT, this, 0, 0, 0, 0,
res == 0 ? "y" : "n");
ScrollView.addMessage(e);
}
@@ -0,0 +1,53 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS], [INPUT])
#
# DESCRIPTION
#
# Check whether the given FLAG works with the current language's compiler
# or gives an error. (Warnings, however, are ignored)
#
# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
# success/failure.
#
# If EXTRA-FLAGS is defined, it is added to the current language's default
# flags (e.g. CFLAGS) when the check is done. The check is thus made with
# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
# force the compiler to issue an error when a bad flag is given.
#
# INPUT gives an alternative input source to AC_COMPILE_IFELSE.
#
# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
#
# LICENSE
#
# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 6
AC_DEFUN([AX_CHECK_COMPILE_FLAG],
[AC_PREREQ(2.64)dnl for _AC_LANG_PREFIX and AS_VAR_IF
AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
AC_COMPILE_IFELSE([m4_default([$5],[AC_LANG_PROGRAM()])],
[AS_VAR_SET(CACHEVAR,[yes])],
[AS_VAR_SET(CACHEVAR,[no])])
_AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
AS_VAR_IF(CACHEVAR,yes,
[m4_default([$2], :)],
[m4_default([$3], :)])
AS_VAR_POPDEF([CACHEVAR])dnl
])dnl AX_CHECK_COMPILE_FLAGS
@@ -0,0 +1,38 @@
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_split_version.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_SPLIT_VERSION
#
# DESCRIPTION
#
# Splits a version number in the format MAJOR.MINOR.POINT into its
# separate components.
#
# Sets the variables.
#
# LICENSE
#
# Copyright (c) 2008 Tom Howard <tomhoward@users.sf.net>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 10
AC_DEFUN([AX_SPLIT_VERSION],[
AC_REQUIRE([AC_PROG_SED])
AX_MAJOR_VERSION=`echo "$VERSION" | $SED 's/\([[^.]][[^.]]*\).*/\1/'`
AX_MINOR_VERSION=`echo "$VERSION" | $SED 's/[[^.]][[^.]]*.\([[^.]][[^.]]*\).*/\1/'`
AX_POINT_VERSION=`echo "$VERSION" | $SED 's/[[^.]][[^.]]*.[[^.]][[^.]]*.\(.*\)/\1/'`
AC_MSG_CHECKING([Major version])
AC_MSG_RESULT([$AX_MAJOR_VERSION])
AC_MSG_CHECKING([Minor version])
AC_MSG_RESULT([$AX_MINOR_VERSION])
AC_MSG_CHECKING([Point version])
AC_MSG_RESULT([$AX_POINT_VERSION])
])
@@ -19,7 +19,6 @@ parts:
source: .
plugin: autotools
build-packages:
- autoconf-archive
- pkg-config
- libpng12-dev
- libjpeg8-dev
@@ -2026,39 +2026,39 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
WERD_RES* word_res = it.word();
if (word_res != nullptr) {
word_res->word->set_text(wordstr);
// Check to see if text matches wordstr.
int w = 0;
int t;
for (t = 0; text[t] != '\0'; ++t) {
if (text[t] == '\n' || text[t] == ' ')
continue;
while (wordstr[w] == ' ') ++w;
if (text[t] != wordstr[w])
break;
++w;
}
if (text[t] != '\0' || wordstr[w] != '\0') {
// No match.
delete page_res_;
GenericVector<TBOX> boxes;
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
tesseract_->ReSegmentByClassification(page_res_);
tesseract_->TidyUp(page_res_);
PAGE_RES_IT pr_it(page_res_);
if (pr_it.word() == nullptr)
success = false;
else
word_res = pr_it.word();
} else {
word_res->BestChoiceToCorrectText();
}
if (success) {
tesseract_->EnableLearning = true;
tesseract_->LearnWord(nullptr, word_res);
}
} else {
success = false;
}
// Check to see if text matches wordstr.
int w = 0;
int t = 0;
for (t = 0; text[t] != '\0'; ++t) {
if (text[t] == '\n' || text[t] == ' ')
continue;
while (wordstr[w] == ' ') ++w;
if (text[t] != wordstr[w])
break;
++w;
}
if (text[t] != '\0' || wordstr[w] != '\0') {
// No match.
delete page_res_;
GenericVector<TBOX> boxes;
page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_);
tesseract_->ReSegmentByClassification(page_res_);
tesseract_->TidyUp(page_res_);
PAGE_RES_IT pr_it(page_res_);
if (pr_it.word() == nullptr)
success = false;
else
word_res = pr_it.word();
} else {
word_res->BestChoiceToCorrectText();
}
if (success) {
tesseract_->EnableLearning = true;
tesseract_->LearnWord(nullptr, word_res);
}
} else {
success = false;
}
@@ -2468,7 +2468,7 @@ void TessBaseAPI::GetBlockTextOrientations(int** block_orientation,
float re_theta = re_rotation.angle();
FCOORD classify_rotation = block_it.data()->classify_rotation();
float classify_theta = classify_rotation.angle();
double rot_theta = - (re_theta - classify_theta) * 2.0 / PI;
double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
if (rot_theta < 0) rot_theta += 4;
int num_rotations = static_cast<int>(rot_theta + 0.5);
(*block_orientation)[i] = num_rotations;
@@ -2537,7 +2537,7 @@ STRING HOcrEscape(const char* text) {
/** Find lines from the image making the BLOCK_LIST. */
BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
FindLines();
ASSERT_HOST(FindLines() == 0);
BLOCK_LIST* result = block_list_;
block_list_ = nullptr;
return result;
@@ -2694,7 +2694,11 @@ struct TESS_CHAR : ELIST_LINK {
strncpy(unicode_repr, repr, length);
}
TESS_CHAR() { // Satisfies ELISTIZE.
TESS_CHAR()
: unicode_repr(nullptr),
length(0),
cost(0.0f)
{ // Satisfies ELISTIZE.
}
~TESS_CHAR() {
delete [] unicode_repr;
@@ -293,16 +293,16 @@ void AffineMatrix(int writing_direction,
// these viewers. I chose this threshold large enough to absorb noise,
// but small enough that lines probably won't cross each other if the
// whole page is tilted at almost exactly the clipping threshold.
void ClipBaseline(int ppi, int x1, int y1, int x2, int y2,
int *line_x1, int *line_y1,
int *line_x2, int *line_y2) {
static void ClipBaseline(int ppi, int x1, int y1, int x2, int y2,
int *line_x1, int *line_y1,
int *line_x2, int *line_y2) {
*line_x1 = x1;
*line_y1 = y1;
*line_x2 = x2;
*line_y2 = y2;
double rise = abs(y2 - y1) * 72 / ppi;
double run = abs(x2 - x1) * 72 / ppi;
if (rise < 2.0 && 2.0 < run)
int rise = abs(y2 - y1) * 72;
int run = abs(x2 - x1) * 72;
if (rise < 2 * ppi && 2 * ppi < run)
*line_y1 = *line_y2 = (y1 + y2) / 2;
}
@@ -321,11 +321,10 @@ void Tesseract::MaximallyChopWord(const GenericVector<TBOX>& boxes,
/// miss metric gets the blob.
static double BoxMissMetric(const TBOX& box1, const TBOX& box2) {
const int overlap_area = box1.intersection(box2).area();
double miss_metric = box1.area()- overlap_area;
miss_metric /= box1.area();
miss_metric *= box2.area() - overlap_area;
miss_metric /= box2.area();
return miss_metric;
const int a = box1.area();
const int b = box2.area();
ASSERT_HOST(a != 0 && b != 0);
return 1.0 * (a - overlap_area) * (b - overlap_area) / a / b;
}
#ifndef DISABLED_LEGACY_ENGINE
Oops, something went wrong.

0 comments on commit 62c7b79

Please sign in to comment.