Skip to content
This repository has been archived by the owner on Mar 17, 2022. It is now read-only.

Commit

Permalink
Add monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
rmtheis committed Dec 18, 2014
1 parent 33f6d57 commit 5d2e03b
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 19 deletions.
Expand Up @@ -493,7 +493,7 @@ char* TessBaseAPI::TesseractRect(const unsigned char* imagedata,
bytes_per_pixel, bytes_per_line);
SetRectangle(left, top, width, height);

return GetUTF8Text();
return GetUTF8Text(NULL);
}

/**
Expand Down Expand Up @@ -1257,9 +1257,9 @@ MutableIterator* TessBaseAPI::GetMutableIterator() {
}

/** Make a text string from the internal data structures. */
char* TessBaseAPI::GetUTF8Text() {
char* TessBaseAPI::GetUTF8Text(struct ETEXT_DESC* monitor) {
if (tesseract_ == NULL ||
(!recognition_done_ && Recognize(NULL) < 0))
(!recognition_done_ && Recognize(monitor) < 0))
return NULL;
STRING text("");
ResultIterator *it = GetIterator();
Expand Down Expand Up @@ -1358,9 +1358,9 @@ static void AddBoxTohOCR(const PageIterator *it,
* GetHOCRText
* STL removed from original patch submission and refactored by rays.
*/
char* TessBaseAPI::GetHOCRText(int page_number) {
char* TessBaseAPI::GetHOCRText(int page_number, struct ETEXT_DESC* monitor) {
if (tesseract_ == NULL ||
(page_res_ == NULL && Recognize(NULL) < 0))
(page_res_ == NULL && Recognize(monitor) < 0))
return NULL;

int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
Expand Down Expand Up @@ -1752,7 +1752,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
PageSegMode current_psm = GetPageSegMode();
SetPageSegMode(mode);
SetVariable("classify_enable_learning", "0");
char* text = GetUTF8Text();
char* text = GetUTF8Text(NULL);
if (debug) {
tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr);
}
Expand Down
Expand Up @@ -579,14 +579,14 @@ class TESS_API TessBaseAPI {
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char* GetUTF8Text();
char* GetUTF8Text(ETEXT_DESC* monitor);

/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
*/
char* GetHOCRText(int page_number);
char* GetHOCRText(int page_number, ETEXT_DESC* monitor);

/**
* The recognized text is returned as a char* which is coded in the same
Expand Down
Expand Up @@ -451,12 +451,12 @@ TESS_API TessMutableIterator* TESS_CALL TessBaseAPIGetMutableIterator(TessBaseAP

TESS_API char* TESS_CALL TessBaseAPIGetUTF8Text(TessBaseAPI* handle)
{
return handle->GetUTF8Text();
return handle->GetUTF8Text(NULL);
}

TESS_API char* TESS_CALL TessBaseAPIGetHOCRText(TessBaseAPI* handle, int page_number)
{
return handle->GetHOCRText(page_number);
return handle->GetHOCRText(page_number, NULL);
}

TESS_API char* TESS_CALL TessBaseAPIGetBoxText(TessBaseAPI* handle, int page_number)
Expand Down
Expand Up @@ -106,7 +106,7 @@ TessTextRenderer::TessTextRenderer(const char *outputbase)
}

bool TessTextRenderer::AddImageHandler(TessBaseAPI* api) {
char* utf8 = api->GetUTF8Text();
char* utf8 = api->GetUTF8Text(NULL);
if (utf8 == NULL) {
return false;
}
Expand Down Expand Up @@ -163,7 +163,7 @@ bool TessHOcrRenderer::EndDocumentHandler() {
}

bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) {
char* hocr = api->GetHOCRText(imagenum());
char* hocr = api->GetHOCRText(imagenum(), NULL);
if (hocr == NULL) return false;

AppendString(hocr);
Expand Down
Expand Up @@ -227,7 +227,12 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor,
if (pass_n == 1)
monitor->progress = 30 + 50 * w / words->size();
else
monitor->progress = 80 + 10 * w / words->size();
monitor->progress = 80 + 20 * w / words->size();
if (monitor->progress_callback != NULL) {
TBOX box = word->word->word->bounding_box();
(*monitor->progress_callback)(monitor->progress_this, monitor->progress,
box.left(), box.right(), box.top(), box.bottom());
}
if (monitor->deadline_exceeded() ||
(monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,
words->size()))) {
Expand Down
Expand Up @@ -108,6 +108,8 @@ typedef struct { /*single character */
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
typedef bool (*PROGRESS_FUNC)(void* progress_this, int progress,
int left, int right, int top, int bottom);

class ETEXT_DESC { // output header
public:
Expand All @@ -117,13 +119,16 @@ class ETEXT_DESC { // output header
volatile inT8 ocr_alive; // ocr sets to 1, HP 0
inT8 err_code; // for errcode use
CANCEL_FUNC cancel; // returns true to cancel
PROGRESS_FUNC progress_callback;//called whenever progress increases
void* cancel_this; // this or other data for cancel
void* progress_this; // this or other data for progress
struct timeval end_time; // time to stop. expected to be set only by call
// to set_deadline_msecs()
EANYCODE_CHAR text[1]; // character data

ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
err_code(0), cancel(NULL), cancel_this(NULL) {
err_code(0), cancel(NULL), progress_callback(NULL),
cancel_this(NULL), progress_this(NULL) {
end_time.tv_sec = 0;
end_time.tv_usec = 0;
}
Expand Down
85 changes: 82 additions & 3 deletions tess-two/jni/com_googlecode_tesseract_android/tessbaseapi.cpp
Expand Up @@ -20,23 +20,83 @@
#include "android/bitmap.h"
#include "common.h"
#include "baseapi.h"
#include "ocrclass.h"
#include "allheaders.h"

static jfieldID field_mNativeData;
static jmethodID method_onProgressValues;

struct native_data_t {
tesseract::TessBaseAPI api;
PIX *pix;
void *data;
bool debug;

l_int32 lastProgress;
bool cancel_ocr;

JNIEnv *cachedEnv;
jobject* cachedObject;

bool isStateValid() {
if (cancel_ocr == false && cachedEnv != NULL && cachedObject != NULL) {
return true;
} else {
LOGI("state is cancelled");
return false;
}
}

void initStateVariables(JNIEnv* env, jobject *object) {
cancel_ocr = false;
cachedEnv = env;
cachedObject = object;
lastProgress = 0;
}

void resetStateVariables() {
cancel_ocr = false;
cachedEnv = NULL;
cachedObject = NULL;
lastProgress = 0;
}

native_data_t() {
lastProgress = 0;
pix = NULL;
data = NULL;
debug = false;
cachedEnv = NULL;
cachedObject = NULL;
cancel_ocr = false;
}
};

/**
* Callback for Tesseract's monitor to cancel recognition.
*/
bool cancelFunc(void* cancel_this, int words) {
native_data_t *nat = (native_data_t*)cancel_this;
return nat->cancel_ocr;
}

/**
* Callback for Tesseract's monitor to update progress.
*/
bool progressJavaCallback(void* progress_this, int progress, int left, int right,
int top, int bottom) {
native_data_t *nat = (native_data_t*)progress_this;

if (nat->isStateValid()) {
if (progress > nat->lastProgress || left != 0 || right != 0 || top != 0 || bottom != 0) {
nat->cachedEnv->CallVoidMethod(*(nat->cachedObject), method_onProgressValues, progress,
(jint) left, (jint) right, (jint) top, (jint) bottom);
nat->lastProgress = progress;
}
}
return true;
}

static inline native_data_t * get_native_data(JNIEnv *env, jobject object) {
return (native_data_t *) (env->GetLongField(object, field_mNativeData));
}
Expand All @@ -60,6 +120,7 @@ void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeClassInit(JNIEnv* e
jclass clazz) {

field_mNativeData = env->GetFieldID(clazz, "mNativeData", "J");
method_onProgressValues = env->GetMethodID(clazz, "onProgressValues", "(IIIII)V");
}

void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeConstruct(JNIEnv* env,
Expand Down Expand Up @@ -229,12 +290,20 @@ jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetUTF8Text(JNIE
jobject thiz) {

native_data_t *nat = get_native_data(env, thiz);
nat->initStateVariables(env, &thiz);

char *text = nat->api.GetUTF8Text();
ETEXT_DESC monitor;
monitor.progress_callback = progressJavaCallback;
monitor.cancel = cancelFunc;
monitor.cancel_this = nat;
monitor.progress_this = nat;

char *text = nat->api.GetUTF8Text(&monitor);

jstring result = env->NewStringUTF(text);

free(text);
nat->resetStateVariables();

return result;
}
Expand All @@ -244,7 +313,9 @@ void Java_com_googlecode_tesseract_android_TessBaseAPI_nativeStop(JNIEnv *env,

native_data_t *nat = get_native_data(env, thiz);

// TODO How do we stop without a monitor?!
// Stop by setting a flag that's used by the monitor
nat->resetStateVariables();
nat->cancel_ocr = true;
}

jint Java_com_googlecode_tesseract_android_TessBaseAPI_nativeMeanConfidence(JNIEnv *env,
Expand Down Expand Up @@ -457,12 +528,20 @@ jstring Java_com_googlecode_tesseract_android_TessBaseAPI_nativeGetHOCRText(JNIE
jobject thiz, jint page) {

native_data_t *nat = get_native_data(env, thiz);
nat->initStateVariables(env, &thiz);

ETEXT_DESC monitor;
monitor.progress_callback = progressJavaCallback;
monitor.cancel = cancelFunc;
monitor.cancel_this = nat;
monitor.progress_this = nat;

char *text = nat->api.GetHOCRText(page);
char *text = nat->api.GetHOCRText(page, &monitor);

jstring result = env->NewStringUTF(text);

free(text);
nat->resetStateVariables();

return result;
}
Expand Down

0 comments on commit 5d2e03b

Please sign in to comment.