Skip to content

Commit

Permalink
Merge pull request #125 from amitdo/osd-renderer
Browse files Browse the repository at this point in the history
Added osd renderer for psm 0.
  • Loading branch information
zdenop committed Oct 30, 2015
2 parents b882590 + 6bbcb50 commit 7089c7b
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 47 deletions.
70 changes: 58 additions & 12 deletions api/baseapi.cpp
Expand Up @@ -1198,35 +1198,39 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
SetInputName(filename);
SetImage(pix);
bool failed = false;
if (timeout_millisec > 0) {

if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
// Disabled character recognition
PageIterator* it = AnalyseLayout();

if (it == NULL) {
failed = true;
} else {
delete it;
}
} else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) {
failed = FindLines() != 0;
} else if (timeout_millisec > 0) {
// Running with a timeout.
ETEXT_DESC monitor;
monitor.cancel = NULL;
monitor.cancel_this = NULL;
monitor.set_deadline_msecs(timeout_millisec);

// Now run the main recognition.
failed = Recognize(&monitor) < 0;
} else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY ||
tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) {
// Disabled character recognition.
PageIterator* it = AnalyseLayout();
if (it == NULL) {
failed = true;
} else {
delete it;
PERF_COUNT_END
return true;
}
} else {
// Normal layout and character recognition with no timeout.
failed = Recognize(NULL) < 0;
}

if (tesseract_->tessedit_write_images) {
#ifndef ANDROID_BUILD
Pix* page_pix = GetThresholdedImage();
pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4);
#endif // ANDROID_BUILD
}

if (failed && retry_config != NULL && retry_config[0] != '\0') {
// Save current config variables before switching modes.
FILE* fp = fopen(kOldVarsFile, "wb");
Expand All @@ -1243,6 +1247,7 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
if (renderer && !failed) {
failed = !renderer->AddImage(this);
}

PERF_COUNT_END
return !failed;
}
Expand Down Expand Up @@ -1734,6 +1739,47 @@ char* TessBaseAPI::GetUNLVText() {
return result;
}

/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* TessBaseAPI::GetOsdText(int page_number) {
OSResults osr;

bool osd = DetectOS(&osr);
if (!osd) {
return NULL;
}

int orient_id = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient_id);
float orient_conf = osr.best_result.oconfidence;
float script_conf = osr.best_result.sconfidence;
const char* script_name =
osr.unicharset->get_script_from_script_id(script_id);

// clockwise orientation of the input image, in degrees
int orient_deg = orient_id * 90;

// clockwise rotation needed to make the page upright
int rotate = OrientationIdToValue(orient_id);

char* osd_buf = new char[255];
snprintf(osd_buf, 255,
"Page number: %d\n"
"Orientation in degrees: %d\n"
"Rotate: %d\n"
"Orientation confidence: %.2f\n"
"Script: %s\n"
"Script confidence: %.2f\n",
page_number,
orient_deg, rotate, orient_conf,
script_name, script_conf);

return osd_buf;
}

/** Returns the average word confidence for Tesseract page result. */
int TessBaseAPI::MeanTextConf() {
int* conf = AllWordConfidences();
Expand Down
9 changes: 9 additions & 0 deletions api/baseapi.h
Expand Up @@ -600,12 +600,21 @@ class TESS_API TessBaseAPI {
* page_number is a 0-based page index that will appear in the box file.
*/
char* GetBoxText(int page_number);

/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes
* and must be freed with the delete [] operator.
*/
char* GetUNLVText();

/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char* GetOsdText(int page_number);

/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
Expand Down
17 changes: 17 additions & 0 deletions api/renderer.cpp
Expand Up @@ -213,4 +213,21 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) {
return true;
}

/**********************************************************************
* Osd Text Renderer interface implementation
**********************************************************************/
TessOsdRenderer::TessOsdRenderer(const char* outputbase)
: TessResultRenderer(outputbase, "osd") {
}

bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) {
char* osd = api->GetOsdText(imagenum());
if (osd == NULL) return false;

AppendString(osd);
delete[] osd;

return true;
}

} // namespace tesseract
11 changes: 11 additions & 0 deletions api/renderer.h
Expand Up @@ -221,6 +221,17 @@ class TESS_API TessBoxTextRenderer : public TessResultRenderer {
virtual bool AddImageHandler(TessBaseAPI* api);
};

/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char* outputbase);

protected:
virtual bool AddImageHandler(TessBaseAPI* api);
};

} // namespace tesseract.

#endif // TESSERACT_API_RENDERER_H__
66 changes: 31 additions & 35 deletions api/tesseractmain.cpp
Expand Up @@ -242,56 +242,52 @@ int main(int argc, char **argv) {
if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
api.SetPageSegMode(pagesegmode);

if (pagesegmode == tesseract::PSM_AUTO_ONLY ||
pagesegmode == tesseract::PSM_OSD_ONLY) {
if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
int ret_val = 0;

Pix* pixs = pixRead(image);
if (!pixs) {
fprintf(stderr, "Cannot open input file: %s\n", image);
exit(2);
}

api.SetImage(pixs);

if (pagesegmode == tesseract::PSM_OSD_ONLY) {
OSResults osr;
if (api.DetectOS(&osr)) {
int orient = osr.best_result.orientation_id;
int script_id = osr.get_best_script(orient);
const char* script_name =
osr.unicharset->get_script_from_script_id(script_id);
float orient_oco = osr.best_result.oconfidence;
float orient_sco = osr.best_result.sconfidence;
tprintf("Orientation: %d\n"
"Orientation in degrees: %d\n"
"Orientation confidence: %.2f\n"
"Script: %s\n"
"Script confidence: %.2f\n",
orient, OrientationIdToValue(orient), orient_oco,
script_name, orient_sco);
} else {
ret_val = 1;
}
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;

tesseract::PageIterator* it = api.AnalyseLayout();
if (it) {
it->Orientation(&orientation, &direction, &order, &deskew_angle);
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
} else {
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;
tesseract::PageIterator* it = api.AnalyseLayout();
if (it) {
it->Orientation(&orientation, &direction, &order, &deskew_angle);
tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
} else {
ret_val = 1;
}
delete it;
ret_val = 1;
}

delete it;

pixDestroy(&pixs);
exit(ret_val);
}

if (pagesegmode == tesseract::PSM_OSD_ONLY) {
tesseract::TessResultRenderer* renderer =
new tesseract::TessOsdRenderer(outputbase);

bool succeed = api.ProcessPages(image, NULL, 0, renderer);
if (succeed) {
PERF_COUNT_END
return 0;
} else {
fprintf(stderr, "Error during processing.\n");
exit(1);
}
}

bool b;
tesseract::PointerVector<tesseract::TessResultRenderer> renderers;
api.GetBoolVariable("tessedit_create_hocr", &b);
Expand Down

0 comments on commit 7089c7b

Please sign in to comment.