diff --git a/api/baseapi.cpp b/api/baseapi.cpp index fa38d29001..23e32311ef 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1198,35 +1198,39 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, SetInputName(filename); SetImage(pix); bool failed = false; - if (timeout_millisec > 0) { + + if (tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { + // Disabled character recognition + PageIterator* it = AnalyseLayout(); + + if (it == NULL) { + failed = true; + } else { + delete it; + } + } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY) { + failed = FindLines() != 0; + } else if (timeout_millisec > 0) { // Running with a timeout. ETEXT_DESC monitor; monitor.cancel = NULL; monitor.cancel_this = NULL; monitor.set_deadline_msecs(timeout_millisec); + // Now run the main recognition. failed = Recognize(&monitor) < 0; - } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY || - tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { - // Disabled character recognition. - PageIterator* it = AnalyseLayout(); - if (it == NULL) { - failed = true; - } else { - delete it; - PERF_COUNT_END - return true; - } } else { // Normal layout and character recognition with no timeout. failed = Recognize(NULL) < 0; } + if (tesseract_->tessedit_write_images) { #ifndef ANDROID_BUILD Pix* page_pix = GetThresholdedImage(); pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); #endif // ANDROID_BUILD } + if (failed && retry_config != NULL && retry_config[0] != '\0') { // Save current config variables before switching modes. FILE* fp = fopen(kOldVarsFile, "wb"); @@ -1243,6 +1247,7 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, if (renderer && !failed) { failed = !renderer->AddImage(this); } + PERF_COUNT_END return !failed; } @@ -1734,6 +1739,47 @@ char* TessBaseAPI::GetUNLVText() { return result; } + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ +char* TessBaseAPI::GetOsdText(int page_number) { + OSResults osr; + + bool osd = DetectOS(&osr); + if (!osd) { + return NULL; + } + + int orient_id = osr.best_result.orientation_id; + int script_id = osr.get_best_script(orient_id); + float orient_conf = osr.best_result.oconfidence; + float script_conf = osr.best_result.sconfidence; + const char* script_name = + osr.unicharset->get_script_from_script_id(script_id); + + // clockwise orientation of the input image, in degrees + int orient_deg = orient_id * 90; + + // clockwise rotation needed to make the page upright + int rotate = OrientationIdToValue(orient_id); + + char* osd_buf = new char[255]; + snprintf(osd_buf, 255, + "Page number: %d\n" + "Orientation in degrees: %d\n" + "Rotate: %d\n" + "Orientation confidence: %.2f\n" + "Script: %s\n" + "Script confidence: %.2f\n", + page_number, + orient_deg, rotate, orient_conf, + script_name, script_conf); + + return osd_buf; +} + /** Returns the average word confidence for Tesseract page result. */ int TessBaseAPI::MeanTextConf() { int* conf = AllWordConfidences(); diff --git a/api/baseapi.h b/api/baseapi.h index a8c500cc73..48cd7472f3 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -600,12 +600,21 @@ class TESS_API TessBaseAPI { * page_number is a 0-based page index that will appear in the box file. */ char* GetBoxText(int page_number); + /** * The recognized text is returned as a char* which is coded * as UNLV format Latin-1 with specific reject and suspect codes * and must be freed with the delete [] operator. */ char* GetUNLVText(); + + /** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ + char* GetOsdText(int page_number); + /** Returns the (average) confidence value between 0 and 100. */ int MeanTextConf(); /** diff --git a/api/renderer.cpp b/api/renderer.cpp index 3ce97a2ab1..5050a232a1 100644 --- a/api/renderer.cpp +++ b/api/renderer.cpp @@ -213,4 +213,21 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { return true; } +/********************************************************************** + * Osd Text Renderer interface implementation + **********************************************************************/ +TessOsdRenderer::TessOsdRenderer(const char* outputbase) + : TessResultRenderer(outputbase, "osd") { +} + +bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { + char* osd = api->GetOsdText(imagenum()); + if (osd == NULL) return false; + + AppendString(osd); + delete[] osd; + + return true; +} + } // namespace tesseract diff --git a/api/renderer.h b/api/renderer.h index 6d189daddf..4120f74eb3 100644 --- a/api/renderer.h +++ b/api/renderer.h @@ -221,6 +221,17 @@ class TESS_API TessBoxTextRenderer : public TessResultRenderer { virtual bool AddImageHandler(TessBaseAPI* api); }; +/** + * Renders tesseract output into an osd text string + */ +class TESS_API TessOsdRenderer : public TessResultRenderer { + public: + explicit TessOsdRenderer(const char* outputbase); + + protected: + virtual bool AddImageHandler(TessBaseAPI* api); +}; + } // namespace tesseract. #endif // TESSERACT_API_RENDERER_H__ diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index c305b3455f..f267d6c84c 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -242,8 +242,7 @@ int main(int argc, char **argv) { if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) api.SetPageSegMode(pagesegmode); - if (pagesegmode == tesseract::PSM_AUTO_ONLY || - pagesegmode == tesseract::PSM_OSD_ONLY) { + if (pagesegmode == tesseract::PSM_AUTO_ONLY) { int ret_val = 0; Pix* pixs = pixRead(image); @@ -251,47 +250,44 @@ int main(int argc, char **argv) { fprintf(stderr, "Cannot open input file: %s\n", image); exit(2); } + api.SetImage(pixs); - if (pagesegmode == tesseract::PSM_OSD_ONLY) { - OSResults osr; - if (api.DetectOS(&osr)) { - int orient = osr.best_result.orientation_id; - int script_id = osr.get_best_script(orient); - const char* script_name = - osr.unicharset->get_script_from_script_id(script_id); - float orient_oco = osr.best_result.oconfidence; - float orient_sco = osr.best_result.sconfidence; - tprintf("Orientation: %d\n" - "Orientation in degrees: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - orient, OrientationIdToValue(orient), orient_oco, - script_name, orient_sco); - } else { - ret_val = 1; - } + tesseract::Orientation orientation; + tesseract::WritingDirection direction; + tesseract::TextlineOrder order; + float deskew_angle; + + tesseract::PageIterator* it = api.AnalyseLayout(); + if (it) { + it->Orientation(&orientation, &direction, &order, &deskew_angle); + tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ + "Deskew angle: %.4f\n", + orientation, direction, order, deskew_angle); } else { - tesseract::Orientation orientation; - tesseract::WritingDirection direction; - tesseract::TextlineOrder order; - float deskew_angle; - tesseract::PageIterator* it = api.AnalyseLayout(); - if (it) { - it->Orientation(&orientation, &direction, &order, &deskew_angle); - tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ - "Deskew angle: %.4f\n", - orientation, direction, order, deskew_angle); - } else { - ret_val = 1; - } - delete it; + ret_val = 1; } + + delete it; + pixDestroy(&pixs); exit(ret_val); } + if (pagesegmode == tesseract::PSM_OSD_ONLY) { + tesseract::TessResultRenderer* renderer = + new tesseract::TessOsdRenderer(outputbase); + + bool succeed = api.ProcessPages(image, NULL, 0, renderer); + if (succeed) { + PERF_COUNT_END + return 0; + } else { + fprintf(stderr, "Error during processing.\n"); + exit(1); + } + } + bool b; tesseract::PointerVector renderers; api.GetBoolVariable("tessedit_create_hocr", &b);