Skip to content
Permalink
Browse files

Improve the way Tesseract is initialized in hardsubx. Fix segfault du…

…ring the cleaning the frame data in hardsubx. (CCExtractor#1083)
  • Loading branch information...
thelastpolaris authored and cfsmp3 committed Mar 25, 2019
1 parent 414a57d commit 116f308a0ba1ae8b3d28bb850af815cab69f0968
Showing with 62 additions and 21 deletions.
  1. +42 −21 src/lib_ccx/hardsubx.c
  2. +19 −0 src/lib_ccx/ocr.c
  3. +1 −0 src/lib_ccx/ocr.h
@@ -110,10 +110,10 @@ int hardsubx_process_data(struct lib_hardsubx_ctx *ctx)
dinit_encoder(&enc_ctx, 0); //TODO: Replace 0 with end timestamp

// Free the allocated memory for frame processing
av_free(ctx->rgb_buffer);
av_frame_free(ctx->rgb_frame);
av_frame_free(ctx->frame);
avcodec_close(ctx->codec_ctx);
av_free(ctx->rgb_buffer);
if(ctx->frame) av_frame_free(&ctx->frame);
if(ctx->rgb_frame) av_frame_free(&ctx->rgb_frame);
avcodec_close(ctx->codec_ctx);
avformat_close_input(&ctx->format_ctx);
}

@@ -221,23 +221,44 @@ struct lib_hardsubx_ctx* _init_hardsubx(struct ccx_s_options *options)
ctx->tess_handle = TessBaseAPICreate();
char* pars_vec = strdup("debug_file");
char* pars_values = strdup("/dev/null");
char *tessdata_dir_path=".";

int ret = -1;
if(options->ocrlang)
{
ret = TessBaseAPIInit4(ctx->tess_handle, NULL, options->ocrlang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
&pars_values, 1, false);
if(ret != 0)
{
mprint("Failed loading language: %s, trying to load eng\n", options->ocrlang);
}
}
if(ret != 0)
{
ret = TessBaseAPIInit4(ctx->tess_handle, NULL, "eng", ccx_options.ocr_oem, NULL, 0, &pars_vec,
&pars_values, 1, false);
}
char* tessdata_path = NULL;

char* lang = options->ocrlang;
if(!lang) lang = "eng"; // English is default language

tessdata_path = probe_tessdata_location_string(lang);
if(!tessdata_path)
{
if (strcmp(lang, "eng") == 0)
{
mprint("eng.traineddata not found! No Switching Possible\n");
return NULL;
}
mprint("%s.traineddata not found! Switching to English\n", lang);
lang = "eng";
tessdata_path = probe_tessdata_location_string("eng");
if(!tessdata_path)
{
mprint("eng.traineddata not found! No Switching Possible\n");
return NULL;
}
}

int ret = -1;

if (!strncmp("4.", TessVersion(), 2))
{
char tess_path [1024];
snprintf(tess_path, 1024, "%s%s%s", tessdata_path, "/", "tessdata");
//ccx_options.ocr_oem are deprecated and only supported mode is OEM_LSTM_ONLY
ret = TessBaseAPIInit4(ctx->tess_handle, tess_path, lang, 1, NULL, 0, &pars_vec,
&pars_values, 1, false);
}
else
{
ret = TessBaseAPIInit4(ctx->tess_handle, tessdata_path, lang, ccx_options.ocr_oem, NULL, 0, &pars_vec,
&pars_values, 1, false);
}

free(pars_vec);
free(pars_values);
@@ -129,6 +129,25 @@ char* probe_tessdata_location(int lang_index)
return NULL;
}

/**
* probe_tessdata_location_string
*
* This function returns tesseract data location given language string
*/
char* probe_tessdata_location_string(char* lang)
{
int lang_index = -1;
for(int i = 0; i < NB_LANGUAGE; i++) {
if(language[i]) {
if(strcmp(lang, language[i]) == 0) lang_index = i;
}
}

if(lang_index == -1) return NULL; // No such language found

return probe_tessdata_location(lang_index);
}

void* init_ocr(int lang_index)
{
int ret = -1;
@@ -12,6 +12,7 @@ struct image_copy //A copy of the original OCR image, used for color detection
};

void delete_ocr (void** arg);
char* probe_tessdata_location_string(char* lang);
void* init_ocr(int lang_index);
char* ocr_bitmap(void* arg, png_color *palette,png_byte *alpha, unsigned char* indata,int w, int h, struct image_copy *copy);
int ocr_rect(void* arg, struct cc_bitmap *rect, char **str, int bgcolor, int ocr_quantmode);

0 comments on commit 116f308

Please sign in to comment.
You can’t perform that action at this time.