Permalink
Browse files

Various fixes for Tesseract 4 beta.3

  • Loading branch information...
jeroen committed Jul 10, 2018
1 parent 6514c58 commit 2784542f7cd21ed99a9b5b43961179a4922e7383
Showing with 17 additions and 32 deletions.
  1. +5 −0 NEWS
  2. +0 −4 R/RcppExports.R
  3. +1 −2 src/Makevars.win
  4. +0 −11 src/RcppExports.cpp
  5. +9 −13 src/tesseract.cpp
  6. +1 −1 src/tesseract_types.h
  7. +1 −1 src/test.h
5 NEWS
@@ -1,3 +1,8 @@
2.2
- Fixes for breaking changes in Tesseract 4.0.0 beta.3
- Set LC_ALL = C when initiating tesseract
- Include <tesseract/*> to support Tesseract 4

2.1
- Fixes for 4.0.0-beta.1: they switched to semver + other data branch
- Set LC_CTYPE to "C" when loading training data (required for some asian languages)
@@ -17,10 +17,6 @@ validate_params <- function(params) {
.Call('_tesseract_validate_params', PACKAGE = 'tesseract', params)
}

validate_paramfile <- function(path) {
invisible(.Call('_tesseract_validate_paramfile', PACKAGE = 'tesseract', path))
}

engine_info_internal <- function(ptr) {
.Call('_tesseract_engine_info_internal', PACKAGE = 'tesseract', ptr)
}
@@ -11,8 +11,7 @@ endif
CXX_STD=CXX11

PKG_CPPFLAGS = \
-I${RWINLIB}/include/tesseract \
-I${RWINLIB}/include/leptonica
-I${RWINLIB}/include -I${RWINLIB}/include/leptonica

PKG_LIBS = \
-L${RWINLIB}/lib${subst gcc,,${COMPILED_BY}}${R_ARCH} \
@@ -55,16 +55,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// validate_paramfile
void validate_paramfile(const char * path);
RcppExport SEXP _tesseract_validate_paramfile(SEXP pathSEXP) {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const char * >::type path(pathSEXP);
validate_paramfile(path);
return R_NilValue;
END_RCPP
}
// engine_info_internal
Rcpp::List engine_info_internal(TessPtr ptr);
RcppExport SEXP _tesseract_engine_info_internal(SEXP ptrSEXP) {
@@ -155,7 +145,6 @@ static const R_CallMethodDef CallEntries[] = {
{"_tesseract_tesseract_engine_internal", (DL_FUNC) &_tesseract_tesseract_engine_internal, 5},
{"_tesseract_tesseract_engine_set_variable", (DL_FUNC) &_tesseract_tesseract_engine_set_variable, 3},
{"_tesseract_validate_params", (DL_FUNC) &_tesseract_validate_params, 1},
{"_tesseract_validate_paramfile", (DL_FUNC) &_tesseract_validate_paramfile, 1},
{"_tesseract_engine_info_internal", (DL_FUNC) &_tesseract_engine_info_internal, 1},
{"_tesseract_print_params", (DL_FUNC) &_tesseract_print_params, 1},
{"_tesseract_get_param_values", (DL_FUNC) &_tesseract_get_param_values, 2},
@@ -1,11 +1,14 @@
#include "tesseract_types.h"
#include <genericvector.h>
#include <params.h>
#include <tesseract/genericvector.h>

// [[Rcpp::export]]
Rcpp::List tesseract_config(){
char old_ctype[100];
strncpy(old_ctype, setlocale(LC_ALL, NULL), 99);
setlocale(LC_ALL,"C");
tesseract::TessBaseAPI api;
api.InitForAnalysePage();
setlocale(LC_CTYPE, old_ctype);
return Rcpp::List::create(
Rcpp::_["version"] = tesseract::TessBaseAPI::Version(),
Rcpp::_["path"] = api.GetDatapath()
@@ -29,13 +32,12 @@ TessPtr tesseract_engine_internal(Rcpp::CharacterVector datapath, Rcpp::Characte
params.push_back(std::string(opt_names.at(i)).c_str());
values.push_back(std::string(opt_values.at(i)).c_str());
}
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
//workaroundf for https://github.com/ropensci/tesseract/issues/14
char old_ctype[100];
strncpy(old_ctype, setlocale(LC_CTYPE, NULL), 99);
setlocale(LC_CTYPE,"C");
strncpy(old_ctype, setlocale(LC_ALL, NULL), 99);
setlocale(LC_ALL,"C");
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
int err = api->Init(path, lang, tesseract::OEM_DEFAULT, configs, confpaths.length(), &params, &values, false);
setlocale(LC_CTYPE, old_ctype);
setlocale(LC_ALL, old_ctype);
if(err){
delete api;
throw std::runtime_error(std::string("Unable to find training data for: ") + (lang ? lang : "eng") + ". Please consult manual for: ?tesseract_download");
@@ -71,12 +73,6 @@ Rcpp::LogicalVector validate_params(Rcpp::CharacterVector params){
return out;
}

// [[Rcpp::export]]
void validate_paramfile(const char * path){
tesseract::ParamsVectors p;
tesseract::ParamUtils::ReadParamsFile(path, tesseract::SET_PARAM_CONSTRAINT_NONE, &p);
}

// [[Rcpp::export]]
Rcpp::List engine_info_internal(TessPtr ptr){
tesseract::TessBaseAPI * api = get_engine(ptr);
@@ -1,4 +1,4 @@
#include <baseapi.h>
#include <tesseract/baseapi.h>
#include <allheaders.h>

#define R_NO_REMAP
@@ -1,2 +1,2 @@
#include <baseapi.h>
#include <tesseract/baseapi.h>
#include <allheaders.h>

0 comments on commit 2784542

Please sign in to comment.