Skip to content

Commit

Permalink
Replace strcpy and strncpy by new inline helper function
Browse files Browse the repository at this point in the history
Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed May 24, 2024
1 parent ea82f91 commit c5b0c2f
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 55 deletions.
6 changes: 2 additions & 4 deletions src/api/altorenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
Expand Down Expand Up @@ -270,12 +271,9 @@ char *TessBaseAPI::GetAltoText(ETEXT_DESC *monitor, int page_number) {

alto_str << "\t\t\t</PrintSpace>\n"
<< "\t\t</Page>\n";
const std::string &text = alto_str.str();

char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
delete res_it;
return result;
return copy_string(alto_str.str());
}

} // namespace tesseract
15 changes: 4 additions & 11 deletions src/api/baseapi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "equationdetect.h" // for EquationDetect, destructor of equ_detect_
#endif // ndef DISABLED_LEGACY_ENGINE
#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for IntCastRounded, chomp_string
#include "helpers.h" // for IntCastRounded, chomp_string, copy_string
#include "host.h" // for MAX_PATH
#include "imageio.h" // for IFF_TIFF_G4, IFF_TIFF, IFF_TIFF_G3, ...
#ifndef DISABLED_LEGACY_ENGINE
Expand Down Expand Up @@ -1378,9 +1378,7 @@ char *TessBaseAPI::GetUTF8Text() {
const std::unique_ptr<const char[]> para_text(it->GetUTF8Text(RIL_PARA));
text += para_text.get();
} while (it->Next(RIL_PARA));
char *result = new char[text.length() + 1];
strncpy(result, text.c_str(), text.length() + 1);
return result;
return copy_string(text);
}

static void AddBoxToTSV(const PageIterator *it, PageIteratorLevel level, std::string &text) {
Expand Down Expand Up @@ -1509,9 +1507,7 @@ char *TessBaseAPI::GetTSVText(int page_number) {
#endif
}

char *ret = new char[tsv_str.length() + 1];
strcpy(ret, tsv_str.c_str());
return ret;
return copy_string(tsv_str);
}

/** The 5 numbers output for each box (the usual 4 and a page number.) */
Expand Down Expand Up @@ -1759,10 +1755,7 @@ char *TessBaseAPI::GetOsdText(int page_number) {
<< "Orientation confidence: " << orient_conf << "\n"
<< "Script: " << script_name << "\n"
<< "Script confidence: " << script_conf << "\n";
const std::string &text = stream.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(stream.str());
}

#endif // ndef DISABLED_LEGACY_ENGINE
Expand Down
6 changes: 2 additions & 4 deletions src/api/hocrrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract

namespace tesseract {
Expand Down Expand Up @@ -480,10 +481,7 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
hocr_str << " </div>\n";

const std::string &text = hocr_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(hocr_str.str());
}

/**********************************************************************
Expand Down
5 changes: 2 additions & 3 deletions src/api/lstmboxrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract

namespace tesseract {
Expand Down Expand Up @@ -81,10 +82,8 @@ char *TessBaseAPI::GetLSTMBoxText(int page_number = 0) {
AddBoxToLSTM(right, bottom, top, image_height_, page_number, lstm_box_str);
lstm_box_str += "\n"; // end of PAGE
}
char *ret = new char[lstm_box_str.length() + 1];
strcpy(ret, lstm_box_str.c_str());
delete res_it;
return ret;
return copy_string(lstm_box_str);
}

/**********************************************************************
Expand Down
10 changes: 2 additions & 8 deletions src/api/pagerenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "errcode.h" // for ASSERT_HOST
#include "helpers.h" // for copy_string
#ifdef _WIN32
# include "host.h" // windows.h for MultiByteToWideChar, ...
#endif
Expand Down Expand Up @@ -1143,15 +1144,8 @@ char *TessBaseAPI::GetPAGEText(ETEXT_DESC *monitor, int page_number) {
const std::string &text = reading_order_str.str();
reading_order_str.str("");

// Allocate memory for result to hold text.length() characters plus a null
// terminator Safely copy the string into result, ensuring no overflow strncpy
// does not necessarily null-terminate the destination, so do it manually
char *result = new char[text.length() + 1];
strncpy(result, text.c_str(), text.length());
result[text.length()] = '\0';

delete res_it;
return result;
return copy_string(text);
}

} // namespace tesseract
7 changes: 2 additions & 5 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

#include "pdf_ttf.h"
#include "tprintf.h"
#include "helpers.h" // for Swap
#include "helpers.h" // for Swap, copy_string

#include <allheaders.h>
#include <tesseract/baseapi.h>
Expand Down Expand Up @@ -497,10 +497,7 @@ char *TessPDFRenderer::GetPDFTextObjects(TessBaseAPI *api, double width, double
pdf_str << "ET\n"; // end the text object
}
}
const std::string &text = pdf_str.str();
char *result = new char[text.length() + 1];
strcpy(result, text.c_str());
return result;
return copy_string(pdf_str.str());
}

bool TessPDFRenderer::BeginDocumentHandler() {
Expand Down
5 changes: 2 additions & 3 deletions src/api/wordstrboxrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <tesseract/baseapi.h> // for TessBaseAPI
#include <tesseract/renderer.h>
#include "helpers.h" // for copy_string
#include "tesseractclass.h" // for Tesseract

namespace tesseract {
Expand Down Expand Up @@ -80,10 +81,8 @@ char *TessBaseAPI::GetWordStrBoxText(int page_number = 0) {
wordstr_box_str += " " + std::to_string(page_number); // row for tab for EOL
wordstr_box_str += "\n";
}
char *ret = new char[wordstr_box_str.length() + 1];
strcpy(ret, wordstr_box_str.c_str());
delete res_it;
return ret;
return copy_string(wordstr_box_str);
}

/**********************************************************************
Expand Down
17 changes: 4 additions & 13 deletions src/ccmain/ltrresultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <tesseract/ltrresultiterator.h>

#include "helpers.h" // for copy_string
#include "pageres.h"
#include "tesseractclass.h"

Expand Down Expand Up @@ -76,10 +77,7 @@ char *LTRResultIterator::GetUTF8Text(PageIteratorLevel level) const {
}
} while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
}
int length = text.length() + 1;
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
return copy_string(text);
}

// Set the string inserted at the end of each text line. "\n" by default.
Expand Down Expand Up @@ -310,11 +308,7 @@ char *LTRResultIterator::WordTruthUTF8Text() const {
if (!HasTruthString()) {
return nullptr;
}
std::string truth_text = it_->word()->blamer_bundle->TruthString();
int length = truth_text.length() + 1;
char *result = new char[length];
strncpy(result, truth_text.c_str(), length);
return result;
return copy_string(it_->word()->blamer_bundle->TruthString());
}

// Returns the null terminated UTF-8 encoded normalized OCR string for the
Expand All @@ -330,10 +324,7 @@ char *LTRResultIterator::WordNormedUTF8Text() const {
for (unsigned i = 0; i < best_choice->length(); ++i) {
ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
}
auto length = ocr_text.length() + 1;
char *result = new char[length];
strncpy(result, ocr_text.c_str(), length);
return result;
return copy_string(ocr_text);
}

// Returns a pointer to serialized choice lattice.
Expand Down
6 changes: 2 additions & 4 deletions src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <tesseract/resultiterator.h>

#include "helpers.h" // for copy_string
#include "pageres.h"
#include "tesseractclass.h"
#include "unicharset.h"
Expand Down Expand Up @@ -681,10 +682,7 @@ char *ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
}
} break;
}
int length = text.length() + 1;
char *result = new char[length];
strncpy(result, text.c_str(), length);
return result;
return copy_string(text);
}
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*ResultIterator::GetRawLSTMTimesteps() const {
Expand Down
11 changes: 11 additions & 0 deletions src/ccutil/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@

namespace tesseract {

// Copy a std::string to a newly allocated char *.
// TODO: Remove this function once the related code has been converted
// to use std::string.
inline char *copy_string(const std::string &from) {
auto length = from.length();
char *target_string = new char[length + 1];
from.copy(target_string, length);
target_string[length] = '\0';
return target_string;
}

template <class T>
inline bool contains(const std::vector<T> &data, const T &value) {
return std::find(data.begin(), data.end(), value) != data.end();
Expand Down

0 comments on commit c5b0c2f

Please sign in to comment.