Skip to content

Commit

Permalink
Fix vector resize with init for all elements (issue tesseract-ocr#3473)
Browse files Browse the repository at this point in the history
Fixes: c8b8d26
Fixes: 9710bc0
Signed-off-by: Stefan Weil <sw@weilnetz.de>
  • Loading branch information
stweil committed Jun 29, 2021
1 parent ff1062d commit b836f30
Show file tree
Hide file tree
Showing 21 changed files with 44 additions and 23 deletions.
21 changes: 14 additions & 7 deletions src/ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -982,9 +982,12 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *
std::vector<bool> *overlapped_any_blob,
std::vector<C_BLOB *> *target_blobs) {
std::vector<bool> blob_wanted;
word_wanted->resize(outlines.size(), false);
overlapped_any_blob->resize(outlines.size(), false);
target_blobs->resize(outlines.size(), nullptr);
word_wanted->clear();
word_wanted->resize(outlines.size());
overlapped_any_blob->clear();
overlapped_any_blob->resize(outlines.size());
target_blobs->clear();
target_blobs->resize(outlines.size());
// For each real blob, find the outlines that seriously overlap it.
// A single blob could be several merged characters, so there can be quite
// a few outlines overlapping, and the full engine needs to be used to chop
Expand All @@ -993,7 +996,8 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *
for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
C_BLOB *blob = blob_it.data();
const TBOX blob_box = blob->bounding_box();
blob_wanted.resize(outlines.size(), false);
blob_wanted.clear();
blob_wanted.resize(outlines.size());
int num_blob_outlines = 0;
for (unsigned i = 0; i < outlines.size(); ++i) {
if (blob_box.major_x_overlap(outlines[i]->bounding_box()) && !(*word_wanted)[i]) {
Expand Down Expand Up @@ -1032,15 +1036,18 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
std::vector<bool> *word_wanted,
std::vector<C_BLOB *> *target_blobs) {
std::vector<bool> blob_wanted;
word_wanted->resize(outlines.size(), false);
target_blobs->resize(outlines.size(), nullptr);
word_wanted->clear();
word_wanted->resize(outlines.size());
target_blobs->clear();
target_blobs->resize(outlines.size());
// Check for outlines that need to be turned into stand-alone blobs.
for (unsigned i = 0; i < outlines.size(); ++i) {
if (outlines[i] == nullptr) {
continue;
}
// Get a set of adjacent outlines that don't overlap any existing blob.
blob_wanted.resize(outlines.size(), false);
blob_wanted.clear();
blob_wanted.resize(outlines.size());
int num_blob_outlines = 0;
TBOX total_ol_box(outlines[i]->bounding_box());
while (i < outlines.size() && outlines[i] != nullptr) {
Expand Down
3 changes: 1 addition & 2 deletions src/ccmain/linerec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,7 @@ ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX>
line_boxes.push_back(box);
line_texts.push_back(texts[b]);
}
std::vector<int> page_numbers;
page_numbers.resize(line_boxes.size(), applybox_page);
std::vector<int> page_numbers(line_boxes.size(), applybox_page);
image_data->AddBoxes(line_boxes, line_texts, page_numbers);
return image_data;
}
Expand Down
4 changes: 2 additions & 2 deletions src/ccmain/paragraphs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2314,14 +2314,14 @@ void CanonicalizeDetectionResults(std::vector<PARA *> *row_owners, PARA_LIST *pa
void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
std::vector<PARA *> *row_owners, PARA_LIST *paragraphs,
std::vector<ParagraphModel *> *models) {
std::vector<RowScratchRegisters> rows;
ParagraphTheory theory(models);

// Initialize row_owners to be a bunch of nullptr pointers.
row_owners->clear();
row_owners->resize(row_infos->size());

// Set up row scratch registers for the main algorithm.
rows.resize(row_infos->size(), RowScratchRegisters());
std::vector<RowScratchRegisters> rows(row_infos->size());
for (unsigned i = 0; i < row_infos->size(); i++) {
rows[i].Init((*row_infos)[i]);
}
Expand Down
2 changes: 2 additions & 0 deletions src/ccstruct/blobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,9 @@ void TBLOB::GetPreciseBoundingBox(TBOX *precise_box) const {
// Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
std::vector<std::vector<int>> &y_coords) const {
x_coords.clear();
x_coords.resize(box.height());
y_coords.clear();
y_coords.resize(box.width());
CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
// Sort the output vectors.
Expand Down
3 changes: 1 addition & 2 deletions src/ccstruct/fontinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ struct FontInfo {

// Reserves unicharset_size spots in spacing_vec.
void init_spacing(int unicharset_size) {
spacing_vec = new std::vector<FontSpacingInfo *>();
spacing_vec->resize(unicharset_size);
spacing_vec = new std::vector<FontSpacingInfo *>(unicharset_size);
}
// Adds the given pointer to FontSpacingInfo to spacing_vec member
// (FontInfo class takes ownership of the pointer).
Expand Down
2 changes: 2 additions & 0 deletions src/ccstruct/normalis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,9 @@ static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<i
std::vector<float> &hx, std::vector<float> &hy) {
int width = box.width();
int height = box.height();
hx.clear();
hx.resize(width + 1);
hy.clear();
hy.resize(height + 1);
double total = 0.0;
for (int iy = 0; iy < height; ++iy) {
Expand Down
1 change: 1 addition & 0 deletions src/ccstruct/pageres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE **choices) {
}
FakeWordFromRatings(TOP_CHOICE_PERM);
reject_map.initialise(blob_count);
best_state.clear();
best_state.resize(blob_count, 1);
done = true;
}
Expand Down
1 change: 1 addition & 0 deletions src/ccstruct/pageres.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class PAGE_RES { // page result
rej_count = 0;
rejected = false;
prev_word_best_choice = nullptr;
blame_reasons.clear();
blame_reasons.resize(IRR_NUM_REASONS);
}

Expand Down
1 change: 1 addition & 0 deletions src/ccstruct/ratngs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,7 @@ void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
static std::vector<int> prev_drawn_state;
bool already_done = prev_drawn_state.size() == length_;
if (!already_done) {
prev_drawn_state.clear();
prev_drawn_state.resize(length_);
}
for (int i = 0; i < length_; ++i) {
Expand Down
3 changes: 1 addition & 2 deletions src/ccstruct/stepblob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,7 @@ int16_t C_BLOB::EstimateBaselinePosition() {
return bottom; // This is only for non-CJK blobs.
}
// Get the minimum y coordinate at each x-coordinate.
std::vector<int> y_mins;
y_mins.resize(width + 1, box.top());
std::vector<int> y_mins(width + 1, box.top());
C_OUTLINE_IT it(&outlines);
for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
C_OUTLINE *outline = it.data();
Expand Down
3 changes: 2 additions & 1 deletion src/ccutil/unicharcompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ void UnicharCompress::ComputeCodeRange() {
// Initializes the decoding hash_map from the encoding array.
void UnicharCompress::SetupDecoder() {
Cleanup();
is_valid_start_.resize(code_range_, false);
is_valid_start_.clear();
is_valid_start_.resize(code_range_);
for (int c = 0; c < encoder_.size(); ++c) {
const RecodedCharID &code = encoder_[c];
decoder_[code] = c;
Expand Down
1 change: 1 addition & 0 deletions src/classify/intmatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
/// Copies the pruned, sorted classes into the output results and returns
/// the number of classes.
int SetupResults(std::vector<CP_RESULT_STRUCT> *results) const {
results->clear();
results->resize(num_classes_);
for (int c = 0; c < num_classes_; ++c) {
(*results)[c].Class = sort_index_[num_classes_ - c];
Expand Down
3 changes: 1 addition & 2 deletions src/classify/shapeclassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Image p
std::vector<ShapeRating> shape_results;
int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, &shape_results);
const ShapeTable *shapes = GetShapeTable();
std::vector<int> unichar_map;
unichar_map.resize(shapes->unicharset().size(), -1);
std::vector<int> unichar_map(shapes->unicharset().size(), -1);
for (int r = 0; r < num_shape_results; ++r) {
shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
}
Expand Down
1 change: 1 addition & 0 deletions src/classify/shapetable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,7 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
void ShapeTable::AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map) {
if (shape_map != nullptr) {
shape_map->clear();
shape_map->resize(other.NumShapes(), -1);
}
for (int s = 0; s < other.shape_table_.size(); ++s) {
Expand Down
2 changes: 2 additions & 0 deletions src/lstm/recodebeam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_
// Fills top_n_flags_ with bools that are true iff the corresponding output
// is one of the top_n.
void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) {
top_n_flags_.clear();
top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
top_code_ = -1;
second_code_ = -1;
Expand Down Expand Up @@ -676,6 +677,7 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to

void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs,
int num_outputs, int top_n) {
top_n_flags_.clear();
top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
top_code_ = -1;
second_code_ = -1;
Expand Down
2 changes: 2 additions & 0 deletions src/training/common/ctc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,9 @@ CTC::CTC(const std::vector<int> &labels, int null_char, const GENERIC_2D_ARRAY<f
// Computes vectors of min and max label index for each timestep, based on
// whether skippability of nulls makes it possible to complete a valid path.
bool CTC::ComputeLabelLimits() {
min_labels_.clear();
min_labels_.resize(num_timesteps_, 0);
max_labels_.clear();
max_labels_.resize(num_timesteps_, 0);
int min_u = num_labels_ - 1;
if (labels_[min_u] == null_char_) {
Expand Down
2 changes: 2 additions & 0 deletions src/training/common/errorcounter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ ErrorCounter::ErrorCounter(const UNICHARSET &unicharset, int fontsize)
, bad_score_hist_(0, 101)
, unicharset_(unicharset) {
Counts empty_counts;
font_counts_.clear();
font_counts_.resize(fontsize, empty_counts);
multi_unichar_counts_.clear();
multi_unichar_counts_.resize(unicharset.size(), 0);
}

Expand Down
1 change: 1 addition & 0 deletions src/training/common/mastertrainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ bool MasterTrainer::LoadFontInfo(const char *filename) {
// Returns false on failure.
bool MasterTrainer::LoadXHeights(const char *filename) {
tprintf("fontinfo table is of size %d\n", fontinfo_table_.size());
xheights_.clear();
xheights_.resize(fontinfo_table_.size(), -1);
if (filename == nullptr) {
return true;
Expand Down
9 changes: 4 additions & 5 deletions src/training/unicharset/lstmtrainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ void LSTMTrainer::InitIterations() {
for (int i = 0; i < ET_COUNT; ++i) {
best_error_rates_[i] = 100.0;
worst_error_rates_[i] = 0.0;
error_buffers_[i].resize(kRollingBufferSize_, 0.0);
error_buffers_[i].clear();
error_buffers_[i].resize(kRollingBufferSize_);
error_rates_[i] = 100.0;
}
error_rate_of_last_saved_best_ = kMinStartedErrorRate;
Expand Down Expand Up @@ -669,8 +670,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
};
std::vector<std::string> layers = EnumerateLayers();
int num_layers = layers.size();
std::vector<int> num_weights;
num_weights.resize(num_layers, 0);
std::vector<int> num_weights(num_layers);
std::vector<double> bad_sums[LR_COUNT];
std::vector<double> ok_sums[LR_COUNT];
for (int i = 0; i < LR_COUNT; ++i) {
Expand Down Expand Up @@ -1263,8 +1263,7 @@ double LSTMTrainer::ComputeWinnerError(const NetworkIO &deltas) {
// Computes a very simple bag of chars char error rate.
double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,
const std::vector<int> &ocr_str) {
std::vector<int> label_counts;
label_counts.resize(NumOutputs(), 0);
std::vector<int> label_counts(NumOutputs());
int truth_size = 0;
for (auto ch : truth_str) {
if (ch != null_char_) {
Expand Down
1 change: 1 addition & 0 deletions src/wordrec/params_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ bool ParamsModel::LoadFromFp(const char *lang, TFile *fp) {
lang_ = lang;
// Load weights for passes with adaption on.
std::vector<float> &weights = weights_vec_[pass_];
weights.clear();
weights.resize(PTRAIN_NUM_FEATURE_TYPES, 0.0f);

while (fp->FGets(line, kMaxLineSize) != nullptr) {
Expand Down
1 change: 1 addition & 0 deletions src/wordrec/segsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ void Wordrec::InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points,
// children are considered in the non-decreasing order of their column, since
// this guarantees that all the parents would be up to date before an update
// of a child is done.
pending->clear();
pending->resize(word_res->ratings->dimension(), SegSearchPending());

// Search the ratings matrix for the initial best path.
Expand Down

0 comments on commit b836f30

Please sign in to comment.