Fix vector resize with init for all elements (issue tesseract-ocr#3473)

Fixes: c8b8d26 Fixes: 9710bc0 Signed-off-by: Stefan Weil <sw@weilnetz.de>
stweil · Jun 29, 2021 · b836f30 · b836f30
1 parent ff1062d
commit b836f30
Show file tree

Hide file tree

Showing 21 changed files with 44 additions and 23 deletions.
diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp
@@ -982,9 +982,12 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *
                                                    std::vector<bool> *overlapped_any_blob,
                                                    std::vector<C_BLOB *> *target_blobs) {
   std::vector<bool> blob_wanted;
-  word_wanted->resize(outlines.size(), false);
-  overlapped_any_blob->resize(outlines.size(), false);
-  target_blobs->resize(outlines.size(), nullptr);
+  word_wanted->clear();
+  word_wanted->resize(outlines.size());
+  overlapped_any_blob->clear();
+  overlapped_any_blob->resize(outlines.size());
+  target_blobs->clear();
+  target_blobs->resize(outlines.size());
   // For each real blob, find the outlines that seriously overlap it.
   // A single blob could be several merged characters, so there can be quite
   // a few outlines overlapping, and the full engine needs to be used to chop
@@ -993,7 +996,8 @@ void Tesseract::AssignDiacriticsToOverlappingBlobs(const std::vector<C_OUTLINE *
   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
     C_BLOB *blob = blob_it.data();
     const TBOX blob_box = blob->bounding_box();
-    blob_wanted.resize(outlines.size(), false);
+    blob_wanted.clear();
+    blob_wanted.resize(outlines.size());
     int num_blob_outlines = 0;
     for (unsigned i = 0; i < outlines.size(); ++i) {
       if (blob_box.major_x_overlap(outlines[i]->bounding_box()) && !(*word_wanted)[i]) {
@@ -1032,15 +1036,18 @@ void Tesseract::AssignDiacriticsToNewBlobs(const std::vector<C_OUTLINE *> &outli
                                            std::vector<bool> *word_wanted,
                                            std::vector<C_BLOB *> *target_blobs) {
   std::vector<bool> blob_wanted;
-  word_wanted->resize(outlines.size(), false);
-  target_blobs->resize(outlines.size(), nullptr);
+  word_wanted->clear();
+  word_wanted->resize(outlines.size());
+  target_blobs->clear();
+  target_blobs->resize(outlines.size());
   // Check for outlines that need to be turned into stand-alone blobs.
   for (unsigned i = 0; i < outlines.size(); ++i) {
     if (outlines[i] == nullptr) {
       continue;
     }
     // Get a set of adjacent outlines that don't overlap any existing blob.
-    blob_wanted.resize(outlines.size(), false);
+    blob_wanted.clear();
+    blob_wanted.resize(outlines.size());
     int num_blob_outlines = 0;
     TBOX total_ol_box(outlines[i]->bounding_box());
     while (i < outlines.size() && outlines[i] != nullptr) {

diff --git a/src/ccmain/linerec.cpp b/src/ccmain/linerec.cpp
@@ -151,8 +151,7 @@ ImageData *Tesseract::GetLineData(const TBOX &line_box, const std::vector<TBOX>
     line_boxes.push_back(box);
     line_texts.push_back(texts[b]);
   }
-  std::vector<int> page_numbers;
-  page_numbers.resize(line_boxes.size(), applybox_page);
+  std::vector<int> page_numbers(line_boxes.size(), applybox_page);
   image_data->AddBoxes(line_boxes, line_texts, page_numbers);
   return image_data;
 }

diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp
@@ -2314,14 +2314,14 @@ void CanonicalizeDetectionResults(std::vector<PARA *> *row_owners, PARA_LIST *pa
 void DetectParagraphs(int debug_level, std::vector<RowInfo> *row_infos,
                       std::vector<PARA *> *row_owners, PARA_LIST *paragraphs,
                       std::vector<ParagraphModel *> *models) {
-  std::vector<RowScratchRegisters> rows;
   ParagraphTheory theory(models);
 
   // Initialize row_owners to be a bunch of nullptr pointers.
+  row_owners->clear();
   row_owners->resize(row_infos->size());
 
   // Set up row scratch registers for the main algorithm.
-  rows.resize(row_infos->size(), RowScratchRegisters());
+  std::vector<RowScratchRegisters> rows(row_infos->size());
   for (unsigned i = 0; i < row_infos->size(); i++) {
     rows[i].Init((*row_infos)[i]);
   }

diff --git a/src/ccstruct/blobs.cpp b/src/ccstruct/blobs.cpp
@@ -558,7 +558,9 @@ void TBLOB::GetPreciseBoundingBox(TBOX *precise_box) const {
 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
 void TBLOB::GetEdgeCoords(const TBOX &box, std::vector<std::vector<int>> &x_coords,
                           std::vector<std::vector<int>> &y_coords) const {
+  x_coords.clear();
   x_coords.resize(box.height());
+  y_coords.clear();
   y_coords.resize(box.width());
   CollectEdges(box, nullptr, nullptr, &x_coords, &y_coords);
   // Sort the output vectors.

diff --git a/src/ccstruct/fontinfo.h b/src/ccstruct/fontinfo.h
@@ -77,8 +77,7 @@ struct FontInfo {
 
   // Reserves unicharset_size spots in spacing_vec.
   void init_spacing(int unicharset_size) {
-    spacing_vec = new std::vector<FontSpacingInfo *>();
-    spacing_vec->resize(unicharset_size);
+    spacing_vec = new std::vector<FontSpacingInfo *>(unicharset_size);
   }
   // Adds the given pointer to FontSpacingInfo to spacing_vec member
   // (FontInfo class takes ownership of the pointer).

diff --git a/src/ccstruct/normalis.cpp b/src/ccstruct/normalis.cpp
@@ -226,7 +226,9 @@ static void ComputeEdgeDensityProfiles(const TBOX &box, const GENERIC_2D_ARRAY<i
                                        std::vector<float> &hx, std::vector<float> &hy) {
   int width = box.width();
   int height = box.height();
+  hx.clear();
   hx.resize(width + 1);
+  hy.clear();
   hy.resize(height + 1);
   double total = 0.0;
   for (int iy = 0; iy < height; ++iy) {

diff --git a/src/ccstruct/pageres.cpp b/src/ccstruct/pageres.cpp
@@ -901,6 +901,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE **choices) {
   }
   FakeWordFromRatings(TOP_CHOICE_PERM);
   reject_map.initialise(blob_count);
+  best_state.clear();
   best_state.resize(blob_count, 1);
   done = true;
 }

diff --git a/src/ccstruct/pageres.h b/src/ccstruct/pageres.h
@@ -96,6 +96,7 @@ class PAGE_RES { // page result
     rej_count = 0;
     rejected = false;
     prev_word_best_choice = nullptr;
+    blame_reasons.clear();
     blame_reasons.resize(IRR_NUM_REASONS);
   }
 

diff --git a/src/ccstruct/ratngs.cpp b/src/ccstruct/ratngs.cpp
@@ -738,6 +738,7 @@ void WERD_CHOICE::DisplaySegmentation(TWERD *word) {
   static std::vector<int> prev_drawn_state;
   bool already_done = prev_drawn_state.size() == length_;
   if (!already_done) {
+    prev_drawn_state.clear();
     prev_drawn_state.resize(length_);
   }
   for (int i = 0; i < length_; ++i) {

diff --git a/src/ccstruct/stepblob.cpp b/src/ccstruct/stepblob.cpp
@@ -431,8 +431,7 @@ int16_t C_BLOB::EstimateBaselinePosition() {
     return bottom; // This is only for non-CJK blobs.
   }
   // Get the minimum y coordinate at each x-coordinate.
-  std::vector<int> y_mins;
-  y_mins.resize(width + 1, box.top());
+  std::vector<int> y_mins(width + 1, box.top());
   C_OUTLINE_IT it(&outlines);
   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
     C_OUTLINE *outline = it.data();

diff --git a/src/ccutil/unicharcompress.cpp b/src/ccutil/unicharcompress.cpp
@@ -395,7 +395,8 @@ void UnicharCompress::ComputeCodeRange() {
 // Initializes the decoding hash_map from the encoding array.
 void UnicharCompress::SetupDecoder() {
   Cleanup();
-  is_valid_start_.resize(code_range_, false);
+  is_valid_start_.clear();
+  is_valid_start_.resize(code_range_);
   for (int c = 0; c < encoder_.size(); ++c) {
     const RecodedCharID &code = encoder_[c];
     decoder_[code] = c;

diff --git a/src/classify/intmatcher.cpp b/src/classify/intmatcher.cpp
@@ -372,6 +372,7 @@ for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
   /// Copies the pruned, sorted classes into the output results and returns
   /// the number of classes.
   int SetupResults(std::vector<CP_RESULT_STRUCT> *results) const {
+    results->clear();
     results->resize(num_classes_);
     for (int c = 0; c < num_classes_; ++c) {
       (*results)[c].Class = sort_index_[num_classes_ - c];

diff --git a/src/classify/shapeclassifier.cpp b/src/classify/shapeclassifier.cpp
@@ -45,8 +45,7 @@ int ShapeClassifier::UnicharClassifySample(const TrainingSample &sample, Image p
   std::vector<ShapeRating> shape_results;
   int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this, &shape_results);
   const ShapeTable *shapes = GetShapeTable();
-  std::vector<int> unichar_map;
-  unichar_map.resize(shapes->unicharset().size(), -1);
+  std::vector<int> unichar_map(shapes->unicharset().size(), -1);
   for (int r = 0; r < num_shape_results; ++r) {
     shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
   }

diff --git a/src/classify/shapetable.cpp b/src/classify/shapetable.cpp
@@ -682,6 +682,7 @@ bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
 // If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
 void ShapeTable::AppendMasterShapes(const ShapeTable &other, std::vector<int> *shape_map) {
   if (shape_map != nullptr) {
+    shape_map->clear();
     shape_map->resize(other.NumShapes(), -1);
   }
   for (int s = 0; s < other.shape_table_.size(); ++s) {

diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
@@ -644,6 +644,7 @@ WERD_RES *RecodeBeamSearch::InitializeWord(bool leading_space, const TBOX &line_
 // Fills top_n_flags_ with bools that are true iff the corresponding output
 // is one of the top_n.
 void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int top_n) {
+  top_n_flags_.clear();
   top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
   top_code_ = -1;
   second_code_ = -1;
@@ -676,6 +677,7 @@ void RecodeBeamSearch::ComputeTopN(const float *outputs, int num_outputs, int to
 
 void RecodeBeamSearch::ComputeSecTopN(std::unordered_set<int> *exList, const float *outputs,
                                       int num_outputs, int top_n) {
+  top_n_flags_.clear();
   top_n_flags_.resize(num_outputs, TN_ALSO_RAN);
   top_code_ = -1;
   second_code_ = -1;

diff --git a/src/training/common/ctc.cpp b/src/training/common/ctc.cpp
@@ -87,7 +87,9 @@ CTC::CTC(const std::vector<int> &labels, int null_char, const GENERIC_2D_ARRAY<f
 // Computes vectors of min and max label index for each timestep, based on
 // whether skippability of nulls makes it possible to complete a valid path.
 bool CTC::ComputeLabelLimits() {
+  min_labels_.clear();
   min_labels_.resize(num_timesteps_, 0);
+  max_labels_.clear();
   max_labels_.resize(num_timesteps_, 0);
   int min_u = num_labels_ - 1;
   if (labels_[min_u] == null_char_) {

diff --git a/src/training/common/errorcounter.cpp b/src/training/common/errorcounter.cpp
@@ -160,7 +160,9 @@ ErrorCounter::ErrorCounter(const UNICHARSET &unicharset, int fontsize)
     , bad_score_hist_(0, 101)
     , unicharset_(unicharset) {
   Counts empty_counts;
+  font_counts_.clear();
   font_counts_.resize(fontsize, empty_counts);
+  multi_unichar_counts_.clear();
   multi_unichar_counts_.resize(unicharset.size(), 0);
 }
 

diff --git a/src/training/common/mastertrainer.cpp b/src/training/common/mastertrainer.cpp
@@ -404,6 +404,7 @@ bool MasterTrainer::LoadFontInfo(const char *filename) {
 // Returns false on failure.
 bool MasterTrainer::LoadXHeights(const char *filename) {
   tprintf("fontinfo table is of size %d\n", fontinfo_table_.size());
+  xheights_.clear();
   xheights_.resize(fontinfo_table_.size(), -1);
   if (filename == nullptr) {
     return true;

diff --git a/src/training/unicharset/lstmtrainer.cpp b/src/training/unicharset/lstmtrainer.cpp
@@ -208,7 +208,8 @@ void LSTMTrainer::InitIterations() {
   for (int i = 0; i < ET_COUNT; ++i) {
     best_error_rates_[i] = 100.0;
     worst_error_rates_[i] = 0.0;
-    error_buffers_[i].resize(kRollingBufferSize_, 0.0);
+    error_buffers_[i].clear();
+    error_buffers_[i].resize(kRollingBufferSize_);
     error_rates_[i] = 100.0;
   }
   error_rate_of_last_saved_best_ = kMinStartedErrorRate;
@@ -669,8 +670,7 @@ int LSTMTrainer::ReduceLayerLearningRates(double factor, int num_samples,
   };
   std::vector<std::string> layers = EnumerateLayers();
   int num_layers = layers.size();
-  std::vector<int> num_weights;
-  num_weights.resize(num_layers, 0);
+  std::vector<int> num_weights(num_layers);
   std::vector<double> bad_sums[LR_COUNT];
   std::vector<double> ok_sums[LR_COUNT];
   for (int i = 0; i < LR_COUNT; ++i) {
@@ -1263,8 +1263,7 @@ double LSTMTrainer::ComputeWinnerError(const NetworkIO &deltas) {
 // Computes a very simple bag of chars char error rate.
 double LSTMTrainer::ComputeCharError(const std::vector<int> &truth_str,
                                      const std::vector<int> &ocr_str) {
-  std::vector<int> label_counts;
-  label_counts.resize(NumOutputs(), 0);
+  std::vector<int> label_counts(NumOutputs());
   int truth_size = 0;
   for (auto ch : truth_str) {
     if (ch != null_char_) {

diff --git a/src/wordrec/params_model.cpp b/src/wordrec/params_model.cpp
@@ -110,6 +110,7 @@ bool ParamsModel::LoadFromFp(const char *lang, TFile *fp) {
   lang_ = lang;
   // Load weights for passes with adaption on.
   std::vector<float> &weights = weights_vec_[pass_];
+  weights.clear();
   weights.resize(PTRAIN_NUM_FEATURE_TYPES, 0.0f);
 
   while (fp->FGets(line, kMaxLineSize) != nullptr) {

diff --git a/src/wordrec/segsearch.cpp b/src/wordrec/segsearch.cpp
@@ -156,6 +156,7 @@ void Wordrec::InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points,
   // children are considered in the non-decreasing order of their column, since
   // this guarantees that all the parents would be up to date before an update
   // of a child is done.
+  pending->clear();
   pending->resize(word_res->ratings->dimension(), SegSearchPending());
 
   // Search the ratings matrix for the initial best path.