Reduce scope of some local variables (reported by Codacy)

Apply also some smaller optimizations and add TODO comments for local variables which require further examination. Signed-off-by: Stefan Weil <sw@weilnetz.de>
tesseract-ocr · Jun 11, 2024 · 96772c5 · 96772c5
1 parent 5605293
commit 96772c5
Show file tree

Hide file tree

Showing 9 changed files with 38 additions and 54 deletions.
diff --git a/src/api/pdfrenderer.cpp b/src/api/pdfrenderer.cpp
@@ -242,13 +242,13 @@ static void GetWordBaseline(int writing_direction, int ppi, int height, int word
   double word_length;
   double x, y;
   {
-    int px = word_x1;
-    int py = word_y1;
     double l2 = dist2(line_x1, line_y1, line_x2, line_y2);
     if (l2 == 0) {
       x = line_x1;
       y = line_y1;
     } else {
+      int px = word_x1;
+      int py = word_y1;
       double t = ((px - line_x2) * (line_x2 - line_x1) + (py - line_y2) * (line_y2 - line_y1)) / l2;
       x = line_x2 + t * (line_x2 - line_x1);
       y = line_y2 + t * (line_y2 - line_y1);

diff --git a/src/ccmain/applybox.cpp b/src/ccmain/applybox.cpp
@@ -258,10 +258,10 @@ void Tesseract::MaximallyChopWord(const std::vector<TBOX> &boxes, BLOCK *block,
   }
   const double e = exp(1.0); // The base of natural logs.
   unsigned blob_number;
-  int right_chop_index = 0;
   if (!assume_fixed_pitch_char_segment) {
     // We only chop if the language is not fixed pitch like CJK.
     SEAM *seam = nullptr;
+    int right_chop_index = 0;
     while ((seam = chop_one_blob(boxes, blob_choices, word_res, &blob_number)) != nullptr) {
       word_res->InsertSeam(blob_number, seam);
       BLOB_CHOICE *left_choice = blob_choices[blob_number];
@@ -685,6 +685,7 @@ void Tesseract::SearchForText(const std::vector<BLOB_CHOICE_LIST *> *choices, in
 void Tesseract::TidyUp(PAGE_RES *page_res) {
   int ok_blob_count = 0;
   int bad_blob_count = 0;
+  // TODO: check usage of ok_word_count.
   int ok_word_count = 0;
   int unlabelled_words = 0;
   PAGE_RES_IT pr_it(page_res);

diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp
@@ -949,6 +949,7 @@ bool Tesseract::ReassignDiacritics(int pass, PAGE_RES_IT *pr_it, bool *make_next
   }
   real_word->AddSelectedOutlines(wanted, wanted_blobs, wanted_outlines, nullptr);
   AssignDiacriticsToNewBlobs(outlines, pass, real_word, pr_it, &word_wanted, &target_blobs);
+  // TODO: check code.
   int non_overlapped = 0;
   int non_overlapped_used = 0;
   for (unsigned i = 0; i < word_wanted.size(); ++i) {
@@ -1121,9 +1122,9 @@ bool Tesseract::SelectGoodDiacriticOutlines(int pass, float certainty_threshold,
                                             C_BLOB *blob,
                                             const std::vector<C_OUTLINE *> &outlines,
                                             int num_outlines, std::vector<bool> *ok_outlines) {
-  std::string best_str;
   float target_cert = certainty_threshold;
   if (blob != nullptr) {
+    std::string best_str;
     float target_c2;
     target_cert = ClassifyBlobAsWord(pass, pr_it, blob, best_str, &target_c2);
     if (debug_noise_removal) {
@@ -1797,9 +1798,6 @@ Allow a single hyphen in a lower case word
 }
 
 bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
-  bool show_map_detail = false;
-  int16_t i;
-
   if (!test_pt) {
     return false;
   }
@@ -1811,6 +1809,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
     if (location < 0) {
       return true; // For breakpoint use
     }
+    bool show_map_detail = false;
     tessedit_rejection_debug.set_value(true);
     debug_x_ht_level.set_value(2);
     tprintf("\n\nTESTWD::");
@@ -1864,7 +1863,7 @@ bool Tesseract::check_debug_pt(WERD_RES *word, int location) {
       tprintf("\n");
       if (show_map_detail) {
         tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
-        for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
+        for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
           tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
           word->reject_map[i].full_print(debug_fp);
         }
@@ -1891,13 +1890,12 @@ static void find_modal_font( // good chars in word
     int16_t *font_out,       // output font
     int8_t *font_count       // output count
 ) {
-  int16_t font;  // font index
-  int32_t count; // pile count
-
   if (fonts->get_total() > 0) {
-    font = static_cast<int16_t>(fonts->mode());
+    // font index
+    int16_t font = static_cast<int16_t>(fonts->mode());
     *font_out = font;
-    count = fonts->pile_count(font);
+    // pile count
+    int32_t count = fonts->pile_count(font);
     *font_count = count < INT8_MAX ? count : INT8_MAX;
     fonts->add(font, -*font_count);
   } else {

diff --git a/src/ccmain/docqual.cpp b/src/ccmain/docqual.cpp
@@ -60,10 +60,10 @@ int16_t Tesseract::word_blob_quality(WERD_RES *word) {
 }
 
 int16_t Tesseract::word_outline_errs(WERD_RES *word) {
-  int16_t i = 0;
   int16_t err_count = 0;
 
   if (word->rebuild_word != nullptr) {
+    int16_t i = 0;
     for (unsigned b = 0; b < word->rebuild_word->NumBlobs(); ++b) {
       TBLOB *blob = word->rebuild_word->blobs[b];
       err_count += count_outline_errs(word->best_choice->unichar_string()[i], blob->NumOutlines());
@@ -209,13 +209,8 @@ void Tesseract::unrej_good_quality_words( // unreject potential
 
 void Tesseract::doc_and_block_rejection( // reject big chunks
     PAGE_RES_IT &page_res_it, bool good_quality_doc) {
-  int16_t block_no = 0;
-  int16_t row_no = 0;
   BLOCK_RES *current_block;
-  ROW_RES *current_row;
 
-  bool rej_word;
-  bool prev_word_rejected;
   int16_t char_quality = 0;
   int16_t accepted_char_quality;
 
@@ -238,16 +233,17 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
     WERD_RES *word;
     while ((word = page_res_it.word()) != nullptr) {
       current_block = page_res_it.block();
-      block_no = current_block->block->pdblk.index();
+      int16_t block_no = current_block->block->pdblk.index();
       if (current_block->char_count > 0 &&
           (current_block->rej_count * 100.0 / current_block->char_count) >
               tessedit_reject_block_percent) {
         if (tessedit_debug_block_rejection) {
           tprintf("REJECTING BLOCK %d  #chars: %d;  #Rejects: %d\n", block_no,
                   current_block->char_count, current_block->rej_count);
         }
-        prev_word_rejected = false;
+        bool prev_word_rejected = false;
         while ((word = page_res_it.word()) != nullptr && (page_res_it.block() == current_block)) {
+          bool rej_word;
           if (tessedit_preserve_blk_rej_perfect_wds) {
             rej_word = word->reject_map.reject_count() > 0 ||
                        word->reject_map.length() < tessedit_preserve_min_wd_len;
@@ -284,9 +280,9 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
         }
 
         /* Walk rows in block testing for row rejection */
-        row_no = 0;
+        int16_t row_no = 0;
         while (page_res_it.word() != nullptr && page_res_it.block() == current_block) {
-          current_row = page_res_it.row();
+          ROW_RES *current_row = page_res_it.row();
           row_no++;
           /* Reject whole row if:
   fraction of chars on row which are rejected exceed a limit AND
@@ -302,9 +298,10 @@ void Tesseract::doc_and_block_rejection( // reject big chunks
               tprintf("REJECTING ROW %d  #chars: %d;  #Rejects: %d\n", row_no,
                       current_row->char_count, current_row->rej_count);
             }
-            prev_word_rejected = false;
+            bool prev_word_rejected = false;
             while ((word = page_res_it.word()) != nullptr && page_res_it.row() == current_row) {
               /* Preserve words on good docs unless they are mostly rejected*/
+              bool rej_word;
               if (!tessedit_row_rej_good_docs && good_quality_doc) {
                 rej_word = word->reject_map.reject_count() /
                                static_cast<float>(word->reject_map.length()) >
@@ -448,20 +445,18 @@ void Tesseract::tilde_crunch(PAGE_RES_IT &page_res_it) {
 }
 
 bool Tesseract::terrible_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_level) {
-  float rating_per_ch;
-  int adjusted_len;
   int crunch_mode = 0;
 
   if (word->best_choice->unichar_string().empty() ||
       (strspn(word->best_choice->unichar_string().c_str(), " ") ==
        word->best_choice->unichar_string().size())) {
     crunch_mode = 1;
   } else {
-    adjusted_len = word->reject_map.length();
+    int adjusted_len = word->reject_map.length();
     if (adjusted_len > crunch_rating_max) {
       adjusted_len = crunch_rating_max;
     }
-    rating_per_ch = word->best_choice->rating() / adjusted_len;
+    float rating_per_ch = word->best_choice->rating() / adjusted_len;
 
     if (rating_per_ch > crunch_terrible_rating) {
       crunch_mode = 2;
@@ -528,7 +523,6 @@ bool Tesseract::potential_word_crunch(WERD_RES *word, GARBAGE_LEVEL garbage_leve
 }
 
 void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
-  WERD_RES *word;
   PAGE_RES_IT copy_it;
   bool deleting_from_bol = false;
   bool marked_delete_point = false;
@@ -539,7 +533,7 @@ void Tesseract::tilde_delete(PAGE_RES_IT &page_res_it) {
 
   page_res_it.restart_page();
   while (page_res_it.word() != nullptr) {
-    word = page_res_it.word();
+    WERD_RES *word = page_res_it.word();
 
     delete_mode = word_deletable(word, debug_delete_mode);
     if (delete_mode != CR_NONE) {

diff --git a/src/ccmain/fixspace.cpp b/src/ccmain/fixspace.cpp
@@ -171,7 +171,6 @@ void Tesseract::fix_fuzzy_spaces(ETEXT_DESC *monitor, int32_t word_count, PAGE_R
 void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *block) {
   int16_t best_score;
   WERD_RES_LIST current_perm;
-  int16_t current_score;
   bool improved = false;
 
   best_score = eval_word_spacing(best_perm); // default score
@@ -183,7 +182,7 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
 
   while ((best_score != PERFECT_WERDS) && !current_perm.empty()) {
     match_current_words(current_perm, row, block);
-    current_score = eval_word_spacing(current_perm);
+    int16_t current_score = eval_word_spacing(current_perm);
     dump_words(current_perm, current_score, 2, improved);
     if (current_score > best_score) {
       best_perm.clear();
@@ -201,11 +200,10 @@ void Tesseract::fix_fuzzy_space_list(WERD_RES_LIST &best_perm, ROW *row, BLOCK *
 void initialise_search(WERD_RES_LIST &src_list, WERD_RES_LIST &new_list) {
   WERD_RES_IT src_it(&src_list);
   WERD_RES_IT new_it(&new_list);
-  WERD_RES *src_wd;
   WERD_RES *new_wd;
 
   for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
-    src_wd = src_it.data();
+    WERD_RES *src_wd = src_it.data();
     if (!src_wd->combination) {
       new_wd = WERD_RES::deep_copy(src_wd);
       new_wd->combination = false;
@@ -393,8 +391,6 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
   WERD_RES_IT prev_word_it(&words);
   WERD_RES *word;
   WERD_RES *prev_word;
-  WERD_RES *combo;
-  WERD *copy_word;
   int16_t prev_right = -INT16_MAX;
   TBOX box;
   int16_t gap;
@@ -425,12 +421,13 @@ void transform_to_next_perm(WERD_RES_LIST &words) {
           gap = box.left() - prev_right;
           if (gap <= min_gap) {
             prev_word = prev_word_it.data();
+            WERD_RES *combo;
             if (prev_word->combination) {
               combo = prev_word;
             } else {
               /* Make a new combination and insert before
                * the first word being joined. */
-              copy_word = new WERD;
+              auto *copy_word = new WERD;
               *copy_word = *(prev_word->word);
               // deep copy
               combo = new WERD_RES(copy_word);
@@ -546,7 +543,6 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
   WERD_RES *word_res;
   WERD_RES_LIST sub_word_list;
   WERD_RES_IT sub_word_list_it(&sub_word_list);
-  int16_t blob_index;
   int16_t new_length;
   float junk;
 
@@ -556,7 +552,7 @@ void Tesseract::fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
     return;
   }
 
-  blob_index = worst_noise_blob(word_res, &junk);
+  auto blob_index = worst_noise_blob(word_res, &junk);
   if (blob_index < 0) {
     return;
   }
@@ -623,7 +619,6 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
   WERD_RES_IT worst_word_it;
   float worst_noise_score = 9999;
   int worst_blob_index = -1; // Noisiest blob of noisiest wd
-  int blob_index;            // of wds noisiest blob
   float noise_score;         // of wds noisiest blob
   WERD_RES *word_res;
   C_BLOB_IT blob_it;
@@ -636,7 +631,7 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
   int16_t i;
 
   for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
-    blob_index = worst_noise_blob(word_it.data(), &noise_score);
+    auto blob_index = worst_noise_blob(word_it.data(), &noise_score);
     if (blob_index > -1 && worst_noise_score > noise_score) {
       worst_noise_score = noise_score;
       worst_blob_index = blob_index;
@@ -806,7 +801,6 @@ float Tesseract::blob_noise_score(TBLOB *blob) {
 void fixspace_dbg(WERD_RES *word) {
   TBOX box = word->word->bounding_box();
   const bool show_map_detail = false;
-  int16_t i;
 
   box.print();
   tprintf(" \"%s\" ", word->best_choice->unichar_string().c_str());
@@ -816,7 +810,7 @@ void fixspace_dbg(WERD_RES *word) {
   tprintf("\n");
   if (show_map_detail) {
     tprintf("\"%s\"\n", word->best_choice->unichar_string().c_str());
-    for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
+    for (unsigned i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
       tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
       word->reject_map[i].full_print(debug_fp);
     }

diff --git a/src/ccmain/output.cpp b/src/ccmain/output.cpp
@@ -101,11 +101,11 @@ void Tesseract::write_results(PAGE_RES_IT &page_res_it,
                               bool force_eol) {  // override tilde crunch?
   WERD_RES *word = page_res_it.word();
   const UNICHARSET &uchset = *word->uch_set;
-  bool need_reject = false;
   UNICHAR_ID space = uchset.unichar_to_id(" ");
 
   if ((word->unlv_crunch_mode != CR_NONE || word->best_choice->empty()) &&
       !tessedit_zero_kelvin_rejection && !tessedit_word_for_word) {
+    bool need_reject = false;
     if ((word->unlv_crunch_mode != CR_DELETE) &&
         (!stats_.tilde_crunch_written ||
          ((word->unlv_crunch_mode == CR_KEEP_SPACE) && (word->word->space() > 0) &&

diff --git a/src/ccmain/paragraphs.cpp b/src/ccmain/paragraphs.cpp
@@ -2407,8 +2407,8 @@ static void InitializeTextAndBoxesPreRecognition(const MutableIterator &it, RowI
   // Set up text, lword_text, and rword_text (mostly for debug printing).
   std::string fake_text;
   PageIterator pit(static_cast<const PageIterator &>(it));
-  bool first_word = true;
   if (!pit.Empty(RIL_WORD)) {
+    bool first_word = true;
     do {
       fake_text += "x";
       if (first_word) {

diff --git a/src/ccmain/pgedit.cpp b/src/ccmain/pgedit.cpp
@@ -703,9 +703,7 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
   WERD_RES *word_res = pr_it->word();
   WERD *word = word_res->word;
   TBOX word_bb;    // word bounding box
-  int word_height; // ht of word BB
   bool displayed_something = false;
-  float shift; // from bot left
 
   if (color_mode != CM_RAINBOW && word_res->box_word != nullptr) {
 #  ifndef DISABLED_LEGACY_ENGINE
@@ -842,13 +840,14 @@ bool Tesseract::word_display(PAGE_RES_IT *pr_it) {
   if (text.length() > 0) {
     word_bb = word->bounding_box();
     image_win->Pen(ScrollView::RED);
-    word_height = word_bb.height();
-    int text_height = 0.50 * word_height;
+    auto word_height = word_bb.height();
+    int text_height = word_height / 2;
     if (text_height > 20) {
       text_height = 20;
     }
     image_win->TextAttributes("Arial", text_height, false, false, false);
-    shift = (word_height < word_bb.width()) ? 0.25 * word_height : 0.0f;
+    // from bot left
+    float shift = (word_height < word_bb.width()) ? 0.25f * word_height : 0.0f;
     image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height, text.c_str());
     if (blame.length() > 0) {
       image_win->Text(word_bb.left() + shift, word_bb.bottom() + 0.25 * word_height - text_height,

diff --git a/src/ccmain/reject.cpp b/src/ccmain/reject.cpp
@@ -293,8 +293,6 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
   int16_t i;
   int16_t offset;
   bool non_conflict_set_char; // non conf set a/n?
-  bool conflict = false;
-  bool allow_1s;
   ACCEPTABLE_WERD_TYPE word_type;
   bool dict_perm_type;
   bool dict_word_ok;
@@ -411,11 +409,11 @@ bool Tesseract::one_ell_conflict(WERD_RES *word_res, bool update_map) {
     Else reject all conflict chs
 */
   if (word_contains_non_1_digit(word, lengths)) {
-    allow_1s =
+    bool allow_1s =
         (alpha_count(word, lengths) == 0) || (word_res->best_choice->permuter() == NUMBER_PERM);
 
     int16_t offset;
-    conflict = false;
+    bool conflict = false;
     for (i = 0, offset = 0; word[offset] != '\0';
          offset += word_res->best_choice->unichar_lengths()[i++]) {
       if ((!allow_1s || (word[offset] != '1')) &&