Skip to content

Commit

Permalink
🎨 removing unnecessary underscores
Browse files Browse the repository at this point in the history
  • Loading branch information
rbturnbull committed Mar 4, 2024
1 parent 444838e commit b47064d
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 20 deletions.
8 changes: 4 additions & 4 deletions hespi/hespi.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ def institutional_label_detect(self, component, stub, output_dir) -> Dict:
counter = Counter(best_engine_results)
preferred_engine = counter.most_common(1)[0][0]
elif detection_results.get('label_classification', None) in ["printed", "typewriter"]:
preferred_engine = '_Tesseract'
preferred_engine = 'Tesseract'
else:
preferred_engine = '_TrOCR'
preferred_engine = 'TrOCR'

# Determining Recognised Text for fields not in the reference database
for key, detection_result in detection_results.items():
Expand Down Expand Up @@ -362,7 +362,7 @@ def read_field_file(

detection_results[f"{field}_ocr_results"].append(
{
'ocr': '_TrOCR',
'ocr': 'TrOCR',
'original_text_detected': htr_text,
'adjusted_text': adjusted_text,
'match_score': match_score,
Expand All @@ -386,7 +386,7 @@ def read_field_file(

detection_results[f"{field}_ocr_results"].append(
{
'ocr': '_Tesseract',
'ocr': 'Tesseract',
'original_text_detected': tesseract_text,
'adjusted_text': adjusted_text,
'match_score': match_score,
Expand Down
4 changes: 2 additions & 2 deletions hespi/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ def process_row_ocr_results(row, field_name):
tesseract_match_score = []

for d in row:
if d['ocr'] == '_TrOCR':
if d['ocr'] == 'TrOCR':
trocr_original.append(d['original_text_detected'])
trocr_adjusted.append(d['adjusted_text'])
trocr_match_score.append(d['match_score'])
elif d['ocr'] == '_Tesseract':
elif d['ocr'] == 'Tesseract':
tesseract_original.append(d['original_text_detected'])
tesseract_adjusted.append(d['adjusted_text'])
tesseract_match_score.append(d['match_score'])
Expand Down
22 changes: 11 additions & 11 deletions tests/test_hespi.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_read_field_file_tesseract_only():
assert len(result["species_image"]) == 1
assert result["species_image"][0] == image
assert len(result['species_ocr_results']) == 1
assert result["species_ocr_results"][0]['ocr'] == '_Tesseract'
assert result["species_ocr_results"][0]['ocr'] == 'Tesseract'
assert result["species_ocr_results"][0]['original_text_detected'] == 'zOstericolaXX'
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericola'
assert result["species_ocr_results"][0]['match_score'] == 0.917
Expand All @@ -150,12 +150,12 @@ def test_read_field_file_htr():
assert result["species_image"][0] == image
assert len(result["species_ocr_results"]) == 2

assert result["species_ocr_results"][0]['ocr'] == '_TrOCR'
assert result["species_ocr_results"][0]['ocr'] == 'TrOCR'
assert result["species_ocr_results"][0]['original_text_detected'] == 'zostericolaX'
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericolax'
assert result["species_ocr_results"][0]['match_score'] == ""

assert result["species_ocr_results"][1]['ocr'] == '_Tesseract'
assert result["species_ocr_results"][1]['ocr'] == 'Tesseract'
assert result["species_ocr_results"][1]['original_text_detected'] == 'zOstericolaXX'
assert result["species_ocr_results"][1]['adjusted_text'] == 'zostericolaxx'
assert result["species_ocr_results"][1]['match_score'] == ""
Expand All @@ -174,12 +174,12 @@ def test_read_field_file_fuzzy():
assert result["species_image"][0] == image
assert len(result["species_ocr_results"]) == 2

assert result["species_ocr_results"][0]['ocr'] == '_TrOCR'
assert result["species_ocr_results"][0]['ocr'] == 'TrOCR'
assert result["species_ocr_results"][0]['original_text_detected'] == 'zostericolaX'
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericola'
assert result["species_ocr_results"][0]['match_score'] == 0.957

assert result["species_ocr_results"][1]['ocr'] == '_Tesseract'
assert result["species_ocr_results"][1]['ocr'] == 'Tesseract'
assert result["species_ocr_results"][1]['original_text_detected'] == 'zOstericolaXX'
assert result["species_ocr_results"][1]['adjusted_text'] == 'zostericola'
assert result["species_ocr_results"][1]['match_score'] == 0.917
Expand Down Expand Up @@ -216,12 +216,12 @@ def test_institutional_label_detect(mock_yolo_output):
assert len(result["species_image"]) == 1
assert len(result["species_ocr_results"]) == 2

assert result["species_ocr_results"][0]['ocr'] == '_TrOCR'
assert result["species_ocr_results"][0]['ocr'] == 'TrOCR'
assert result["species_ocr_results"][0]['original_text_detected'] == 'zostericolaX'
assert result["species_ocr_results"][0]['adjusted_text'] == 'zostericola'
assert result["species_ocr_results"][0]['match_score'] == 0.957

assert result["species_ocr_results"][1]['ocr'] == '_Tesseract'
assert result["species_ocr_results"][1]['ocr'] == 'Tesseract'
assert result["species_ocr_results"][1]['original_text_detected'] == 'zOstericolaXX'
assert result["species_ocr_results"][1]['adjusted_text'] == 'zostericola'
assert result["species_ocr_results"][1]['match_score'] == 0.917
Expand Down Expand Up @@ -329,7 +329,7 @@ def test_determine_best_ocr_result_non_reference():
assert best_match_score == ""
assert best_engine == ""

best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['location_ocr_results'], preferred_engine="_TrOCR")
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['location_ocr_results'], preferred_engine="TrOCR")
assert best_text == "Queenscliff"
assert best_match_score == ""
assert best_engine == ""
Expand All @@ -345,12 +345,12 @@ def test_determine_best_ocr_result_reference():
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['species_ocr_results'])
assert best_text == "zostericola"
assert best_match_score == 0.957
assert best_engine == "_TrOCR"
assert best_engine == "TrOCR"

best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['species_ocr_results'], preferred_engine="_Tesseract")
best_text, best_match_score, best_engine = hespi.determine_best_ocr_result(result['species_ocr_results'], preferred_engine="Tesseract")
assert best_text == "zostericola"
assert best_match_score == 0.957
assert best_engine == "_TrOCR"
assert best_engine == "TrOCR"


def test_determine_best_ocr_result_single():
Expand Down
6 changes: 3 additions & 3 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,9 @@ def test_ocr_data_df_ocr_results():
"family":"family",
"id":"id",
"species_ocr_results": [
dict(ocr="_TrOCR", original_text_detected="zostericolumXX", adjusted_text="zostericolum", match_score=0.9),
dict(ocr="_TrOCR", original_text_detected="z", adjusted_text="z", match_score=0),
dict(ocr="_Tesseract", original_text_detected="zasdfoppasf", adjusted_text="zasdfoppasf", match_score=''),
dict(ocr="TrOCR", original_text_detected="zostericolumXX", adjusted_text="zostericolum", match_score=0.9),
dict(ocr="TrOCR", original_text_detected="z", adjusted_text="z", match_score=0),
dict(ocr="Tesseract", original_text_detected="zasdfoppasf", adjusted_text="zasdfoppasf", match_score=''),
],
"extra": [],
}
Expand Down

0 comments on commit b47064d

Please sign in to comment.