Skip to content

Commit

Permalink
Merge branch 'csv_output' of https://github.com/rbturnbull/hespi into…
Browse files Browse the repository at this point in the history
… csv_output
  • Loading branch information
rbturnbull committed Mar 22, 2024
2 parents 689f99a + 25ef583 commit 04c5342
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 1 deletion.
2 changes: 1 addition & 1 deletion hespi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def detect(
help="Whether or not to do handwritten text recognition using Microsoft's TrOCR.",
),
trocr_size: TrOCRSize = typer.Option(
TrOCRSize.BASE.value,
TrOCRSize.LARGE.value,
help="The size of the TrOCR model to use for handwritten text recognition.",
case_sensitive=False,
),
Expand Down
1 change: 1 addition & 0 deletions hespi/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def ocr_data_df(data: dict, output_path: Path=None) -> pd.DataFrame:
pd.DataFrame: The text recognition data as a Pandas dataframe
"""
df = pd.DataFrame.from_dict(data, orient="index")
df = df.fillna(value="")
df = df.reset_index().rename(columns={"index": "institutional label"})

# Splitting the ocr_results columns into seperate original text, adjusted, and score
Expand Down

0 comments on commit 04c5342

Please sign in to comment.