In [1]:
import os
if not 'initial_cwd' in globals():
    initial_cwd = os.getcwd()
while 'ipython' in os.getcwd():
    os.chdir("../")

from typing import Dict

import numpy as np
import pandas as pd


from PIL import Image
from pathlib import Path
from matplotlib import pyplot as plt
from IPython.core.display import HTML
import cv2

from mim_ocr.image.transformations import no_transform, reorient, deskew
from mim_ocr.backends.tesseract import TesseractBackend
from mim_ocr.visualization import visualize_ocr_result
from mim_ocr.image import open_image, write_image
from mim_ocr.utils.notebook_utils import get_directory_for_notebook_images

%load_ext autoreload
%autoreload 2

In [2]:
samples_dir = Path('sample_data')
images_dir = get_directory_for_notebook_images(initial_cwd, makedirs=True)

In [3]:
tesseract_backend = TesseractBackend()

In [4]:
meta = []
for path in samples_dir.iterdir():
    if not path.is_file():
        continue
    img = open_image(path)

    metadata = {'path' : path}

    transformations = [no_transform, reorient, deskew]

    for t in transformations:

        try:
            img = t(img, path, metadata)
            box = tesseract_backend.run_ocr_to_box(img)

            for stat, value in box.calc_confidence().items():
                metadata[f'{stat}_{t.__name__}'] = value

            vis = visualize_ocr_result(original_image=img, box=box, confidence_threshold=30)
            img_path = images_dir / f'{path.name}_tranformation_{t.__name__}.jpeg'
            write_image(vis, img_path)
            # 
            #plt.imshow(vis)
            #plt.show()

            
        except Exception as e:
             print(f"Got error for {path} during tranformation {t.__name__}", e)

    meta.append(metadata)
display(pd.DataFrame.from_records(meta))

Unnamed: 0,path,avg_confidence_no_transform,total_letters_no_transform,avg_letters_per_box_no_transform,orientation,avg_confidence_reorient,total_letters_reorient,avg_letters_per_box_reorient,angle,avg_confidence_deskew,total_letters_deskew,avg_letters_per_box_deskew
0,sample_data/wyniki_badan_7_tydzien_enan_deca_f...,72.282502,1023,4.506608,0,72.282502,1023,4.506608,0.0,72.282502,1023,4.506608
1,sample_data/badania.jpg,86.602131,563,4.330769,0,86.602131,563,4.330769,0.5,86.696649,567,4.295455
2,sample_data/jak_odczytac_wyniki_badan_moczu.png,86.649254,804,5.826087,0,86.649254,804,5.826087,0.0,86.649254,804,5.826087
3,sample_data/canvas.png,88.679496,2936,5.700971,0,88.679496,2936,5.700971,0.0,88.679496,2936,5.700971
4,sample_data/luxmed.png,59.889153,1263,4.543165,0,59.889153,1263,4.543165,0.0,59.889153,1263,4.543165
5,sample_data/canvas_2.png,93.075877,1911,6.544521,0,93.075877,1911,6.544521,0.0,93.075877,1911,6.544521
