In [1]:
import numpy as np
import pandas as pd
from PIL import Image
import datasets
import lightning.pytorch as pl
import torch
import itertools
import timeit
import time
import onnx
import onnxruntime
from tqdm import tqdm

import my_utils
import my_training
import my_datasets

  from .autonotebook import tqdm as notebook_tqdm


# Predict on test dataset

In [2]:
# load dataset from hugging face
cropped_test_dataset_from_hf = datasets.load_dataset("shortery/cropped-dm-codes")["test"]

Resolving data files: 100%|██████████| 201/201 [00:00<00:00, 281.30it/s]


In [3]:
# create pandas dataframe with new collumn decoded_text
pd_dataset = pd.DataFrame(cropped_test_dataset_from_hf)
resized_np_image = pd_dataset["image"].map(lambda x: np.asarray(x.resize((128, 128)).convert("L")))
pd_dataset["decoded_text_before_nn"] = resized_np_image.map(lambda x: my_utils.decode_dm_code(x))
pd_dataset

Unnamed: 0,image,tl,tr,br,bl,clean,text,image_name,decoded_text_before_nn
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1632, 884]","[2646, 873]","[2652, 1896]","[1632, 1908]",True,11505771-01,000.jpg,11505771-01
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1869, 1737]","[2430, 993]","[3183, 1542]","[2604, 2262]",False,11505771-01,001.jpg,
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1380, 1536]","[1923, 1110]","[2295, 1668]","[1737, 2064]",False,11505771-01,002.jpg,
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1476, 932]","[2628, 984]","[2580, 2144]","[1412, 2096]",False,11505771-01,003.jpg,11505771-01
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1640, 600]","[2732, 600]","[2724, 1692]","[1648, 1692]",False,11505771-01,004.jpg,11505771-01
...,...,...,...,...,...,...,...,...,...
195,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1824, 2790]","[1731, 2136]","[2379, 1923]","[2328, 2418]",False,92123605213202401080000053846434,195.jpg,
196,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[441, 2571]","[1593, 2376]","[1707, 3144]","[444, 3225]",False,92123605213202401080000053846434,196.jpg,
197,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1344, 2280]","[2235, 1995]","[2646, 2685]","[1710, 3000]",False,92123605213202401080000053846434,197.jpg,
198,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1308, 2535]","[2043, 2370]","[2367, 3102]","[1614, 3333]",False,92123605213202401080000053846434,198.jpg,


In [4]:
def print_decoding_results(pd_dataset: pd.DataFrame, decoded_text_column: str):
    print("Number of correctly decoded clean codes:", end=" ")
    print(len(pd_dataset[(pd_dataset["clean"]) & (pd_dataset[decoded_text_column] == pd_dataset["text"])]), end=" ")
    print("out of", pd_dataset["clean"].sum())

    print("Number of undecodable clean codes:", end=" ")
    print(len(pd_dataset[(pd_dataset["clean"]) & pd_dataset[decoded_text_column].isna()]), end=" ")
    print("out of", pd_dataset["clean"].sum())

    print()

    print("Number of correctly decoded damaged codes:", end=" ")
    print(len(pd_dataset[~(pd_dataset["clean"]) & (pd_dataset[decoded_text_column] == pd_dataset["text"])]), end=" ")
    print("out of", (~pd_dataset["clean"]).sum())

    print("Number of undecodable damaged codes:", end=" ")
    print(len(pd_dataset[~(pd_dataset["clean"]) & pd_dataset[decoded_text_column].isna()]), end=" ")
    print("out of", (~pd_dataset["clean"]).sum())


    print("Number of decoded, but not correctly decoded damaged codes:", end=" ")
    print(len(pd_dataset[~(pd_dataset["clean"]) & (~pd_dataset[decoded_text_column].isna()) &
                        (pd_dataset[decoded_text_column] != pd_dataset["text"])]), end=" ")
    print("out of", (~pd_dataset["clean"]).sum())

In [5]:
print("Before network:")
print_decoding_results(pd_dataset, "decoded_text_before_nn")

Before network:
Number of correctly decoded clean codes: 35 out of 38
Number of undecodable clean codes: 3 out of 38

Number of correctly decoded damaged codes: 30 out of 162
Number of undecodable damaged codes: 130 out of 162
Number of decoded, but not correctly decoded damaged codes: 2 out of 162


In [6]:
# decoded, but not correctly decoded damaged codes
pd_dataset[~(pd_dataset["clean"]) & (~pd_dataset["decoded_text_before_nn"].isna()) &
           (pd_dataset["decoded_text_before_nn"] != pd_dataset["text"])]

Unnamed: 0,image,tl,tr,br,bl,clean,text,image_name,decoded_text_before_nn
62,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[2175, 1341]","[2427, 1371]","[2424, 1617]","[2178, 1575]",False,10192673,062.jpg,19S
98,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[876, 1290]","[1800, 774]","[2514, 1656]","[1599, 2067]",False,LB7210B,098.jpg,KFCB


In [7]:
# prepare dataset to be an input to the network
def resize_and_preprocess_image(batch):
    resized_image = batch["image"].resize((128, 128), resample=Image.Resampling.NEAREST)
    return {"image": my_datasets._preprocess(np.asarray(resized_image))}

preprocessed_dataset = cropped_test_dataset_from_hf.map(resize_and_preprocess_image)

In [8]:
# create dataloader
dataloader_test = torch.utils.data.DataLoader(
    dataset=preprocessed_dataset.with_format("torch"),
    batch_size=64
)

In [9]:
# load model from checkpoint and compute predictions
trainer = pl.Trainer(precision=16)
loaded_model = my_training.LitAutoEncoder.load_from_checkpoint("../checkpoints/glowing-snowball-134/step=7500--corr_dec=0.99.ckpt")
loaded_model.eval()
predictions = trainer.predict(loaded_model, dataloader_test)

  rank_zero_warn(
Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 4/4 [00:01<00:00,  2.82it/s]


In [10]:
# chain all predictions together to get one list
# (otherwise I could iterate it as "for batch in predictions: for pred in batch: ...")
pd_dataset["nn_prediction"] = list(itertools.chain(*predictions))
pd_dataset.head()

Unnamed: 0,image,tl,tr,br,bl,clean,text,image_name,decoded_text_before_nn,nn_prediction
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1632, 884]","[2646, 873]","[2652, 1896]","[1632, 1908]",True,11505771-01,000.jpg,11505771-01,"[[[tensor(0.9883, dtype=torch.float16), tensor..."
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1869, 1737]","[2430, 993]","[3183, 1542]","[2604, 2262]",False,11505771-01,001.jpg,,"[[[tensor(0.9526, dtype=torch.float16), tensor..."
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1380, 1536]","[1923, 1110]","[2295, 1668]","[1737, 2064]",False,11505771-01,002.jpg,,"[[[tensor(1.0176, dtype=torch.float16), tensor..."
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1476, 932]","[2628, 984]","[2580, 2144]","[1412, 2096]",False,11505771-01,003.jpg,11505771-01,"[[[tensor(0.9736, dtype=torch.float16), tensor..."
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1640, 600]","[2732, 600]","[2724, 1692]","[1648, 1692]",False,11505771-01,004.jpg,11505771-01,"[[[tensor(0.9824, dtype=torch.float16), tensor..."


In [11]:
np_prediction_for_image = pd_dataset["nn_prediction"].map(lambda x: np.squeeze(my_utils.tensor_to_numpy_for_image(x.unsqueeze(dim=0))))
pd_dataset["nn_output_image"] = np_prediction_for_image.map(lambda x: Image.fromarray(x, mode="L"))
pd_dataset["decoded_text_after_nn"] = np_prediction_for_image.map(my_utils.decode_dm_code)

In [12]:
pd_dataset.head()

Unnamed: 0,image,tl,tr,br,bl,clean,text,image_name,decoded_text_before_nn,nn_prediction,nn_output_image,decoded_text_after_nn
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1632, 884]","[2646, 873]","[2652, 1896]","[1632, 1908]",True,11505771-01,000.jpg,11505771-01,"[[[tensor(0.9883, dtype=torch.float16), tensor...",<PIL.Image.Image image mode=L size=128x128 at ...,11505771-01
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1869, 1737]","[2430, 993]","[3183, 1542]","[2604, 2262]",False,11505771-01,001.jpg,,"[[[tensor(0.9526, dtype=torch.float16), tensor...",<PIL.Image.Image image mode=L size=128x128 at ...,11505771-01
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1380, 1536]","[1923, 1110]","[2295, 1668]","[1737, 2064]",False,11505771-01,002.jpg,,"[[[tensor(1.0176, dtype=torch.float16), tensor...",<PIL.Image.Image image mode=L size=128x128 at ...,
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1476, 932]","[2628, 984]","[2580, 2144]","[1412, 2096]",False,11505771-01,003.jpg,11505771-01,"[[[tensor(0.9736, dtype=torch.float16), tensor...",<PIL.Image.Image image mode=L size=128x128 at ...,11505771-01
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,"[1640, 600]","[2732, 600]","[2724, 1692]","[1648, 1692]",False,11505771-01,004.jpg,11505771-01,"[[[tensor(0.9824, dtype=torch.float16), tensor...",<PIL.Image.Image image mode=L size=128x128 at ...,


In [13]:
print("After network:")
print_decoding_results(pd_dataset, "decoded_text_after_nn")

After network:
Number of correctly decoded clean codes: 21 out of 38
Number of undecodable clean codes: 15 out of 38

Number of correctly decoded damaged codes: 20 out of 162
Number of undecodable damaged codes: 141 out of 162
Number of decoded, but not correctly decoded damaged codes: 1 out of 162


# Measure execution time

In [2]:
# seed the RNG for all devices (both CPU and CUDA)
torch.manual_seed(0)

# create random tensors as the inputs to the network
random_tensors = torch.rand(size=(64, 3, 128, 128))

In [3]:
# create dataloader
dataloader_random_tensors = torch.utils.data.DataLoader(
    dataset=[{"image": x} for x in random_tensors],
    batch_size=32
)

In [4]:
checkpoint_path = "../checkpoints/glowing-snowball-134/step=7500--corr_dec=0.99.ckpt"

trainer = pl.Trainer()
loaded_model = my_training.LitAutoEncoder.load_from_checkpoint(checkpoint_path, map_location=torch.device("cpu"))
loaded_model.eval()

start_wall_time = timeit.default_timer()
start_cpu_time = time.process_time()

predictions = trainer.predict(loaded_model, dataloader_random_tensors)

end_wall_time = timeit.default_timer()
end_cpu_time = time.process_time()

print()
print("wall time:", end_wall_time - start_wall_time, "seconds")
print("cpu time:", end_cpu_time - start_cpu_time, "seconds")

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 2/2 [00:03<00:00,  1.96s/it]

wall time: 4.236556711024605 seconds
cpu time: 7.305168144 seconds


In [5]:
# export torch model to onnx
onnx_model_path = "../checkpoints/exported_model.onnx"
torch.onnx.export(
    loaded_model,
    torch.rand(size=(64, 3, 128, 128)),
    onnx_model_path,
    input_names=["image"],
    output_names = ['output'],
    dynamic_axes={'image' : {0 : 'batch_size'}, 'output' : {0 : 'batch_size'}}
)

  if h % output_stride != 0 or w % output_stride != 0:


verbose: False, log level: Level.ERROR



In [6]:
# load onnx model and create inference session
onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)
onnx_inference_session = onnxruntime.InferenceSession(onnx_model_path, providers=["CPUExecutionProvider"])

In [7]:
random_arrays = pd.DataFrame(dataloader_random_tensors).map(np.asarray).to_dict('records')
onnx_predictions = []

start_wall_time = timeit.default_timer()
start_cpu_time = time.process_time()

for random_array in tqdm(random_arrays):
    pred, = onnx_inference_session.run(None, random_array)
    onnx_predictions.append(pred)

end_wall_time = timeit.default_timer()
end_cpu_time = time.process_time()

print()
print("wall time:", end_wall_time - start_wall_time, "seconds")
print("cpu time:", end_cpu_time - start_cpu_time, "seconds")

  0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:02<00:00,  1.29s/it]


wall time: 2.5878376649925485 seconds
cpu time: 4.860588821 seconds





In [8]:
# check if the output predictions are the same
np.abs(np.asarray(list(itertools.chain(*predictions))) -
       np.asarray(list(itertools.chain(*onnx_predictions)))).max()

6.735325e-06