In [2]:
import pathlib
from time import sleep
from typing import Generator
import pandas as pd
import cv2
import numpy as np
import onnxruntime

In [3]:
MODEL_PATH_ONNX: pathlib.Path = pathlib.Path(
    "../data/logs/mlruns/669180362677009476/28c74b00b3c24059a887a64895e6dedf/artifacts/model/model.onnx"
).resolve()

MODEL_PATH: pathlib.Path = pathlib.Path("../data/logs/mlruns/669180362677009476/28c74b00b3c24059a887a64895e6dedf/artifacts/model/checkpoints/model_checkpoint/model_checkpoint.ckpt").resolve()

with pathlib.Path(
        "../data/logs/mlruns/669180362677009476/28c74b00b3c24059a887a64895e6dedf/metrics/threshold"
).resolve().open("r") as f:
    THRESHOLD = round(float(f.readline().split(" ")[1]), 2)

BASE_DATA_PATH = pathlib.Path("/home/paolo/git/wild-boar-detection")

dataframe: pd.DataFrame = pd.read_parquet(pathlib.Path("../data/valid.parquet").resolve())

ort_session: onnxruntime.InferenceSession = onnxruntime.InferenceSession(MODEL_PATH_ONNX)
input_name: str = ort_session.get_inputs()[0].name

In [7]:
dataframe

Unnamed: 0,path,target,weight
0,data/bronze/images/wild_boar/contadino_mette_u...,1,1.299435
1,data/bronze/images/wild_boar/contadino_mette_u...,1,1.299435
2,data/bronze/images/wild_boar/contadino_mette_u...,1,1.299435
3,data/bronze/images/wild_boar/image_41.jpg,1,1.299435
4,data/bronze/images/wild_boar/image_14.jpg,1,1.299435
...,...,...,...
949,data/bronze/images/other_animals/cosa_ci_fate_...,0,0.812721
950,data/bronze/images/other_animals/il_sonnellino...,0,0.812721
951,data/bronze/images/other_animals/cosa_ci_fate_...,0,0.812721
952,data/bronze/images/other_animals/cosa_ci_fate_...,0,0.812721


In [5]:
predictions: dict[str, list[bool | float]] = {
    "y_true": [],
    "y_pred": [],
    "y_prob": [],
}

for _, row in dataframe.iterrows():
    image = cv2.imread(str(BASE_DATA_PATH.joinpath(row.path)))

    inputs: np.ndarray = cv2.resize(image, (256, 256))
    inputs = inputs.transpose(2, 0, 1)
    inputs = np.expand_dims(inputs, axis=0)
    inputs = inputs.astype(np.float32)
    inputs /= 255.0
    
    outputs: list[np.array] = ort_session.run(None, {input_name: inputs})
    
    y_prob: float = 1 / (1 + np.exp(-outputs[0].item()))
    y_class: int = int(y_prob >= THRESHOLD)
    
    predictions["y_true"].append(row.target)
    predictions["y_pred"].append(y_class)
    predictions["y_prob"].append(y_prob)
    
predictions: pd.DataFrame = pd.DataFrame(predictions)
predictions

Unnamed: 0,y_true,y_pred,y_prob
0,1,True,0.991586
1,1,True,0.998911
2,1,True,0.997739
3,1,True,0.992407
4,1,True,0.997874
...,...,...,...
949,0,False,0.171134
950,0,False,0.040340
951,0,False,0.027583
952,0,False,0.247993


In [9]:
predictions["y_pred"] = predictions["y_pred"].astype(int)
predictions["weight"] = predictions["y_true"].apply(lambda x: 1.299435 if x == 1 else 0.812721) 
predictions

Unnamed: 0,y_true,y_pred,y_prob,weight
0,1,1,0.991586,1.299435
1,1,1,0.998911,1.299435
2,1,1,0.997739,1.299435
3,1,1,0.992407,1.299435
4,1,1,0.997874,1.299435
...,...,...,...,...
949,0,0,0.171134,0.812721
950,0,0,0.040340,0.812721
951,0,0,0.027583,0.812721
952,0,0,0.247993,0.812721


In [11]:
from sklearn.metrics import f1_score

unweighted_f1: float = f1_score(predictions["y_true"], predictions["y_pred"])
weighted_f1: float = f1_score(predictions["y_true"], predictions["y_pred"], sample_weight=predictions["weight"])
unweighted_f1, weighted_f1

(0.9394812680115274, 0.94869764295663)