# Evaluting Model Performance
- Accuracy
- Per-class precision and recall
- Confusion matrix
- F1-score per class 
- Macro-averaged precision, recall and F1
- ROC curves (one versus all) for every single class
- Precision-recall curve for every single class

In [None]:
import pandas as pd
import numpy as np
import torch
import os
from torchvision import transforms
from scripts.train import model_create
from tqdm import tqdm
from PIL import Image


model_checkpoint_path = "/media/hdd1/neo/MODELS/2025-05-31 Remodelled ResNeXt DeepHemeRetrain/1/version_0/checkpoints/epoch=499-step=27500.ckpt"
data_path = "/media/hdd1/neo/pooled_deepheme_data/val"
save_dir = "/media/hdd1/neo/pooled_deepheme_data"

cellnames = [
    "B1",
    "B2",
    "E1",
    "E4",
    "ER1",
    "ER2",
    "ER3",
    "ER4",
    "ER5",
    "ER6",
    "L2",
    "L4",
    "M1",
    "M2",
    "M3",
    "M4",
    "M5",
    "M6",
    "MO2",
    "PL2",
    "PL3",
    "U1",
    "U4",
]

## Compiling Data
We start by compiling the data. The expected input is a model checkpoint path and a path to the testing or validation data folder structued in the imagenet format or whatever format renderable by the intended dataloader. We are going to compile the model's outputs using a GPU And save as a pandas dataframe.

In [None]:
model = model_create(path=model_checkpoint_path)


def predict_batch(pil_images, model):
    # Define the transformations
    image_transforms = transforms.Compose(
        [
            transforms.Resize(96),
            transforms.ToTensor(),
        ]
    )

    # Apply transformations to each image and create a batch
    batch = torch.stack([image_transforms(image).float() for image in pil_images])

    # Move the batch to the GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch = batch.to(device)

    # Set the model to evaluation mode and make predictions
    model.eval()
    with torch.no_grad():
        outputs = model(batch)

        # apply softmax to the outputs
        outputs = torch.nn.functional.softmax(outputs, dim=1)

    # Process each output as in the original code snippet
    predictions = []
    for output in outputs:
        output = output.detach().cpu().numpy()
        predictions.append(tuple(output))

    # Return a list of predictions in the same order as the input images
    return predictions


def predict_image(pil_image, model):
    pil_images = [pil_image]

    # Call the predict_batch function
    predictions = predict_batch(pil_images, model)

    # Return the first prediction
    return predictions[0]


result_df_dct = {
    "image_path": [],
    "label": [],
}

for cellname in cellnames:
    result_df_dct[cellname] = []

for cellname in tqdm(cellnames, desc="Processing cell types"):
    image_dir_path = os.path.join(data_path, cellname)

    # find all the .jpg, .jpeg, .png files in the directory
    image_paths = [
        os.path.join(image_dir_path, f)
        for f in os.listdir(image_dir_path)
        if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png")
    ]

    for image_path in tqdm(image_paths, desc="Processing images"):
        image = Image.open(image_path)
        prediction = predict_image(image, model)

        result_df_dct["image_path"].append(image_path)
        result_df_dct["label"].append(cellname)
        for i, cellname in enumerate(cellnames):
            result_df_dct[cellname].append(prediction[i])

result_df = pd.DataFrame(result_df_dct)

# save the result as a csv file in the save_dir
result_df.to_csv(os.path.join(save_dir, "predictions.csv"), index=False)

In [None]:
prediction_path = "/Users/neo/Documents/DATA/deephemev3_val_predictions.csv"

# open the csv file as a pandas dataframe
df = pd.read_csv(prediction_path)