# `test.ipynb` - Model Evaluation and Visualization

In [1]:
import os, time
import numpy as np
import cv2
import torch
import matplotlib.pyplot as plt
from glob import glob
from tqdm import tqdm
from operator import add
from sklearn.metrics import accuracy_score, f1_score, jaccard_score, precision_score, recall_score

# Custom imports
from model import build_unet
from utils import create_dir, seeding

In [3]:
def save_image_comparison(image, mask, pred, dice, iou, save_path):
    """ Visualization Function that creates a side-by-side comparison: Original | Truth | U-Net Prediction """
    fig, ax = plt.subplots(1, 3, figsize=(15, 5))
    ax[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    ax[0].set_title("Original Image")
    ax[0].axis("off")
    
    ax[1].imshow(mask, cmap="gray")
    ax[1].set_title("Ground Truth (Doctor)")
    ax[1].axis("off")
    
    ax[2].imshow(pred, cmap="gray")
    ax[2].set_title(f"U-Net Result\nDice: {dice:.2f} | IoU: {iou:.2f}")
    ax[2].axis("off")
    
    plt.savefig(save_path, bbox_inches='tight', dpi=150)
    plt.close()



def calculate_metrics(y_true, y_pred):
    """ Calculates all scores for one image """
    y_true = (y_true.cpu().numpy() > 0.5).astype(np.uint8).flatten()
    y_pred = (y_pred.cpu().numpy() > 0.5).astype(np.uint8).flatten()

    return [
        jaccard_score(y_true, y_pred),
        f1_score(y_true, y_pred),
        recall_score(y_true, y_pred),
        precision_score(y_true, y_pred),
        accuracy_score(y_true, y_pred)
    ]




if __name__ == "__main__":
    seeding(42)
    create_dir("results")
    
    test_x = sorted(glob("new_data/test/image/*"))
    test_y = sorted(glob("new_data/test/mask/*"))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = build_unet().to(device)
    model.load_state_dict(torch.load("files/checkpoint.pth", map_location=device))
    model.eval()

    metrics_score = [0.0, 0.0, 0.0, 0.0, 0.0]
    time_taken = []

    for i, (x_path, y_path) in tqdm(enumerate(zip(test_x, test_y)), total=len(test_x)):
        name = os.path.basename(x_path).split(".")[0]
        
        # Image Processing
        image = cv2.imread(x_path, cv2.IMREAD_COLOR)
        x = np.transpose(image, (2, 0, 1)) / 255.0
        x = torch.from_numpy(np.expand_dims(x, axis=0)).float().to(device)
        
        # Mask Processing
        mask = cv2.imread(y_path, cv2.IMREAD_GRAYSCALE)
        y = (mask / 255.0)
        y_tensor = torch.from_numpy(np.expand_dims(np.expand_dims(y, 0), 0)).float().to(device)

        with torch.no_grad():
            start_time = time.time()
            y_pred = torch.sigmoid(model(x))
            time_taken.append(time.time() - start_time)
            
            # Calculate scores for this specific image
            score = calculate_metrics(y_tensor, y_pred)
            metrics_score = list(map(add, metrics_score, score))
            
            # Post-process mask for visualization
            pred_y_final = (y_pred[0].cpu().numpy().squeeze() > 0.5).astype(np.uint8)

        
        
        
        # Finally We Save Every Comparison Image
        save_image_comparison(image, mask, pred_y_final * 255, score[1], score[0], f"results/comp_{name}.png")


    print("\nGenerating Images...")

    # Final Print
    avg = [s / len(test_x) for s in metrics_score]
    print(f"\nFinal Results:\nDice: {avg[1]:.4f} | IoU: {avg[0]:.4f} | Acc: {avg[4]:.4f}")
    print(f"Speed: {1/np.mean(time_taken):.2f} FPS")

100%|██████████| 20/20 [00:13<00:00,  1.49it/s]


Generating Images...

Final Results:
Dice: 0.7725 | IoU: 0.6330 | Acc: 0.9622
Speed: 185.89 FPS



