In [None]:
import os
import cv2
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import jaccard_score, f1_score, precision_score, recall_score
from unet_model import UNet

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# === Path Config ===
base_dir = "/content/drive/MyDrive/Info_Project/Defect_Detection/DataSets/Data.Splitting/After_Melting_Defect_Detection"
test_img_dir = os.path.join(base_dir, "test", "Img.After.Melting")
test_mask_dirs = [os.path.join(base_dir, "test", f"Defect_Class{i}") for i in [0, 5, 8, 9, 10, 11]]
model_path = os.path.join(base_dir, "(unet)trained_model", "after_melting_unet_model_2.pth")
predictions_base = os.path.join(base_dir, "(unet)predicted_binary_masks")

In [None]:
# Create subfolders for each class
class_names = [f"Defect_Class{i}" for i in [0, 5, 8, 9, 10, 11]]
for class_name in class_names:
    os.makedirs(os.path.join(predictions_base, class_name), exist_ok=True)

In [None]:
# === Device ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# === Evaluation Dataset (images + masks) ===
class EvalDataset(Dataset):
    def __init__(self, image_dir, mask_dirs, img_size=128):
        self.image_dir = image_dir
        self.mask_dirs = mask_dirs
        self.image_filenames = sorted(os.listdir(image_dir))
        self.img_size = img_size

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_dir, img_name)

        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (self.img_size, self.img_size))
        image = image.astype(np.float32) / 255.0
        image = np.expand_dims(image, axis=0)

        masks = [cv2.imread(os.path.join(mask_dir, img_name), cv2.IMREAD_GRAYSCALE) for mask_dir in self.mask_dirs]
        masks = [cv2.resize(mask, (self.img_size, self.img_size)) for mask in masks]
        mask = np.stack(masks, axis=0).astype(np.float32) / 255.0

        return torch.tensor(image, dtype=torch.float32), torch.tensor(mask, dtype=torch.float32), img_name


In [None]:
# === Load Dataset ===
eval_dataset = EvalDataset(test_img_dir, test_mask_dirs)
eval_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False)


In [None]:
# === Load Model ===
model = UNet(in_channels=1, out_channels=6).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

UNet(
  (encoder): ModuleList(
    (0): DoubleConv(
      (conv): Sequential(
        (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace=True)
      )
    )
    (1): DoubleConv(
      (conv): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU(inplace

In [None]:
# === Metric Accumulators ===
iou_scores = []
dice_scores = []
precisions = []
recalls = []


In [None]:
# === Evaluation Loop ===
model.eval()
iou_scores, dice_scores, precisions, recalls = [], [], [], []

with torch.no_grad():
    for image, true_mask, filename in eval_loader:
        image = image.to(device)
        output = model(image)

        # Threshold predictions
        pred_mask = torch.sigmoid(output).cpu().numpy() > 0.5
        true_mask = true_mask.cpu().numpy() > 0.5

        base_name = os.path.splitext(filename[0])[0]  # Remove extension

        for c in range(6):
            pred = (pred_mask[0, c] * 255).astype(np.uint8)
            save_path = os.path.join(predictions_base, class_names[c], f"{base_name}.png")
            cv2.imwrite(save_path, pred)

            # Compute metrics
            y_true = true_mask[0, c].flatten().astype(np.uint8)
            y_pred = pred_mask[0, c].flatten().astype(np.uint8)

            if y_true.sum() == 0 and y_pred.sum() == 0:
                continue

            iou_scores.append(jaccard_score(y_true, y_pred, zero_division=0))
            dice_scores.append(f1_score(y_true, y_pred, zero_division=0))
            precisions.append(precision_score(y_true, y_pred, zero_division=0))
            recalls.append(recall_score(y_true, y_pred, zero_division=0))


In [None]:
# === Report Metrics ===
def print_metric(name, values):
    print(f"{name} per class:")
    print(f"  Mean: {np.mean(values):.4f} | Std: {np.std(values):.4f}")
    print("")

print("\n=== Model Evaluation on Test Set ===")
print_metric("IoU", iou_scores)
print_metric("Dice (F1)", dice_scores)
print_metric("Precision", precisions)
print_metric("Recall", recalls)
print("✅ Evaluation complete!")


=== Model Evaluation on Test Set ===
IoU per class:
  Mean: 0.4894 | Std: 0.3807

Dice (F1) per class:
  Mean: 0.5558 | Std: 0.4023

Precision per class:
  Mean: 0.6038 | Std: 0.4244

Recall per class:
  Mean: 0.5225 | Std: 0.3921

✅ Evaluation complete!
