In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold
import numpy as np
import pandas as pd
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
label_path = r"C:\Users\Charlotte\Desktop\dissertation\US_new\annotation_scaled_then_corrected.xlsx"
metric_path = r"C:\Users\Charlotte\Desktop\dissertation\US_new\enhancement_summary.xlsx"
image_dir = r"C:\Users\Charlotte\Desktop\dissertation\US_new\High_Quality_Images"

df_label = pd.read_excel(label_path)
df_label["weight"] = 1.0

all_images = {f: os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.lower().endswith(".jpg")}

def find_image_path(patient):
    for fname in all_images.keys():
        if patient.replace(" ", "_") in fname:
            return all_images[fname]
    return None

def load_image(patient):
    path = find_image_path(patient)
    if path is None:
        raise FileNotFoundError(f"No image found for patient {patient}")
    img = Image.open(path).convert("L")
    img = np.array(img) / 255.0
    img = torch.tensor(img, dtype=torch.float32).unsqueeze(0)
    return img
if "Filename" in df_metric.columns and "SSIM" in df_metric.columns:
    df_label = df_label.merge(df_metric[["Filename", "SSIM"]], left_on="Patient", right_on="Filename", how="left")
    df_label["weight"] = df_label["SSIM"] / df_label["SSIM"].mean()

In [12]:
class USDataset(Dataset):
    def __init__(self, df):
        self.df = df
        self.patients = df["Patient"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = load_image(row["Patient"])
        label = torch.tensor(row[1:-1].values, dtype=torch.float32).mean().unsqueeze(0)
        weight = torch.tensor(row["weight"], dtype=torch.float32)
        return img, label, weight


In [13]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.down1 = DoubleConv(1, 64)
        self.pool1 = nn.MaxPool2d(2)
        self.down2 = DoubleConv(64, 128)
        self.pool2 = nn.MaxPool2d(2)
        self.down3 = DoubleConv(128, 256)
        self.pool3 = nn.MaxPool2d(2)
        self.bottom = DoubleConv(256, 512)
        self.up3 = nn.ConvTranspose2d(512, 256, 2, stride=2)
        self.conv3 = DoubleConv(512, 256)
        self.up2 = nn.ConvTranspose2d(256, 128, 2, stride=2)
        self.conv2 = DoubleConv(256, 128)
        self.up1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
        self.conv1 = DoubleConv(128, 64)
        self.final = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        c1 = self.down1(x)
        p1 = self.pool1(c1)
        c2 = self.down2(p1)
        p2 = self.pool2(c2)
        c3 = self.down3(p2)
        p3 = self.pool3(c3)
        b = self.bottom(p3)
        u3 = self.up3(b)
        u3 = torch.cat([u3, c3], dim=1)
        c3 = self.conv3(u3)
        u2 = self.up2(c3)
        u2 = torch.cat([u2, c2], dim=1)
        c2 = self.conv2(u2)
        u1 = self.up1(c2)
        u1 = torch.cat([u1, c1], dim=1)
        c1 = self.conv1(u1)
        out = self.final(c1)
        return out.mean(dim=[2,3], keepdim=True)


In [14]:
class WeightedMSELoss(nn.Module):
    def forward(self, pred, target, weight):
        return torch.mean(weight * (pred - target) ** 2)

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
results = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(df_label)):
    train_df = df_label.iloc[train_idx]
    val_df = df_label.iloc[val_idx]
    train_loader = DataLoader(USDataset(train_df), batch_size=2, shuffle=True)
    val_loader = DataLoader(USDataset(val_df), batch_size=2, shuffle=False)

    model = UNet().to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    criterion = WeightedMSELoss()

    for epoch in range(10):
        model.train()
        train_loss = []
        for img, label, w in train_loader:
            img, label, w = img.to(device), label.to(device), w.to(device)
            pred = model(img)
            loss = criterion(pred, label, w)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        model.eval()
        val_loss = []
        with torch.no_grad():
            for img, label, w in val_loader:
                img, label, w = img.to(device), label.to(device), w.to(device)
                pred = model(img)
                loss = criterion(pred, label, w)
                val_loss.append(loss.item())
        print(f"Fold {fold+1} | Epoch {epoch+1} | Train {np.mean(train_loss):.4f} | Val {np.mean(val_loss):.4f}")
    results.append(np.mean(val_loss))

FileNotFoundError: No image found for patient Abbey 005

In [None]:
print(f"Average 5-Fold Validation Loss: {np.mean(results):.6f}")
best_fold = np.argmin(results) + 1
print(f"Best model: Fold {best_fold} with ValLoss={np.min(results):.6f}")
