In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch.nn.functional as F

# Configuration
DATA_PATH = "/content/drive/MyDrive/filtered_image"
CSV_PATH = "/content/drive/MyDrive/landmark_detection/train.csv"
IMAGE_SIZE = (800, 540)  # U-Net typically works with 256x256 or 512x512 images
BATCH_SIZE = 4
NUM_EPOCHS = 5
NUM_LANDMARKS = 8  # 4 points × (x, y)
THRESHOLD = 5.0  # Pixel threshold for accuracy computation
DEVICE = torch.device("cuda:0" )
# Custom Dataset
class FetalLandmarkDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe
        self.scaler = StandardScaler()
        self.coordinates = self.scaler.fit_transform(dataframe.iloc[:, 1:].values)

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = self.dataframe.iloc[idx, 0]
        img_path = f"{DATA_PATH}/{img_name}"
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        #image = cv2.resize(image, IMAGE_SIZE)
        image = image.astype(np.float32) / 255.0

        # Convert grayscale to 3-channel by repeating the grayscale channel
        image = np.stack([image] * 3, axis=-1)  # Shape: (H, W, 3)
        image = np.transpose(image, (2, 0, 1))  # Shape: (3, H, W)

        coords = self.coordinates[idx]

        return torch.tensor(image, dtype=torch.float32), torch.tensor(coords, dtype=torch.float32)




class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=NUM_LANDMARKS):
        super(UNet, self).__init__()

        def conv_block(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )

        def up_conv(in_channels, out_channels):
            return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)

        # Encoder
        self.encoder1 = conv_block(in_channels, 64)
        self.encoder2 = conv_block(64, 128)
        self.encoder3 = conv_block(128, 256)
        self.encoder4 = conv_block(256, 512)

        # Pooling with ceil_mode to handle odd dimensions
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)

        # Bottleneck
        self.bottleneck = conv_block(512, 1024)

        # Decoder
        self.upconv4 = up_conv(1024, 512)
        self.decoder4 = conv_block(1024, 512)
        self.upconv3 = up_conv(512, 256)
        self.decoder3 = conv_block(512, 256)
        self.upconv2 = up_conv(256, 128)
        self.decoder2 = conv_block(256, 128)
        self.upconv1 = up_conv(128, 64)
        self.decoder1 = conv_block(128, 64)

        # Final regression head
        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)
        self.global_pool = nn.AdaptiveAvgPool2d(1)

    def forward(self, x):
        # Encoder
        e1 = self.encoder1(x)          # (64, H, W)
        e2 = self.encoder2(self.pool(e1))  # (128, H/2, W/2)
        e3 = self.encoder3(self.pool(e2))  # (256, H/4, W/4)
        e4 = self.encoder4(self.pool(e3))  # (512, H/8, W/8)

        # Bottleneck
        bottleneck = self.bottleneck(self.pool(e4))  # (1024, H/16, W/16)

        # Decoder with cropping
        d4 = self.upconv4(bottleneck)       # (512, H/8, W/8)
        d4 = self.crop(d4, e4)              # Ensure matching dimensions
        d4 = torch.cat([d4, e4], dim=1)     # (1024, H/8, W/8)
        d4 = self.decoder4(d4)              # (512, H/8, W/8)

        d3 = self.upconv3(d4)               # (256, H/4, W/4)
        d3 = self.crop(d3, e3)
        d3 = torch.cat([d3, e3], dim=1)     # (512, H/4, W/4)
        d3 = self.decoder3(d3)              # (256, H/4, W/4)

        d2 = self.upconv2(d3)               # (128, H/2, W/2)
        d2 = self.crop(d2, e2)
        d2 = torch.cat([d2, e2], dim=1)     # (256, H/2, W/2)
        d2 = self.decoder2(d2)              # (128, H/2, W/2)

        d1 = self.upconv1(d2)               # (64, H, W)
        d1 = self.crop(d1, e1)
        d1 = torch.cat([d1, e1], dim=1)     # (128, H, W)
        d1 = self.decoder1(d1)              # (64, H, W)

        # Final output
        output = self.final_conv(d1)        # (out_channels, H, W)
        output = self.global_pool(output)   # (out_channels, 1, 1)
        output = output.view(output.size(0), -1)
        return output

    def crop(self, source, target):
        # Crop source to match target dimensions
        _, _, h, w = target.size()
        return source[:, :, :h, :w]

# Load and prepare data
full_df = pd.read_csv(CSV_PATH)
train_df, val_df = train_test_split(full_df, test_size=0.2, random_state=42)
train_dataset = FetalLandmarkDataset(train_df)
val_dataset = FetalLandmarkDataset(val_df)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=4)

# Initialize model
model = UNet().to(DEVICE)
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=0.01)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5)
criterion = nn.MSELoss()

# Training loop
best_val_loss = float('inf')
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

def compute_accuracy(preds, targets, threshold=THRESHOLD):
    preds = preds.cpu().detach().numpy()
    targets = targets.cpu().detach().numpy()
    errors = np.linalg.norm(preds - targets, axis=1)
    return np.mean(errors < threshold) * 100

for epoch in range(NUM_EPOCHS):
    model.train()
    running_loss, correct_train, total_train = 0.0, 0, 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Train]")

    for images, targets in progress_bar:
        images, targets = images.to(DEVICE), targets.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        correct_train += compute_accuracy(outputs, targets)
        total_train += 1
        progress_bar.set_postfix({'loss': loss.item()})

    train_loss = running_loss / len(train_loader.dataset)
    train_acc = correct_train / total_train
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)

    model.eval()
    val_loss, correct_val, total_val = 0.0, 0, 0
    with torch.no_grad():
        for images, targets in val_loader:
            images, targets = images.to(DEVICE), targets.to(DEVICE)
            outputs = model(images)
            val_loss += criterion(outputs, targets).item() * images.size(0)
            correct_val += compute_accuracy(outputs, targets)
            total_val += 1

    val_loss /= len(val_loader.dataset)
    val_acc = correct_val / total_val
    val_losses.append(val_loss)
    val_accuracies.append(val_acc)
    scheduler.step(val_loss)

    print(f"\nEpoch {epoch+1} Summary:")
    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_unet_landmark_model.pth")
        print("Saved new best model!")

print("Training completed!")
# Print final training statistics
print("\nFinal Training Statistics:")
print(f"Total Training Accuracy: {train_accuracies[-1]:.2f}%")
print(f"Total Training Loss: {train_losses[-1]:.4f}")
print(f"Total Validation Accuracy: {val_accuracies[-1]:.2f}%")
print(f"Total Validation Loss: {val_losses[-1]:.4f}")


# Plot training curves
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.title("Loss Progress")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.title("Accuracy Progress")
plt.xlabel("Epochs")
plt.ylabel("Accuracy (%)")
plt.legend()

plt.show()

  return F.mse_loss(input, target, reduction=self.reduction)
Epoch 1/5 [Train]:   0%|          | 0/142 [00:01<?, ?it/s]


RuntimeError: The size of tensor a (4) must match the size of tensor b (8) at non-singleton dimension 1

In [None]:
torch.save(model.state_dict(), f'/content/drive/MyDrive/landmark_detection/unet_epoch_iteration_1.pth')

In [None]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

# Define constants
IMAGE_SIZE = (224, 224)  # Ensure this matches the training image size
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def predict_and_save_to_csv(image_folder, output_csv, model):
    model.to(DEVICE)
    model.eval()  # Set model to evaluation mode

    # Initialize a list to store predictions
    predictions = []

    # Iterate over all images in the folder
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)

        # Load image in grayscale
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            print(f"Warning: Failed to read image {img_name}")
            continue

        # Convert grayscale to 3-channel RGB
        image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Resize image to match the input size expected by the model
        image_resized = cv2.resize(image_rgb, IMAGE_SIZE)
        image_resized = image_resized.astype(np.float32) / 255.0  # Normalize

        # Convert to torch tensor and add batch dimension
        image_tensor = torch.from_numpy(image_resized).permute(2, 0, 1).unsqueeze(0).to(DEVICE)  # Shape: (1, 3, 224, 224)

        # Get the predictions from the model
        with torch.no_grad():
            landmarks = model(image_tensor)  # Output shape: (1, 8)

        # Convert predictions to numpy and flatten
        landmarks_np = landmarks.cpu().numpy().flatten()

        # Append predictions with image name
        predictions.append([img_name] + landmarks_np.tolist())

    # Create a dataframe and save to CSV
    df_predictions = pd.DataFrame(predictions, columns=[
        "image_name", "ofd_1_x", "ofd_1_y", "ofd_2_x", "ofd_2_y",
        "bpd_1_x", "bpd_1_y", "bpd_2_x", "bpd_2_y"
    ])
    df_predictions.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Test folder path and output CSV path
image_folder = "/content/drive/MyDrive/landmark_detection/test"  # Update with your test folder
output_csv = "predictions_unet.csv"  # Update output file name

# Ensure the model is loaded before calling the function
predict_and_save_to_csv(image_folder, output_csv, model)


Predictions saved to predictions_unet.csv


In [None]:
import pandas as pd

def compare_predictions(pred_csv, ground_truth_csv, image_size=299):
    # Load predicted CSV
    df_pred = pd.read_csv(pred_csv)

    # Load ground truth CSV
    df_gt = pd.read_csv(ground_truth_csv)

    # Merge both dataframes on the image_name column
    df_merged = pd.merge(df_pred, df_gt, on="image_name", suffixes=('_pred', '_gt'))

    # Compute the errors for each landmark (for each coordinate pair: x and y)
    errors = []
    relative_errors = []

    for i in range(1, 5):  # Assuming 4 landmarks: ofd_1, ofd_2, bpd_1, bpd_2
        x_pred_col = f"ofd_{i}_x_pred" if i <= 2 else f"bpd_{i-2}_x_pred"
        y_pred_col = f"ofd_{i}_y_pred" if i <= 2 else f"bpd_{i-2}_y_pred"

        x_gt_col = f"ofd_{i}_x_gt" if i <= 2 else f"bpd_{i-2}_x_gt"
        y_gt_col = f"ofd_{i}_y_gt" if i <= 2 else f"bpd_{i-2}_y_gt"

        # Check if the columns exist in the dataframe
        if all(col in df_merged.columns for col in [x_pred_col, y_pred_col, x_gt_col, y_gt_col]):
            # Compute absolute errors
            x_error = abs(df_merged[x_pred_col] - df_merged[x_gt_col])
            y_error = abs(df_merged[y_pred_col] - df_merged[y_gt_col])

            # Compute mean absolute error (AAE)
            mean_x_error = x_error.mean()
            mean_y_error = y_error.mean()
            mean_error = (mean_x_error + mean_y_error) / 2  # Average of x and y errors

            errors.append(mean_error)

            # Compute relative error in percentage
            relative_error = (mean_error / image_size) * 100
            relative_errors.append(relative_error)
        else:
            print(f"Warning: One or more columns for landmark {i} are missing.")

    # Print average absolute errors
    print("Average Absolute Errors (in pixels):")
    for i, error in enumerate(errors):
        print(f"Landmark {i+1}: {error:.4f} pixels")

    # Print relative errors
    print("\nRelative Errors (% of image size):")
    for i, rel_error in enumerate(relative_errors):
        print(f"Landmark {i+1}: {rel_error:.2f}%")

# Paths to the predictions CSV and ground truth CSV
pred_csv = "predictions_unet.csv"
ground_truth_csv = "/content/drive/MyDrive/landmark_detection/test.csv"

# Compare predictions with ground truth
compare_predictions(pred_csv, ground_truth_csv)


Average Absolute Errors (in pixels):
Landmark 1: nan pixels
Landmark 2: nan pixels
Landmark 3: nan pixels
Landmark 4: nan pixels

Relative Errors (% of image size):
Landmark 1: nan%
Landmark 2: nan%
Landmark 3: nan%
Landmark 4: nan%


In [None]:
import os
import cv2
import torch
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader

# Define constants
IMAGE_SIZE = (224, 224)  # Ensure this matches the training image size
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def predict_and_save_to_csv(image_folder, output_csv, model):
    model.to(DEVICE)
    model.eval()  # Set model to evaluation mode

    # Initialize a list to store predictions
    predictions = []

    # Iterate over all images in the folder
    for img_name in os.listdir(image_folder):
        img_path = os.path.join(image_folder, img_name)

        # Load image in grayscale
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            print(f"Warning: Failed to read image {img_name}")
            continue

        # Convert grayscale to 3-channel RGB
        image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Resize image to match the input size expected by the model
        image_resized = cv2.resize(image_rgb, IMAGE_SIZE)
        image_resized = image_resized.astype(np.float32) / 255.0  # Normalize

        # Convert to torch tensor and add batch dimension
        image_tensor = torch.from_numpy(image_resized).permute(2, 0, 1).unsqueeze(0).to(DEVICE)  # Shape: (1, 3, 224, 224)

        # Get the predictions from the model
        with torch.no_grad():
            landmarks = model(image_tensor)  # Output shape: (1, 8)

        # Convert predictions to numpy and flatten
        landmarks_np = landmarks.cpu().numpy().flatten()

        # Append predictions with image name
        predictions.append([img_name] + landmarks_np.tolist())

    # Create a dataframe and save to CSV
    df_predictions = pd.DataFrame(predictions, columns=[
        "image_name", "ofd_1_x", "ofd_1_y", "ofd_2_x", "ofd_2_y",
        "bpd_1_x", "bpd_1_y", "bpd_2_x", "bpd_2_y"
    ])
    df_predictions.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

# Test folder path and output CSV path
image_folder = "image_train"  # Update with your test folder
output_csv = "predictions_unet_train.csv"  # Update output file name

# Ensure the model is loaded before calling the function
predict_and_save_to_csv(image_folder, output_csv, model)


Predictions saved to predictions_unet_train.csv


In [None]:
import pandas as pd

def compare_predictions(pred_csv, ground_truth_csv, image_size=299):
    # Load predicted CSV
    df_pred = pd.read_csv(pred_csv)

    # Load ground truth CSV
    df_gt = pd.read_csv(ground_truth_csv)

    # Merge both dataframes on the image_name column
    df_merged = pd.merge(df_pred, df_gt, on="image_name", suffixes=('_pred', '_gt'))

    # Compute the errors for each landmark (for each coordinate pair: x and y)
    errors = []
    relative_errors = []

    for i in range(1, 5):  # Assuming 4 landmarks: ofd_1, ofd_2, bpd_1, bpd_2
        x_pred_col = f"ofd_{i}_x_pred" if i <= 2 else f"bpd_{i-2}_x_pred"
        y_pred_col = f"ofd_{i}_y_pred" if i <= 2 else f"bpd_{i-2}_y_pred"

        x_gt_col = f"ofd_{i}_x_gt" if i <= 2 else f"bpd_{i-2}_x_gt"
        y_gt_col = f"ofd_{i}_y_gt" if i <= 2 else f"bpd_{i-2}_y_gt"

        # Check if the columns exist in the dataframe
        if all(col in df_merged.columns for col in [x_pred_col, y_pred_col, x_gt_col, y_gt_col]):
            # Compute absolute errors
            x_error = abs(df_merged[x_pred_col] - df_merged[x_gt_col])
            y_error = abs(df_merged[y_pred_col] - df_merged[y_gt_col])

            # Compute mean absolute error (AAE)
            mean_x_error = x_error.mean()
            mean_y_error = y_error.mean()
            mean_error = (mean_x_error + mean_y_error) / 2  # Average of x and y errors

            errors.append(mean_error)

            # Compute relative error in percentage
            relative_error = (mean_error / image_size) * 100
            relative_errors.append(relative_error)
        else:
            print(f"Warning: One or more columns for landmark {i} are missing.")

    # Print average absolute errors
    print("Average Absolute Errors (in pixels):")
    for i, error in enumerate(errors):
        print(f"Landmark {i+1}: {error:.4f} pixels")

    # Print relative errors
    print("\nRelative Errors (% of image size):")
    for i, rel_error in enumerate(relative_errors):
        print(f"Landmark {i+1}: {rel_error:.2f}%")

# Paths to the predictions CSV and ground truth CSV
pred_csv = "predictions_unet_train.csv"
ground_truth_csv = "train.csv"

# Compare predictions with ground truth
compare_predictions(pred_csv, ground_truth_csv)


Average Absolute Errors (in pixels):
Landmark 1: 265.7234 pixels
Landmark 2: 408.0808 pixels
Landmark 3: 239.0683 pixels
Landmark 4: 428.7966 pixels

Relative Errors (% of image size):
Landmark 1: 88.87%
Landmark 2: 136.48%
Landmark 3: 79.96%
Landmark 4: 143.41%
