In [1]:
import torch
import copy
import os
import random
import sys
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from PIL import Image
from sklearn.metrics import cohen_kappa_score, precision_score, recall_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm

#### Transforms

In [2]:
class SLORandomPad:
    def __init__(self, size):
        self.size = size

    def __call__(self, img):
        pad_width = max(0, self.size[0] - img.width)
        pad_height = max(0, self.size[1] - img.height)
        pad_left = random.randint(0, pad_width)
        pad_top = random.randint(0, pad_height)
        pad_right = pad_width - pad_left
        pad_bottom = pad_height - pad_top
        return transforms.functional.pad(img, (pad_left, pad_top, pad_right, pad_bottom))


class FundRandomRotate:
    def __init__(self, prob, degree):
        self.prob = prob
        self.degree = degree

    def __call__(self, img):
        if random.random() < self.prob:
            angle = random.uniform(-self.degree, self.degree)
            return transforms.functional.rotate(img, angle)
        return img

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop((210, 210)),
    SLORandomPad((224, 224)),
    FundRandomRotate(prob=0.5, degree=30),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#### Dataset (Default)

In [3]:
class RetinopathyDataset(Dataset):
    def __init__(self, ann_file, image_dir, transform=None, mode='single', test=False):
        self.ann_file = ann_file
        self.image_dir = image_dir
        self.transform = transform

        self.test = test
        self.mode = mode

        if self.mode == 'single':
            self.data = self.load_data()
        else:
            self.data = self.load_data_dual()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.mode == 'single':
            return self.get_item(index)
        else:
            return self.get_item_dual(index)

    # 1. single image
    def load_data(self):
        df = pd.read_csv(self.ann_file)

        data = []
        for _, row in df.iterrows():
            file_info = dict()
            file_info['id_code'] = os.path.join(self.image_dir, row['id_code'])
            if not self.test:
                file_info['diagnosis'] = int(row['diagnosis'])
            data.append(file_info)
        return data

    def get_item(self, index):
        data = self.data[index]
        img = Image.open(data['id_code']).convert('RGB')
        if self.transform:
            img = self.transform(img)

        if not self.test:
            label = torch.tensor(data['diagnosis'], dtype=torch.int64)
            return img, label
        else:
            return img

#### Dataset Custom

In [None]:
class DRDatasetPatient(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

        # Group by patient_id
        self.patient_groups = self.dataframe.groupby('patient_id')

    def __len__(self):
        return len(self.patient_groups)

    def __getitem__(self, idx):
        # Get patient group
        patient_id = list(self.patient_groups.groups.keys())[idx]
        group = self.patient_groups.get_group(patient_id)

        # Load all images for this patient
        images = []
        labels = group['level'].iloc[0]  # Assume all images for a patient have the same label

        for img_name in group['image']:
            img_path = os.path.join(self.img_dir, img_name)
            img = Image.open(img_path).convert('RGB')

            if self.transform:
                img = self.transform(img)

            images.append(img)

        # Stack all images into a single tensor
        images = torch.stack(images)  # Shape: (num_images, C, H, W)
        return images, labels


In [None]:
# Group images by patient ID
df['patient_id'] = df['image'].str.split('_').str[0]  # Extract patient ID from image name

# Oversample patients in each class
balanced_df = df.groupby('level', group_keys=False).apply(
    lambda x: x.sample(n=df['level'].value_counts().max(), replace=True, random_state=42)
)


In [None]:
# Use the balanced DataFrame
train_dataset = DRDatasetPatient(balanced_df, img_dir, transform=transform_train)
val_dataset = DRDatasetPatient(val_df, img_dir, transform=transform_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Check the shapes
for images, labels in train_loader:
    print("Batch Image Shape:", images.shape)  # (batch_size, num_images, C, H, W)
    print("Batch Label Shape:", labels.shape)
    break


#### Model

In [12]:

class MyModel(nn.Module):
    def __init__(self, num_classes=5, dropout_rate=0.5):
        super().__init__()

        self.backbone = models.resnet18(pretrained=True, )
        self.backbone.fc = nn.Identity()  # Remove the original classification layer

        self.fc = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(256, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(p=dropout_rate),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.backbone(x)
        x = self.fc(x)
        return x

In [13]:
def evaluate_model(model, test_loader, device, test_only=False, prediction_path='./test_predictions.csv'):
    model.eval()

    all_preds = []
    all_labels = []
    all_image_ids = []

    with tqdm(total=len(test_loader), desc=f'Evaluating', unit=' batch', file=sys.stdout) as pbar:
        for i, data in enumerate(test_loader):

            if test_only:
                images = data
            else:
                images, labels = data

            if not isinstance(images, list):
                images = images.to(device)  # single image case
            else:
                images = [x.to(device) for x in images]  # dual images case

            with torch.no_grad():
                outputs = model(images)
                preds = torch.argmax(outputs, 1)

            if not isinstance(images, list):
                # single image case
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.numpy())
                """
                image_ids = [
                    os.path.basename(test_loader.dataset.data[idx]['img_path']) for idx in
                    range(i * test_loader.batch_size, i * test_loader.batch_size + len(images))
                ]
                all_image_ids.extend(image_ids)
                if not test_only:
                    all_labels.extend(labels.numpy())
            else:
                # dual images case
                for k in range(2):
                    all_preds.extend(preds.cpu().numpy())
                    image_ids = [
                        os.path.basename(test_loader.dataset.data[idx][f'img_path{k + 1}']) for idx in
                        range(i * test_loader.batch_size, i * test_loader.batch_size + len(images[k]))
                    ]
                    all_image_ids.extend(image_ids)
                    if not test_only:
                        all_labels.extend(labels.numpy())
                        """

            pbar.update(1)

    # Save predictions to csv file for Kaggle online evaluation
    if test_only:
        df = pd.DataFrame({
            'ID': all_image_ids,
            'TARGET': all_preds
        })
        df.to_csv(prediction_path, index=False)
        print(f'[Test] Save predictions to {os.path.abspath(prediction_path)}')
    else:
        metrics = compute_metrics(all_preds, all_labels)
        return metrics


def compute_metrics(preds, labels, per_class=False):
    kappa = cohen_kappa_score(labels, preds, weights='quadratic')
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds, average='weighted', zero_division=0)
    recall = recall_score(labels, preds, average='weighted', zero_division=0)

    # Calculate and print precision and recall for each class
    if per_class:
        precision_per_class = precision_score(labels, preds, average=None, zero_division=0)
        recall_per_class = recall_score(labels, preds, average=None, zero_division=0)
        return kappa, accuracy, precision, recall, precision_per_class, recall_per_class

    return kappa, accuracy, precision, recall

In [14]:
def train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=25,
                checkpoint_path='model.pth'):
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0  # Initialize the best kappa score

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []

        model.train()

        with tqdm(total=len(train_loader), desc=f'Training', unit=' batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                if not isinstance(images, list):
                    images = images.to(device)  # single image case
                else:
                    images = [x.to(device) for x in images]  # dual images case

                labels = labels.to(device)

                optimizer.zero_grad()

                outputs = model(images)
                loss = criterion(outputs, labels.long())

                loss.backward()
                optimizer.step()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                running_loss.append(loss.item())

                pbar.set_postfix({'lr': f'{optimizer.param_groups[0]["lr"]:.1e}', 'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()

        epoch_loss = sum(running_loss) / len(running_loss)

        train_metrics = compute_metrics(all_preds, all_labels, per_class=True)

        kappa, accuracy, precision, recall = train_metrics[:4]
        print(f'[Train] Kappa: {kappa:.4f} Accuracy: {accuracy:.4f} '
              f'Precision: {precision:.4f} Recall: {recall:.4f} Loss: {epoch_loss:.4f}')



        if len(train_metrics) > 4:
            precision_per_class, recall_per_class = train_metrics[4:]
            for i, (precision, recall) in enumerate(zip(precision_per_class, recall_per_class)):
                print(f'[Train] Class {i}: Precision: {precision:.4f}, Recall: {recall:.4f}')

        # Evaluation on the validation set at the end of each epoch
        val_metrics = evaluate_model(model, val_loader, device)
        val_kappa, val_accuracy, val_precision, val_recall = val_metrics[:4]
        print(f'[Val] Kappa: {val_kappa:.4f} Accuracy: {val_accuracy:.4f} '
              f'Precision: {val_precision:.4f} Recall: {val_recall:.4f}')

        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')

    return model

In [15]:
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

train_dir = "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/final_project/data/APTOS/train_images/train_images/"
train_ann_file = "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/final_project/data/APTOS/train_1.csv"
mode = 'single'

train_dataset = RetinopathyDataset(ann_file=train_ann_file, image_dir=train_dir, mode=mode, transform=transform_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dir = "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/final_project/data/APTOS/val_images/val_images/"
val_ann_file = "/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/final_project/data/APTOS/valid.csv"
mode = 'single'

val_dataset = RetinopathyDataset(ann_file=val_ann_file, image_dir=val_dir, mode=mode, transform=transform_test)
val_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)

In [16]:
batch_size = 24
num_classes = 5  # 5 DR levels
learning_rate = 0.0001
num_epochs = 20

In [17]:
model = MyModel()
model.to(device)

#for param in model.backbone.parameters():
#    param.requires_grad = True

for param in model.parameters():
    param.requires_grad = True

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

model = train_model(
        model, train_loader, val_loader, device, criterion, optimizer,
        lr_scheduler=lr_scheduler, num_epochs=num_epochs,
        checkpoint_path='/Users/shakibibnashameem/Documents/Practice/deep_learning_fall_2024/final_project/models/train_with_APTOS.pth'
    )





Epoch 1/20
Training: 100%|██████████| 92/92 [05:05<00:00,  3.32s/ batch, lr=1.0e-04, Loss=0.4952]
[Train] Kappa: 0.5058 Accuracy: 0.6212 Precision: 0.5505 Recall: 0.6212 Loss: 1.0548
[Train] Class 0: Precision: 0.7878, Recall: 0.8466
[Train] Class 1: Precision: 0.1462, Recall: 0.0633
[Train] Class 2: Precision: 0.5169, Recall: 0.7178
[Train] Class 3: Precision: 0.0481, Recall: 0.0325
[Train] Class 4: Precision: 0.0606, Recall: 0.0085
Evaluating: 100%|██████████| 92/92 [04:50<00:00,  3.15s/ batch]
[Val] Kappa: 0.6462 Accuracy: 0.7003 Precision: 0.5365 Recall: 0.7003

Epoch 2/20
Training: 100%|██████████| 92/92 [05:06<00:00,  3.33s/ batch, lr=1.0e-04, Loss=0.7283]
[Train] Kappa: 0.7608 Accuracy: 0.7321 Precision: 0.6429 Recall: 0.7321 Loss: 0.7406
[Train] Class 0: Precision: 0.9241, Recall: 0.9679
[Train] Class 1: Precision: 0.4167, Recall: 0.1000
[Train] Class 2: Precision: 0.5365, Recall: 0.8998
[Train] Class 3: Precision: 0.0000, Recall: 0.0000
[Train] Class 4: Precision: 0.0000, Rec