In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

import os
# Check if the directory exists
dataset_dir = "/content/drive/MyDrive/Colab Notebooks/dataset"
if not os.path.exists(dataset_dir):
  os.makedirs(dataset_dir)
  print(f"Directory '{dataset_dir}' created successfully.")

!unzip /content/drive/MyDrive/Colab\ Notebooks/521153S-3005-final-project.zip -d /content/drive/MyDrive/Colab\ Notebooks/dataset

Directory '/content/drive/MyDrive/Colab Notebooks/dataset' created successfully.
Archive:  /content/drive/MyDrive/Colab Notebooks/521153S-3005-final-project.zip
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/sample_submission.csv  
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test.csv  
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/347/347_l1.jpg  
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/347/347_l2.jpg  
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/347/347_r1.jpg  
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/347/347_r2.jpg  
  inflating: /content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/353/353_l1.jpg  
  inflating: /content/

In [None]:
import copy
import os
import random
import sys

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from PIL import Image
from sklearn.metrics import cohen_kappa_score, precision_score, recall_score, accuracy_score
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from torchvision.transforms.functional import to_pil_image
from tqdm import tqdm

# Hyper Parameters
batch_size = 64
num_classes = 5  # 5 DR levels
learning_rate = 5e-5
num_epochs = 20  # Reduced epochs for faster execution

class RetinopathyDataset(Dataset):
    def __init__(self, ann_file, image_dir, transform=None, mode='single', test=False):
        self.ann_file = ann_file
        self.image_dir = image_dir
        self.transform = transform
        self.test = test
        self.mode = mode

        if self.mode == 'single':
            self.data = self.load_data()
        else:
            self.data = self.load_data_dual()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        if self.mode == 'single':
            return self.get_item(index)
        else:
            return self.get_item_dual(index)

    def load_data(self):
        df = pd.read_csv(self.ann_file)
        data = []
        for _, row in df.iterrows():
            file_info = dict()
            file_info['img_path'] = os.path.join(self.image_dir, row['img_path'])
            if not self.test:
                file_info['dr_level'] = int(row['patient_DR_Level'])
            data.append(file_info)
        return data

    def get_item(self, index):
        data = self.data[index]
        img = Image.open(data['img_path']).convert('RGB')
        if self.transform:
            img = self.transform(img)

        if not self.test:
            label = torch.tensor(data['dr_level'], dtype=torch.int64)
            return img, label
        else:
            return img

    def load_data_dual(self):
        df = pd.read_csv(self.ann_file)
        df['prefix'] = df['image_id'].str.split('_').str[0]  # The patient id of each image
        df['suffix'] = df['image_id'].str.split('_').str[1].str[0]  # The left or right eye
        grouped = df.groupby(['prefix', 'suffix'])
        data = []
        for (prefix, suffix), group in grouped:
            file_info = dict()
            file_info['img_path1'] = os.path.join(self.image_dir, group.iloc[0]['img_path'])
            file_info['img_path2'] = os.path.join(self.image_dir, group.iloc[1]['img_path'])
            if not self.test:
                file_info['dr_level'] = int(group.iloc[0]['patient_DR_Level'])
            data.append(file_info)
        return data

    def get_item_dual(self, index):
        data = self.data[index]
        img1 = Image.open(data['img_path1']).convert('RGB')
        img2 = Image.open(data['img_path2']).convert('RGB')

        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        if not self.test:
            label = torch.tensor(data['dr_level'], dtype=torch.int64)
            return [img1, img2], label
        else:
            return [img1, img2]

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop((210, 210)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.ColorJitter(brightness=(0.1, 0.9)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=10,
                checkpoint_path='/content/drive/MyDrive/Colab Notebooks/dataset/model1.pth'):
    scaler = torch.cuda.amp.GradScaler()  # Mixed precision training
    best_model = model.state_dict()
    best_epoch = None
    best_val_kappa = -1.0

    for epoch in range(1, num_epochs + 1):
        print(f'\nEpoch {epoch}/{num_epochs}')
        running_loss = []
        all_preds = []
        all_labels = []

        model.train()
        with tqdm(total=len(train_loader), desc=f'Training', unit='batch', file=sys.stdout) as pbar:
            for images, labels in train_loader:
                images = images.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                with torch.cuda.amp.autocast():
                    outputs = model(images)
                    loss = criterion(outputs, labels.long())

                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                preds = torch.argmax(outputs, 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

                running_loss.append(loss.item())
                pbar.set_postfix({'Loss': f'{loss.item():.4f}'})
                pbar.update(1)

        lr_scheduler.step()
        epoch_loss = sum(running_loss) / len(running_loss)
        train_kappa = cohen_kappa_score(all_labels, all_preds, weights='quadratic')
        print(f'[Train] Kappa: {train_kappa:.4f}, Loss: {epoch_loss:.4f}')

        val_metrics = evaluate_model(model, val_loader, device)
        val_kappa = val_metrics['kappa']
        print(f'[Val] Kappa: {val_kappa:.4f}')

        if val_kappa > best_val_kappa:
            best_val_kappa = val_kappa
            best_epoch = epoch
            best_model = model.state_dict()
            torch.save(best_model, checkpoint_path)

    print(f'[Val] Best kappa: {best_val_kappa:.4f}, Epoch {best_epoch}')
    return model

def evaluate_model(model, test_loader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    kappa = cohen_kappa_score(all_labels, all_preds, weights='quadratic')
    return {'kappa': kappa}

def create_submission(model, test_loader, device, output_file='/content/drive/MyDrive/Colab Notebooks/dataset/submission1.csv'):
    model.eval()
    results = []
    image_ids = []

    with torch.no_grad():
        for i, images in enumerate(test_loader):
            images = images.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, 1).cpu().numpy()
            results.extend(preds)
            # Get image IDs for the current batch
            batch_image_ids = [test_loader.dataset.data[i * test_loader.batch_size + j]['img_path'] for j in range(len(images))]
            image_ids.extend(batch_image_ids)

    submission = pd.DataFrame({"ID": image_ids, "TARGET": results})
    submission.to_csv(output_file, index=False)
    print(f'Submission saved to {output_file}')


class MyModel(nn.Module):
    def __init__(self, num_classes=5):
        super().__init__()
        self.backbone = models.resnet18(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

if __name__ == '__main__':
    train_dataset = RetinopathyDataset('/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/train.csv', '/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/train/', transform_train)
    val_dataset = RetinopathyDataset('/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/val.csv', '/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/val/', transform_test)
    test_dataset = RetinopathyDataset('/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test.csv', '/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/', transform_test, test=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = MyModel(num_classes=num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    model = train_model(model, train_loader, val_loader, device, criterion, optimizer, lr_scheduler, num_epochs=num_epochs)
    create_submission(model, test_loader, device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 159MB/s]
  scaler = torch.cuda.amp.GradScaler()  # Mixed precision training



Epoch 1/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [18:59<00:00, 59.95s/batch, Loss=0.8998]
[Train] Kappa: 0.5104, Loss: 1.3236
[Val] Kappa: 0.6919

Epoch 2/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:30<00:00, 17.38s/batch, Loss=0.9296]
[Train] Kappa: 0.7251, Loss: 0.9764
[Val] Kappa: 0.7204

Epoch 3/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:31<00:00, 17.44s/batch, Loss=0.7430]
[Train] Kappa: 0.7893, Loss: 0.8577
[Val] Kappa: 0.7432

Epoch 4/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.32s/batch, Loss=0.7347]
[Train] Kappa: 0.8111, Loss: 0.7716
[Val] Kappa: 0.7052

Epoch 5/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.32s/batch, Loss=0.7015]
[Train] Kappa: 0.8309, Loss: 0.6961
[Val] Kappa: 0.7199

Epoch 6/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.33s/batch, Loss=0.6149]
[Train] Kappa: 0.8360, Loss: 0.6610
[Val] Kappa: 0.7224

Epoch 7/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.34s/batch, Loss=0.9483]
[Train] Kappa: 0.8458, Loss: 0.6470
[Val] Kappa: 0.7262

Epoch 8/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.34s/batch, Loss=0.5078]
[Train] Kappa: 0.8502, Loss: 0.6133
[Val] Kappa: 0.7091

Epoch 9/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:27<00:00, 17.23s/batch, Loss=0.8145]
[Train] Kappa: 0.8514, Loss: 0.6230
[Val] Kappa: 0.7184

Epoch 10/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:27<00:00, 17.25s/batch, Loss=0.5726]
[Train] Kappa: 0.8555, Loss: 0.6037
[Val] Kappa: 0.7277

Epoch 11/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:27<00:00, 17.21s/batch, Loss=0.4912]
[Train] Kappa: 0.8701, Loss: 0.6111
[Val] Kappa: 0.7313

Epoch 12/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.32s/batch, Loss=0.6337]
[Train] Kappa: 0.8564, Loss: 0.6163
[Val] Kappa: 0.7295

Epoch 13/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:27<00:00, 17.22s/batch, Loss=0.6639]
[Train] Kappa: 0.8671, Loss: 0.6078
[Val] Kappa: 0.7291

Epoch 14/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.37s/batch, Loss=0.5672]
[Train] Kappa: 0.8545, Loss: 0.5914
[Val] Kappa: 0.7271

Epoch 15/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:29<00:00, 17.32s/batch, Loss=0.6841]
[Train] Kappa: 0.8777, Loss: 0.6019
[Val] Kappa: 0.7288

Epoch 16/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:26<00:00, 17.18s/batch, Loss=0.6957]
[Train] Kappa: 0.8641, Loss: 0.5883
[Val] Kappa: 0.7408

Epoch 17/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:30<00:00, 17.39s/batch, Loss=0.7845]
[Train] Kappa: 0.8590, Loss: 0.6132
[Val] Kappa: 0.7274

Epoch 18/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:26<00:00, 17.17s/batch, Loss=0.5808]
[Train] Kappa: 0.8740, Loss: 0.6063
[Val] Kappa: 0.7323

Epoch 19/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:35<00:00, 17.66s/batch, Loss=0.5106]
[Train] Kappa: 0.8580, Loss: 0.5961
[Val] Kappa: 0.7267

Epoch 20/20
Training:   0%|          | 0/19 [00:00<?, ?batch/s]

  with torch.cuda.amp.autocast():


Training: 100%|██████████| 19/19 [05:33<00:00, 17.54s/batch, Loss=0.5834]
[Train] Kappa: 0.8629, Loss: 0.6035
[Val] Kappa: 0.7313
[Val] Best kappa: 0.7432, Epoch 3
Submission saved to /content/drive/MyDrive/Colab Notebooks/dataset/submission1.csv


In [None]:
!ls "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD"


sample_submission.csv  test  test.csv  train  train.csv  val  val.csv
