In [2]:
import copy
import os
import random
import sys

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision import models
from tqdm import tqdm
from PIL import Image
from sklearn.metrics import cohen_kappa_score
from sklearn.ensemble import GradientBoostingClassifier

# Modified Models
class BaseResNet(nn.Module):
    def __init__(self, model_name, num_classes=5):
        super(BaseResNet, self).__init__()
        if model_name == "resnet18":
            self.backbone = models.resnet18(pretrained=True)
        elif model_name == "resnet34":
            self.backbone = models.resnet34(pretrained=True)
        elif model_name == "efficientnet":
            self.backbone = models.efficientnet_b0(pretrained=True)
        else:
            raise ValueError("Unsupported model_name")

        if hasattr(self.backbone, 'fc'):
            self.backbone.fc = nn.Linear(self.backbone.fc.in_features, num_classes)
        else:
            self.backbone.classifier = nn.Linear(self.backbone.classifier[1].in_features, num_classes)

    def forward(self, x):
        return self.backbone(x)

# Dataset Class
class RetinopathyDataset(Dataset):
    def __init__(self, ann_file, image_dir, transform=None, test=False):
        self.ann_file = ann_file
        self.image_dir = image_dir
        self.transform = transform
        self.test = test
        self.data = self.load_data()

    def load_data(self):
        df = pd.read_csv(self.ann_file)
        data = []
        for _, row in df.iterrows():
            file_info = {'img_path': os.path.join(self.image_dir, row['img_path'])}
            if not self.test:
                file_info['label'] = int(row['patient_DR_Level'])
            else:
                file_info['id'] = row['img_path']
            data.append(file_info)
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data[idx]['img_path']
        img = Image.open(img_path).convert('RGB')
        if self.transform:
            img = self.transform(img)
        if self.test:
            return img, os.path.basename(img_path)
        label = self.data[idx]['label']
        return img, label

# Transforms
transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Data Loaders
def get_dataloaders(batch_size, train_ann, val_ann, test_ann, train_dir, val_dir, test_dir):
    train_dataset = RetinopathyDataset(train_ann, train_dir, transform_train)
    val_dataset = RetinopathyDataset(val_ann, val_dir, transform_test)
    test_dataset = RetinopathyDataset(test_ann, test_dir, transform_test, test=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    return train_loader, val_loader, test_loader

# Save Test Predictions
def save_test_predictions(predictions, ids, output_file="/content/drive/MyDrive/Colab Notebooks/dataset/boosting.csv"):
    df = pd.DataFrame({
        "ID": ids,
        "Target": predictions
    })
    df.to_csv(output_file, index=False)
    print(f"Test predictions saved to {output_file}")

# Visualization Function
def plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies):
    epochs = range(1, len(train_losses) + 1)

    # Loss Plot
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_losses, label='Training Loss')
    plt.plot(epochs, val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Loss Over Epochs')
    plt.legend()
    plt.show()

    # Accuracy Plot
    plt.figure(figsize=(10, 5))
    plt.plot(epochs, train_accuracies, label='Training Accuracy')
    plt.plot(epochs, val_accuracies, label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Accuracy Over Epochs')
    plt.legend()
    plt.show()

# Main Function
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Models
    resnet18 = BaseResNet("resnet18", num_classes=5).to(device)
    resnet34 = BaseResNet("resnet34", num_classes=5).to(device)
    efficientnet = BaseResNet("efficientnet", num_classes=5).to(device)

    # Hyperparameters
    batch_size = 32
    train_ann = "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/train.csv"
    val_ann = "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/val.csv"
    test_ann = "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test.csv"

    train_dir = "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/train/"
    val_dir = "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/val/"
    test_dir = "/content/drive/MyDrive/Colab Notebooks/dataset/521153S-3005-final-project/DeepDRiD/test/"
    train_loader, val_loader, test_loader = get_dataloaders(batch_size, train_ann, val_ann, test_ann, train_dir, val_dir, test_dir)

    # Training Parameters
    train_data = []
    train_labels = []
    for imgs, labels in train_loader:
        train_data.extend(imgs.numpy())
        train_labels.extend(labels.numpy())

    train_data = np.array(train_data)
    train_labels = np.array(train_labels)

    # Flatten data for Gradient Boosting Classifier
    train_data_flat = train_data.reshape(train_data.shape[0], -1)

    # Boosting
    gbc = GradientBoostingClassifier()
    print("Training Gradient Boosting Classifier...")
    gbc.fit(train_data_flat, train_labels)

    print("Boosting Completed.")




Training Gradient Boosting Classifier...
Boosting Completed.
