In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("sovitrath/diabetic-retinopathy-224x224-2019-data")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/sovitrath/diabetic-retinopathy-224x224-2019-data?dataset_version_number=4...


100%|██████████| 238M/238M [00:02<00:00, 83.4MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4


In [None]:
import os

def walk_through(dir_path):
  for dirpath, dirnames, filenames in os.walk(dir_path):
    print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

In [None]:
walk_through(path)

There are 1 directories and 1 images in '/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4'.
There are 5 directories and 0 images in '/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4/colored_images'.
There are 0 directories and 295 images in '/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4/colored_images/Proliferate_DR'.
There are 0 directories and 1805 images in '/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4/colored_images/No_DR'.
There are 0 directories and 370 images in '/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4/colored_images/Mild'.
There are 0 directories and 999 images in '/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4/colored_images/Moderate'.
There are 0 directories and 193 images in '/root/.cache/kagglehub/datas

In [None]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader, random_split
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
DATA_DIR = "/root/.cache/kagglehub/datasets/sovitrath/diabetic-retinopathy-224x224-2019-data/versions/4/colored_images"
full_dataset = datasets.ImageFolder(root=DATA_DIR, transform=transform)
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

device

'cuda'

In [None]:
import torchvision

weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 151MB/s]


In [None]:
for param in model.features.parameters():
    param.requires_grad = False

In [None]:
full_dataset.classes

['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']

In [None]:

torch.manual_seed(42)
torch.cuda.manual_seed(42)


output_shape = len(full_dataset.classes)

model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280,
                    out_features=output_shape,
                    bias=True)).to(device)


In [None]:
import torch.nn as nn
import torch.optim as optim

criterion=nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

In [None]:
import torch

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               device: torch.device) -> Tuple[float, float]:



    model.train()


    train_loss, train_acc = 0, 0

    for batch, (X, y) in enumerate(dataloader):

        X, y = X.to(device), y.to(device)

        y_pred = model(X)


        loss = loss_fn(y_pred, y)
        train_loss += loss.item()


        optimizer.zero_grad()


        loss.backward()


        optimizer.step()


        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)


    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

In [None]:
def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              device: torch.device) -> Tuple[float, float]:


    model.eval()


    test_loss, test_acc = 0, 0


    with torch.inference_mode():

        for batch, (X, y) in enumerate(dataloader):

            X, y = X.to(device), y.to(device)



            test_pred_logits = model(X)


            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()

            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))


    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

In [None]:
def train_and_validate(model,
                       train_loader,
                       val_loader,
                       criterion,
                       optimizer,
                       device,
                       num_epochs=50,
                       patience=5,
                       checkpoint_dir='./checkpoints'):
    """
    Train and validate the model with early stopping and checkpointing

    Args:
    - model: PyTorch model
    - train_loader: DataLoader for training data
    - val_loader: DataLoader for validation data
    - criterion: Loss function
    - optimizer: Optimizer
    - device: Computing device (cuda/cpu)
    - num_epochs: Maximum number of training epochs
    - patience: Number of epochs with no improvement after which training will be stopped
    - checkpoint_dir: Directory to save model checkpoints

    Returns:
    - Dictionary containing training history
    """
    # Create checkpoint directory if it doesn't exist
    os.makedirs(checkpoint_dir, exist_ok=True)

    # Training history tracking
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }

    # Early stopping variables
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # Training loop
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss, train_acc = 0, 0

        train_progress_bar = tqdm(train_loader,
                                  desc=f'Epoch {epoch+1}/{num_epochs}',
                                  unit='batch')

        for batch, (X, y) in enumerate(train_progress_bar):
            X, y = X.to(device), y.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(X)
            loss = criterion(outputs, y)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Compute metrics
            train_loss += loss.item()
            train_pred = torch.argmax(torch.softmax(outputs, dim=1), dim=1)
            train_acc += (train_pred == y).float().mean().item()

            # Update progress bar
            train_progress_bar.set_postfix({
                'Train Loss': loss.item(),
                'Train Acc': train_acc / (batch + 1)
            })

        # Average epoch metrics
        train_loss /= len(train_loader)
        train_acc /= len(train_loader)

        # Validation phase
        model.eval()
        val_loss, val_acc = 0, 0

        with torch.inference_mode():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)

                outputs = model(X)
                loss = criterion(outputs, y)

                val_loss += loss.item()
                val_pred = torch.argmax(torch.softmax(outputs, dim=1), dim=1)
                val_acc += (val_pred == y).float().mean().item()

        # Average validation metrics
        val_loss /= len(val_loader)
        val_acc /= len(val_loader)

        # Store history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        # Print epoch summary
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        # Early stopping and model checkpointing
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0

            # Save best model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': train_loss,
                'val_loss': val_loss
            }, os.path.join(checkpoint_dir, 'best_model.pth'))
        else:
            epochs_no_improve += 1

        # Early stopping
        if epochs_no_improve >= patience:
            print(f'Early stopping triggered after {epoch+1} epochs')
            break

    return history

In [None]:
history = train_and_validate(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        num_epochs=10,
        patience=3
    )


Epoch 1/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 1/10:
Train Loss: 0.9713, Train Acc: 0.6563
Val Loss: 0.7934, Val Acc: 0.7383


Epoch 2/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 2/10:
Train Loss: 0.7545, Train Acc: 0.7277
Val Loss: 0.7423, Val Acc: 0.7326


Epoch 3/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 3/10:
Train Loss: 0.7091, Train Acc: 0.7502
Val Loss: 0.7128, Val Acc: 0.7517


Epoch 4/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 4/10:
Train Loss: 0.6772, Train Acc: 0.7570
Val Loss: 0.6999, Val Acc: 0.7490


Epoch 5/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 5/10:
Train Loss: 0.6627, Train Acc: 0.7599
Val Loss: 0.7174, Val Acc: 0.7477


Epoch 6/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 6/10:
Train Loss: 0.6482, Train Acc: 0.7594
Val Loss: 0.6840, Val Acc: 0.7561


Epoch 7/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 7/10:
Train Loss: 0.6306, Train Acc: 0.7650
Val Loss: 0.6648, Val Acc: 0.7479


Epoch 8/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 8/10:
Train Loss: 0.6387, Train Acc: 0.7739
Val Loss: 0.6773, Val Acc: 0.7641


Epoch 9/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 9/10:
Train Loss: 0.6324, Train Acc: 0.7611
Val Loss: 0.6624, Val Acc: 0.7558


Epoch 10/10:   0%|          | 0/92 [00:00<?, ?batch/s]

Epoch 10/10:
Train Loss: 0.6094, Train Acc: 0.7789
Val Loss: 0.6745, Val Acc: 0.7434


In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import os

def freeze_all_layers(model):
    for param in model.features.parameters():
        param.requires_grad = False

def unfreeze_layers(model, num_layers):
    layers = list(model.features.children())
    for layer in layers[-num_layers:]:
        for param in layer.parameters():
            param.requires_grad = True

def train_and_validate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10, patience=3, unfreeze_step=2):
    os.makedirs("checkpoints", exist_ok=True)
    best_val_loss = float('inf')
    epochs_no_improve = 0
    unfreeze_count = 0
    freeze_all_layers(model)  # Start with frozen features

    for epoch in range(num_epochs):
        model.train()
        train_loss, train_acc = 0, 0

        for X, y in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_acc += (outputs.argmax(dim=1) == y).float().mean().item()

        train_loss /= len(train_loader)
        train_acc /= len(train_loader)

        model.eval()
        val_loss, val_acc = 0, 0
        with torch.no_grad():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                outputs = model(X)
                loss = criterion(outputs, y)
                val_loss += loss.item()
                val_acc += (outputs.argmax(dim=1) == y).float().mean().item()

        val_loss /= len(val_loader)
        val_acc /= len(val_loader)

        print(f'Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Val Acc={val_acc:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), 'checkpoints/best_model.pth')
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        if epoch % 5 == 0 and unfreeze_count < len(list(model.features.children())):
            unfreeze_count += unfreeze_step
            print(f"Unfreezing {unfreeze_step} more layers")
            unfreeze_layers(model, unfreeze_count)

    return model

# Load model
device = "cuda" if torch.cuda.is_available() else "cpu"
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
model = torchvision.models.efficientnet_b0(weights=weights).to(device)
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=5, bias=True)  # Adjust output classes
).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.classifier.parameters(), lr=0.001, weight_decay=0.01)

# Train model
model = train_and_validate(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10, patience=3, unfreeze_step=2)

# Save final model
torch.save(model.state_dict(), 'model.pth')


Epoch 1/10: 100%|██████████| 92/92 [00:18<00:00,  4.92it/s]


Epoch 1: Train Loss=0.9686, Val Loss=0.8001, Val Acc=0.7246
Unfreezing 2 more layers


Epoch 2/10: 100%|██████████| 92/92 [00:19<00:00,  4.73it/s]


Epoch 2: Train Loss=0.7770, Val Loss=0.7358, Val Acc=0.7301


Epoch 3/10: 100%|██████████| 92/92 [00:19<00:00,  4.78it/s]


Epoch 3: Train Loss=0.7055, Val Loss=0.6949, Val Acc=0.7573


Epoch 4/10: 100%|██████████| 92/92 [00:19<00:00,  4.77it/s]


Epoch 4: Train Loss=0.6842, Val Loss=0.7074, Val Acc=0.7314


Epoch 5/10: 100%|██████████| 92/92 [00:19<00:00,  4.73it/s]


Epoch 5: Train Loss=0.6695, Val Loss=0.6832, Val Acc=0.7576


Epoch 6/10: 100%|██████████| 92/92 [00:19<00:00,  4.74it/s]


Epoch 6: Train Loss=0.6674, Val Loss=0.6843, Val Acc=0.7572
Unfreezing 2 more layers


Epoch 7/10: 100%|██████████| 92/92 [00:21<00:00,  4.31it/s]


Epoch 7: Train Loss=0.6390, Val Loss=0.6777, Val Acc=0.7573


Epoch 8/10: 100%|██████████| 92/92 [00:21<00:00,  4.34it/s]


Epoch 8: Train Loss=0.6336, Val Loss=0.6539, Val Acc=0.7640


Epoch 9/10: 100%|██████████| 92/92 [00:21<00:00,  4.34it/s]


Epoch 9: Train Loss=0.6326, Val Loss=0.6760, Val Acc=0.7670


Epoch 10/10: 100%|██████████| 92/92 [00:20<00:00,  4.40it/s]


Epoch 10: Train Loss=0.6173, Val Loss=0.6782, Val Acc=0.7466


In [None]:
torch.save(model.state_dict(), 'model.pth')
torch.save(model, 'model_full.pth')