This example requires the following dependencies to be installed:
pip install lightly

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
!unzip -q /content/drive/MyDrive/aptos2019-blindness-detection.zip -d ./dataset



In [3]:
!pip install lightly

Collecting lightly
  Downloading lightly-1.5.19-py3-none-any.whl.metadata (36 kB)
Collecting hydra-core>=1.0.0 (from lightly)
  Downloading hydra_core-1.3.2-py3-none-any.whl.metadata (5.5 kB)
Collecting lightly_utils~=0.0.0 (from lightly)
  Downloading lightly_utils-0.0.2-py3-none-any.whl.metadata (1.4 kB)
Collecting pytorch_lightning>=1.0.4 (from lightly)
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Collecting aenum>=3.1.11 (from lightly)
  Downloading aenum-3.1.15-py3-none-any.whl.metadata (3.7 kB)
Collecting omegaconf<2.4,>=2.2 (from hydra-core>=1.0.0->lightly)
  Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)
Collecting antlr4-python3-runtime==4.9.* (from hydra-core>=1.0.0->lightly)
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch

In [4]:
!pip install torchvision timm




In [1]:
import torch
import torchvision
import timm
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from PIL import Image
import os
from lightly.loss import NegativeCosineSimilarity
from lightly.models.modules import SimSiamProjectionHead, SimSiamPredictionHead
from lightly.transforms import SimSiamTransform




In [2]:
class APTOSDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.images = [f for f in os.listdir(root) if f.endswith('.png') or f.endswith('.jpg')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root, self.images[idx])
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image

# Define SimSiam transformations
transform = SimSiamTransform(input_size=224)  # EfficientNetV2 input size

# Load dataset
dataset = APTOSDataset(root="./dataset/train_images", transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)




In [4]:
class SimSiam(nn.Module):
    def __init__(self, backbone, feature_dim):
        super().__init__()
        self.backbone = backbone
        self.projection_head = SimSiamProjectionHead(feature_dim, 512, 128)
        self.prediction_head = SimSiamPredictionHead(128, 64, 128)

    def forward(self, x):
        f = self.backbone(x).flatten(start_dim=1)
        z = self.projection_head(f)
        p = self.prediction_head(z)
        z = z.detach()  # Stop gradient
        return z, p

# Load EfficientNet-B0 backbone
efficientnet = timm.create_model('efficientnet_b0', pretrained=True)
backbone = nn.Sequential(*list(efficientnet.children())[:-1])  # Remove classification head
feature_dim = efficientnet.num_features  # Get feature dimension

# Instantiate SimSiam model
model = SimSiam(backbone, feature_dim).cuda()  # Move to GPU


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

In [5]:
criterion = NegativeCosineSimilarity()
optimizer = torch.optim.SGD(model.parameters(), lr=0.06, momentum=0.9, weight_decay=5e-4)


In [6]:
print("Starting SimSiam Pretraining...")
epochs = 10
for epoch in range(epochs):
    total_loss = 0
    for batch in dataloader:
        x1, x2 = batch  # Get two augmented views
        x1, x2 = x1.cuda(), x2.cuda()

        z1, p1 = model(x1)
        z2, p2 = model(x2)

        # SimSiam loss (negative cosine similarity)
        loss = 0.5 * (criterion(z1, p2) + criterion(z2, p1))
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

# Save the pre-trained model
torch.save(model.state_dict(), "simsiam_effnetv2_aptos.pth")
print("Pretraining Completed!")


Starting SimSiam Pretraining...
Epoch [1/10], Loss: -0.3533
Epoch [2/10], Loss: -0.7356
Epoch [3/10], Loss: -0.6606
Epoch [4/10], Loss: -0.7916
Epoch [5/10], Loss: -0.6302
Epoch [6/10], Loss: -0.7494
Epoch [7/10], Loss: -0.7874
Epoch [8/10], Loss: -0.7926
Epoch [9/10], Loss: -0.8988
Epoch [10/10], Loss: -0.9214
Pretraining Completed!


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models, datasets
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from PIL import Image


device = 'cuda'
# Load CSV file (APTOS 2019 dataset)
class DRDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = f"{self.root_dir}/{self.data.iloc[idx, 0]}.png"
        image = Image.open(img_name).convert("RGB")
        label = self.data.iloc[idx, 1]  # DR grade (0-4)

        if self.transform:
            image = self.transform(image)

        return image, label

# Define transformations (same as pretraining)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
dataset = DRDataset(csv_file='/content/dataset/train.csv', root_dir='/content/dataset/train_images', transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

# Load pretrained EfficientNetV2-B0

feature_dim = efficientnet.num_features  # Get feature dimension


# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(efficientnet.parameters(), lr=0.001)

# Training loop
num_epochs = 10
print("Starting Supervised Fine-Tuning...")
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")

print("Fine-Tuning Completed!")


Starting Supervised Fine-Tuning...


