# SimCLR Implementation and Evaluation on CIFAR-10

This notebook implements the SimCLR algorithm, trains it on the CIFAR-10 dataset, and evaluates the learned representations using Linear Probing and K-Nearest Neighbors (KNN) classification.


In [2]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import torch.nn.functional as F
import torchvision.models as models

# Importing necessary libraries and modules for the implementation.

### Execution Timers

In [3]:

# Flag to enable or disable timers
enable_timers = True

import time

class Timer:
    def __enter__(self):
        if enable_timers:
            self.start = time.time()
        return self

    def __exit__(self, *args):
        if enable_timers:
            self.end = time.time()
            self.interval = self.end - self.start
            print(f"Elapsed time: {self.interval:.2f} seconds")
    

# Importing necessary libraries and modules for the implementation.

## Load CIFAR-10 Dataset

Load the CIFAR-10 training and test datasets.


In [4]:
from data_aug.contrastive_learning_dataset import ContrastiveLearningDataset

dataset = ContrastiveLearningDataset(root_folder='data')
train_dataset = dataset.get_dataset('cifar10', 2)
train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=512, shuffle=True,
        num_workers=8, pin_memory=True, drop_last=True)


Files already downloaded and verified


## Define SimCLR Encoder and Projection Head

Create the encoder model and projection head using ResNet18 as the base architecture.


In [5]:
pretrained = False
class ResNetSimCLR(nn.Module):

    def __init__(self, base_model, out_dim):
        super(ResNetSimCLR, self).__init__()
        if pretrained:
            print("Using pretrained model.")
            self.resnet_dict = {"resnet18": models.resnet18(pretrained=True),
                                "resnet50": models.resnet50(pretrained=True)}
        else:
            print("Using model from scratch.")
            self.resnet_dict = {"resnet18": models.resnet18(pretrained=False, num_classes=out_dim),
                                "resnet50": models.resnet50(pretrained=False, num_classes=out_dim)}

        self.backbone = self._get_basemodel(base_model)
        if pretrained:
            self.backbone.fc = nn.Linear(512, out_dim).to(device)

        dim_mlp = self.backbone.fc.in_features

        # add mlp projection head
        self.backbone.fc = nn.Sequential(nn.Linear(dim_mlp, dim_mlp), nn.ReLU(), self.backbone.fc)

    def _get_basemodel(self, model_name):
        try:
            model = self.resnet_dict[model_name]
        except:
            raise ("Invalid model name. Check the config file and pass one of: resnet18 or resnet50")

        return model

    def forward(self, x):
        return self.backbone(x)

## Define Contrastive Loss

Implement the contrastive loss function used by SimCLR.


In [6]:
def info_nce_loss(features, temperature=0.5):
        batch_size = features.shape[0] // 2 # 2 views per batch
        
        labels = torch.cat([torch.arange(batch_size) for i in range(2)], dim=0)
        labels = (labels.unsqueeze(0) == labels.unsqueeze(1)).float()
        device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
        labels = labels.to(device)

        features = F.normalize(features, dim=1)

        similarity_matrix = torch.matmul(features, features.T)
        # assert similarity_matrix.shape == (
        #     self.args.n_views * self.args.batch_size, self.args.n_views * self.args.batch_size)
        # assert similarity_matrix.shape == labels.shape

        # discard the main diagonal from both: labels and similarities matrix
        mask = torch.eye(labels.shape[0], dtype=torch.bool).to(device)
        labels = labels[~mask].view(labels.shape[0], -1)
        similarity_matrix = similarity_matrix[~mask].view(similarity_matrix.shape[0], -1)
        # assert similarity_matrix.shape == labels.shape

        # select and combine multiple positives
        positives = similarity_matrix[labels.bool()].view(labels.shape[0], -1)

        # select only the negatives
        negatives = similarity_matrix[~labels.bool()].view(similarity_matrix.shape[0], -1)

        logits = torch.cat([positives, negatives], dim=1)
        labels = torch.zeros(logits.shape[0], dtype=torch.long).to(device)

        logits = logits / temperature
        return logits, labels

## Training SimCLR

Train the SimCLR model using the contrastive loss and augmented image pairs from CIFAR-10.


In [83]:
from torch.utils.tensorboard import SummaryWriter
import os
from tqdm import tqdm
import logging
from utils import accuracy, save_checkpoint


with Timer():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Training with gpu: {device}.")
    # Initialize optimizer and loss criterion
    model = ResNetSimCLR(base_model='resnet18', out_dim=128)
    model = model.to(device)
    lr = 3e-4
    weight_decay = 1e-4
    optimizer = torch.optim.Adam(model.parameters(), lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=len(train_loader), eta_min=0,
                                                           last_epoch=-1)
    writer = SummaryWriter()
    logging.basicConfig(filename=os.path.join(writer.log_dir, 'training.log'), level=logging.DEBUG)
    criterion = torch.nn.CrossEntropyLoss().to(device)
    # Set number of training epochs
    epochs = 800
    log_every_n_epochs = 1
    logging.info(f"Start SimCLR training for {epochs} epochs.")
    logging.info(f"Training with gpu: {device}.")
    best_acc = 0
    for epoch_counter in range(epochs):
        loss_epoch = 0
        for images, _ in tqdm(train_loader):
            images = torch.cat(images, dim=0)

            images = images.to(device)

            # with autocast(enabled=fp16_precision):
            features = model(images)
            logits, labels = info_nce_loss(features)
            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loss_epoch += loss.item()
            # scaler.scale(loss).backward()
            # scaler.step(self.optimizer)
            # scaler.update()
        avg_loss = loss_epoch / len(train_loader)
        # print(f"Epoch {epoch_counter}:\tLoss: {avg_loss}")
        # every log_every_n_epochs log epoch loss and accuracy
        if epoch_counter % log_every_n_epochs == 0:
            top1, top5 = accuracy(logits, labels, topk=(1, 5))
            writer.add_scalar('loss', avg_loss, global_step=epoch_counter)
            writer.add_scalar('acc/top1', top1[0], global_step=epoch_counter)
            writer.add_scalar('acc/top5', top5[0], global_step=epoch_counter)
            writer.add_scalar('learning_rate', scheduler.get_last_lr()[0], global_step=epoch_counter)
            if top1[0] > best_acc:
                best_acc = top1[0]
                save_checkpoint({
                    'epoch': epoch_counter,
                    'arch': 'resnet18',
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                }, is_best=True, filename=os.path.join(writer.log_dir, f'checkpoint_best.pth.tar'))


        # warmup for the first 10 epochs
        if epoch_counter >= 10:
            scheduler.step()
        logging.debug(f"Epoch: {epoch_counter}\tLoss: {loss}\tTop1 accuracy: {top1[0]}")

    logging.info("Training has finished.")
    # save model checkpoints
    checkpoint_name = 'checkpoint_{:04d}.pth.tar'.format(epochs)
    save_checkpoint({
        'epoch': epochs,
        'arch': 'resnet18',
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }, is_best=False, filename=os.path.join(writer.log_dir, checkpoint_name))
    logging.info(f"Model checkpoint and metadata has been saved at {writer.log_dir}.")

Training with gpu: cuda.
Using pretrained model.


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/fotis/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 69.9MB/s]
100%|██████████| 97/97 [00:06<00:00, 14.17it/s]
100%|██████████| 97/97 [00:06<00:00, 14.64it/s]
100%|██████████| 97/97 [00:07<00:00, 13.81it/s]
100%|██████████| 97/97 [00:06<00:00, 14.80it/s]
100%|██████████| 97/97 [00:06<00:00, 14.68it/s]
100%|██████████| 97/97 [00:06<00:00, 14.28it/s]
100%|██████████| 97/97 [00:06<00:00, 14.27it/s]
100%|██████████| 97/97 [00:06<00:00, 15.02it/s]
100%|██████████| 97/97 [00:06<00:00, 14.24it/s]
100%|██████████| 97/97 [00:06<00:00, 14.42it/s]
100%|██████████| 97/97 [00:06<00:00, 14.38it/s]
100%|██████████| 97/97 [00:06<00:00, 15.04it/s]
100%|██████████| 97/97 [00:06<00:00, 14.65it/s]
100%|██████████| 97/97 [00:06<00:00, 14.76it/s]
100%|██████████| 97/97 [00:06<00:00, 14.53it/s]
100%|██████████| 97/97 [00:06<00:00, 14.26it/s]
100%|██████████| 97/97 [00:06<00:00, 14.

Elapsed time: 5474.76 seconds


Load the model checkpoint and evaluate the learned representations using Linear Probing and KNN classification.

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=False, num_classes=10).to(device)
# Load the checkpoint
checkpoint_path = 'runs/Sep27_15-10-48_cpsadmin-Z790-AORUS-ELITE-AX/checkpoint_best.pth.tar'
checkpoint = torch.load(checkpoint_path)
state_dict = checkpoint['state_dict']
# model.load_state_dict(state_dict)

for k in list(state_dict.keys()):
  if k.startswith('backbone.'):
    if k.startswith('backbone') and not k.startswith('backbone.fc'):
      # remove prefix
      state_dict[k[len("backbone."):]] = state_dict[k]
  del state_dict[k]
log = model.load_state_dict(state_dict, strict=False)
assert log.missing_keys == ['fc.weight', 'fc.bias']



In [8]:
# freeze all layers but the last fc
for name, param in model.named_parameters():
    if name not in ['fc.weight', 'fc.bias']:
        param.requires_grad = False

parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
assert len(parameters) == 2  # fc.weight, fc.bias

In [9]:
from torchvision import datasets
def get_cifar10_data_loaders(download, shuffle=False, batch_size=256):
  train_dataset = datasets.CIFAR10('./data', train=True, download=download,
                                  transform=transforms.ToTensor())

  train_loader = DataLoader(train_dataset, batch_size=batch_size,
                            num_workers=0, drop_last=False, shuffle=shuffle)
  
  test_dataset = datasets.CIFAR10('./data', train=False, download=download,
                                  transform=transforms.ToTensor())

  test_loader = DataLoader(test_dataset, batch_size=2*batch_size,
                            num_workers=10, drop_last=False, shuffle=shuffle)
  return train_loader, test_loader


In [10]:
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.0008)
criterion = torch.nn.CrossEntropyLoss().to(device)
train_loader, test_loader = get_cifar10_data_loaders(download=True)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
from utils import accuracy
epochs = 20
with Timer():
    for epoch in range(epochs):
        top1_train_accuracy = 0
        model.train()
        for counter, (x_batch, y_batch) in enumerate(train_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            logits = model(x_batch)
            loss = criterion(logits, y_batch)
            top1 = accuracy(logits, y_batch, topk=(1,))
            top1_train_accuracy += top1[0]
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        top1_train_accuracy /= (counter + 1)
        top1_accuracy = 0
        top5_accuracy = 0
        model.eval()
        with torch.no_grad():
            for counter, (x_batch, y_batch) in enumerate(test_loader):
                x_batch = x_batch.to(device)
                y_batch = y_batch.to(device)
                
                logits = model(x_batch)
                
                top1, top5 = accuracy(logits, y_batch, topk=(1,5))
                top1_accuracy += top1[0]
                top5_accuracy += top5[0]
        
        top1_accuracy /= (counter + 1)
        top5_accuracy /= (counter + 1)
        print(f"Epoch {epoch}:\tTrain Accuracy: {top1_train_accuracy.item():.2f}\tTest Accuracy: {top1_accuracy.item():.2f}\tTest Top-5 Accuracy: {top5_accuracy.item():.2f}")
  
  

Epoch 0:	Train Accuracy: 67.41	Test Accuracy: 74.85	Test Top-5 Accuracy: 97.88
Epoch 1:	Train Accuracy: 77.11	Test Accuracy: 75.60	Test Top-5 Accuracy: 98.15
Epoch 2:	Train Accuracy: 77.65	Test Accuracy: 76.08	Test Top-5 Accuracy: 98.31
Epoch 3:	Train Accuracy: 78.16	Test Accuracy: 76.50	Test Top-5 Accuracy: 98.45
Epoch 4:	Train Accuracy: 78.51	Test Accuracy: 76.80	Test Top-5 Accuracy: 98.53
Epoch 5:	Train Accuracy: 78.79	Test Accuracy: 77.09	Test Top-5 Accuracy: 98.63
Epoch 6:	Train Accuracy: 79.04	Test Accuracy: 77.23	Test Top-5 Accuracy: 98.68
Epoch 7:	Train Accuracy: 79.19	Test Accuracy: 77.30	Test Top-5 Accuracy: 98.72
Epoch 8:	Train Accuracy: 79.35	Test Accuracy: 77.40	Test Top-5 Accuracy: 98.70
Epoch 9:	Train Accuracy: 79.48	Test Accuracy: 77.54	Test Top-5 Accuracy: 98.72
Epoch 10:	Train Accuracy: 79.60	Test Accuracy: 77.71	Test Top-5 Accuracy: 98.72
Epoch 11:	Train Accuracy: 79.68	Test Accuracy: 77.92	Test Top-5 Accuracy: 98.74
Epoch 12:	Train Accuracy: 79.77	Test Accuracy: 78.

### Train a ResNet18 model from scratch on CIFAR-10 using the sane augmentation strategy as SimCLR  

In [12]:
from torchvision import datasets
def get_cifar10_data_loaders(download, shuffle=False, batch_size=256):
  train_dataset = datasets.CIFAR10('./data', train=True, download=download,
                                  transform=transforms.ToTensor())

  train_loader = DataLoader(train_dataset, batch_size=batch_size,
                            num_workers=0, drop_last=False, shuffle=shuffle)
  
  test_dataset = datasets.CIFAR10('./data', train=False, download=download,
                                  transform=transforms.ToTensor())

  test_loader = DataLoader(test_dataset, batch_size=2*batch_size,
                            num_workers=10, drop_last=False, shuffle=shuffle)
  return train_loader, test_loader


In [13]:
from torchvision.models import resnet18
model = resnet18(pretrained=False, num_classes=10).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.0008)
criterion = torch.nn.CrossEntropyLoss().to(device)
train_loader, test_loader = get_cifar10_data_loaders(download=True)

Files already downloaded and verified
Files already downloaded and verified


In [14]:
from utils import accuracy
epochs = 10
with Timer():
    for epoch in range(epochs):
        top1_train_accuracy_sup = 0
        for counter, (x_batch, y_batch) in enumerate(train_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            logits = model(x_batch)
            loss = criterion(logits, y_batch)
            top1 = accuracy(logits, y_batch, topk=(1,))
            top1_train_accuracy_sup += top1[0]
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        top1_train_accuracy_sup /= (counter + 1)
        top1_accuracy_sup = 0
        top5_accuracy_sup = 0
        for counter, (x_batch, y_batch) in enumerate(test_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            logits = model(x_batch)
            
            top1, top5 = accuracy(logits, y_batch, topk=(1,5))
            top1_accuracy_sup += top1[0]
            top5_accuracy_sup += top5[0]
        
        top1_accuracy_sup /= (counter + 1)
        top5_accuracy_sup /= (counter + 1)
        print(f"Epoch {epoch}:\tTrain Accuracy: {top1_train_accuracy_sup.item():.2f}\tTest Accuracy: {top1_accuracy_sup.item():.2f}\tTest Top-5 Accuracy: {top5_accuracy_sup.item():.2f}")
            

Epoch 0:	Train Accuracy: 47.11	Test Accuracy: 55.18	Test Top-5 Accuracy: 94.91
Epoch 1:	Train Accuracy: 61.76	Test Accuracy: 61.49	Test Top-5 Accuracy: 95.96
Epoch 2:	Train Accuracy: 70.05	Test Accuracy: 63.21	Test Top-5 Accuracy: 96.46
Epoch 3:	Train Accuracy: 76.26	Test Accuracy: 62.70	Test Top-5 Accuracy: 96.15
Epoch 4:	Train Accuracy: 79.66	Test Accuracy: 62.49	Test Top-5 Accuracy: 95.93
Epoch 5:	Train Accuracy: 82.28	Test Accuracy: 63.15	Test Top-5 Accuracy: 95.90
Epoch 6:	Train Accuracy: 85.21	Test Accuracy: 64.27	Test Top-5 Accuracy: 96.29
Epoch 7:	Train Accuracy: 87.49	Test Accuracy: 64.64	Test Top-5 Accuracy: 96.47
Epoch 8:	Train Accuracy: 89.84	Test Accuracy: 64.67	Test Top-5 Accuracy: 96.07
Epoch 9:	Train Accuracy: 91.33	Test Accuracy: 65.05	Test Top-5 Accuracy: 95.90
Elapsed time: 27.09 seconds


In [15]:
model = resnet18(pretrained=True).to(device)
# overwrite the last fc layer
model.fc = nn.Linear(512, 10).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=0.0008)
criterion = torch.nn.CrossEntropyLoss().to(device)
train_loader, test_loader = get_cifar10_data_loaders(download=True)



Files already downloaded and verified
Files already downloaded and verified


In [16]:
from utils import accuracy
epochs = 10
with Timer():
    for epoch in range(epochs):
        top1_train_accuracy_sup_pre = 0
        for counter, (x_batch, y_batch) in enumerate(train_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            logits = model(x_batch)
            loss = criterion(logits, y_batch)
            top1 = accuracy(logits, y_batch, topk=(1,))
            top1_train_accuracy_sup_pre += top1[0]
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        top1_train_accuracy_sup_pre /= (counter + 1)
        top1_accuracy_sup_pre = 0
        top5_accuracy_sup_pre = 0
        for counter, (x_batch, y_batch) in enumerate(test_loader):
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            logits = model(x_batch)
            
            top1, top5 = accuracy(logits, y_batch, topk=(1,5))
            top1_accuracy_sup_pre += top1[0]
            top5_accuracy_sup_pre += top5[0]
        
        top1_accuracy_sup_pre /= (counter + 1)
        top5_accuracy_sup_pre /= (counter + 1)
        # print every 10 epochs
        print(f"Epoch {epoch}:\tTrain Accuracy: {top1_train_accuracy_sup_pre.item():.2f}\tTest Accuracy: {top1_accuracy_sup_pre.item():.2f}\tTest Top-5 Accuracy: {top5_accuracy_sup_pre.item():.2f}")
            

Epoch 0:	Train Accuracy: 68.68	Test Accuracy: 78.01	Test Top-5 Accuracy: 98.69
Epoch 1:	Train Accuracy: 84.09	Test Accuracy: 79.46	Test Top-5 Accuracy: 98.93
Epoch 2:	Train Accuracy: 90.54	Test Accuracy: 79.10	Test Top-5 Accuracy: 98.74
Epoch 3:	Train Accuracy: 92.81	Test Accuracy: 79.47	Test Top-5 Accuracy: 98.88
Epoch 4:	Train Accuracy: 94.27	Test Accuracy: 79.68	Test Top-5 Accuracy: 98.70
Epoch 5:	Train Accuracy: 95.68	Test Accuracy: 80.35	Test Top-5 Accuracy: 98.74
Epoch 6:	Train Accuracy: 96.61	Test Accuracy: 80.27	Test Top-5 Accuracy: 98.64
Epoch 7:	Train Accuracy: 97.09	Test Accuracy: 80.81	Test Top-5 Accuracy: 98.71
Epoch 8:	Train Accuracy: 97.29	Test Accuracy: 80.81	Test Top-5 Accuracy: 98.63
Epoch 9:	Train Accuracy: 97.57	Test Accuracy: 81.61	Test Top-5 Accuracy: 98.63
Elapsed time: 24.66 seconds


In [17]:
# print the results of the SimCLR model and the supervised model in a table format
print(f"{'Model':<25}{'Train Accuracy':<20}{'Test Accuracy':<20}{'Test Top-5 Accuracy':<20}")
print(f"{'SimCLR':<25}{top1_train_accuracy.item():<20.2f}{top1_accuracy.item():<20.2f}{top5_accuracy.item():<20.2f}")
print(f"{'Supervised':<25}{top1_train_accuracy_sup.item():<20.2f}{top1_accuracy_sup.item():<20.2f}{top5_accuracy_sup.item():<20.2f}")
print(f"{'Supervised Pretrained':<25}{top1_train_accuracy_sup_pre.item():<20.2f}{top1_accuracy_sup_pre.item():<20.2f}{top5_accuracy_sup_pre.item():<20.2f}")

Model                    Train Accuracy      Test Accuracy       Test Top-5 Accuracy 
SimCLR                   80.27               78.42               98.78               
Supervised               91.33               65.05               95.90               
Supervised Pretrained    97.57               81.61               98.63               
