## Performance

#### Model 1

learning_rate = 0.03

momentum = 0.9

weight_decay = 0.0005 

Test
Loss - 1.4894

Acc - 0.6062

Train
loss -  5.6051


#### Model 2

learning_rate = 0.06

momentum = 0.9 

weight_decay = 0.0005

Test Loss - 1.5381 

Acc - 0.6512 

Train loss - 5.4742

#### Model 3

learning_rate = 0.1 

momentum = 0.9 

weight_decay = 0.0005

Test Loss - 1.5381

Acc - 0.6391

Train loss - 5.3890

In [16]:
import numpy as np
from PIL import Image
import torch.nn as nn
import torch
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
from torchvision.datasets import CIFAR100
from torchvision.models import resnet18, resnet34
from torchvision import transforms
from tqdm import tqdm
from torch.optim import Adam
from torch.utils.data import DataLoader, SubsetRandomSampler
from tqdm import tqdm

In [17]:
torch.cuda.empty_cache()

In [19]:
#Directory to store the model files
data_dir = './data'
backbone = 'resnet18' 
projection_dim = 128 
# Hyperparameters
seed =  42 
batch_size = 512
workers = 16
epochs = 800
log_interval = 200
optimizer =  'sgd' 
'''Optimal is 0.06 as in paper lr = 0.03 * batch_size / 256 '''
learning_rate = 0.1 
momentum = 0.9
weight_decay = 0.0005 
temperature =  0.5 
# Linear Classifier hyperparameters 
finetune_epochs = 100
load_epoch = 800

# Model Architecture 

In [18]:
class SimCLR(nn.Module):
    def __init__(self, base_encoder, projection_dim=128):
        super().__init__()
        self.enc = base_encoder(pretrained=False) 
        self.feature_dim = self.enc.fc.in_features
        self.enc.conv1 = nn.Conv2d(3, 64, 3, 1, 1, bias=False)
        self.enc.maxpool = nn.Identity()
        self.enc.fc = nn.Identity()
        self.projection_dim = projection_dim
        self.projector = nn.Sequential(nn.Linear(self.feature_dim, 2048),nn.ReLU(),nn.Linear(2048, projection_dim))
    def forward(self, x):
        feature = self.enc(x)
        projection = self.projector(feature)
        return feature, projection

In [None]:
class LinModel(nn.Module):
    def __init__(self, encoder: nn.Module, feature_dim: int, n_classes: int):
        super().__init__()
        self.enc = encoder
        self.feature_dim = feature_dim
        self.n_classes = n_classes
        self.lin = nn.Linear(self.feature_dim, self.n_classes)

    def forward(self, x):
        return self.lin(self.enc(x))



## Loss Function

In [None]:
class SimCLRLoss(nn.Module):
    def __init__(self):
        super().__init__()
        
    def simclr_loss_def(self,x,t=0.5):
        x = F.normalize(x, dim=1)
        x_scores =  (x @ x.t()).clamp(min=1e-7) 
        x_scale = x_scores / t 
        x_scale = x_scale - torch.eye(x_scale.size(0)).to(x_scale.device) * 1e5
        targets = torch.arange(x.size()[0])
        targets[::2] += 1
        targets[1::2] -= 1 
        return F.cross_entropy(x_scale, targets.long().to(x_scale.device))

    def forward(self,x,t=0.5):
        return simclr_loss_def(x,t)

### Utility functions 

In [20]:
class AverageMeter(object):
    def __init__(self, name):
        self.name = name
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


class PairImage(CIFAR100):
    def __getitem__(self, idx):
        img, target = self.data[idx], self.targets[idx]
        img = Image.fromarray(img)
        imgs = [self.transform(img), self.transform(img)]
        return torch.stack(imgs), target


def simclr_loss_def(x, t=0.5):
    x = F.normalize(x, dim=1)
    x_scores =  (x @ x.t()).clamp(min=1e-7)
    x_scale = x_scores / t
    x_scale = x_scale - torch.eye(x_scale.size(0)).to(x_scale.device) * 1e5
    targets = torch.arange(x.size()[0])
    targets[::2] += 1 
    targets[1::2] -= 1 
    return F.cross_entropy(x_scale, targets.long().to(x_scale.device))


def find_next_learningrate(step, total_steps, lr_max, lr_min):
    return lr_min + (lr_max - lr_min) * 0.5 * (1 + np.cos(step / total_steps * np.pi))

def img_distortion(s=0.5):  
    color_jitter = transforms.ColorJitter(0.8*s, 0.8*s, 0.8*s, 0.2*s)
    rnd_color_jitter = transforms.RandomApply([color_jitter], p=0.8)
    rnd_gray = transforms.RandomGrayscale(p=0.2)
    color_distort = transforms.Compose([rnd_color_jitter, rnd_gray])
    return color_distort

def train_iter(model, dataloader, epoch, optimizer=None, scheduler=None):
    if optimizer:
        model.train()
    else:
        model.eval()
    loss_meter = AverageMeter('loss')
    acc_meter = AverageMeter('acc')
    loader_bar = tqdm(dataloader)
    for x, y in loader_bar:
        x, y = x.cuda(), y.cuda()
        logits = model(x)
        loss = F.cross_entropy(logits, y)
        if optimizer:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if scheduler:
                scheduler.step()
        acc = (logits.argmax(dim=1) == y).float().mean()
        loss_meter.update(loss.item(), x.size(0))
        acc_meter.update(acc.item(), x.size(0))
        if optimizer:
            loader_bar.set_description("Train epoch {}, loss: {:.4f}, acc: {:.4f}".format(epoch, loss_meter.avg, acc_meter.avg))
        else:
            loader_bar.set_description("Test epoch {}, loss: {:.4f}, acc: {:.4f}".format(epoch, loss_meter.avg, acc_meter.avg))
    return loss_meter.avg, acc_meter.avg


### Train Functions

In [None]:
def train() -> None:
    assert torch.cuda.is_available()
    cudnn.benchmark = True
    train_loss = []

    train_transform = transforms.Compose([transforms.RandomResizedCrop(32,scale=(0.2, 1.)),
                                          transforms.RandomHorizontalFlip(),
                                          img_distortion(s=0.5),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])                                   
    train_set = PairImage(root=data_dir,
                            train=True,
                            transform=train_transform,
                            download=True)
    train_loader = DataLoader(train_set,
                              batch_size=512,
                              shuffle=True,
                              num_workers=0,
                              drop_last=True)
    base_encoder = eval('resnet18')
    model = SimCLR(base_encoder, projection_dim=128).cuda()
    optimizer = torch.optim.SGD(model.parameters(),learning_rate,momentum,weight_decay)
    scheduler = LambdaLR(optimizer,lr_lambda=lambda step: find_next_learningrate(step,epochs * len(train_loader),learning_rate, 1e-3))
    model.train()
    optimal_loss = 1e5
    for epoch in range(1, epochs + 1):
        loss_meter = AverageMeter("SimCLR_loss")
        train_bar = tqdm(train_loader)
        for x, y in train_bar:
            sizes = x.size()
            x = x.view(sizes[0] * 2, sizes[2], sizes[3], sizes[4]).cuda(non_blocking=True)
            optimizer.zero_grad()
            feature, rep = model(x)
            loss = simclr_loss_def(rep, 0.5)
            loss.backward()
            optimizer.step()
            scheduler.step()
            loss_meter.update(loss.item(), x.size(0))
            train_bar.set_description("Train epoch {}, SimCLR loss: {:.4f}".format(epoch, loss_meter.avg))
        train_loss.append(loss_meter.avg)    
        if loss_meter.avg < optimal_loss:
            optimal_loss = loss_meter.avg
            torch.save(model.state_dict(), 'simclr_best_{}.pt'.format(backbone))
        if epoch >= log_interval and epoch % log_interval == 0:
            torch.save(model.state_dict(), 'simclr_{}_epoch{}.pt'.format('resnet18', epoch))
    np.savetxt("train_losses.txt", train_loss)


In [None]:
def train_lr() -> None:
    test_accu = []
    test_losses = []
    train_transform = transforms.Compose([transforms.RandomResizedCrop(32,scale=(0.2, 1.)),
                                          transforms.RandomHorizontalFlip(),
                                          img_distortion(s=0.5),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
    test_transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))])
    train_set = CIFAR100(root=data_dir, train=True, transform=train_transform, download=False)
    test_set = CIFAR100(root=data_dir, train=False, transform=test_transform, download=False)
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True,num_workers=workers, drop_last=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
    base_encoder = eval(backbone)
    pre_model = SimCLR(base_encoder, projection_dim=projection_dim).cuda()
    pre_model.load_state_dict(torch.load('simclr_best_{}.pt'.format(backbone)))
    model = LinModel(pre_model.enc, feature_dim=pre_model.feature_dim, n_classes=len(train_set.targets))
    model = model.cuda()
    model.enc.requires_grad = False
    parameters = [param for param in model.parameters() if param.requires_grad is True]
    optimizer = torch.optim.SGD(parameters,0.2,momentum=momentum,weight_decay=0. )
    scheduler = LambdaLR(optimizer,lr_lambda=lambda step: find_next_learningrate(step,epochs * len(train_loader),learning_rate, 1e-3))
    optimal_loss, optimal_acc = 1e5, 0.
    for epoch in range(1, finetune_epochs + 1):
        train_loss, train_acc = train_iter(model, train_loader, epoch, optimizer, None)
        test_loss, test_acc = train_iter(model, test_loader, epoch)
        test_accu.append(test_acc)
        test_losses.append(test_loss)
        if train_loss < optimal_loss:
            optimal_loss = train_loss
            optimal_acc = test_acc
            torch.save(model.state_dict(), 'simclr_lin_{}_best.pth'.format(backbone))
    np.savetxt("test_accuracies_linear.txt", test_accu)
    np.savetxt("test_losses_linear.txt", test_losses)

In [21]:
train()

Files already downloaded and verified


Train epoch 1, SimCLR loss: 6.3359: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 2, SimCLR loss: 6.0938: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.08s/it]
Train epoch 3, SimCLR loss: 6.0073: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.08s/it]
Train epoch 4, SimCLR loss: 5.9594: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.08s/it]
Train epoch 5, SimCLR loss: 5.9240: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.08s/it]
Train epoch 6, SimCLR loss: 5.8983: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.08s/it]
Train epoch 7, SimCLR loss: 5.8777: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.08s/it]
Train epoch 8, SimCLR loss: 5.8571: 100%|██████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 9, SimCLR loss: 5.8446: 100%

Train epoch 137, SimCLR loss: 5.4918: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 138, SimCLR loss: 5.4939: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 139, SimCLR loss: 5.4926: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 140, SimCLR loss: 5.4941: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 141, SimCLR loss: 5.4915: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 142, SimCLR loss: 5.4905: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 143, SimCLR loss: 5.4885: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 144, SimCLR loss: 5.4917: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 145, SimCLR loss: 5.4859: 10

Train epoch 273, SimCLR loss: 5.4375: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 274, SimCLR loss: 5.4407: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 275, SimCLR loss: 5.4361: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 276, SimCLR loss: 5.4421: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 277, SimCLR loss: 5.4393: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 278, SimCLR loss: 5.4350: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.09s/it]
Train epoch 279, SimCLR loss: 5.4374: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 280, SimCLR loss: 5.4381: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 281, SimCLR loss: 5.4388: 10

Train epoch 409, SimCLR loss: 5.4121: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 410, SimCLR loss: 5.4118: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 411, SimCLR loss: 5.4148: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 412, SimCLR loss: 5.4111: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 413, SimCLR loss: 5.4127: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 414, SimCLR loss: 5.4094: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 415, SimCLR loss: 5.4087: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 416, SimCLR loss: 5.4118: 100%|████████████████████████████████████████████| 97/97 [01:45<00:00,  1.09s/it]
Train epoch 417, SimCLR loss: 5.4105: 10

Train epoch 545, SimCLR loss: 5.3980: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 546, SimCLR loss: 5.3995: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 547, SimCLR loss: 5.3975: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 548, SimCLR loss: 5.3994: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 549, SimCLR loss: 5.3984: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 550, SimCLR loss: 5.3977: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 551, SimCLR loss: 5.3967: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 552, SimCLR loss: 5.3966: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 553, SimCLR loss: 5.3984: 10

Train epoch 681, SimCLR loss: 5.3921: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 682, SimCLR loss: 5.3909: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 683, SimCLR loss: 5.3884: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 684, SimCLR loss: 5.3904: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 685, SimCLR loss: 5.3904: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.11s/it]
Train epoch 686, SimCLR loss: 5.3904: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.10s/it]
Train epoch 687, SimCLR loss: 5.3922: 100%|████████████████████████████████████████████| 97/97 [01:47<00:00,  1.11s/it]
Train epoch 688, SimCLR loss: 5.3903: 100%|████████████████████████████████████████████| 97/97 [01:46<00:00,  1.10s/it]
Train epoch 689, SimCLR loss: 5.3910: 10

In [None]:
train_lr()