In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

## Conversion to SimCLRv2 and Converting TF Pretrained Weights
Pretrained weights can be found on Google's [repo](https://github.com/google-research/simclr). With conversion scripts linked. Most of the inital work can be found in spijkervet_prototypes.ipynb. This work is to clean up the spaghetti code and turn into modules.

In [4]:
import os
import sys
import argparse
from pprint import pprint
from pathlib import Path

import torch
import torch.nn as nn
import torchvision
import numpy as np
from torch.utils.tensorboard import SummaryWriter

sys.path.insert(0, '../../../SimCLRv2-PyTorch/')

from utils.model import save_model, load_optimizer
from simclr.modules import LogisticRegression
from simclr import SimCLRv2, SimCLRv2_ft
from simclr.modules import get_resnet_pt, get_resnet_v2, NT_Xent
from simclr.modules.transformations import TransformsSimCLR
from utils import yaml_config_hook

In [5]:
simclr_repo = Path('/home/kaipak/dev/SimCLRv2-PyTorch/')
parser = argparse.ArgumentParser(description="SimCLR")
config = yaml_config_hook(simclr_repo / 'config/config.yaml')
tb_out = Path('/home/kaipak/models/tensorboard_logs')

for k, v in config.items():
    parser.add_argument(f"--{k}", default=v, type=type(v))
    
args = parser.parse_args([])
args.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [6]:
args.batch_size = 64
args.logistic_batch_size = 96
args.resnet = "resnet50"
args.epochs = 400
args.gpus = 4
args.optimizer = 'LARS'
args.workers = 64
args.dataset = 'CIFAR100'
pprint(vars(args))

{'batch_size': 64,
 'dataparallel': 0,
 'dataset': 'CIFAR100',
 'dataset_dir': '/home/kaipak/datasets',
 'device': device(type='cuda'),
 'epoch_num': 100,
 'epochs': 400,
 'gpus': 4,
 'image_size': 224,
 'logistic_batch_size': 96,
 'logistic_epochs': 500,
 'model_path': '/home/kaipak/models/SimCLRv2/save',
 'nodes': 1,
 'nr': 0,
 'optimizer': 'LARS',
 'pretrain': True,
 'projection_dim': 64,
 'reload': False,
 'resnet': 'resnet50',
 'seed': 42,
 'start_epoch': 0,
 'temperature': 0.5,
 'weight_decay': 1e-06,
 'workers': 64}


In [None]:
torch.manual_seed(args.seed)
np.random.seed(args.seed)

if args.dataset == "STL10":
    train_dataset = torchvision.datasets.STL10(
        args.dataset_dir,
        split="unlabeled",
        download=True,
        transform=TransformsSimCLR(size=args.image_size),
    )
elif args.dataset == "CIFAR10":
    train_dataset = torchvision.datasets.CIFAR10(
        args.dataset_dir,
        download=True,
        transform=TransformsSimCLR(size=args.image_size),
    )
elif args.dataset == "CIFAR100":
    train_dataset = torchvision.datasets.CIFAR100(
        args.dataset_dir,
        download=True,
        transform=TransformsSimCLR(size=args.image_size),
    )
else:
    raise NotImplementedError

if args.nodes > 1:
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=args.world_size, rank=rank, shuffle=True
    )
else:
    train_sampler = None


# Data Transforms happen here.
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=(train_sampler is None),
    drop_last=True,
    num_workers=args.workers,
    sampler=train_sampler,
)

In [7]:
if args.dataset == "STL10":
    train_dataset = torchvision.datasets.STL10(
        args.dataset_dir,
        split="train",
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
    test_dataset = torchvision.datasets.STL10(
        args.dataset_dir,
        split="test",
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
elif args.dataset == "CIFAR10":
    train_dataset = torchvision.datasets.CIFAR10(
        args.dataset_dir,
        train=True,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
    test_dataset = torchvision.datasets.CIFAR10(
        args.dataset_dir,
        train=False,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
elif args.dataset == "CIFAR100":
    train_dataset = torchvision.datasets.CIFAR100(
        args.dataset_dir,
        train=True,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
    test_dataset = torchvision.datasets.CIFAR100(
        args.dataset_dir,
        train=False,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
else:
    raise NotImplementedError

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.logistic_batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=args.workers,
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    #batch_size=args.logistic_batch_size,
    batch_size=32,
    shuffle=False,
    drop_last=True,
    num_workers=args.workers,
)

Files already downloaded and verified
Files already downloaded and verified


## SimCLRv2: Self Supervised Learning
Modified SimCLR Pytorch code to v2 with Resnet code from converter which includes contrastive head.


In [None]:
model = SimCLRv2(resnet_depth=50, resnet_width_multiplier=2)

if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
  model = nn.DataParallel(model)

if args.reload:
    model_fp = os.path.join(
        args.model_path, f"checkpoint_{args.epoch_num}.tar"
    )
    model.load_state_dict(torch.load(model_fp, map_location=args.device.type))

model = model.to(args.device)
optimizer, scheduler = load_optimizer(args, model)
criterion = NT_Xent(args.batch_size, args.temperature, world_size=1)

In [None]:
def train(args, train_loader, model, criterion, optimizer, writer, display_every=50):
    """Train function"""
    
    model.train()
    epoch_loss = 0
    
    
    for step, ((x_i, x_j), _) in enumerate(train_loader):
        optimizer.zero_grad()
        sys.exit()
        x_i = x_i.cuda(non_blocking=True)
        x_j = x_j.cuda(non_blocking=True)
        
        # Positive pair with encoding
        h_i, h_j, z_i, z_j = model(x_i, x_j)
        
        loss = criterion(z_i, z_j)
        loss.backward()
        optimizer.step()
        
        if step % display_every == 0:
            print(f"Step [{step}/{len(train_loader)}]\t Loss: {loss.item()}")
        
        writer.add_scalar("Loss/train_epoch", loss.item(), args.global_step)
        epoch_loss += loss.item()
        args.global_step += 1
    
    return epoch_loss

In [None]:
args.global_step = 0
args.current_epoch = 0
tb_writer =  SummaryWriter(log_dir=f'/home/kaipak/models/tensorboard_logs/' +
                           f'{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')

for epoch in range(args.start_epoch, args.epochs):
    lr = optimizer.param_groups[0]["lr"]
    epoch_loss = train(args, train_loader, model, criterion, optimizer, tb_writer)
    
    if scheduler:
        scheduler.step()
    
    if epoch % 10 == 0:
        save_model(args, model, optimizer)
    
    writer.add_scalar("Loss/train", epoch_loss / len(train_loader), epoch)
    writer.add_scalar("Misc/learning_rate", lr, epoch)

    print(
        f"Epoch [{epoch}/{args.epochs}]\t Loss: {epoch_loss / len(train_loader)}\t lr: {round(lr, 5)}"
    )
    args.current_epoch += 1

save_model(args, model, optimizer)

In [None]:
torch.cuda.empty_cache()

In [None]:
torch.load('/home/kaipak/models/SimCLRv2/r50_2x_sk1.pth').keys()

In [None]:
args

## SimCLRv2: Fine Tuning From Projection Head
v2 says we should fine tune from middle projection layer. Original SimCLR implementation basically throws this away and additionally does not have fine-tuning step from Resnet. Build code to take middle layer of projection then run supervised fine-tuning using cross-entropy as loss function

In [19]:
n_classes = 100
simclr_model = SimCLRv2(resnet_depth=50, resnet_width_multiplier=2, sk_ratio=0.0625, 
                        pretrained_weights='/home/kaipak/models/SimCLRv2/r50_2x_sk1.pth')
simclr_model_ft = SimCLRv2_ft(simclr_model, n_classes)

if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  simclr_model_ngpu = nn.DataParallel(simclr_model_ft)

simclr_model = simclr_model_ngpu.to(args.device)

Let's use 4 GPUs!


In [20]:
def train(args, loader, model, criterion, optimizer, writer):
    """Train evaluation model"""
    epoch_loss = 0
    epoch_accuracy = 0
    model.train()
    
    for step, input in enumerate(loader):
        optimizer.zero_grad()
        X, y = input
        X = X.cuda(non_blocking=True)
        y = y.cuda(non_blocking=True)
        
        output = model(X)
        step_loss = criterion(output, y)
        
        predicted = output.argmax(1)
        step_accuracy = (predicted == y).sum().item() / y.size(0)
        epoch_accuracy += step_accuracy
        
        step_loss.backward()
        optimizer.step()
        
        epoch_loss += step_loss
        writer.add_scalar("Accuracy/train_step", step_accuracy, args.global_step)
        args.global_step += 1
        
        if step % 100 == 0:
            print(f"Step [{step}/{len(loader)}]\t Accuracy {step_accuracy}...")
        
    writer.add_scalar("Accuracy/train_epoch", step_accuracy, args.current_epoch)
    writer.add_scalar("Loss/train_epoch", epoch_loss, args.current_epoch)

    return epoch_loss, epoch_accuracy

def test(args, loader, model, criterion, optimizer):
    epoch_loss = 0
    epoch_accuracy = 0
    model.eval()
    
    for step, (x, y) in enumerate(loader):
        model.zero_grad()
        
        x = x.to(args.device)
        y = y.to(args.device)
        
        output = model(x)
        step_loss = criterion(output, y)
        
        predicted = output.argmax(1)
        step_accuracy = (predicted == y).sum().item() / y.size(0)
        epoch_accuracy += step_accuracy
        
        epoch_loss += step_loss.item()
    
    return epoch_loss, epoch_accuracy

In [21]:
optimizer, scheduler = load_optimizer(args, simclr_model)
criterion = torch.nn.CrossEntropyLoss()
writer = SummaryWriter(log_dir='/home/kaipak/models/runs')

In [22]:
%time
args.global_step = 0
args.current_epoch = 0
args.logistic_epochs = 5
tb_writer =  SummaryWriter(log_dir=f'/home/kaipak/models/tensorboard_logs/' +
                           f'{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}')

#for epoch in range(args.logistic_epochs):
for epoch in range(args.logistic_epochs):
    loss_epoch, accuracy_epoch = train(args, train_loader, simclr_model, criterion, optimizer, tb_writer)
    
    print(f"Epoch [{epoch}/{args.logistic_epochs}]\t Loss: {loss_epoch / len(train_loader)}\t Accuracy: {accuracy_epoch / len(train_loader)}")
    
    args.current_epoch += 1

loss_epoch, accuracy_epoch = test(
    args, test_loader, simclr_model, criterion, optimizer
)

print(
    f"[FINAL]\t Loss: {loss_epoch / len(test_loader)}\t Accuracy: {accuracy_epoch / len(test_loader)}"
)

CPU times: user 6 µs, sys: 1e+03 ns, total: 7 µs
Wall time: 14.1 µs
Step [0/520]	 Accuracy 0.0...
Step [100/520]	 Accuracy 0.23958333333333334...
Step [200/520]	 Accuracy 0.5833333333333334...
Step [300/520]	 Accuracy 0.6354166666666666...
Step [400/520]	 Accuracy 0.6979166666666666...
Step [500/520]	 Accuracy 0.7604166666666666...
Epoch [0/5]	 Loss: 2.096226215362549	 Accuracy: 0.5176883012820513
Step [0/520]	 Accuracy 0.7916666666666666...
Step [100/520]	 Accuracy 0.7708333333333334...
Step [200/520]	 Accuracy 0.8541666666666666...
Step [300/520]	 Accuracy 0.8020833333333334...
Step [400/520]	 Accuracy 0.8333333333333334...
Step [500/520]	 Accuracy 0.8229166666666666...
Epoch [1/5]	 Loss: 0.6005164980888367	 Accuracy: 0.8258413461538463
Step [0/520]	 Accuracy 0.8854166666666666...
Step [100/520]	 Accuracy 0.8645833333333334...
Step [200/520]	 Accuracy 0.7708333333333334...
Step [300/520]	 Accuracy 0.9166666666666666...
Step [400/520]	 Accuracy 0.9166666666666666...
Step [500/520]	 Ac

In [None]:
torch.cuda.empty_cache()

In [15]:
simclr_model = SimCLRv2(resnet_depth=50, resnet_width_multiplier=2, sk_ratio=0.0625, 
                        pretrained_weights='/home/kaipak/models/SimCLRv2/r50_2x_sk1.pth')
simclr_model.projector

ContrastiveHead(
  (layers): ModuleList(
    (0): Linear(in_features=4096, out_features=4096, bias=False)
    (1): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=4096, out_features=4096, bias=False)
    (4): BatchNorm1d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=4096, out_features=128, bias=False)
    (7): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [17]:
optimizer

LARS (
Parameter Group 0
    classic_momentum: True
    eeta: 0.001
    exclude_from_layer_adaptation: None
    exclude_from_weight_decay: ['batch_normalization', 'bias']
    initial_lr: 0.075
    lr: 0.075
    momentum: 0.9
    use_nesterov: False
    weight_decay: 1e-06
)

In [18]:
train_dataset

Dataset CIFAR100
    Number of datapoints: 50000
    Root location: /home/kaipak/datasets
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=224, interpolation=PIL.Image.BILINEAR)
               ToTensor()
           )