In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

## Conversion to SimCLRv2 and Converting TF Pretrained Weights
Pretrained weights can be found on Google's [repo](https://github.com/google-research/simclr). With conversion scripts linked. Most of the inital work can be found in spijkervet_prototypes.ipynb. This work is to clean up the spaghetti code and turn into modules.

In [2]:
import os
import sys
import argparse
from pprint import pprint

import torch
import torch.nn as nn
import torchvision
import numpy as np
from torch.utils.tensorboard import SummaryWriter

sys.path.insert(0, '../')

from model import save_model, load_optimizer
from simclr.modules import LogisticRegression
from simclr import SimCLR, SimCLRv2
from simclr.modules import get_resnet_pt, get_resnet_v2, NT_Xent
from simclr.modules.transformations import TransformsSimCLR
from utils import yaml_config_hook

In [5]:
parser = argparse.ArgumentParser(description="SimCLR")
config = yaml_config_hook("../config/config.yaml")
tensorboard_

for k, v in config.items():
    parser.add_argument(f"--{k}", default=v, type=type(v))
    
args = parser.parse_args([])
args.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [6]:
args.batch_size = 32
args.resnet = "resnet50"
args.epochs = 400
args.gpus = 4
args.optimizer = 'LARS'
args.workers = 64
args.dataset = 'CIFAR100'
pprint(vars(args))

{'batch_size': 32,
 'dataparallel': 0,
 'dataset': 'CIFAR100',
 'dataset_dir': './datasets',
 'device': device(type='cuda'),
 'epoch_num': 100,
 'epochs': 400,
 'gpus': 4,
 'image_size': 224,
 'logistic_batch_size': 256,
 'logistic_epochs': 500,
 'model_path': 'save',
 'nodes': 1,
 'nr': 0,
 'optimizer': 'LARS',
 'pretrain': True,
 'projection_dim': 64,
 'reload': False,
 'resnet': 'resnet50',
 'seed': 42,
 'start_epoch': 0,
 'temperature': 0.5,
 'weight_decay': 1e-06,
 'workers': 64}


In [11]:
torch.manual_seed(args.seed)
np.random.seed(args.seed)

if args.dataset == "STL10":
    train_dataset = torchvision.datasets.STL10(
        args.dataset_dir,
        split="unlabeled",
        download=True,
        transform=TransformsSimCLR(size=args.image_size),
    )
elif args.dataset == "CIFAR10":
    train_dataset = torchvision.datasets.CIFAR10(
        args.dataset_dir,
        download=True,
        transform=TransformsSimCLR(size=args.image_size),
    )
elif args.dataset == "CIFAR100":
    train_dataset = torchvision.datasets.CIFAR100(
        args.dataset_dir,
        download=True,
        transform=TransformsSimCLR(size=args.image_size),
    )
else:
    raise NotImplementedError

if args.nodes > 1:
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset, num_replicas=args.world_size, rank=rank, shuffle=True
    )
else:
    train_sampler = None


# Data Transforms happen here.
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.batch_size,
    shuffle=(train_sampler is None),
    drop_last=True,
    num_workers=args.workers,
    sampler=train_sampler,
)

Files already downloaded and verified


## SimCLRv2: Self Supervised Learning
Modified SimCLR Pytorch code to v2 with Resnet code from converter which includes contrastive head.


In [None]:
model = SimCLRv2(resnet_depth=50, resnet_width_multiplier=2)

if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
  model = nn.DataParallel(model)

if args.reload:
    model_fp = os.path.join(
        args.model_path, f"checkpoint_{args.epoch_num}.tar"
    )
    model.load_state_dict(torch.load(model_fp, map_location=args.device.type))

model = model.to(args.device)
optimizer, scheduler = load_optimizer(args, model)
criterion = NT_Xent(args.batch_size, args.temperature, world_size=1)
writer = SummaryWriter()

In [None]:
def train(args, train_loader, model, criterion, optimizer, writer, display_every=50):
    """Train function"""
    epoch_loss = 0
    
    for step, ((x_i, x_j), _) in enumerate(train_loader):
    #for step, x_i, x_j in enumerate(train_loader):
        optimizer.zero_grad()
        x_i = x_i.cuda(non_blocking=True)
        x_j = x_j.cuda(non_blocking=True)
        
        # Positive pair with encoding
        h_i, h_j, z_i, z_j = model(x_i, x_j)
        
        loss = criterion(z_i, z_j)
        loss.backward()
        optimizer.step()
        
        if step % display_every == 0:
            print(f"Step [{step}/{len(train_loader)}]\t Loss: {loss.item()}")
        
        writer.add_scalar("Loss/train_epoch", loss.item(), args.global_step)
        epoch_loss += loss.item()
        args.global_step += 1
    
    return epoch_loss

In [None]:
args.global_step = 0
args.current_epoch = 0

for epoch in range(args.start_epoch, args.epochs):
    lr = optimizer.param_groups[0]["lr"]
    epoch_loss = train(args, train_loader, model, criterion, optimizer, writer)
    
    if scheduler:
        scheduler.step()
    
    if epoch % 10 == 0:
        save_model(args, model, optimizer)
    
    writer.add_scalar("Loss/train", epoch_loss / len(train_loader), epoch)
    writer.add_scalar("Misc/learning_rate", lr, epoch)

    print(
        f"Epoch [{epoch}/{args.epochs}]\t Loss: {epoch_loss / len(train_loader)}\t lr: {round(lr, 5)}"
    )
    args.current_epoch += 1

save_model(args, model, optimizer)

In [None]:
torch.cuda.empty_cache()

## SimCLRv2: Fine Tuning From Projection Head
v2 paper states that fine tuning should happen from 2nd linear projection layer. Original SimCLR implementation basically throws this away and additionally does not have fine-tuning step from Resnet. Build code to take middle layer of projection then run supervised fine-tuning using cross-entropy as loss function

In [7]:
simclr_model = SimCLRv2(resnet_depth=50, resnet_width_multiplier=2, sk_ratio=0.0625, 
                        pretrained_weights='/home/kaipak/models/SimCLRv2/r50_2x_sk1.pth')

if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
  simclr_model_ngpu = nn.DataParallel(simclr_model)
  # simclr_model_ngpu.n_features = n_features

simclr_model = simclr_model_ngpu.to(args.device)

Let's use 4 GPUs!


In [9]:
# Helper functions to map all input data X to their latent representations 
# h that are used in linear evaluation (they only have to be computed once)
# Should be part of the processing step before FT.
def inference(loader, simclr_model, device):
    feature_vector = []
    labels_vector = []
    for step, (x, y) in enumerate(loader):
        x = x.to(device)

        # get encoding
        with torch.no_grad():
            h, _, z, _ = simclr_model(x, x)

        h = h.detach()

        feature_vector.extend(h.cpu().detach().numpy())
        labels_vector.extend(y.numpy())

        if step % 20 == 0:
            print(f"Step [{step}/{len(loader)}]\t Computing features...")

    feature_vector = np.array(feature_vector)
    labels_vector = np.array(labels_vector)
    print("Features shape {}".format(feature_vector.shape))
    return feature_vector, labels_vector


def get_features(context_model, train_loader, test_loader, device):
    train_X, train_y = inference(train_loader, context_model, device)
    test_X, test_y = inference(test_loader, context_model, device)
    return train_X, train_y, test_X, test_y


def create_data_loaders_from_arrays(X_train, y_train, X_test, y_test, batch_size):
    train = torch.utils.data.TensorDataset(
        torch.from_numpy(X_train), torch.from_numpy(y_train)
    )
    train_loader = torch.utils.data.DataLoader(
        train, batch_size=batch_size, shuffle=False
    )

    test = torch.utils.data.TensorDataset(
        torch.from_numpy(X_test), torch.from_numpy(y_test)
    )
    test_loader = torch.utils.data.DataLoader(
        test, batch_size=batch_size, shuffle=False
    )
    return train_loader, test_loader

## Train/Test Split
For linear evaluation or fine tuning, since we are now interested in labeling instead of self supervised contrastive learning, we will need to split the data

In [13]:
if args.dataset == "STL10":
    train_dataset = torchvision.datasets.STL10(
        args.dataset_dir,
        split="train",
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
    test_dataset = torchvision.datasets.STL10(
        args.dataset_dir,
        split="test",
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
elif args.dataset == "CIFAR10":
    train_dataset = torchvision.datasets.CIFAR10(
        args.dataset_dir,
        train=True,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
    test_dataset = torchvision.datasets.CIFAR10(
        args.dataset_dir,
        train=False,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
elif args.dataset == "CIFAR100":
    train_dataset = torchvision.datasets.CIFAR100(
        args.dataset_dir,
        train=True,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
    test_dataset = torchvision.datasets.CIFAR100(
        args.dataset_dir,
        train=False,
        download=True,
        transform=TransformsSimCLR(size=args.image_size).test_transform,
    )
else:
    raise NotImplementedError

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args.logistic_batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=args.workers,
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=args.logistic_batch_size,
    shuffle=False,
    drop_last=True,
    num_workers=args.workers,
)

Files already downloaded and verified
Files already downloaded and verified


In [14]:
(train_X, train_y, test_X, test_y) = get_features(
    simclr_model, train_loader, test_loader, args.device
)

arr_train_loader, arr_test_loader = create_data_loaders_from_arrays(
    train_X, train_y, test_X, test_y, args.logistic_batch_size
)

Step [0/195]	 Computing features...
Step [20/195]	 Computing features...
Step [40/195]	 Computing features...
Step [60/195]	 Computing features...
Step [80/195]	 Computing features...
Step [100/195]	 Computing features...
Step [120/195]	 Computing features...
Step [140/195]	 Computing features...
Step [160/195]	 Computing features...
Step [180/195]	 Computing features...
Features shape (49920, 4096)
Step [0/39]	 Computing features...
Step [20/39]	 Computing features...
Features shape (9984, 4096)


In [None]:
# Fine tuning
def train(args, train_loader, model, criterion, optimizer, writer, display_every=50):
    """Train function"""
    epoch_loss = 0
    
    for step, ((x_i, x_j), _) in enumerate(train_loader):
    #for step, x_i, x_j in enumerate(train_loader):
        optimizer.zero_grad()
        x_i = x_i.cuda(non_blocking=True)
        x_j = x_j.cuda(non_blocking=True)
        
        # Positive pair with encoding
        h_i, h_j, z_i, z_j = model(x_i, x_j)
        
        loss = criterion(z_i, z_j)
        loss.backward()
        optimizer.step()
        
        if step % display_every == 0:
            print(f"Step [{step}/{len(train_loader)}]\t Loss: {loss.item()}")
        
        writer.add_scalar("Loss/train_epoch", loss.item(), args.global_step)
        epoch_loss += loss.item()
        args.global_step += 1
    
    return epoch_loss

In [None]:
# Fine Tune training
def train(args, loader, simclr_model, model, criterion, optimizer, writer):
    """Train evaluation model"""
    epoch_loss = 0
    epoch_accuracy = 0
    model.eval()
    
    for step, (x, y) in enumerate(loader):
        optimizer.zero_grad()
        
        x = x.to(args.device)
        y = y.to(args.device)
        
        output = model(x)
        step_loss = criterion(output, y)
        
        predicted = output.argmax(1)
        step_accuracy = (predicted == y).sum().item() / y.size(0)
        epoch_accuracy += step_accuracy
        
        step_loss.backward()
        optimizer.step()
        
        epoch_loss += step_loss
        writer.add_scalar("Accuracy/train_step", step_accuracy, args.global_step)
        args.global_step += 1
        
    writer.add_scalar("Accuracy/train_epoch", step_accuracy, args.current_epoch)
    writer.add_scalar("Loss/train_epoch", epoch_loss, args.current_epoch)

    return epoch_loss, epoch_accuracy

def test(args, loader, simclr_model, model, criterion, optimizer):
    epoch_loss = 0
    epoch_accuracy = 0
    model.eval()
    
    for step, (x, y) in enumerate(loader):
        model.zero_grad()
        
        x = x.to(args.device)
        y = y.to(args.device)
        
        output = model(x)
        step_loss = criterion(output, y)
        
        predicted = output.argmax(1)
        step_accuracy = (predicted == y).sum().item() / y.size(0)
        epoch_accuracy += step_accuracy
        
        epoch_loss += step_loss.item()
    
    return epoch_loss, epoch_accuracy

In [None]:
n_classes = 100
model = LogisticRegression(simclr_model.n_features, n_classes)
if torch.cuda.device_count() > 1:
  print("Let's use", torch.cuda.device_count(), "GPUs!")
  # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
  model = nn.DataParallel(model)
model = model.to(args.device)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
#optimizer = LARS(model.parameters(), lr=3e-3)
criterion = torch.nn.CrossEntropyLoss()
writer = SummaryWriter(log_dir='/home/kaipak/models/runs')

In [16]:
simclr_model

DataParallel(
  (module): SimCLRv2(
    (encoder): ResNet(
      (net): Sequential(
        (0): Stem(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNormRelu(
            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (1): ReLU()
          )
          (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (3): BatchNormRelu(
            (0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (1): ReLU()
          )
          (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (5): BatchNormRelu(
            (0): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (1): ReLU()
          )
          (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        )
        (1): Blocks(
       