## SIMCLR Implementation


### Import Libraries

In [1]:
# !pip3 freeze > requirements.txt
# !pip install torchlars
# ! pip install lightly

Referenced from: https://github.com/leftthomas/SimCLR

In [1]:
# import necessary dependencies
import argparse
import os, sys
import time
import datetime
import pickle
import random
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.datasets import CIFAR10
from PIL import Image

import numpy as np
import pandas as pd

# import shutil, time, os, requests, random, copy

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

from sklearn.manifold import TSNE
# from torchlars import LARS
from lightly.loss import NTXentLoss

# move one directory up to import from src
if os.getcwd().split("/")[-1] == "notebooks":
    os.chdir("..")

# from src.setdevice import set_device

  from .autonotebook import tqdm as notebook_tqdm


### Set Seeds

In [2]:
def set_seed(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    random.seed(seed)
set_seed()


In [3]:
def set_device():
    if torch.cuda.is_available():  # check if NVIDIA GPU is available
        device = torch.device("cuda")
        print(f"Running on {device} GPU: {torch.cuda.get_device_name(0)}")
    elif torch.backends.mps.is_available():  # check if Apple's Metal is available
        device = torch.device("mps")
        print(
            f"Checking pytorch is built with mps activated: {torch.backends.mps.is_built()}"
        )
        print(f"Running on {device} GPU...")
    else:
        device = torch.device("cpu")
        print("Running on CPU...")

    return device


device = set_device()


Running on cuda GPU: Tesla V100-PCIE-16GB


## MOVE TO SCRIPT

## Define f(·) encoder - ResNet-20


In [5]:
"""
It is the neural network model base encoder f(·) that extracts representation vectors 
from augmented data examples. We choose to use the ResNet20 architecture.
"""

# Residual Block
class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=inplanes,
            out_channels=planes,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False,
        )
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes, out_channels=planes, kernel_size=3, stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(planes)
        if inplanes > planes or stride > 1:
            # Option B
            self.downsample = nn.Sequential(
                nn.Conv2d(
                    in_channels=inplanes,
                    out_channels=planes,
                    kernel_size=1,
                    stride=stride,
                    padding=0,
                    bias=False,
                ),
                nn.BatchNorm2d(planes),
            )
        else:
            self.downsample = None
        self.stride = stride

    def forward(self, x):
        identity = x
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += identity if self.downsample is None else self.downsample(identity)
        out = F.relu(out)
        return out


# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.inplanes = 16
        self.conv0 = nn.Conv2d(
            in_channels=3,
            out_channels=self.inplanes,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False,
        )
        self.bn0 = nn.BatchNorm2d(self.inplanes)
        self.layer1 = self._make_layer(block, planes=16, blocks=layers[0], stride=1)
        self.layer2 = self._make_layer(block, planes=32, blocks=layers[1], stride=2)
        self.layer3 = self._make_layer(block, planes=64, blocks=layers[2], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

    def _make_layer(self, block, planes, blocks, stride):
        layers = []
        for i in range(blocks):
            layers.append(
                block(
                    inplanes=(self.inplanes if i == 0 else planes),
                    planes=planes,
                    stride=stride if i == 0 else 1,
                )
            )
        self.inplanes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        x = F.relu(self.bn0(self.conv0(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)

        return x


### Define g(·) projection head - MLP with linear layers

In [6]:
"""
It is the neural network projection head g(·) that maps representations to 
the space where contrastive loss is applied. There is the option to use a linear
projection head or a nonlinear projection head (2 layer MLP).
"""


class LinearLayer(nn.Module):
    def __init__(self, in_features, out_features, use_bias=False, use_bn=False):
        super(LinearLayer, self).__init__()

        self.use_bn = use_bn
        self.linear = nn.Linear(
            in_features=in_features,
            out_features=out_features,
            bias=use_bias and not use_bn,
        )
        self.bn = nn.BatchNorm1d(out_features)

    def forward(self, x):
        x = self.linear(x)
        x = self.bn(x) if self.use_bn else x
        return x


class ProjectionHead(nn.Module):
    def __init__(
        self,
        in_features,
        hidden_features,
        out_features,
        head_type="nonlinear",
        use_bn=True,
    ):
        super(ProjectionHead, self).__init__()

        if head_type == "linear":
            self.layers = LinearLayer(
                in_features=in_features,
                out_features=out_features,
                use_bias=False,
                use_bn=True,
            )
        elif head_type == "nonlinear":
            self.layers = nn.Sequential(
                LinearLayer(
                    in_features=in_features,
                    out_features=hidden_features,
                    use_bias=True,
                    use_bn=use_bn,
                ),
                nn.ReLU(),
                LinearLayer(
                    in_features=hidden_features,
                    out_features=out_features,
                    use_bias=False,
                    use_bn=use_bn,
                ),
            )

    def forward(self, x):
        x = self.layers(x)
        return x


### Model

In [7]:
class SimCLR(nn.Module):
    """
    Creates SimCLR model given encoder
    Args:
      encoder (nn.Module): Encoder
      projection_n_in (int): Number of input features of the projection head
      projection_n_hidden (int): Number of hidden features of the projection head
      projection_n_out (int): Number of output features of the projection head
      projection_use_bn (bool): Whether to use batch norm in the projection head
    """

    def __init__(
        self,
        encoder: nn.Module = ResNet(BasicBlock, [3, 3, 3]),
        projection_n_in: int = 64,  # TODO CHECK ON THIS
        projection_n_hidden: int = 64,  # TODO CHECK ON THIS
        projection_n_out: int = 128,
        projection_use_bn: bool = True,
    ):
        super().__init__()

        self.encoder = ResNet(BasicBlock, [3, 3, 3])
        self.projection = ProjectionHead(
            in_features=projection_n_in,
            hidden_features=projection_n_hidden,
            out_features=projection_n_out,
            head_type="nonlinear",
            use_bn=projection_use_bn,
        )

    def forward(self, x):
        feature = self.encoder(x)
        x = self.projection(feature)
        return F.normalize(feature, dim=-1), x


### Instantiate Model on Device

In [8]:
model = SimCLR().to(device)

### Setting up hyperparameters

In [9]:
# Tunable hyperparameters
LRS = [0.5, 1, 1.5]
TEMPS = [0.1, 0.5, 1.0]
BATCHES = [256, 512, 1024, 2048, 4096]

# for now
# LR = 0.5
TEMP = 0.5
BATCH = 512
LR = 0.003 * BATCH / 256
EPOCHS = 100
DECAY = 1e-4

#############################################
# loss function
criterion = NTXentLoss(temperature=TEMP)

# Add optimizer
optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=DECAY)

# "decay the learning rate with the cosine decay schedule without restarts"
# SCHEDULER OR LINEAR WARMUP
# warmupscheduler = torch.optim.lr_scheduler.LambdaLR(
#     optimizer, lambda epoch: (epoch + 1) / 10.0, verbose=True
# )

# # SCHEDULER FOR COSINE DECAY
# mainscheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
#     optimizer, len(data_load), eta_min=0.05, last_epoch=-1, verbose=True
# )

# SCHEDULER FOR COSINE DECAY
mainscheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, 50_000, verbose=True
)
#############################################


Adjusting learning rate of group 0 to 6.0000e-03.


### Pre-processing functions for augmenting images

In [10]:
s = 0.5
train_transform = transforms.Compose(
    [
        transforms.RandomResizedCrop(
            (32, 32),
            scale=(0.08, 1.0),
            ratio=(0.75, 1.3333333333333333),
        ),
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply(
            [transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)], p=0.8
        ),
        transforms.RandomGrayscale(p=0.2),
        # transforms.RandomApply([transforms.GaussianBlur(kernel_size = 3, sigma=(0.1, 2.0))], p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]
        ),
    ]
)

test_transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]
        ),
    ]
)


### Setting up dataset and dataloader

In [11]:
# Alternative class for dataloader
class getC10Pair(CIFAR10):
    def __getitem__(self, idx):

        img, label = self.data[idx], self.targets[idx]
        img = Image.fromarray(img)

        if self.transform is not None:
            img1 = self.transform(img)
            img2 = self.transform(img)

        if self.target_transform is not None:
            label = self.target_transform(label)

        return img1, img2, label


train_data = getC10Pair(
    root="data", train=True, transform=train_transform, download=True
)
train_loader = DataLoader(
    train_data, batch_size=BATCH, shuffle=True, num_workers=4, drop_last=True
)

memory_data = getC10Pair(
    root="data", train=True, transform=test_transform, download=True
)
memory_loader = DataLoader(memory_data, batch_size=BATCH, shuffle=False, num_workers=4)

test_data = getC10Pair(
    root="data", train=False, transform=test_transform, download=True
)
test_loader = DataLoader(test_data, batch_size=BATCH, shuffle=False, num_workers=4)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 83046107.10it/s]


Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified
Files already downloaded and verified


### Training

In [None]:
# def save_model(model, optimizer, scheduler, current_epoch, name):
#   out = os.path.join('/workspaces/RepLearning/saved_models/',name.format(current_epoch))

#   torch.save({'model_state_dict': model.state_dict(),
#               'optimizer_state_dict': optimizer.state_dict(),
#               'scheduler_state_dict':scheduler.state_dict()}, out)


In [12]:
# alternative
current_epoch = 0

DECAY_EPOCHS = 10

results = {"train_loss": [], "test_acc@1": [], "test_acc@5": []}
save_name_pre = "B{}_E{}".format(BATCH, EPOCHS)

if not os.path.exists("./results"):
    os.mkdir("results")

if not os.path.exists("./results/backup"):
    os.mkdir("results/backup")

best_acc = 0.0
for epoch in range(EPOCHS):

    print("")
    model.train()
    total_loss, total_num, train_bar = 0.0, 0, tqdm(train_loader)

    for x_1, x_2, target in train_bar:
        x_1, x_2 = x_1.cuda(non_blocking=True), x_2.cuda(non_blocking=True)

        feature_1, out_1 = model(x_1)
        feature_2, out_2 = model(x_2)

        loss = criterion(out_1, out_2)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_num += BATCH
        total_loss += loss.item() * BATCH
        train_bar.set_description(
            "Train Epoch: [{}/{}] Loss: {:.4f}".format(
                epoch + 1, EPOCHS, total_loss / total_num
            )
        )

        train_loss = total_loss / total_num

    results["train_loss"].append(train_loss)

    # if epoch < 10:
    #     warmupscheduler.step()
    if epoch >= 10:
        mainscheduler.step()

    model.eval()
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []

    with torch.no_grad():
        # generate feature bank
        for data, _, target in tqdm(memory_loader, desc="Feature extracting"):
            feature, out = model(data.cuda(non_blocking=True))
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(
            memory_loader.dataset.targets, device=feature_bank.device
        )

        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_loader)
        for data, _, target in test_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature, out = model(data)

            total_num += data.size(0)
            # compute cos similarity between each feature vector and feature bank ---> [B, N]
            sim_matrix = torch.mm(feature, feature_bank)
            # [B, K]
            sim_weight, sim_indices = sim_matrix.topk(k=200, dim=-1)
            # [B, K]
            sim_labels = torch.gather(
                feature_labels.expand(data.size(0), -1), dim=-1, index=sim_indices
            )
            sim_weight = (sim_weight / TEMP).exp()

            # counts for each class
            one_hot_label = torch.zeros(
                data.size(0) * 200, 10, device=sim_labels.device
            )
            # [B*K, C]
            one_hot_label = one_hot_label.scatter(
                dim=-1, index=sim_labels.view(-1, 1), value=1.0
            )
            # weighted score ---> [B, C]
            pred_scores = torch.sum(
                one_hot_label.view(data.size(0), -1, 10) * sim_weight.unsqueeze(dim=-1),
                dim=1,
            )

            pred_labels = pred_scores.argsort(dim=-1, descending=True)

            total_top1 += torch.sum(
                (pred_labels[:, :1] == target.unsqueeze(dim=-1)).any(dim=-1).float()
            ).item()
            total_top5 += torch.sum(
                (pred_labels[:, :5] == target.unsqueeze(dim=-1)).any(dim=-1).float()
            ).item()
            test_bar.set_description(
                "Test Epoch: [{}/{}] Acc@1:{:.2f}% Acc@5:{:.2f}%".format(
                    epoch + 1,
                    EPOCHS,
                    total_top1 / total_num * 100,
                    total_top5 / total_num * 100,
                )
            )

    test_acc_1 = total_top1 / total_num * 100
    test_acc_5 = total_top5 / total_num * 100

    results["test_acc@1"].append(test_acc_1)
    results["test_acc@5"].append(test_acc_5)



    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        # torch.save(model.state_dict(), '/workspaces/RepLearning/saved_models/{}_best_model.pth'.format(save_name_pre))
        torch.save(
            model.state_dict(), "./results/{}_best_model.pth".format(save_name_pre)
        )
        # save statistics
        data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
        data_frame.to_csv('results/{}_statistics.csv'.format(save_name_pre), index_label='epoch')

    # save_model(model, optimizer, mainscheduler, current_epoch, "SimCLR_CIFAR10_RN20_bestacc.pt")
    # save every epoch as backup
    torch.save(model.state_dict(), f"./results/backup/{save_name_pre}_e{epoch}.pth")

    # current_epoch+=1





  0%|          | 0/97 [00:00<?, ?it/s]

: 

: 

In [None]:
# def plot_features(model, num_classes, num_feats, batch_size):
#     preds = np.array([]).reshape((0,1))
#     gt = np.array([]).reshape((0,1))
#     feats = np.array([]).reshape((0,num_feats))
#     model.eval()
#     with torch.no_grad():
#         for x1,_, target in test_loader:
#             x1 = x1.squeeze().to(device = 'cuda:0', dtype = torch.float)
#             target = target.squeeze().to(device = 'cuda:0')
#             out = model(x1)
#             out = out.detach().to("cpu").data.numpy()#.reshape((1,-1))
#             feats = np.append(feats,out,axis = 0)

#             # feature_labels = torch.tensor(memory_loader.dataset.targets, device=feature_bank.device)

#     tsne = TSNE(n_components = 2, perplexity = 50)
#     x_feats = tsne.fit_transform(feats)
#     num_samples = int(batch_size*(test_data.shape[0]//batch_size))#(len(val_df)

#     for i in range(num_classes):
#         plt.scatter(x_feats[target[:num_samples]==i,1],x_feats[target[:num_samples]==i,0])

#     plt.legend([str(i) for i in range(num_classes)])
#     plt.show()

# plot_features(model, 10, 64, 512)

# # torch.save(model.state_dict(), './results/{}_best_model.pth'.format(save_name_pre))


### Linear Evaluations

### load a model
Careful to only use the encoder part of the model to evaluate

In [9]:
class EvalNet(nn.Module):
    def __init__(self, num_class, pretrained_path):
        super(EvalNet, self).__init__()

        # encoder
        self.encoder = ResNet(BasicBlock, [3, 3, 3])
        # classifier
        self.fc = nn.Linear(64, 10, bias=True)
        self.load_state_dict(
            torch.load(pretrained_path, map_location="cpu"), strict=False
        )

    def forward(self, x):
        x = self.encoder(x)
        out = self.fc(x)
        return out


In [10]:
# load best model
# model_path = '/workspaces/RepLearning/saved_models/{}_best_model.pth'.format(save_name_pre)
model_path = "results/B256_E1_best_model.pth"

model = EvalNet(num_class=10, pretrained_path=model_path).to(device)

for param in model.encoder.parameters():
    param.requires_grad = False

eval_LRATE = 0.001 * BATCH / 256
optimizer = optim.Adam(model.fc.parameters(), lr=eval_LRATE, weight_decay=1e-4) 
# optimizer = optim.SGD(model.fc.parameters(), lr=eval_LRATE, momentum=0.9, nesterov=True)
criterion = nn.CrossEntropyLoss() 


In [11]:
eval_train_transform = transforms.Compose(
    [
        transforms.RandomResizedCrop(
            (32, 32),
            scale=(0.08, 1.0),
            ratio=(0.75, 1.3333333333333333),
        ),
        transforms.RandomHorizontalFlip(),
        # transforms.RandomApply(
        #     [transforms.ColorJitter(0.8 * s, 0.8 * s, 0.8 * s, 0.2 * s)], p=0.8
        # ),
        # transforms.RandomGrayscale(p=0.2),
        # transforms.RandomApply([transforms.GaussianBlur(kernel_size = 3, sigma=(0.1, 2.0))], p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]
        ),
    ]
)

eval_test_transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]
        ),
    ]
)


In [12]:
os.getcwd()

'/workspaces/DRLrecommenders/dnn'

In [1]:
from dataloader_eval import DataLoader, GenericDataset

train_set = GenericDataset(
    dataset_name='cifar10',
    split= 'train', # crop and horizontal flip 
    num_imgs_per_cat=500) # 10% of the training set
val_set = GenericDataset(
    dataset_name='cifar10',
    split='test') 

train_loader = DataLoader(
    dataset=train_set,
    batch_size=128,
    unsupervised=False,
    epoch_size=10*5000,
    num_workers=4,
    shuffle=True)

val_loader = DataLoader(
    dataset=val_set,
    batch_size=128,
    unsupervised=False,
    epoch_size=None,
    num_workers=4,
    shuffle=False)

############################################

  from .autonotebook import tqdm as notebook_tqdm


Files already downloaded and verified
num_imgs_per_category 500
Files already downloaded and verified


In [14]:
# train_data = CIFAR10(
#     root="data", train=True, transform=eval_train_transform, download=True
# )
# train_loader = DataLoader(train_data, batch_size=BATCH, shuffle=True, num_workers=4)

# test_data = CIFAR10(
#     root="data", train=False, transform=eval_test_transform, download=True
# )
# test_loader = DataLoader(test_data, batch_size=BATCH, shuffle=False, num_workers=4)


In [15]:
# # load best model
# # model_path = '/workspaces/RepLearning/saved_models/{}_best_model.pth'.format(save_name_pre)
# model_path = "./results/{}_best_model.pth".format(save_name_pre)

# model = EvalNet(num_class=10, pretrained_path=model_path).to(device)

# for param in model.encoder.parameters():
#     param.requires_grad = False

# eval_LRATE = 0.1 * BATCH / 256
# optimizer = optim.Adam(model.fc.parameters(), lr=1e-3, weight_decay=1e-6) #TODO verify this
# # optimizer = optim.SGD(model.fc.parameters(), lr=eval_LRATE, momentum=0.9, nesterov=True)
# criterion = nn.CrossEntropyLoss()  # TODO verify this


In [None]:
# cifar_dataset = torchvision.datasets.CIFAR10(root='./data', transform=transform)
# train_indices = # select train indices according to your rule
# test_indices = # select test indices according to your rule
# train_loader = torch.utils.data.DataLoader(cifar_dataset, batch_size=32, shuffle=True, sampler=SubsetRandomSampler(train_indices))
# test_loader = torch.utils.data.DataLoader(cifar_dataset, batch_size=32, shuffle=True, sampler=SubsetRandomSampler(test_indices))

In [18]:
results = {
    "train_loss": [],
    "train_acc@1": [],
    "train_acc@5": [],
    "test_loss": [],
    "test_acc@1": [],
    "test_acc@5": [],
}

best_acc = 0.0

EVAL_EPOCHS = 30

for epoch in range(1, EVAL_EPOCHS + 1):

    print("")

    model.train()

    total_loss, total_correct_1, total_correct_5, total_num, data_bar = (
        0.0,
        0.0,
        0.0,
        0,
        tqdm(train_loader(0)),
    )
    with torch.enable_grad():
        for data, target in data_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)

            out = model(data)
            loss = criterion(out, target)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_num += data.size(0)
            total_loss += loss.item() * data.size(0)

            prediction = torch.argsort(out, dim=-1, descending=True)
            total_correct_1 += torch.sum(
                (prediction[:, 0:1] == target.unsqueeze(dim=-1)).any(dim=-1).float()
            ).item()
            total_correct_5 += torch.sum(
                (prediction[:, 0:5] == target.unsqueeze(dim=-1)).any(dim=-1).float()
            ).item()

            data_bar.set_description(
                "{} Epoch: [{}/{}] Loss: {:.4f} ACC@1: {:.2f}% ACC@5: {:.2f}%".format(
                    "Train",
                    epoch,
                    EVAL_EPOCHS,
                    total_loss / total_num,
                    total_correct_1 / total_num * 100,
                    total_correct_5 / total_num * 100,
                )
            )

    train_loss = total_loss / total_num
    train_acc_1 = total_correct_1 / total_num * 100
    train_acc_5 = total_correct_5 / total_num * 100

    results["train_loss"].append(train_loss)
    results["train_acc@1"].append(train_acc_1)
    results["train_acc@5"].append(train_acc_5)

    model.eval()

    total_loss, total_correct_1, total_correct_5, total_num, data_bar = (
        0.0,
        0.0,
        0.0,
        0,
        tqdm(test_loader(0)),
    )
    with torch.no_grad():
        for data, target in data_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            out = model(data)
            loss = criterion(out, target)

            total_num += data.size(0)
            total_loss += loss.item() * data.size(0)

            prediction = torch.argsort(out, dim=-1, descending=True)
            total_correct_1 += torch.sum(
                (prediction[:, 0:1] == target.unsqueeze(dim=-1)).any(dim=-1).float()
            ).item()
            total_correct_5 += torch.sum(
                (prediction[:, 0:5] == target.unsqueeze(dim=-1)).any(dim=-1).float()
            ).item()

            data_bar.set_description(
                "{} Epoch: [{}/{}] Loss: {:.4f} ACC@1: {:.2f}% ACC@5: {:.2f}%".format(
                    "Test",
                    epoch,
                    EVAL_EPOCHS,
                    total_loss / total_num,
                    total_correct_1 / total_num * 100,
                    total_correct_5 / total_num * 100,
                )
            )

    test_loss = total_loss / total_num
    test_acc_1 = total_correct_1 / total_num * 100
    test_acc_5 = total_correct_5 / total_num * 100

    results["test_loss"].append(test_loss)
    results["test_acc@1"].append(test_acc_1)
    results["test_acc@5"].append(test_acc_5)

    # # save statistics
    # data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    # data_frame.to_csv('results/linear_statistics.csv', index_label='epoch')

    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        # torch.save(model.state_dict(), '/workspaces/RepLearning/saved_models/linear_model.pth')
        torch.save(model.state_dict(), "./results/linear_model.pth")





  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
  img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()


: 

: 

In [1]:
---------------------------

SyntaxError: invalid syntax (3780048248.py, line 1)

dg = C10DataGen('train',trimages)
dl = DataLoader(dg,batch_size = 128,drop_last=True)

vdg = C10DataGen('valid',valimages)
vdl = DataLoader(vdg,batch_size = 128,drop_last=True)

In [None]:
nr = 0
current_epoch = 0
epochs = 5
tr_loss = []
val_loss = []

for epoch in range(epochs):

    print(f"Epoch [{epoch}/{epochs}]\t")
    stime = time.time()

    model.train()
    tr_loss_epoch = 0

    for step, (x_i, x_j) in enumerate(train_loader):
        optimizer.zero_grad()
        x_i = x_i.squeeze().to("cuda:0").float()
        x_j = x_j.squeeze().to("cuda:0").float()

        # positive pair, with encoding
        z_i = model(x_i)
        z_j = model(x_j)

        loss = criterion(z_i, z_j)
        loss.backward()

        optimizer.step()

        if nr == 0 and step % 50 == 0:
            print(f"Step [{step}/{len(train_loader)}]\t Loss: {round(loss.item(), 5)}")

        tr_loss_epoch += loss.item()

    if nr == 0 and epoch < 10:
        warmupscheduler.step()
    if nr == 0 and epoch >= 10:
        mainscheduler.step()

    lr = optimizer.param_groups[0]["lr"]

    if nr == 0 and (epoch + 1) % 50 == 0:
        save_model(
            model,
            optimizer,
            mainscheduler,
            current_epoch,
            "SimCLR_CIFAR10_RN20_checkpoint_{}.pt",
        )

    model.eval()
    with torch.no_grad():
        val_loss_epoch = 0
        for step, (x_i, x_j) in enumerate(val_loader):

            x_i = x_i.squeeze().to("cuda:0").float()
            x_j = x_j.squeeze().to("cuda:0").float()

            # positive pair, with encoding
            z_i = model(x_i)
            z_j = model(x_j)

            loss = criterion(z_i, z_j)

            if nr == 0 and step % 50 == 0:
                print(f"Step [{step}/{len(val_loader)}]\t Loss: {round(loss.item(),5)}")

            val_loss_epoch += loss.item()

    if nr == 0:
        tr_loss.append(tr_loss_epoch / len(train_loader))
        val_loss.append(val_loss_epoch / len(val_loader))
        print(
            f"Epoch [{epoch}/{epochs}]\t Training Loss: {tr_loss_epoch / len(train_loader)}\t lr: {round(lr, 5)}"
        )
        print(
            f"Epoch [{epoch}/{epochs}]\t Validation Loss: {val_loss_epoch / len(val_loader)}\t lr: {round(lr, 5)}"
        )
        current_epoch += 1

    train_dataset.on_epoch_end()

    time_taken = (time.time() - stime) / 60
    print(f"Epoch [{epoch}/{epochs}]\t Time Taken: {time_taken} minutes")

    if (epoch + 1) % 10 == 0:
        plot_features(model.pretrained, 10, 2048, 128)

save_model(
    model,
    optimizer,
    mainscheduler,
    current_epoch,
    "SimCLR_CIFAR10_RN20_checkpoint_{}_test?.pt",
)


Epoch [0/5]	
Step [0/156]	 Loss: 8.21142
Step [50/156]	 Loss: 6.96044
Step [100/156]	 Loss: 6.46314
Step [150/156]	 Loss: 5.86387
Adjusting learning rate of group 0 to 1.0000e-01.
Step [0/39]	 Loss: 0.38851
Epoch [0/5]	 Training Loss: 6.600502879191668	 lr: 0.1
Epoch [0/5]	 Validation Loss: 0.39332603491269624	 lr: 0.1
Epoch [0/5]	 Time Taken: 2.455996863047282 minutes
Epoch [1/5]	
Step [0/156]	 Loss: 6.24187
Step [50/156]	 Loss: 6.18044
Step [100/156]	 Loss: 5.93262
Step [150/156]	 Loss: 6.08959
Adjusting learning rate of group 0 to 1.5000e-01.
Step [0/39]	 Loss: 0.40206
Epoch [1/5]	 Training Loss: 6.109975503041194	 lr: 0.15
Epoch [1/5]	 Validation Loss: 0.41858119001755345	 lr: 0.15
Epoch [1/5]	 Time Taken: 2.735532263914744 minutes
Epoch [2/5]	
Step [0/156]	 Loss: 6.10084
Step [50/156]	 Loss: 6.20254
Step [100/156]	 Loss: 6.03884
Step [150/156]	 Loss: 6.13484
Adjusting learning rate of group 0 to 2.0000e-01.
Step [0/39]	 Loss: 0.42708
Epoch [2/5]	 Training Loss: 6.014321721517122	 

AttributeError: 'SimCLR' object has no attribute 'pretrained'

In [None]:
# print("==> Training starts!")
# print("=" * 50)


# for i in range(0, EPOCHS):
#     # handle the learning rate scheduler.
#     # BONUS
#     if i in DECAY_EPOCHS and i != 0:
#         current_learning_rate = current_learning_rate * DECAY
#         for param_group in optimizer.param_groups:
#             param_group["lr"] = current_learning_rate
#         print("Current learning rate has decayed to %f" % current_learning_rate)

#     #######################
#     # your code here
#     # switch to train mode
#     model.train()

#     #######################

#     print("Epoch %d:" % i)
#     # this help you compute the training accuracy
#     total_examples = 0
#     correct_examples = 0

#     train_loss = 0  # track training loss if you want

#     # Train the model for 1 epoch.
#     for batch_idx, (inputs, targets) in enumerate(train_loader):
#         ####################################
#         # your code here
#         # copy inputs to device
#         inputs = inputs.to(device)

#         # compute the output and loss
#         outputs = model(inputs)
#         targets = targets.to(device)  # don't forget to also copy targets to device
#         loss = criterion(outputs, targets)

#         # zero the gradient
#         optimizer.zero_grad()

#         # backpropagation
#         loss.backward()

#         # apply gradient and update the weights
#         optimizer.step()

#         # count the number of correctly predicted samples in the current batch
#         _, preds = outputs.max(1)
#         total_examples += targets.shape[0]
#         correct_examples += preds.eq(targets).sum().item()
#         # track loss
#         train_loss += loss.item()

#         ####################################

#     avg_loss = train_loss / len(train_loader)
#     avg_acc = correct_examples / total_examples
#     print("Training loss: %.4f, Training accuracy: %.4f" % (avg_loss, avg_acc))

#     # store train accuracy and loss
#     save_train_acc = avg_acc
#     avg_train_loss.append(avg_loss)
#     avg_train_acc.append(avg_acc)

#     # Validate on the validation dataset
#     #######################
#     # your code here
#     # switch to eval mode
#     model.eval()

#     #######################

#     # this help you compute the validation accuracy
#     total_examples = 0
#     correct_examples = 0

#     val_loss = 0  # again, track the validation loss if you want

#     # disable gradient during validation, which can save GPU memory
#     with torch.no_grad():
#         for batch_idx, (inputs, targets) in enumerate(val_loader):
#             ####################################
#             # your code here
#             # copy inputs to device
#             inputs = inputs.to(device)

#             # compute the output and loss
#             outputs = model(inputs)
#             targets = targets.to(device)  # don't forget to also copy targets to device
#             loss = criterion(outputs, targets)

#             # count the number of correctly preds samples in the current batch
#             _, preds = outputs.max(1)
#             total_examples += targets.shape[0]
#             correct_examples += preds.eq(targets).sum().item()
#             # track loss
#             val_loss += loss.item()

#             ####################################

#     avg_loss = val_loss / len(val_loader)
#     avg_acc = correct_examples / total_examples
#     print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))

#     # store val accuracy and loss
#     avg_val_loss.append(avg_loss)
#     avg_val_acc.append(avg_acc)

#     # save the model checkpoint
#     if avg_acc > best_val_acc:
#         best_val_acc = avg_acc
#         if not os.path.exists(CHECKPOINT_FOLDER):
#             os.makedirs(CHECKPOINT_FOLDER)
#         print("Saving ...")
#         state = {
#             "state_dict": model.state_dict(),
#             "epoch": i,
#             "lr": current_learning_rate,
#             "val_acc": avg_acc,
#             "train_acc": save_train_acc,
#         }
#         torch.save(state, os.path.join(CHECKPOINT_FOLDER, "resnet.pth"))

# print("=" * 50)
# print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")


In [None]:
# # save/load the training and validation accuracy and loss
# SAVE_NEW_DATA = True
# if SAVE_NEW_DATA:
#     with open(os.path.join(CHECKPOINT_FOLDER, "train_acc.pkl"), "wb") as f:
#         pickle.dump(avg_train_acc, f)
#     with open(os.path.join(CHECKPOINT_FOLDER, "train_loss.pkl"), "wb") as f:
#         pickle.dump(avg_train_loss, f)
#     with open(os.path.join(CHECKPOINT_FOLDER, "val_acc.pkl"), "wb") as f:
#         pickle.dump(avg_val_acc, f)
#     with open(os.path.join(CHECKPOINT_FOLDER, "val_loss.pkl"), "wb") as f:
#         pickle.dump(avg_val_loss, f)
# else:
#     with open(os.path.join(CHECKPOINT_FOLDER, "train_acc.pkl"), "rb") as f:
#         avg_train_acc = pickle.load(f)
#     with open(os.path.join(CHECKPOINT_FOLDER, "train_loss.pkl"), "rb") as f:
#         avg_train_loss = pickle.load(f)
#     with open(os.path.join(CHECKPOINT_FOLDER, "val_acc.pkl"), "rb") as f:
#         avg_val_acc = pickle.load(f)
#     with open(os.path.join(CHECKPOINT_FOLDER, "val_loss.pkl"), "rb") as f:
#         avg_val_loss = pickle.load(f)


### Save predictions

In [None]:
# DATA_ROOT = "./data"
# BATCH_SIZE = 100

# transform_test = transforms.Compose(
#     [
#         transforms.ToTensor(),
#         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
#     ]
# )

# test_set = CIFAR10(root=DATA_ROOT, mode="test", download=True, transform=transform_test)

# # do NOT shuffle your test data loader!!!!!!!!!!!!!!!!
# # otherwise the order of samples will be messed up
# # and your test accuracy is likely to drop to random guessing level
# test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=1)

# #########################################################
# # use your model to generate predictions on test data
# # and save the results into variable "results"
# # "results" should be either a numpy array or a torch tensor with length of 10000

# # initialize a resnet and load trained weights
# net = ResNet(BasicBlock, [3, 3, 3])
# state_dict = torch.load(
#     os.path.join(CHECKPOINT_FOLDER, "resnet.pth")
# )  # change the path to your own checkpoint file
# net.load_state_dict(state_dict["state_dict"])
# net.to(device)

# # remember to switch to eval mode whenever you are making inference
# net.eval()

# results = []
# with torch.no_grad():
#     for x in test_loader:
#         results.append(net(x.to(device)).argmax(1))

# # convert results to numpy array
# results = torch.cat(results).cpu().numpy()
# assert len(results) == 10000

# #########################################################
# with open(os.path.join(CHECKPOINT_FOLDER, "predictions.csv"), "w") as fp:
#     fp.write("Id,Label\n")
#     for i in range(len(results)):
#         fp.write("%d,%d\n" % (i, results[i]))


Using downloaded and verified file: ./data/cifar10_test_F22.zip
Extracting ./data/cifar10_test_F22.zip to ./data
Files already downloaded and verified
