In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
%cd /content/gdrive/MyDrive/Summer 2021/PURM/simclr-converter-master

/content/gdrive/.shortcut-targets-by-id/1nU-uKKJE6zrzHeoUPljWcBWr1_Hcvs4D/Summer 2021/PURM/simclr-converter-master


In [3]:
import argparse
import os
import random
import shutil
import time
import warnings
import numpy as np

import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from resnet_wider import resnet50x1, resnet50x2, resnet50x4

In [None]:
model_names = sorted(name for name in models.__dict__
                     if name.islower() and not name.startswith("__")
                     and callable(models.__dict__[name]))

In [None]:
parser = argparse.ArgumentParser(description='PyTorch SimCLR Linear Evaluation')

parser.add_argument('data', metavar='DIR',
                    help='path to dataset')
parser.add_argument('-a', '--arch', default='resnet50-1x')
parser.add_argument('-j', '--workers', default=8, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('-b', '--batch-size', default=256, type=int)
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('-p', '--print-freq', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')

best_acc1 = 0

args

In [4]:
args_arch = 'resnet50-4x'
args_workers = 2
args_batch_size = 256
args_learning_rate = 0.1
args_print_freq = 10
args_device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
#args_device = torch.device("cpu")

args_dataset_dir = '/content/gdrive/MyDrive/Summer 2021/PURM/Datasets/CIFAR10'
args_image_size = 224
args_logistic_batch_size = 128
args_logistic_epochs = 100

main()

In [5]:
#args = parser.parse_args()

# create model
if args_arch == 'resnet50-1x':
    model = resnet50x1()
    sd = 'resnet50-1x.pth'
elif args_arch == 'resnet50-2x':
    model = resnet50x2()
    sd = 'resnet50-2x.pth'
elif args_arch == 'resnet50-4x':
    model = resnet50x4()
    sd = 'resnet50-4x.pth'
else:
    raise NotImplementedError

sd = torch.load(sd, map_location='cpu')
model.load_state_dict(sd['state_dict'])

#model = torch.nn.DataParallel(model).to('cuda')

# define loss function (criterion) and optimizer
#criterion = nn.CrossEntropyLoss()

#cudnn.benchmark = True

<All keys matched successfully>

loading data

In [6]:
CIFAR10_transform_ex = transforms.Compose([
  transforms.Resize((224, 224)),
  #transforms.CenterCrop(224),
  transforms.ToTensor(),
])

In [7]:
train_dataset = datasets.CIFAR10(
    args_dataset_dir,
    train=True,
    download=True,
    transform=CIFAR10_transform_ex,
)
test_dataset = datasets.CIFAR10(
    args_dataset_dir,
    train=False,
    download=True,
    transform=CIFAR10_transform_ex,
)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=args_logistic_batch_size,
    shuffle=True,
    drop_last=True,
    num_workers=args_workers,
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=args_logistic_batch_size,
    shuffle=False,
    drop_last=True,
    num_workers=args_workers,
)

continuation of main()

In [9]:
print(model)

ResNet(
  (conv1): Conv2d(3, 256, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (downsample): Sequential(
        (0): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=Fal

In [10]:
n_classes = 10  #for CIFAR10
n_features = model.fc.in_features
#old_fc = model.fc

In [None]:
import torch.nn as nn
from torchvision.models.resnet import Bottleneck, ResNet

# Modifications for CIFAR10 dataset
def modify_resnet_model(model, *, cifar_stem=True, v1=True):
  if cifar_stem:
    conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
    nn.init.kaiming_normal_(conv1.weight, mode="fan_out", nonlinearity="relu")
    model.conv1 = conv1
    model.maxpool = nn.Identity()
  if v1:
    for l in range(2, 5):
      layer = getattr(model, "layer{}".format(l))
      block = list(layer.children())[0]
      if isinstance(block, Bottleneck):
        assert block.conv1.kernel_size == (1, 1) and block.conv1.stride == (1,1,)
        assert block.conv2.kernel_size == (3, 3) and block.conv2.stride == (2,2,)
        assert block.conv2.dilation == (1,1,), "Currently, only models with dilation=1 are supported"
        block.conv1.stride = (2, 2)
        block.conv2.stride = (1, 1)
  return model
  

In [None]:
model1 = modify_resnet_model(model)
print(model1)

In [11]:
for param in model.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
model.fc = nn.Identity()
print(n_features)
print(model.fc)
#print(old_fc)

8192
Identity()


In [12]:
#model = torch.nn.DataParallel(model).to('cuda')
model = model.to('cuda')
cudnn.benchmark = True

In [None]:
#class LogisticRegression(nn.Module):
#    def __init__(self, input_size, num_classes):
#        super(LogisticRegression, self).__init__()
#        self.linear = nn.Linear(input_size, num_classes)
#    
#    def forward(self, x):
#        out = self.linear(x)
#        return out

In [13]:
# Logistic Regression defined in PyTorch
class LogisticRegression(nn.Module):
    def __init__(self, n_features, n_classes):
        super(LogisticRegression, self).__init__()
        self.model = nn.Linear(n_features, n_classes)

    def forward(self, x):
        return self.model(x)

In [None]:
#from sklearn.linear_model import LogisticRegression

#lr_model = LogisticRegression(penalty='l2', C=1e5, solver='lbfgs')

all-in-one training

Using the pretrained SimCLR model to generate its representations of the input image data (which will be used for the logistic regression)

In [16]:
def inference(loader, simclr_model, device):
  feature_vector = []
  labels_vector = []
  for step, (x, y) in enumerate(loader):
      x = x.to(device)

      # get encoding
      with torch.no_grad():
          #h, _, z, _ = simclr_model(x)
          h = simclr_model(x)
          #output = simclr_model(images)

      h = h.detach()

      feature_vector.extend(h.cpu().detach().numpy())
      labels_vector.extend(y.numpy())

      if step % 20 == 0:
          print(f"Step [{step}/{len(loader)}]\t Computing features...")

  feature_vector = np.array(feature_vector)
  labels_vector = np.array(labels_vector)
  print("Features shape {}".format(feature_vector.shape))
  return feature_vector, labels_vector


def get_features(context_model, train_loader, test_loader, device):
    train_X, train_y = inference(train_loader, context_model, device)
    test_X, test_y = inference(test_loader, context_model, device)
    return train_X, train_y, test_X, test_y


def create_data_loaders_from_arrays(X_train, y_train, X_test, y_test, batch_size):
    train = torch.utils.data.TensorDataset(
        torch.from_numpy(X_train), torch.from_numpy(y_train)
    )
    train_loader = torch.utils.data.DataLoader(
        train, batch_size=batch_size, shuffle=False
    )

    test = torch.utils.data.TensorDataset(
        torch.from_numpy(X_test), torch.from_numpy(y_test)
    )
    test_loader = torch.utils.data.DataLoader(
        test, batch_size=batch_size, shuffle=False
    )
    return train_loader, test_loader

In [17]:
print("### Creating features from pre-trained context model ###")
(train_X, train_y, test_X, test_y) = get_features(
    model, train_loader, test_loader, args_device
)

arr_train_loader, arr_test_loader = create_data_loaders_from_arrays(
    train_X, train_y, test_X, test_y, args_logistic_batch_size
)

### Creating features from pre-trained context model ###


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Step [0/390]	 Computing features...
Step [20/390]	 Computing features...
Step [40/390]	 Computing features...
Step [60/390]	 Computing features...
Step [80/390]	 Computing features...
Step [100/390]	 Computing features...
Step [120/390]	 Computing features...
Step [140/390]	 Computing features...
Step [160/390]	 Computing features...
Step [180/390]	 Computing features...
Step [200/390]	 Computing features...
Step [220/390]	 Computing features...
Step [240/390]	 Computing features...
Step [260/390]	 Computing features...
Step [280/390]	 Computing features...
Step [300/390]	 Computing features...
Step [320/390]	 Computing features...
Step [340/390]	 Computing features...
Step [360/390]	 Computing features...
Step [380/390]	 Computing features...
Features shape (49920, 8192)
Step [0/78]	 Computing features...
Step [20/78]	 Computing features...
Step [40/78]	 Computing features...
Step [60/78]	 Computing features...
Features shape (9984, 8192)


training loop for logistic classifier model

In [19]:
def train_CIFAR10(loader, simclr_model, logistic_model, criterion, optimizer):
  loss_epoch = 0
  accuracy_epoch = 0
  simclr_model.eval()

  for step, (x, y) in enumerate(loader):
    optimizer.zero_grad()

    x = x.to(args_device)
    y = y.to(args_device)

    output = logistic_model(x)
    loss = criterion(output, y)

    predicted = output.argmax(1)
    acc = (predicted == y).sum().item() / y.size(0)
    accuracy_epoch += acc

    loss.backward()
    optimizer.step()

    loss_epoch += loss.item()
    # if step % 100 == 0:
    #     print(
    #         f"Step [{step}/{len(loader)}]\t Loss: {loss.item()}\t Accuracy: {acc}"
    #     )
  return loss_epoch, accuracy_epoch

In [21]:
def test_CIFAR10(loader, simclr_model, model, criterion, optimizer):
    loss_epoch = 0
    accuracy_epoch = 0
    model.eval()
    for step, (x, y) in enumerate(loader):
        model.zero_grad()

        x = x.to(args_device)
        y = y.to(args_device)

        output = model(x)
        loss = criterion(output, y)

        predicted = output.argmax(1)
        acc = (predicted == y).sum().item() / y.size(0)
        accuracy_epoch += acc

        loss_epoch += loss.item()

    return loss_epoch, accuracy_epoch

In [98]:
lr_model = LogisticRegression(n_features, n_classes)
lr_model = lr_model.to(args_device)

# learning rate = 0.1 * BatchSize/256
# lr = 0.075 * \sqrt{BatchSize}

optimizer = torch.optim.Adam(lr_model.parameters(), lr=1e-3, weight_decay=0)
#optimizer_l2 = torch.optim.Adam(lr_model.parameters(), lr=3e-4, weight_decay=l2_weight_decay)  # <--- use this for L2 regularization

criterion = torch.nn.CrossEntropyLoss()

In [99]:
args_logistic_batch_size = 128
args_logistic_epochs = 200

In [100]:
for epoch in range(args_logistic_epochs):
    loss_epoch, accuracy_epoch = train_CIFAR10(arr_train_loader, model, lr_model, criterion, optimizer)
    
    if epoch % 10 == 0:
      print(f"Epoch [{epoch}/{args_logistic_epochs}]\t Loss: {loss_epoch / len(train_loader)}\t Accuracy: {accuracy_epoch / len(train_loader)}")

Epoch [0/200]	 Loss: 0.9483144970276417	 Accuracy: 0.7735576923076923
Epoch [10/200]	 Loss: 0.18015954931959127	 Accuracy: 0.9540064102564103
Epoch [20/200]	 Loss: 0.10151325542575274	 Accuracy: 0.9818309294871795
Epoch [30/200]	 Loss: 0.06196954305737447	 Accuracy: 0.9931690705128206
Epoch [40/200]	 Loss: 0.03914252476384624	 Accuracy: 0.9977564102564103
Epoch [50/200]	 Loss: 0.025243059235314527	 Accuracy: 0.9994991987179487
Epoch [60/200]	 Loss: 0.01652276179848764	 Accuracy: 0.9999399038461538
Epoch [70/200]	 Loss: 0.010935359415956414	 Accuracy: 0.9999799679487179
Epoch [80/200]	 Loss: 0.007301386781275654	 Accuracy: 1.0
Epoch [90/200]	 Loss: 0.004909141629170149	 Accuracy: 1.0
Epoch [100/200]	 Loss: 0.0033195339498492196	 Accuracy: 1.0
Epoch [110/200]	 Loss: 0.0022553979055000804	 Accuracy: 1.0
Epoch [120/200]	 Loss: 0.0015387129154987634	 Accuracy: 1.0
Epoch [130/200]	 Loss: 0.0010536255800010015	 Accuracy: 1.0
Epoch [140/200]	 Loss: 0.0007239266591242109	 Accuracy: 1.0
Epoch [1

testing

In [101]:
# final testing
loss_epoch, accuracy_epoch = test_CIFAR10(arr_test_loader, model, lr_model, criterion, optimizer)
print(
    f"[FINAL]\t Loss: {loss_epoch / len(test_loader)}\t Accuracy: {accuracy_epoch / len(test_loader)}"
)

[FINAL]	 Loss: 0.6065696779734049	 Accuracy: 0.905448717948718
