# Lab1 - Self-Supervised Learning (SSL)


In this Lab, we will focus on the main steps of implementing [SimCLR](https://proceedings.mlr.press/v119/chen20j) in PyTorch.

Number of checkpoint: 2

1. Image Preprocessing and Augmentation  <- Checkpoint "1"
2. NT-Xent Loss               <- Checkpoint "2"
3. Leave-one-out KNN in Pytorch

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. Image Preprocessing and Augmentation

In [4]:
import torchvision.transforms as transforms
from IPython.display import display
from PIL import Image

import numpy as np
import pandas as pd
import shutil, time, os, requests, random, copy

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
from torchvision.datasets import ImageFolder

import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.manifold import TSNE

In [5]:
import pickle
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def save_model(model, optimizer, scheduler, current_epoch, name):
    out = os.path.join('/content/saved_models/',name.format(current_epoch))

    torch.save({'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict':scheduler.state_dict()}, out)

def plot_features(model, num_classes, num_feats, batch_size):
    preds = np.array([]).reshape((0,1))
    gt = np.array([]).reshape((0,1))
    feats = np.array([]).reshape((0,num_feats))
    model.eval()
    with torch.no_grad():
        for x1,x2 in testloader:
            x1 = x1.squeeze().to(device = 'cuda:0', dtype = torch.float)
            out = model(x1)
            out = out.cpu().data.numpy()#.reshape((1,-1))
            feats = np.append(feats,out,axis = 0)

In [6]:
import os

jpg = [str(i).rjust(4,"0")+".jpg" for i in range(0,7294)] #0001.jpg
file_train = [os.path.join("/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled",i) for i in jpg]

print(file_train)
np.save( "file_train.npy" ,file_train)


['/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0000.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0001.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0002.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0003.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0004.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0005.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0006.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0007.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0008.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0009.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0010.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0011.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0012.jpg', '/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled/0013.jpg', '/con

In [7]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
    transforms.Resize(150),
    transforms.RandomCrop(96),
    transforms.RandomRotation(10),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomHorizontalFlip(0.5),
    # transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    normalize
])

def default_loader(path):
    img_pil =  Image.open(path)
    img_pil = img_pil.resize((96,96))
    img_tensor = preprocess(img_pil)
    return img_tensor

class evaluatedata():
    def __init__(self, root_path="/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/test"):
        self.root_path = root_path

    def get_data(self):
        img_data = ImageFolder(self.root_path,transform = transforms.ToTensor())
        return img_data


class trainset(Dataset):
    def __init__(self, loader=default_loader):
        #定义好 image 的路径
        self.images = file_train
        # self.target = number_train
        self.loader = loader

    def __getitem__(self, index):
        fn = self.images[index]
        img1 = self.loader(fn)
        img2 = self.loader(fn)

        # target = self.target[index]
        return img1,img2

    def __len__(self):
        return len(self.images)



In [8]:
train_data  = trainset()
trainloader = DataLoader(train_data, batch_size=64,shuffle=True)
test_data = evaluatedata()
test_data = test_data.get_data()
testloader = torch.utils.data.DataLoader(test_data, batch_size=100, shuffle=True, drop_last=True, pin_memory=True)
dataiter = iter(trainloader)
images, aug_images = dataiter.next()
print(type(trainloader),type(train_data),type(images))
print(images.size(),aug_images.size()) #length = 67

<class 'torch.utils.data.dataloader.DataLoader'> <class '__main__.trainset'> <class 'torch.Tensor'>
torch.Size([64, 3, 96, 96]) torch.Size([64, 3, 96, 96])


In [9]:
# Model

class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x

class LinearLayer(nn.Module):
    def __init__(self,
                 in_features,
                 out_features,
                 use_bias = True,
                 use_bn = False,
                 **kwargs):
        super(LinearLayer, self).__init__(**kwargs)

        self.in_features = in_features
        self.out_features = out_features
        self.use_bias = use_bias
        self.use_bn = use_bn
        
        self.linear = nn.Linear(self.in_features, 
                                self.out_features, 
                                bias = self.use_bias and not self.use_bn)
        if self.use_bn:
             self.bn = nn.BatchNorm1d(self.out_features)

    def forward(self,x):
        x = self.linear(x)
        if self.use_bn:
            x = self.bn(x)
        return x

class ProjectionHead(nn.Module):
    def __init__(self,
                 in_features,
                 hidden_features,
                 out_features,
                 head_type = 'nonlinear',
                 **kwargs):
        super(ProjectionHead,self).__init__(**kwargs)
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_features = hidden_features
        self.head_type = head_type

        if self.head_type == 'linear':
            self.layers = LinearLayer(self.in_features,self.out_features,False, True)
        elif self.head_type == 'nonlinear':
            self.layers = nn.Sequential(
                LinearLayer(self.in_features,self.hidden_features,True, True),
                nn.ReLU(),
                LinearLayer(self.hidden_features,self.out_features,False,True))
        
    def forward(self,x):
        x = self.layers(x)
        return x

class PreModel(nn.Module):
    def __init__(self,base_model):
        super().__init__()
        self.base_model = base_model
        #PRETRAINED MODEL
        self.pretrained = models.resnet18(pretrained=False)
        
        self.pretrained.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
        self.pretrained.maxpool = Identity()
        self.classfy = True
        self.pretrained.fc = Identity()
        
        for p in self.pretrained.parameters():
            p.requires_grad = True
        
        self.projector = ProjectionHead(512, 512, 4)

    def classfy(self,classfy):
        self.classfy = classfy

    def forward(self,x):
        out = self.pretrained(x)
        # if self.classfy == True:
        xp = self.projector(torch.squeeze(out))
        # return out
        return xp

In [10]:
# loss Function

def xt_xent(u, v, temperature=0.5):
    N = u.shape[0]

    z = torch.cat([u, v], dim=0)
    z = F.normalize(z, p=2, dim=1)
    s = torch.matmul(z, z.t()) / temperature
    mask = torch.eye(2 * N).bool().to(z.device)
    s = torch.masked_fill(s, mask, -float('inf'))
    label = torch.cat([
        torch.arange(N, 2 * N),
        torch.arange(N)]).to(z.device)

    loss = F.cross_entropy(s, label)
    return loss

In [11]:
# Caculate KNN accuracy

def KNN(emb, cls, batch_size, Ks=[1, 10, 50, 100]):
    """Apply KNN for different K and return the maximum acc"""
    preds = []
    mask = torch.eye(batch_size).bool().to(emb.device)
    mask = F.pad(mask, (0, len(emb) - batch_size))
    for batch_x in torch.split(emb, batch_size):
        dist = torch.norm(
            batch_x.unsqueeze(1) - emb.unsqueeze(0), dim=2, p="fro")
        now_batch_size = len(batch_x)
        mask = mask[:now_batch_size]
        dist = torch.masked_fill(dist, mask, float('inf'))
        # update mask
        mask = F.pad(mask[:, :-now_batch_size], (now_batch_size, 0))
        pred = []
        for K in Ks:
            knn = dist.topk(K, dim=1, largest=False).indices
            knn = cls[knn].cpu()
            pred.append(torch.mode(knn).values)
        pred = torch.stack(pred, dim=0)
        preds.append(pred)
    preds = torch.cat(preds, dim=1)
    accs = [(pred == cls.cpu()).float().mean().item() for pred in preds]
    return max(accs)

In [12]:
model = PreModel('resnet18').to('cuda')

In [13]:
# OPtimazer

from torch.optim.optimizer import Optimizer, required
import re

EETA_DEFAULT = 0.001


class LARS(Optimizer):
    """
    Layer-wise Adaptive Rate Scaling for large batch training.
    Introduced by "Large Batch Training of Convolutional Networks" by Y. You,
    I. Gitman, and B. Ginsburg. (https://arxiv.org/abs/1708.03888)
    """

    def __init__(
        self,
        params,
        lr=required,
        momentum=0.9,
        use_nesterov=False,
        weight_decay=0.0,
        exclude_from_weight_decay=None,
        exclude_from_layer_adaptation=None,
        classic_momentum=True,
        eeta=EETA_DEFAULT,
    ):
        """Constructs a LARSOptimizer.
        Args:
        lr: A `float` for learning rate.
        momentum: A `float` for momentum.
        use_nesterov: A 'Boolean' for whether to use nesterov momentum.
        weight_decay: A `float` for weight decay.
        exclude_from_weight_decay: A list of `string` for variable screening, if
            any of the string appears in a variable's name, the variable will be
            excluded for computing weight decay. For example, one could specify
            the list like ['batch_normalization', 'bias'] to exclude BN and bias
            from weight decay.
        exclude_from_layer_adaptation: Similar to exclude_from_weight_decay, but
            for layer adaptation. If it is None, it will be defaulted the same as
            exclude_from_weight_decay.
        classic_momentum: A `boolean` for whether to use classic (or popular)
            momentum. The learning rate is applied during momeuntum update in
            classic momentum, but after momentum for popular momentum.
        eeta: A `float` for scaling of learning rate when computing trust ratio.
        name: The name for the scope.
        """

        self.epoch = 0
        defaults = dict(
            lr=lr,
            momentum=momentum,
            use_nesterov=use_nesterov,
            weight_decay=weight_decay,
            exclude_from_weight_decay=exclude_from_weight_decay,
            exclude_from_layer_adaptation=exclude_from_layer_adaptation,
            classic_momentum=classic_momentum,
            eeta=eeta,
        )

        super(LARS, self).__init__(params, defaults)
        self.lr = lr
        self.momentum = momentum
        self.weight_decay = weight_decay
        self.use_nesterov = use_nesterov
        self.classic_momentum = classic_momentum
        self.eeta = eeta
        self.exclude_from_weight_decay = exclude_from_weight_decay
        # exclude_from_layer_adaptation is set to exclude_from_weight_decay if the
        # arg is None.
        if exclude_from_layer_adaptation:
            self.exclude_from_layer_adaptation = exclude_from_layer_adaptation
        else:
            self.exclude_from_layer_adaptation = exclude_from_weight_decay

    def step(self, epoch=None, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        if epoch is None:
            epoch = self.epoch
            self.epoch += 1

        for group in self.param_groups:
            weight_decay = group["weight_decay"]
            momentum = group["momentum"]
            eeta = group["eeta"]
            lr = group["lr"]

            for p in group["params"]:
                if p.grad is None:
                    continue

                param = p.data
                grad = p.grad.data

                param_state = self.state[p]

                # TODO: get param names
                # if self._use_weight_decay(param_name):
                grad += self.weight_decay * param

                if self.classic_momentum:
                    trust_ratio = 1.0

                    # TODO: get param names
                    # if self._do_layer_adaptation(param_name):
                    w_norm = torch.norm(param)
                    g_norm = torch.norm(grad)

                    device = g_norm.get_device()
                    trust_ratio = torch.where(
                        w_norm.gt(0),
                        torch.where(
                            g_norm.gt(0),
                            (self.eeta * w_norm / g_norm),
                            torch.Tensor([1.0]).to(device),
                        ),
                        torch.Tensor([1.0]).to(device),
                    ).item()

                    scaled_lr = lr * trust_ratio
                    if "momentum_buffer" not in param_state:
                        next_v = param_state["momentum_buffer"] = torch.zeros_like(
                            p.data
                        )
                    else:
                        next_v = param_state["momentum_buffer"]

                    next_v.mul_(momentum).add_(scaled_lr, grad)
                    if self.use_nesterov:
                        update = (self.momentum * next_v) + (scaled_lr * grad)
                    else:
                        update = next_v

                    p.data.add_(-update)
                else:
                    raise NotImplementedError

        return loss

    def _use_weight_decay(self, param_name):
        """Whether to use L2 weight decay for `param_name`."""
        if not self.weight_decay:
            return False
        if self.exclude_from_weight_decay:
            for r in self.exclude_from_weight_decay:
                if re.search(r, param_name) is not None:
                    return False
        return True

    def _do_layer_adaptation(self, param_name):
        """Whether to do layer-wise learning rate adaptation for `param_name`."""
        if self.exclude_from_layer_adaptation:
            for r in self.exclude_from_layer_adaptation:
                if re.search(r, param_name) is not None:
                    return False
        return True


In [14]:


optimizer = LARS(
    [params for params in model.parameters() if params.requires_grad],
    lr=0.2,
    weight_decay=5e-7,
    exclude_from_weight_decay=["batch_normalization", "bias"],
)
optimizer = torch.optim.Adam(model.parameters(),lr=0.001)


# "decay the learning rate with the cosine decay schedule without restarts"
#SCHEDULER OR LINEAR EWARMUP
warmupscheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch : (epoch+1)/10.0, verbose = True)

#SCHEDULER FOR COSINE DECAY
mainscheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 500, eta_min=0.05, last_epoch=-1, verbose = True)
#LOSS FUNCTION
# criterion = SimCLR_Loss(batch_size = 128, temperature = 0.5)
# criterion = xt_xent()

Adjusting learning rate of group 0 to 1.0000e-04.
Epoch 00000: adjusting learning rate of group 0 to 1.0000e-03.


In [2]:
!nvidia-smi

Wed Apr 27 11:04:09 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   65C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [15]:
# training

nr = 0
current_epoch = 0
epochs = 10
tr_loss = []
val_loss = []

for epoch in range(epochs):
        
    print(f"Epoch [{epoch}/{epochs}]\t")
    stime = time.time()

    model.train()
    tr_loss_epoch = 0
    
    for step, (x_i ,x_j) in enumerate(trainloader):
        optimizer.zero_grad()
        x_i = x_i.squeeze().to('cuda:0').float()
        x_j = x_j.squeeze().to('cuda:0').float()

        # positive pair, with encoding
        z_i = model(x_i)
        z_j = model(x_j)

        loss = xt_xent(z_i,z_j)
        # loss = criterion(z_i, z_j)
        loss.backward()

        optimizer.step()
        
        if nr == 0 and step % 50 == 0:
            print(f"Step [{step}/{len(trainloader)}]\t Loss: {round(loss.item(), 5)}")

        tr_loss_epoch += loss.item()

    # if nr == 0 and epoch < 10:
    #     warmupscheduler.step()
    if nr == 0:
        mainscheduler.step()
    
    lr = optimizer.param_groups[0]["lr"]

    if nr == 0 and (epoch+1) % 50 == 0:
        save_model(model, optimizer, mainscheduler, current_epoch,"SimCLR_CIFAR10_RN50_P128_LR0P2_LWup10_Cos500_T0p5_B128_checkpoint_{}_260621.pt")

    model.eval()

    for i, (image, label) in enumerate(testloader):
        image = image.squeeze().to('cuda:0').float()
        label = label.squeeze().to('cuda:0').float()
        # print(label.shape)
        test_embedding  = model(image)
        # print("test shape", test_embedding.shape)
        acc = KNN(emb=test_embedding, cls=label, batch_size=100)
        print("Testing KNN: %.5f" % acc)
    # with torch.no_grad():
    #     val_loss_epoch = 0
    #     for step, (x_i, x_j) in enumerate(vdl):
        
    #       x_i = x_i.squeeze().to('cuda:0').float()
    #       x_j = x_j.squeeze().to('cuda:0').float()

    #       # positive pair, with encoding
    #       z_i = model(x_i)
    #       z_j = model(x_j)

    #       loss = criterion(z_i, z_j)

    #       if nr == 0 and step % 50 == 0:
    #           print(f"Step [{step}/{len(vdl)}]\t Loss: {round(loss.item(),5)}")

    #       val_loss_epoch += loss.item()

    # if nr == 0:
    #     tr_loss.append(tr_loss_epoch / len(dl))
    #     val_loss.append(val_loss_epoch / len(vdl))
    #     print(f"Epoch [{epoch}/{epochs}]\t Training Loss: {tr_loss_epoch / len(dl)}\t lr: {round(lr, 5)}")
    #     print(f"Epoch [{epoch}/{epochs}]\t Validation Loss: {val_loss_epoch / len(vdl)}\t lr: {round(lr, 5)}")
    #     current_epoch += 1

    # dg.on_epoch_end()

    # time_taken = (time.time()-stime)/60
    # print(f"Epoch [{epoch}/{epochs}]\t Time Taken: {time_taken} minutes")

    if (epoch+1)%10==0:
        plot_features(model.pretrained, 10, 512, 4)

# save_model(model, optimizer, mainscheduler, current_epoch, "SimCLR_CIFAR10_RN50_P128_LR0P2_LWup10_Cos500_T0p5_B128_checkpoint_{}_260621.pt")


Epoch [0/10]	
Step [0/114]	 Loss: 4.49714
Step [50/114]	 Loss: 3.46086
Step [100/114]	 Loss: 3.43754
Epoch 00001: adjusting learning rate of group 0 to 1.0005e-03.
Testing KNN: 0.69000
Testing KNN: 0.72000
Testing KNN: 0.84000
Testing KNN: 0.79000
Testing KNN: 0.81000
Epoch [1/10]	
Step [0/114]	 Loss: 3.39127
Step [50/114]	 Loss: 3.45018
Step [100/114]	 Loss: 3.44777
Epoch 00002: adjusting learning rate of group 0 to 1.0019e-03.
Testing KNN: 0.76000
Testing KNN: 0.81000
Testing KNN: 0.73000
Testing KNN: 0.88000
Testing KNN: 0.82000
Epoch [2/10]	
Step [0/114]	 Loss: 3.39684
Step [50/114]	 Loss: 3.37885
Step [100/114]	 Loss: 3.36419
Epoch 00003: adjusting learning rate of group 0 to 1.0044e-03.
Testing KNN: 0.79000
Testing KNN: 0.80000
Testing KNN: 0.86000
Testing KNN: 0.88000
Testing KNN: 0.74000
Epoch [3/10]	
Step [0/114]	 Loss: 3.35378
Step [50/114]	 Loss: 3.35277
Step [100/114]	 Loss: 3.35889
Epoch 00004: adjusting learning rate of group 0 to 1.0077e-03.
Testing KNN: 0.71000
Testing 

KeyboardInterrupt: ignored

In [15]:
# save model
save_model(model, optimizer, mainscheduler, current_epoch, "/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/SimCLR.pth")

mkdir: cannot create directory ‘saved_models’: File exists


In [30]:
# Save npy file

class ImageNPY(Dataset):
    def __init__(self, path="./content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled"):
        self.root_path = path
        # self.file_name = glob.glob(self.root_path + '/*')
        # self.file_name.sort()
        self.transformation = transforms.Compose([transforms.ToTensor()])
        self.jpg = [str(i).rjust(4,"0")+".jpg" for i in range(0,7294)] #0001.jpg
        self.file_train = [os.path.join("/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled",i) for i in jpg]
    def __getitem__(self, index):
        # img_name = self.file_name[index]

      #  imageA = Image.open(file_train[i])
        img = Image.open(self.file_train[index])
        # img = Image.open(img_name)
        # print(img_name)
        img = self.transformation(img)
        return img

    def __len__(self):
        return len(self.file_name)
        # return len(self.file_train)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

npy_dataset = ImageNPY()
numpy_embedding = np.empty((0,512),dtype = np.float32)
count = 0
print(npy_dataset)
for image in npy_dataset:
  with torch.no_grad():
    count += 1
    print(count)
    image = image.unsqueeze(0)
    image = image.to(device)
    new_model = PreModel(model)
    embedding  = new_model(image)
    embedding = embedding.cpu().numpy()
    numpy_embedding = np.append(numpy_embedding , embedding , axis = 0)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481


In [31]:
np.save("/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/310605023.npy",numpy_embedding)

In [33]:
embedding_test = np.load("/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/310605023.npy")
print(embedding_test.dtype)
print(embedding_test.shape)

float32
(7294, 512)


---

### `torchvision` is part of PyTorch framework, which natively supports various image preprocessing functions.
- The [document](https://pytorch.org/vision/stable/transforms.html#scriptable-transforms) of available transformations.
- The documents of used transformations in this example:
    - [transforms.RandomHorizontalFlip](https://pytorch.org/vision/stable/generated/torchvision.transforms.RandomHorizontalFlip.html#torchvision.transforms.RandomHorizontalFlip).
    - [transforms.Pad](https://pytorch.org/vision/stable/generated/torchvision.transforms.Pad.html#torchvision.transforms.Pad).

In [None]:
image = Image.open('./0000.jpg')
print("Original:")
display(image)
resize = transforms.RandomHorizontalFlip(p=0.5)
print("RandomHorizontalFlip(p=0.9):")
display(resize(image))
random_resize_crop = transforms.Pad(padding=(0, 0, 8, 16), fill=128)
print("Pad(padding=(0, 0, 8, 16)):")
display(random_resize_crop(image))

---

### The transforms can be composed sequentially to form a stronger augmentation.
- The document of [transforms.Compose](https://pytorch.org/vision/stable/generated/torchvision.transforms.Compose.html#torchvision.transforms.Compose)

In [None]:
image = Image.open('./0000.jpg')
print("Original:")
display(image)
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.Pad(padding=(0, 0, 8, 16), fill=128),
])
print("RandomHorizontalFlip(p=0.9) + Pad(padding=(0, 0, 8, 16), fill=128):")
display(transform(image))

---

### Checkpoint 1.
Create a `transform` which applies `transforms.RandomHorizontalFlip(p=0.5)` and `transforms.Pad(padding=(0, 0, 8, 16), fill=128)` in a random order.
- The document of [transforms.RandomOrder](https://pytorch.org/vision/stable/generated/torchvision.transforms.RandomOrder.html#torchvision.transforms.RandomOrder).

#### Hint
1. load image
2. print text and display imge
3. use RandomOrder
4. display transformed image

In [None]:
image = Image.open('./0001.jpg')
print("Original:")
display(image)
transform = transforms.RandomOrder(  [transforms.RandomHorizontalFlip(p=0.5), transforms.Pad(padding=(0, 0, 8, 16), fill=128)])
print("RandomOrder(p=0.9) + Pad(padding=(0, 0, 8, 16), fill=128):")
display(transform(image))

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

---

## 2. The Normalized Temperature-scaled Cross Entropy Loss (NT-Xent) for SSL

Let $u$ and $v$ be the encoded features of an image in different views (different augmentation).

The similarity of $u$ and $v$ is defined as $sim(u,v)=\frac{u^Tv}{\vert u\vert\vert v\vert}$.

For $N$ images, there are $2N$ encoded features:

$$
\{z_i\}_{i=1}^{2N}=\{u_1,u_2,\cdots,u_N,v_1,v_2,\cdots,v_N\}
$$

**SimCLR** designs a loss function to learn that the feature $u_i$ can figure out $v_i$ from 2N-1 features and vise versa.

The classification loss:

$$
\mathcal{L}_{i,j}=-\log\frac{\exp(sim(z_i,z_j)/\tau)}{\sum_{k=1}^{2N}\mathbb 1[i\neq k]\exp(sim(z_i,z_k)/\tau)}
$$

where $\tau$ is a constant.

Take the average:

$$
\mathcal{L}=\frac{1}{2N}\sum_{i=1}^N \mathcal{L}_{i,i+N} + \mathcal{L}_{i+N,i}
$$

In [None]:
def xt_xent(u, v, temperature=0.5):
    N = u.shape[0]

    z = torch.cat([u, v], dim=0)
    z = F.normalize(z, p=2, dim=1)
    s = torch.matmul(z, z.t()) / temperature
    mask = torch.eye(2 * N).bool().to(z.device)
    s = torch.masked_fill(s, mask, -float('inf'))
    label = torch.cat([
        torch.arange(N, 2 * N),
        torch.arange(N)]).to(z.device)

    loss = F.cross_entropy(s, label)
    return loss

We can load two images ($N=2$) to form a batch data, and define a stochastic transforms to get different views.

In [None]:
import os

jpg = [str(i).rjust(4,"0")+".jpg" for i in range(0,7294)] #0001.jpg
file_train = [os.path.join("/content/drive/MyDrive/Colab_Notebooks/lab01/hw2/unlabeled",i) for i in jpg]

print(file_train)

In [None]:
transform = transforms.Compose([
    transforms.ToTensor()
])
model = PreModel('resnet18').to('cuda')
# model.classfy(False)

output = torch.tensor([]).to(torch.device('cuda'))

for i in range(0,7924):
  # print(file_train[i])
  imageA = Image.open(file_train[i])
  imageB = Image.open(file_train[i+1])
  # print(image)
  # display(image)
  x = transform(imageA) 
  # x = torch.stack([transform(imageA),transform(imageB)])
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  x = x.to(device)
  o = model(x)
  torch.cat((output,o),0)
  print(o)
  print(output)
  # np.save( "file_train.npy" ,file_train)
# image0 = Image.open('./0000.jpg')
# image1 = Image.open('./0001.jpg')

# transform = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.RandomResizedCrop((32, 32), scale=(0.6, 1.0))
# ])

# x1 = torch.stack([
#     transform(image0),
#     transform(image1),
# ])

# x2 = torch.stack([
#     transform(image0),
#     transform(image1),
# ])

Define a single layer CNN to test our loss function.

In [None]:
model = nn.Sequential(
    nn.Conv2d(3, 1, kernel_size=5),
    nn.AvgPool2d(1),
    nn.Flatten(start_dim=1),
)

## Checkpoint 2.

Get the encoded features of `x1` and `x2` then calculate the SimCLR loss by function `xt_xent`.

### Hint
1. use model to output encoded features of x1
2. use model to output encoded features of x2
3. calcuate xt_xent loss
4. print loss

In [None]:
o1 = model(x1)
o2 = model(x2)
loss = xt_xent(o1,o2)
print(loss)

---
# Checkpoint all finished! Congratulation!
## The following parts are useful function for HW2.

---

### 3. Leave-one-out KNN in Pytorch

In KNN, for $N$ data points with $d$ dimensional feature, the space complexity of distance matrix costs $O(N^2\times d)$. Which might take up too much capacity to allocate in memory, so we need to calculate the distance matrix batch-wisely.

In [None]:
def KNN(emb, cls, batch_size, Ks=[1, 10, 50, 100]):
    """Apply KNN for different K and return the maximum acc"""
    preds = []
    mask = torch.eye(batch_size).bool().to(emb.device)
    mask = F.pad(mask, (0, len(emb) - batch_size))
    for batch_x in torch.split(emb, batch_size):
        dist = torch.norm(
            batch_x.unsqueeze(1) - emb.unsqueeze(0), dim=2, p="fro")
        now_batch_size = len(batch_x)
        mask = mask[:now_batch_size]
        dist = torch.masked_fill(dist, mask, float('inf'))
        # update mask
        mask = F.pad(mask[:, :-now_batch_size], (now_batch_size, 0))
        pred = []
        for K in Ks:
            knn = dist.topk(K, dim=1, largest=False).indices
            knn = cls[knn].cpu()
            pred.append(torch.mode(knn).values)
        pred = torch.stack(pred, dim=0)
        preds.append(pred)
    preds = torch.cat(preds, dim=1)
    accs = [(pred == cls.cpu()).float().mean().item() for pred in preds]
    return max(accs)

### Test this function by $2$ $2$-dimensional Gaussians.

In [None]:
N = 500

embedding = torch.cat([
    # mean = (0, 0), std = 1
    torch.randn((N, 2)),
    # mean = (3, 3), std = 1
    torch.randn((N, 2)) + 3,
], dim=0)
classes = torch.cat([
    torch.zeros(N),
    torch.ones(N),
], dim=0)
acc = KNN(embedding, classes, batch_size=16)
print("Accuracy: %.5f" % acc)