# Contrastive Learning using Resnet50 Backbone, AID Dataset

Please submit this file to Luminus by **23:59 on 20 Mar**. 

---


1. Finish 2 tasks according to the instructions. Only change the code in the required area and DO NOT change others or add new code/text snippets.
2. Rename this file as "Student_number.ipynb". e.g., 'A0000000J.ipynb'. 

3. Submit the file to /Files/assignments/submission/assignment5. 

Please follow the instructions strictly, otherwise you might be penalized.

If you has any questions, please propose it on Slack, or contact Ziheng Qin (e0823059@u.nus.edu) and Yong Liu (e0672130@u.nus.edu).

First, we import the dataset and define transformation operations on it. We apply random transformation on images (crop + flip + colorjitter + grayscale).

#Data Loading Code fragments and Data for AID courtesy of vladan-stojnic, CMC-RSSR
@InProceedings{Stojnic_2021_CVPR_Workshops,
    author = {Stojnic, Vladan and Risojevic, Vladimir},
    title = {Self-Supervised Learning of Remote Sensing Scene Representations Using Contrastive Multiview Coding},
    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
    month = {June},
    year = {2021}
}

In [1]:
from PIL import Image
from torchvision import transforms
from torchvision.datasets import CIFAR10
import torch.utils.data as datautils
import lmdb


def pil_loader(path):
    # open path as file to avoid ResourceWarning 
    with open(path,'rb') as f:
      img=Image.open(f)
      return img.convert('RGB')

class ClassificationImageDatasetPair(datautils.Dataset):
    def __init__(self, root_path, images_to_use, transform=None, target_transform=None, multilabel_targets=None):
        super(ClassificationImageDatasetPair, self).__init__()
        
        with open(images_to_use, 'r') as f:
            self.samples = f.readlines()
            
        self.samples.sort()
            
        self.samples = [os.path.join(root_path, image_path.strip()) for image_path in self.samples]
            
        self.loader = pil_loader

        self.transform = transform
        
        self.target_transform = target_transform
        
        if multilabel_targets:
            self.targets = self._make_targets(multilabel_targets=multilabel_targets)
        else:
            classes, class_to_idx = self._find_classes(root_path)
            self.targets = self._make_targets(class_to_idx=class_to_idx)
            self.classes=classes
            
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):

        path, target = self.samples[index], self.targets[index]

        img = self.loader(path)

        if self.transform is not None:
            pos_1 = self.transform(img)
            pos_2 = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        #print(f'pos_1 size {pos_1.size()}')
        #print(f'pos_2_size {pos_2.size()}')
        #print(target)

        return pos_1, pos_2, target 
            
    def _find_classes(self, dir):
        classes = [d.name for d in os.scandir(dir) if d.is_dir()]
        classes.sort()
        class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
        return classes, class_to_idx
        
    def _make_targets(self, class_to_idx=None, multilabel_targets=None):
        if class_to_idx:
            self.num_classes = len(class_to_idx)
            return np.array([class_to_idx[os.path.split(os.path.split(sample)[0])[1]] for sample in self.samples])
            
        if multilabel_targets:
            self.num_classes = len(multilabel_targets[os.path.split(self.samples[0])[1]])
            return [multilabel_targets[os.path.split(sample)[1]] for sample in self.samples]
            
        raise ValueError("Either class_to_idx or multilabel_targets must be supplied!!!")

#cropsize=84
#cropsize=48
cropsize=56
#cropsize=32
#cropsize=64

train_transform = transforms.Compose([
    transforms.Resize(cropsize),
    transforms.RandomResizedCrop(cropsize),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[(0+100)/2,(-86.183+98.233)/2,(-107.857+94.478)/2],
                         std=[(100-0)/2,(86.183+98.233)/2,(107.857+94.478)/2])]) 

test_transform = transforms.Compose([
    transforms.Resize(cropsize),
    transforms.CenterCrop(cropsize),                                 
    transforms.ToTensor(),
    transforms.Normalize(mean=[(0+100)/2,(-86.183+98.233)/2,(-107.857+94.478)/2],
                         std=[(100-0)/2,(86.183+98.233)/2,(107.857+94.478)/2])]) 

We use commonly used ResNet-50 as ConvNet encoders for simplicity in the original paper. The task 1 is to set encoder and projection head. The parameters are adapted from the original paper.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.resnet import resnet50


class Model(nn.Module):
    def __init__(self, feature_dim=128):
        super(Model, self).__init__()

        self.f = []
        for name, module in resnet50().named_children():
            if name == 'conv1':
                module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if not isinstance(module, nn.Linear) and not isinstance(module, nn.MaxPool2d):
                self.f.append(module)
        # ----------------------------------------------------------------------
        # START OF YOUR CODE
        # ----------------------------------------------------------------------
        # Task 1
        # set a neural network base encoder self.f
        # hint: nn.Sequential
        # Reference : https://github.com/leftthomas/SimCLR/blob/master/model.py
        self.f = nn.Sequential(*self.f)


        # set a small neural network projection head
        # Dense-> Relu-> Dense (2-layer MLP to project the representation to a 128-dimensional latent space and 
        # the representation is 2048-dimensional here)
        # Reference : https://github.com/leftthomas/SimCLR/blob/master/model.py
        self.g = nn.Sequential(nn.Linear(2048, 512, bias=False), nn.BatchNorm1d(512),
                               nn.ReLU(inplace=True), nn.Linear(512, feature_dim, bias=True))
     

        
        # ----------------------------------------------------------------------
        # END OF YOUR CODE
        # ----------------------------------------------------------------------
    def forward(self, x):
        x = self.f(x)
        feature = torch.flatten(x, start_dim=1)
        out = self.g(feature)
        return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)


We train encoder network and projection head to maximize agreement using a contrastive loss. The default epoch is 1 for time efficiency while it could takes about 10 minutes to run for one epoch in google colab. The task 2 is to calculate the contrastive loss.
To evaluate the influence of temperature value for contrastive loss, we run this training process 3 times with different temperature value (0.1,0.5 and 1.0).

In [3]:
import argparse
import os
import numpy as np

import pandas as pd
import torch
import torch.optim as optim
!pip install thop
from thop import profile, clever_format
from torch.utils.data import DataLoader
from tqdm import tqdm

#from gdrive.MyDrive.CS5260Project.CMC_RSSR_main import dataset


from pytorch_lamb_master.optim.lamb import create_lamb_optimizer
from pytorch_lamb_master.optim import lr_scheduler
import math

def contrastive_loss(out_1, out_2, temperature):

    # ------------------------------------------------------------------
    # START OF YOUR CODE
    # ------------------------------------------------------------------
    # Task2: implement contrastive loss function and return loss variable
    # hint: loss formula could refer to the slides
    # input: out_1, out_2，temperature
    # output: loss variable

    #print(out_1.size())
    #print(out_2.size())

    batch_size=out_1.size(dim=0) 

    out = torch.cat([out_1, out_2],dim=0)
    # [2*B, 2*B]
    sim_matrix = torch.exp(torch.mm(out, out.t().contiguous()) / temperature)
    mask = (torch.ones_like(sim_matrix) - torch.eye(2 * batch_size, device=sim_matrix.device)).bool()

    # [2*B, 2*B-1]
    sim_matrix = sim_matrix.masked_select(mask).view(2 * batch_size, -1)

    # compute loss
    pos_sim = torch.exp(torch.sum(out_1 * out_2, dim=-1) / temperature)
    # [2*B]
    pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
    loss = (- torch.log(pos_sim / sim_matrix.sum(dim=-1))).mean() 

    # ------------------------------------------------------------------
    # END OF YOUR CODE
    # ------------------------------------------------------------------

    return loss

# train for one epoch to learn unique features
def train(net, data_loader, train_optimizer, train_scheduler, temperature):
    net.train()
    total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader)
    for pos_1, pos_2, target in train_bar:
        pos_1, pos_2 = pos_1.cuda(non_blocking=True), pos_2.cuda(non_blocking=True)
        feature_1, out_1 = net(pos_1)
        feature_2, out_2 = net(pos_2)

        loss = contrastive_loss(out_1, out_2, temperature)

        train_optimizer.zero_grad()
        loss.backward()
        train_optimizer.step()
        train_scheduler.step()

        total_num += batch_size
        total_loss += loss.item() * batch_size
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, epochs, total_loss / total_num))

    return total_loss / total_num


# test for one epoch, use weighted knn to find the most similar images' label to assign the test image
def test(net, memory_data_loader, test_data_loader, temperature):
    net.eval()
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    with torch.no_grad():
        # generate feature bank
        for data, _, target in tqdm(memory_data_loader, desc='Feature extracting'):
            feature, out = net(data.cuda(non_blocking=True))
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device)
        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_data_loader)
        for data, _, target in test_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature, out = net(data)

            total_num += data.size(0)
            # compute cos similarity between each feature vector and feature bank ---> [B, N]
            sim_matrix = torch.mm(feature, feature_bank)
            # [B, K]
            sim_weight, sim_indices = sim_matrix.topk(k=k, dim=-1)
            # [B, K]
            sim_labels = torch.gather(feature_labels.expand(data.size(0), -1), dim=-1, index=sim_indices)
            sim_weight = (sim_weight / temperature).exp()

            # counts for each class
            one_hot_label = torch.zeros(data.size(0) * k, c, device=sim_labels.device)
            # [B*K, C]
            one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
            # weighted score ---> [B, C]
            pred_scores = torch.sum(one_hot_label.view(data.size(0), -1, c) * sim_weight.unsqueeze(dim=-1), dim=1)

            pred_labels = pred_scores.argsort(dim=-1, descending=True)
            total_top1 += torch.sum((pred_labels[:, :1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_top5 += torch.sum((pred_labels[:, :5] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            test_bar.set_description('Test Epoch: [{}/{}] Acc@1:{:.2f}% Acc@5:{:.2f}%'
                                     .format(epoch, epochs, total_top1 / total_num * 100, total_top5 / total_num * 100))

    return total_top1 / total_num * 100, total_top5 / total_num * 100


#Loading AID Dataset
#Acknowledgements add later
#Assume current working directory is '/home/umaiyal/CS5260Project'
data_folder=os.path.join('CMC_RSSR_main','data','AID')
datalist_folder=os.path.join('CMC_RSSR_main','data_splits')
trn_list=os.path.join(datalist_folder,'AID_train.txt')
test_list=os.path.join(datalist_folder,'AID_val.txt')

#if (cropsize>32|batch_size<128):
numworkersset=0
#else:
#numworkersset=16





In [None]:
# Train SimCLR
import numpy as np
   
# Feature dim for latent vector, Temperature used in softmax, Top k most similar images used to predict the label
feature_dim, temp, k = 128, [0.1], 200
# Number of images in each mini-batch, Number of sweeps over the dataset to train
batch_size=128
#Coarse setting
#Fine setting
lr_start = 0.00025
lr_end =0.398
max_lrvalue=0.0017895 #Rule of Thumb is peaklr (from lambdalr test)*3/8
temp0 = 0.1 #contrastive loss temperature setting
schedulertype='onecyclelr' #'lambdalr' for testing range of training or 'onecyclelr' for actual training
if schedulertype=='lambdalr':
    epochs=10
    #epochs=10 #coarserg recommend 10 epochs, finerg recommend 100 epochs 
elif schedulertype=='onecyclelr':
    epochs=500
    #epochs=3
else:
    print('choose valid option for scheduler')
smoothfactor=0.95 #Smooth Factor for smoothing contrastive loss    
IterationStr='It1'
loadmodel=0 #loadmodel=0 From scratch or loadmodel=1 Continue from presaved model 
pathtosave='/home/umaiyal/CS5260Project/results/Simclr_resnet50_AID_RAdamv2'+schedulertype+'/'+IterationStr+'/'
save_name_pre = '{}_{}_{}_{}_{}'.format(feature_dim, temp0, k, batch_size, epochs)
csvfilename=pathtosave+'{}_statistics.csv'.format(save_name_pre)
modelfilename=pathtosave+'{}_model.pth'.format(save_name_pre)

# data prepare
train_data=ClassificationImageDatasetPair(data_folder, trn_list, transform=train_transform)
train_loader=torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, 
                                         num_workers=numworkersset, pin_memory=True, sampler=None)
memory_data=ClassificationImageDatasetPair(data_folder, trn_list, transform=test_transform)
memory_loader = torch.utils.data.DataLoader(memory_data, batch_size=batch_size, shuffle=False, 
                                         num_workers=numworkersset, pin_memory=True, sampler=None)
test_data=ClassificationImageDatasetPair(data_folder,test_list,transform=test_transform)
test_loader=torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, 
                                         num_workers=numworkersset, pin_memory=True, sampler=None)
import torch
n=3
torch.cuda.is_available()
torch.cuda.set_device(n)

# model setup and optimizer config
model = Model(feature_dim).cuda(n)

flops, params = profile(model, inputs=(torch.randn(1, 3, cropsize, cropsize).cuda(n),))
flops, params = clever_format([flops, params])
print('# Model Params: {} FLOPs: {}'.format(params, flops))

optimizer = optim.RAdam(model.parameters(), lr=1.0)
#optimizer=optim.Adam(model.parameters(),lr=1.0)

#exponentially increase learning rate from low to high
def lrs(batch):
   low = math.log2(lr_start)
   high = math.log2(lr_end)
   return 2**(low+(high-low)*batch/len(train_loader)/epochs)
   
if schedulertype=='lambdalr':
   scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lrs)
elif schedulertype=='onecyclelr':
   scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=max_lrvalue,total_steps=len(train_loader)*epochs,epochs=epochs)
else:
   print('choose valid option for scheduler')

if loadmodel==1:
   checkpoint=torch.load(modelfilename)
   model.load_state_dict(checkpoint['model_state_dict'],strict=False)
   model.to(device)
   optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
   scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
   startepoch=checkpoint['epoch']+1
   best_acc=checkpoint['best_acc']

   print(startepoch)
else:     
   startepoch=1
   best_acc=0
   

c = len(memory_data.classes)

if not os.path.exists(pathtosave):
   os.makedirs(pathtosave)

train_loss_epoch=torch.zeros(epochs)
smooth_loss_epoch=torch.zeros(epochs)
test_acc_1_epoch=torch.zeros(epochs)
test_acc_5_epoch=torch.zeros(epochs)
lr_epoch=torch.zeros(epochs)
if loadmodel==1:
   df=pd.read_csv(csvfilename)
   temp=pd.to_numeric(df['train_loss']).apply(np.array)
   train_loss_epoch[0:temp.size]=torch.tensor(temp)
   train_loss_list=temp.tolist()
   temp=pd.to_numeric(df['smooth_loss']).apply(np.array)
   smooth_loss_epoch[0:temp.size]=torch.tensor(temp)
   smooth_loss_list=temp.tolist()
   temp=pd.to_numeric(df['test_acc@1']).apply(np.array)
   test_acc_1_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_1_list=temp.tolist()
   temp=pd.to_numeric(df['test_acc@5']).apply(np.array)
   test_acc_5_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_5_list=temp.tolist()
   temp=pd.to_numeric(df['lr_epoch']).apply(np.array)
   lr_epoch[0:temp.size]=torch.tensor(temp)
   lr_list=temp.tolist()
   results = {'train_loss': train_loss_list, 'test_acc@1': test_acc_1_list, 'test_acc@5': test_acc_5_list, 'smooth_loss': smooth_loss_list, 'lr_epoch': lr_list}

else:
   results = {'train_loss': [], 'test_acc@1': [], 'test_acc@5': [], 'smooth_loss': [], 'lr_epoch': []}


for epoch in range(startepoch, epochs + 1):
    train_loss = train(model, train_loader, optimizer, scheduler, temp0)
    train_loss_epoch[epoch-1]=train_loss
    if epoch>1:
       smooth_loss=float(train_loss_epoch[epoch-1]*smoothfactor+smooth_loss_epoch[epoch-2]*(1.0-smoothfactor))
    else:
       smooth_loss=train_loss
    smooth_loss_epoch[epoch-1]=torch.tensor(smooth_loss)


    print(optimizer.param_groups[0]['lr'])
    print(smooth_loss_epoch[epoch-1])
    lr_epoch[epoch-1]=float(optimizer.param_groups[0]['lr'])
   
        
    results['train_loss'].append(train_loss)
    test_acc_1, test_acc_5 = test(model, memory_loader, test_loader, temp0)
    results['test_acc@1'].append(test_acc_1)
    results['test_acc@5'].append(test_acc_5)
    results['smooth_loss'].append(smooth_loss)
    results['lr_epoch'].append(optimizer.param_groups[0]['lr'])
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv(csvfilename, index_label='epoch')
    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        torch.save({'epoch':epoch,'model_state_dict':model.state_dict(),'optimizer_state_dict':optimizer.state_dict(),'scheduler_state_dict':scheduler.state_dict(),'best_acc':best_acc}, modelfilename)
    test_acc_1_epoch[epoch-1]=test_acc_1
    test_acc_5_epoch[epoch-1]=test_acc_5
    
minloss_loc=torch.argmin(smooth_loss_epoch)
minloss_loclr=lr_epoch[minloss_loc]
print(f'lr corresponding to minloss={minloss_loclr}');
print(f'suggested maxlr={minloss_loclr*3/8}');
print(f'suggested minlr={minloss_loclr*3/80}');

  kernel = torch.DoubleTensor([*(x[0].shape[2:])]) // torch.DoubleTensor(list((m.output_size,))).squeeze()


[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torchvision.models.resnet.Bottleneck'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm1d'>.
[91m[WARN] Cannot find rule for <class '__main__.Model'>. Treat it as zero Macs and zero Params.[00m
# Model Params: 24.62M FLOPs: 4.00G


Train Epoch: [1/500] Loss: 5.1965: 100%|██████████| 40/40 [01:05<00:00,  1.64s/it]


7.176844693055945e-05
tensor(5.1965)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [1/500] Acc@1:23.82% Acc@5:61.54%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [2/500] Loss: 5.0871: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


7.233370503562748e-05
tensor(5.0926)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [2/500] Acc@1:25.54% Acc@5:62.36%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [3/500] Loss: 4.9801: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


7.327552629165222e-05
tensor(4.9857)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [3/500] Acc@1:26.44% Acc@5:64.30%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [4/500] Loss: 4.8913: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


7.459349744696965e-05
tensor(4.8960)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [4/500] Acc@1:27.38% Acc@5:65.40%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [5/500] Loss: 4.8085: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


7.628704020312856e-05
tensor(4.8129)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [5/500] Acc@1:27.64% Acc@5:66.46%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [6/500] Loss: 4.7304: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


7.835541146863608e-05
tensor(4.7345)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [6/500] Acc@1:28.90% Acc@5:68.52%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [7/500] Loss: 4.6053: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


8.079770368501224e-05
tensor(4.6118)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [7/500] Acc@1:30.30% Acc@5:69.60%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [8/500] Loss: 4.6394: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


8.36128452250057e-05
tensor(4.6381)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [8/500] Acc@1:30.72% Acc@5:70.48%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [9/500] Loss: 4.4997: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


8.679960086280334e-05
tensor(4.5066)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [9/500] Acc@1:30.78% Acc@5:70.72%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [10/500] Loss: 4.5050: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


9.035657231602095e-05
tensor(4.5051)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [10/500] Acc@1:31.18% Acc@5:72.10%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [11/500] Loss: 4.4088: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


9.428219885924049e-05
tensor(4.4136)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [11/500] Acc@1:30.86% Acc@5:72.30%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [12/500] Loss: 4.3647: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


9.85747580088249e-05
tensor(4.3672)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [12/500] Acc@1:31.80% Acc@5:73.28%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [13/500] Loss: 4.3261: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00010323236627870721
tensor(4.3282)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [13/500] Acc@1:32.30% Acc@5:74.36%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [14/500] Loss: 4.2747: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00010825298000682556
tensor(4.2774)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [14/500] Acc@1:32.34% Acc@5:74.40%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [15/500] Loss: 4.2444: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00011363439625184085
tensor(4.2460)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [15/500] Acc@1:32.96% Acc@5:74.60%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [16/500] Loss: 4.2047: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00011937425375973935
tensor(4.2068)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [16/500] Acc@1:33.36% Acc@5:74.90%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [17/500] Loss: 4.1956: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00012547003399990524
tensor(4.1962)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [17/500] Acc@1:33.82% Acc@5:74.88%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [18/500] Loss: 4.1591: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0001319190622701969
tensor(4.1610)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [18/500] Acc@1:34.68% Acc@5:75.48%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [19/500] Loss: 4.1362: 100%|██████████| 40/40 [01:05<00:00,  1.64s/it]


0.00013871850887055238
tensor(4.1375)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.06s/it]
Test Epoch: [19/500] Acc@1:34.90% Acc@5:76.00%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [20/500] Loss: 4.0913: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.00014586539034460257
tensor(4.0936)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [20/500] Acc@1:35.76% Acc@5:76.80%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [21/500] Loss: 4.0506: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0001533565707887508
tensor(4.0528)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [21/500] Acc@1:35.28% Acc@5:75.64%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [22/500] Loss: 4.0255: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00016118876322814154
tensor(4.0269)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [22/500] Acc@1:33.86% Acc@5:74.56%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [23/500] Loss: 3.9577: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00016935853105891677
tensor(3.9611)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [23/500] Acc@1:36.04% Acc@5:76.26%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [24/500] Loss: 3.9289: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.00017786228955612754
tensor(3.9305)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [24/500] Acc@1:36.06% Acc@5:77.02%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [25/500] Loss: 3.9229: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00018669630744663703
tensor(3.9233)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [25/500] Acc@1:37.96% Acc@5:77.38%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [26/500] Loss: 3.9466: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.00019585670854632416
tensor(3.9454)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [26/500] Acc@1:37.24% Acc@5:78.06%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [27/500] Loss: 3.8349: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00020533947346087589
tensor(3.8404)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [27/500] Acc@1:37.14% Acc@5:76.00%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [28/500] Loss: 3.7974: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00021514044134941096
tensor(3.7995)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [28/500] Acc@1:34.22% Acc@5:72.86%: 100%|██████████| 40/40 [00:44<00:00,  1.11s/it]
Train Epoch: [29/500] Loss: 3.7152: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.00022525531175017358
tensor(3.7194)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [29/500] Acc@1:38.10% Acc@5:77.02%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [30/500] Loss: 3.6986: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.00023567964646748494
tensor(3.6997)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [30/500] Acc@1:38.52% Acc@5:77.30%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [31/500] Loss: 3.6073: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.00024640887151913273
tensor(3.6119)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [31/500] Acc@1:34.96% Acc@5:74.08%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [32/500] Loss: 3.5340: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0002574382791433391
tensor(3.5379)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [32/500] Acc@1:34.76% Acc@5:72.82%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [33/500] Loss: 3.4631: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0002687630298644297
tensor(3.4668)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [33/500] Acc@1:41.44% Acc@5:78.36%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [34/500] Loss: 3.4242: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0002803781546162948
tensor(3.4263)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [34/500] Acc@1:43.10% Acc@5:79.92%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [35/500] Loss: 3.2782: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0002922785569227132
tensor(3.2856)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [35/500] Acc@1:40.34% Acc@5:78.40%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [36/500] Loss: 3.1701: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.00030445901513358035
tensor(3.1758)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.03s/it]
Test Epoch: [36/500] Acc@1:38.32% Acc@5:76.54%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [37/500] Loss: 3.1333: 100%|██████████| 40/40 [01:05<00:00,  1.64s/it]


0.0003169141847160588
tensor(3.1355)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [37/500] Acc@1:40.54% Acc@5:78.76%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [38/500] Loss: 3.0991: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00032963860059965133
tensor(3.1009)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [38/500] Acc@1:39.52% Acc@5:78.42%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [39/500] Loss: 2.8585: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0003426266795741582
tensor(2.8706)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [39/500] Acc@1:36.44% Acc@5:74.88%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [40/500] Loss: 2.8215: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0003558727227394758
tensor(2.8240)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [40/500] Acc@1:35.12% Acc@5:76.16%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [41/500] Loss: 2.7314: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0003693709180061552
tensor(2.7360)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [41/500] Acc@1:39.48% Acc@5:78.94%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [42/500] Loss: 2.6711: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00038311534264562634
tensor(2.6744)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [42/500] Acc@1:30.48% Acc@5:70.86%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [43/500] Loss: 2.6615: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.00039709996588896987
tensor(2.6622)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [43/500] Acc@1:33.92% Acc@5:73.42%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [44/500] Loss: 2.5012: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0004113186515730927
tensor(2.5093)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [44/500] Acc@1:33.24% Acc@5:73.40%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [45/500] Loss: 2.4572: 100%|██████████| 40/40 [01:05<00:00,  1.64s/it]


0.00042576516083314936
tensor(2.4598)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [45/500] Acc@1:35.20% Acc@5:73.82%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [46/500] Loss: 2.3601: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0004404331548400269
tensor(2.3651)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [46/500] Acc@1:37.30% Acc@5:77.46%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [47/500] Loss: 2.2504: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0004553161975816924
tensor(2.2561)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [47/500] Acc@1:34.80% Acc@5:72.64%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [48/500] Loss: 2.3269: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0004704077586871805
tensor(2.3233)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [48/500] Acc@1:25.48% Acc@5:64.44%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [49/500] Loss: 2.4141: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0004857012162919861
tensor(2.4096)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [49/500] Acc@1:35.80% Acc@5:75.62%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [50/500] Loss: 2.1598: 100%|██████████| 40/40 [01:05<00:00,  1.64s/it]


0.0005011898599436013
tensor(2.1723)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [50/500] Acc@1:29.68% Acc@5:70.08%: 100%|██████████| 40/40 [00:44<00:00,  1.11s/it]
Train Epoch: [51/500] Loss: 2.1570: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0005168668935459217
tensor(2.1578)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [51/500] Acc@1:27.06% Acc@5:65.54%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [52/500] Loss: 2.2853: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0005327254383412344
tensor(2.2789)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [52/500] Acc@1:32.14% Acc@5:72.24%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [53/500] Loss: 2.1394: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0005487585359284734
tensor(2.1464)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [53/500] Acc@1:36.76% Acc@5:77.38%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [54/500] Loss: 2.1589: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0005649591513164224
tensor(2.1582)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [54/500] Acc@1:30.24% Acc@5:68.40%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [55/500] Loss: 1.9677: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0005813201760105223
tensor(1.9772)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [55/500] Acc@1:33.44% Acc@5:74.18%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [56/500] Loss: 1.8866: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0005978344311319331
tensor(1.8911)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [56/500] Acc@1:34.62% Acc@5:74.68%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [57/500] Loss: 1.8538: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0006144946705674754
tensor(1.8556)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [57/500] Acc@1:34.06% Acc@5:73.44%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [58/500] Loss: 1.8121: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0006312935841490763
tensor(1.8143)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [58/500] Acc@1:28.02% Acc@5:66.58%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [59/500] Loss: 1.7575: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0006482238008613192
tensor(1.7603)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [59/500] Acc@1:28.70% Acc@5:66.88%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [60/500] Loss: 1.7919: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0006652778920756923
tensor(1.7903)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [60/500] Acc@1:34.32% Acc@5:74.42%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [61/500] Loss: 1.7031: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.000682448374810118
tensor(1.7075)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [61/500] Acc@1:32.54% Acc@5:72.16%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [62/500] Loss: 1.7805: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0006997277150123291
tensor(1.7768)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [62/500] Acc@1:32.18% Acc@5:72.50%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [63/500] Loss: 1.7226: 100%|██████████| 40/40 [01:06<00:00,  1.66s/it]


0.0007171083308656564
tensor(1.7253)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [63/500] Acc@1:28.48% Acc@5:68.30%: 100%|██████████| 40/40 [00:44<00:00,  1.11s/it]
Train Epoch: [64/500] Loss: 1.6659: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0007345825961157721
tensor(1.6689)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.05s/it]
Test Epoch: [64/500] Acc@1:51.22% Acc@5:87.50%: 100%|██████████| 40/40 [00:44<00:00,  1.11s/it]
Train Epoch: [65/500] Loss: 1.7435: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0007521428434169317
tensor(1.7398)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [65/500] Acc@1:28.66% Acc@5:67.94%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [66/500] Loss: 1.6534: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.000769781367696248
tensor(1.6577)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [66/500] Acc@1:44.04% Acc@5:83.86%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [67/500] Loss: 1.6519: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0007874904295345159
tensor(1.6522)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [67/500] Acc@1:27.06% Acc@5:66.78%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [68/500] Loss: 1.8119: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0008052622585621105
tensor(1.8039)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [68/500] Acc@1:38.84% Acc@5:78.82%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [69/500] Loss: 1.6902: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0008230890568684644
tensor(1.6959)


Feature extracting: 100%|██████████| 40/40 [00:42<00:00,  1.06s/it]
Test Epoch: [69/500] Acc@1:27.54% Acc@5:65.74%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [70/500] Loss: 1.6682: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0008409630024236279
tensor(1.6695)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [70/500] Acc@1:33.68% Acc@5:75.36%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [71/500] Loss: 1.5684: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0008588762525104161
tensor(1.5735)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [71/500] Acc@1:30.10% Acc@5:68.34%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [72/500] Loss: 1.5276: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0008768209471656282
tensor(1.5299)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [72/500] Acc@1:30.14% Acc@5:70.46%: 100%|██████████| 40/40 [00:45<00:00,  1.13s/it]
Train Epoch: [73/500] Loss: 1.5317: 100%|██████████| 40/40 [01:06<00:00,  1.65s/it]


0.0008947892126288387
tensor(1.5316)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.05s/it]
Test Epoch: [73/500] Acc@1:37.30% Acc@5:78.42%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [74/500] Loss: 1.5008: 100%|██████████| 40/40 [01:05<00:00,  1.65s/it]


0.0009127731647972397
tensor(1.5023)


Feature extracting: 100%|██████████| 40/40 [00:41<00:00,  1.04s/it]
Test Epoch: [74/500] Acc@1:32.44% Acc@5:73.38%: 100%|██████████| 40/40 [00:44<00:00,  1.12s/it]
Train Epoch: [75/500] Loss: 1.4963:  92%|█████████▎| 37/40 [01:02<00:05,  1.67s/it]

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df=pd.read_csv(csvfilename)

f1=plt.figure()
plt.semilogx(df['lr_epoch'],df['smooth_loss'])  
plt.xlabel('learning rate')
plt.ylabel('smoothed trg epoch loss')
plt.show()

f2=plt.figure()
plt.plot(df['test_acc@1'])
plt.title('Sim CLR with AdamW')
plt.xlabel('epoch number')
plt.ylabel('epoch loss')
plt.show()

In [None]:
torch.cuda.empty_cache()

In [None]:
os.getcwd()