# Contrastive Learning on CIFAR10 using ConvNext Backbone

First, we import the dataset and define transformation operations on it. We apply random transformation on images (crop + flip + colorjitter + grayscale).

In [1]:
from PIL import Image
from torchvision import transforms
from torchvision.datasets import CIFAR10
import torch

class CIFAR10Pair(CIFAR10):
    """CIFAR10 Dataset.
    """

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            pos_1 = self.transform(img)
            pos_2 = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return pos_1, pos_2, target


train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

We use commonly used ResNet-50 as ConvNet encoders for simplicity in the original paper. The task 1 is to set encoder and projection head. The parameters are adapted from the original paper.

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models.resnet import resnet50


class Model(nn.Module):
    def __init__(self, feature_dim=128):
        super(Model, self).__init__()

        self.f = []
        for name, module in resnet50().named_children():
            if name == 'conv1':
                module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if not isinstance(module, nn.Linear) and not isinstance(module, nn.MaxPool2d):
                self.f.append(module)
        # ----------------------------------------------------------------------
        # START OF YOUR CODE
        # ----------------------------------------------------------------------
        # Task 1
        # set a neural network base encoder self.f
        # hint: nn.Sequential
        # Reference : https://github.com/leftthomas/SimCLR/blob/master/model.py
        self.f = nn.Sequential(*self.f)


        # set a small neural network projection head
        # Dense-> Relu-> Dense (2-layer MLP to project the representation to a 128-dimensional latent space and 
        # the representation is 2048-dimensional here)
        # Reference : https://github.com/leftthomas/SimCLR/blob/master/model.py
        self.g = nn.Sequential(nn.Linear(2048, 512, bias=False), nn.BatchNorm1d(512),
                               nn.ReLU(inplace=True), nn.Linear(512, feature_dim, bias=True))
     

        
        # ----------------------------------------------------------------------
        # END OF YOUR CODE
        # ----------------------------------------------------------------------
    def forward(self, x):
        x = self.f(x)
        feature = torch.flatten(x, start_dim=1)
        out = self.g(feature)
        return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)


We train encoder network and projection head to maximize agreement using a contrastive loss. The default epoch is 1 for time efficiency while it could takes about 10 minutes to run for one epoch in google colab. The task 2 is to calculate the contrastive loss.
To evaluate the influence of temperature value for contrastive loss, we run this training process 3 times with different temperature value (0.1,0.5 and 1.0).

In [3]:
import argparse
import os

import numpy as np
import pandas as pd
import torch
import torch.optim as optim
!pip install thop
from thop import profile, clever_format
from torch.utils.data import DataLoader
from tqdm import tqdm


import math

def contrastive_loss(out_1, out_2, temperature):

    # ------------------------------------------------------------------
    # START OF YOUR CODE
    # ------------------------------------------------------------------
    # Task2: implement contrastive loss function and return loss variable
    # hint: loss formula could refer to the slides
    # input: out_1, out_2，temperature
    # output: loss variable

    out = torch.cat([out_1, out_2], dim=0)
    # [2*B, 2*B]
    sim_matrix = torch.exp(torch.mm(out, out.t().contiguous()) / temperature)
    mask = (torch.ones_like(sim_matrix) - torch.eye(2 * batch_size, device=sim_matrix.device)).bool()
    # [2*B, 2*B-1]
    sim_matrix = sim_matrix.masked_select(mask).view(2 * batch_size, -1)

    # compute loss
    pos_sim = torch.exp(torch.sum(out_1 * out_2, dim=-1) / temperature)
    # [2*B]
    pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
    loss = (- torch.log(pos_sim / sim_matrix.sum(dim=-1))).mean() 

    # ------------------------------------------------------------------
    # END OF YOUR CODE
    # ------------------------------------------------------------------

    return loss

# train for one epoch to learn unique features
def train(net, data_loader, train_optimizer, train_scheduler, temperature):
    net.train()
    total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader)
    for pos_1, pos_2, target in train_bar:
        pos_1, pos_2 = pos_1.cuda(non_blocking=True), pos_2.cuda(non_blocking=True)
        feature_1, out_1 = net(pos_1)
        feature_2, out_2 = net(pos_2)

        loss = contrastive_loss(out_1, out_2, temperature)

        train_optimizer.zero_grad()
        loss.backward()
        train_optimizer.step()
        train_scheduler.step()

        total_num += batch_size
        total_loss += loss.item() * batch_size
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, epochs, total_loss / total_num))

    return total_loss / total_num


# test for one epoch, use weighted knn to find the most similar images' label to assign the test image
def test(net, memory_data_loader, test_data_loader, temperature):
    net.eval()
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    with torch.no_grad():
        # generate feature bank
        for data, _, target in tqdm(memory_data_loader, desc='Feature extracting'):
            feature, out = net(data.cuda(non_blocking=True))
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device)
        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_data_loader)
        for data, _, target in test_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature, out = net(data)

            total_num += data.size(0)
            # compute cos similarity between each feature vector and feature bank ---> [B, N]
            sim_matrix = torch.mm(feature, feature_bank)
            # [B, K]
            sim_weight, sim_indices = sim_matrix.topk(k=k, dim=-1)
            # [B, K]
            sim_labels = torch.gather(feature_labels.expand(data.size(0), -1), dim=-1, index=sim_indices)
            sim_weight = (sim_weight / temperature).exp()

            # counts for each class
            one_hot_label = torch.zeros(data.size(0) * k, c, device=sim_labels.device)
            # [B*K, C]
            one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
            # weighted score ---> [B, C]
            pred_scores = torch.sum(one_hot_label.view(data.size(0), -1, c) * sim_weight.unsqueeze(dim=-1), dim=1)

            pred_labels = pred_scores.argsort(dim=-1, descending=True)
            total_top1 += torch.sum((pred_labels[:, :1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_top5 += torch.sum((pred_labels[:, :5] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            test_bar.set_description('Test Epoch: [{}/{}] Acc@1:{:.2f}% Acc@5:{:.2f}%'
                                     .format(epoch, epochs, total_top1 / total_num * 100, total_top5 / total_num * 100))

    return total_top1 / total_num * 100, total_top5 / total_num * 100



In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Train SimCLR
import numpy as np
   
# Feature dim for latent vector, Temperature used in softmax, Top k most similar images used to predict the label
feature_dim, temp, k = 128, [0.1], 200
# Number of images in each mini-batch, Number of sweeps over the dataset to train
batch_size=128
#Coarse setting
#Fine setting
lr_start = 1.0e-6
lr_end =1000000.0
max_lrvalue=0.1 #Rule of Thumb is peaklr (from lambdalr test)*3/8
temp0 = 0.1 #contrastive loss temperature setting
schedulertype='onecyclelr' #'lambdalr' for testing range of training or 'onecyclelr' for actual training
if schedulertype=='lambdalr':
    epochs=10
    #epochs=10 #coarserg recommend 10 epochs, finerg recommend 100 epochs 
elif schedulertype=='onecyclelr':
    epochs=500
    #epochs=3
else:
    print('choose valid option for scheduler')
smoothfactor=0.95 #Smooth Factor for smoothing contrastive loss    
IterationStr='It1'
loadmodel=0 #loadmodel=0 From scratch or loadmodel=1 Continue from presaved model 
pathtosave='/home/umaiyal/CS5260Project/results/Simclr_resnet50_RAdamv2'+schedulertype+'/'+IterationStr+'/'
save_name_pre = '{}_{}_{}_{}_{}'.format(feature_dim, temp0, k, batch_size, epochs)
csvfilename=pathtosave+'{}_statistics.csv'.format(save_name_pre)
modelfilename=pathtosave+'{}_model.pth'.format(save_name_pre)

# data prepare
train_data = CIFAR10Pair(root='data', train=True, transform=train_transform, download=True)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True,
                          drop_last=True)
memory_data = CIFAR10Pair(root='data', train=True, transform=test_transform, download=True)
memory_loader = DataLoader(memory_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)
test_data = CIFAR10Pair(root='data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)

import torch
torch.cuda.is_available()
torch.cuda.set_device(0)

# model setup and optimizer config
model = Model(feature_dim).cuda()

flops, params = profile(model, inputs=(torch.randn(1, 3, 32, 32).cuda(),))
flops, params = clever_format([flops, params])
print('# Model Params: {} FLOPs: {}'.format(params, flops))

optimizer = optim.RAdam(model.parameters(), lr=1.0)
#optimizer=optim.Adam(model.parameters(),lr=1.0)

#exponentially increase learning rate from low to high
def lrs(batch):
   low = math.log2(lr_start)
   high = math.log2(lr_end)
   return 2**(low+(high-low)*batch/len(train_loader)/epochs)
   
if schedulertype=='lambdalr':
   scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lrs)
elif schedulertype=='onecyclelr':
   div_factorvalue=10#round(max_lrvalue/0.0006) 
   scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,div_factor=div_factorvalue,max_lr=max_lrvalue,total_steps=len(train_loader)*epochs,epochs=epochs)
else:
   print('choose valid option for scheduler')

if loadmodel==1:
   checkpoint=torch.load(modelfilename)
   model.load_state_dict(checkpoint['model_state_dict'],strict=False)
   model.to(device)
   optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
   scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
   startepoch=checkpoint['epoch']+1
   best_acc=checkpoint['best_acc']

   print(startepoch)
else:     
   startepoch=1
   best_acc=0
   

c = len(memory_data.classes)

if not os.path.exists(pathtosave):
   os.makedirs(pathtosave)

train_loss_epoch=torch.zeros(epochs)
smooth_loss_epoch=torch.zeros(epochs)
test_acc_1_epoch=torch.zeros(epochs)
test_acc_5_epoch=torch.zeros(epochs)
lr_epoch=torch.zeros(epochs)

if loadmodel==1:

   df=pd.read_csv(csvfilename)
   temp=pd.to_numeric(df.iloc[0:startepoch-1,1]).apply(np.array)
   train_loss_epoch[0:temp.size]=torch.tensor(temp)
   train_loss_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,2]).apply(np.array)
   test_acc_1_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_1_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,3]).apply(np.array)
   test_acc_5_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_5_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,4]).apply(np.array)
   smooth_loss_epoch[0:temp.size]=torch.tensor(temp)
   smooth_loss_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,5]).apply(np.array)
   lr_epoch[0:temp.size]=torch.tensor(temp)
   lr_list=temp.tolist()
   results = {'train_loss': train_loss_list, 'test_acc@1': test_acc_1_list, 'test_acc@5': test_acc_5_list, 'smooth_loss': smooth_loss_list, 'lr_epoch': lr_list}

else:
   results = {'train_loss': [], 'test_acc@1': [], 'test_acc@5': [], 'smooth_loss': [], 'lr_epoch': []}


for epoch in range(startepoch, epochs + 1):
    train_loss = train(model, train_loader, optimizer, scheduler, temp0)
    train_loss_epoch[epoch-1]=train_loss
    if epoch>1:
       smooth_loss=float(train_loss_epoch[epoch-1]*smoothfactor+smooth_loss_epoch[epoch-2]*(1.0-smoothfactor))
    else:
       smooth_loss=train_loss
    smooth_loss_epoch[epoch-1]=torch.tensor(smooth_loss)


    print(optimizer.param_groups[0]['lr'])
    print(smooth_loss_epoch[epoch-1])
    lr_epoch[epoch-1]=float(optimizer.param_groups[0]['lr'])
   
        
    results['train_loss'].append(train_loss)
    test_acc_1, test_acc_5 = test(model, memory_loader, test_loader, temp0)
    results['test_acc@1'].append(test_acc_1)
    results['test_acc@5'].append(test_acc_5)
    results['smooth_loss'].append(smooth_loss)
    results['lr_epoch'].append(optimizer.param_groups[0]['lr'])
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv(csvfilename, index_label='epoch')
    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        torch.save({'epoch':epoch,'model_state_dict':model.state_dict(),'optimizer_state_dict':optimizer.state_dict(),'scheduler_state_dict':scheduler.state_dict(),'best_acc':best_acc}, modelfilename)
    test_acc_1_epoch[epoch-1]=test_acc_1
    test_acc_5_epoch[epoch-1]=test_acc_5
    
minloss_loc=torch.argmin(smooth_loss_epoch)
minloss_loclr=lr_epoch[minloss_loc]
print(f'lr corresponding to minloss={minloss_loclr}');
print(f'suggested maxlr={minloss_loclr*3/8}');
print(f'suggested minlr={minloss_loclr*3/80}');

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


  kernel = torch.DoubleTensor([*(x[0].shape[2:])]) // torch.DoubleTensor(list((m.output_size,))).squeeze()


[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torchvision.models.resnet.Bottleneck'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_bn() for <class 'torch.nn.modules.batchnorm.BatchNorm1d'>.
[91m[WARN] Cannot find rule for <class '__main__.Model'>. Treat it as zero Macs and zero Params.[00m
# Model Params: 24.62M FLOPs: 1.31G


Train Epoch: [1/500] Loss: 4.6051: 100%|██████████| 390/390 [01:31<00:00,  4.25it/s]


0.010009869581038305
tensor(4.6051)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.36it/s]
Test Epoch: [1/500] Acc@1:42.35% Acc@5:89.96%: 100%|██████████| 79/79 [00:03<00:00, 22.59it/s]
Train Epoch: [2/500] Loss: 3.7506: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.010039473994880796
tensor(3.7934)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [2/500] Acc@1:45.81% Acc@5:91.71%: 100%|██████████| 79/79 [00:03<00:00, 22.58it/s]
Train Epoch: [3/500] Loss: 3.1320: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.010088800255609168
tensor(3.1651)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.21it/s]
Test Epoch: [3/500] Acc@1:50.82% Acc@5:93.38%: 100%|██████████| 79/79 [00:03<00:00, 22.65it/s]
Train Epoch: [4/500] Loss: 2.7270: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.010157826726355554
tensor(2.7489)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [4/500] Acc@1:53.51% Acc@5:94.48%: 100%|██████████| 79/79 [00:03<00:00, 22.47it/s]
Train Epoch: [5/500] Loss: 2.4508: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.010246523128793386
tensor(2.4657)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.26it/s]
Test Epoch: [5/500] Acc@1:55.74% Acc@5:95.14%: 100%|██████████| 79/79 [00:03<00:00, 22.32it/s]
Train Epoch: [6/500] Loss: 2.2158: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.01035485055641909
tensor(2.2283)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.25it/s]
Test Epoch: [6/500] Acc@1:58.96% Acc@5:95.70%: 100%|██████████| 79/79 [00:03<00:00, 22.49it/s]
Train Epoch: [7/500] Loss: 2.0683: 100%|██████████| 390/390 [01:33<00:00,  4.17it/s]


0.010482761491618156
tensor(2.0763)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [7/500] Acc@1:60.22% Acc@5:96.18%: 100%|██████████| 79/79 [00:03<00:00, 22.55it/s]
Train Epoch: [8/500] Loss: 1.9514: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.010630199826508785
tensor(1.9576)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [8/500] Acc@1:61.12% Acc@5:96.35%: 100%|██████████| 79/79 [00:03<00:00, 22.37it/s]
Train Epoch: [9/500] Loss: 1.8409: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.010797100887553437
tensor(1.8468)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [9/500] Acc@1:62.31% Acc@5:96.52%: 100%|██████████| 79/79 [00:03<00:00, 22.35it/s]
Train Epoch: [10/500] Loss: 1.7430: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.010983391463927808
tensor(1.7482)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.29it/s]
Test Epoch: [10/500] Acc@1:62.92% Acc@5:96.63%: 100%|██████████| 79/79 [00:03<00:00, 22.61it/s]
Train Epoch: [11/500] Loss: 1.6648: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.011188989839634567
tensor(1.6690)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.23it/s]
Test Epoch: [11/500] Acc@1:64.28% Acc@5:96.78%: 100%|██████████| 79/79 [00:03<00:00, 22.46it/s]
Train Epoch: [12/500] Loss: 1.6172: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.011413805829347895
tensor(1.6198)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.17it/s]
Test Epoch: [12/500] Acc@1:63.94% Acc@5:97.16%: 100%|██████████| 79/79 [00:03<00:00, 22.39it/s]
Train Epoch: [13/500] Loss: 1.5513: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.011657740817973147
tensor(1.5548)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.21it/s]
Test Epoch: [13/500] Acc@1:65.45% Acc@5:97.40%: 100%|██████████| 79/79 [00:03<00:00, 22.65it/s]
Train Epoch: [14/500] Loss: 1.5148: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.011920687803904192
tensor(1.5168)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [14/500] Acc@1:65.31% Acc@5:97.57%: 100%|██████████| 79/79 [00:03<00:00, 22.54it/s]
Train Epoch: [15/500] Loss: 1.4885: 100%|██████████| 390/390 [01:32<00:00,  4.20it/s]


0.012202531445959439
tensor(1.4899)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [15/500] Acc@1:65.71% Acc@5:97.60%: 100%|██████████| 79/79 [00:03<00:00, 22.56it/s]
Train Epoch: [16/500] Loss: 1.4571: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.012503148113975945
tensor(1.4588)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.22it/s]
Test Epoch: [16/500] Acc@1:66.30% Acc@5:97.37%: 100%|██████████| 79/79 [00:03<00:00, 22.47it/s]
Train Epoch: [17/500] Loss: 1.4149: 100%|██████████| 390/390 [01:32<00:00,  4.20it/s]


0.012822405943039655
tensor(1.4170)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [17/500] Acc@1:66.21% Acc@5:97.40%: 100%|██████████| 79/79 [00:03<00:00, 22.65it/s]
Train Epoch: [18/500] Loss: 1.3830: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.013160164891327639
tensor(1.3847)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [18/500] Acc@1:67.62% Acc@5:97.87%: 100%|██████████| 79/79 [00:03<00:00, 22.41it/s]
Train Epoch: [19/500] Loss: 1.3657: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.01351627680153715
tensor(1.3666)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.35it/s]
Test Epoch: [19/500] Acc@1:67.34% Acc@5:97.34%: 100%|██████████| 79/79 [00:03<00:00, 22.62it/s]
Train Epoch: [20/500] Loss: 1.3422: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.013890585465874586
tensor(1.3434)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [20/500] Acc@1:67.08% Acc@5:97.58%: 100%|██████████| 79/79 [00:03<00:00, 22.39it/s]
Train Epoch: [21/500] Loss: 1.2973: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.014282926694575648
tensor(1.2996)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.29it/s]
Test Epoch: [21/500] Acc@1:67.61% Acc@5:97.62%: 100%|██████████| 79/79 [00:03<00:00, 22.26it/s]
Train Epoch: [22/500] Loss: 1.3054: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.014693128387926968
tensor(1.3051)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.31it/s]
Test Epoch: [22/500] Acc@1:67.96% Acc@5:97.79%: 100%|██████████| 79/79 [00:03<00:00, 22.63it/s]
Train Epoch: [23/500] Loss: 1.3001: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.015121010611757219
tensor(1.3004)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [23/500] Acc@1:68.59% Acc@5:97.83%: 100%|██████████| 79/79 [00:03<00:00, 22.37it/s]
Train Epoch: [24/500] Loss: 1.2536: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.015566385676364966
tensor(1.2559)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.21it/s]
Test Epoch: [24/500] Acc@1:68.26% Acc@5:97.78%: 100%|██████████| 79/79 [00:03<00:00, 22.51it/s]
Train Epoch: [25/500] Loss: 1.2440: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.01602905821884827
tensor(1.2446)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.21it/s]
Test Epoch: [25/500] Acc@1:68.36% Acc@5:97.80%: 100%|██████████| 79/79 [00:03<00:00, 22.39it/s]
Train Epoch: [26/500] Loss: 1.2366: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.016508825288800405
tensor(1.2370)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [26/500] Acc@1:69.08% Acc@5:97.78%: 100%|██████████| 79/79 [00:03<00:00, 22.65it/s]
Train Epoch: [27/500] Loss: 1.2303: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.017005476437333494
tensor(1.2306)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [27/500] Acc@1:69.14% Acc@5:97.91%: 100%|██████████| 79/79 [00:03<00:00, 22.78it/s]
Train Epoch: [28/500] Loss: 1.1793: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.017518793809391642
tensor(1.1819)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [28/500] Acc@1:69.40% Acc@5:97.86%: 100%|██████████| 79/79 [00:03<00:00, 22.47it/s]
Train Epoch: [29/500] Loss: 1.1954: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.018048552239312537
tensor(1.1948)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [29/500] Acc@1:69.23% Acc@5:97.85%: 100%|██████████| 79/79 [00:03<00:00, 22.07it/s]
Train Epoch: [30/500] Loss: 1.1576: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.018594519349595967
tensor(1.1595)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.20it/s]
Test Epoch: [30/500] Acc@1:69.62% Acc@5:97.96%: 100%|██████████| 79/79 [00:03<00:00, 22.35it/s]
Train Epoch: [31/500] Loss: 1.1419: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.019156455652835747
tensor(1.1428)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.35it/s]
Test Epoch: [31/500] Acc@1:69.39% Acc@5:98.12%: 100%|██████████| 79/79 [00:03<00:00, 22.54it/s]
Train Epoch: [32/500] Loss: 1.1414: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.0197341146567704
tensor(1.1414)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.25it/s]
Test Epoch: [32/500] Acc@1:69.62% Acc@5:98.19%: 100%|██████████| 79/79 [00:03<00:00, 22.35it/s]
Train Epoch: [33/500] Loss: 1.1238: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.020327242972406484
tensor(1.1247)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [33/500] Acc@1:70.00% Acc@5:98.10%: 100%|██████████| 79/79 [00:03<00:00, 22.46it/s]
Train Epoch: [34/500] Loss: 1.1181: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.020935580425167258
tensor(1.1185)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [34/500] Acc@1:70.17% Acc@5:98.14%: 100%|██████████| 79/79 [00:03<00:00, 22.82it/s]
Train Epoch: [35/500] Loss: 1.1024: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.021558860169017688
tensor(1.1032)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.29it/s]
Test Epoch: [35/500] Acc@1:70.25% Acc@5:98.09%: 100%|██████████| 79/79 [00:03<00:00, 22.66it/s]
Train Epoch: [36/500] Loss: 1.0924: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.02219680880351589
tensor(1.0929)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.24it/s]
Test Epoch: [36/500] Acc@1:70.90% Acc@5:98.23%: 100%|██████████| 79/79 [00:03<00:00, 22.31it/s]
Train Epoch: [37/500] Loss: 1.0849: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.022849146493739828
tensor(1.0853)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [37/500] Acc@1:71.14% Acc@5:98.24%: 100%|██████████| 79/79 [00:03<00:00, 22.20it/s]
Train Epoch: [38/500] Loss: 1.0601: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.02351558709303625
tensor(1.0614)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [38/500] Acc@1:70.46% Acc@5:98.15%: 100%|██████████| 79/79 [00:03<00:00, 22.61it/s]
Train Epoch: [39/500] Loss: 1.0616: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.024195838268538383
tensor(1.0616)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.27it/s]
Test Epoch: [39/500] Acc@1:70.37% Acc@5:98.24%: 100%|██████████| 79/79 [00:03<00:00, 22.39it/s]
Train Epoch: [40/500] Loss: 1.0584: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.02488960162939724
tensor(1.0585)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [40/500] Acc@1:70.46% Acc@5:98.12%: 100%|██████████| 79/79 [00:03<00:00, 22.54it/s]
Train Epoch: [41/500] Loss: 1.0412: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.025596572857670058
tensor(1.0421)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.21it/s]
Test Epoch: [41/500] Acc@1:72.15% Acc@5:98.18%: 100%|██████████| 79/79 [00:03<00:00, 22.58it/s]
Train Epoch: [42/500] Loss: 1.0391: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.026316441841808896
tensor(1.0393)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [42/500] Acc@1:71.76% Acc@5:98.35%: 100%|██████████| 79/79 [00:03<00:00, 22.32it/s]
Train Epoch: [43/500] Loss: 1.0187: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.02704889281269035
tensor(1.0197)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [43/500] Acc@1:71.58% Acc@5:98.34%: 100%|██████████| 79/79 [00:03<00:00, 22.50it/s]
Train Epoch: [44/500] Loss: 1.0337: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.027793604482127185
tensor(1.0330)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [44/500] Acc@1:71.61% Acc@5:98.35%: 100%|██████████| 79/79 [00:03<00:00, 22.51it/s]
Train Epoch: [45/500] Loss: 1.0227: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.02855025018380064
tensor(1.0232)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [45/500] Acc@1:70.95% Acc@5:98.03%: 100%|██████████| 79/79 [00:03<00:00, 22.60it/s]
Train Epoch: [46/500] Loss: 1.0073: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.029318498016552036
tensor(1.0081)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.35it/s]
Test Epoch: [46/500] Acc@1:71.55% Acc@5:98.42%: 100%|██████████| 79/79 [00:03<00:00, 22.60it/s]
Train Epoch: [47/500] Loss: 1.0028: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.030098010989970614
tensor(1.0031)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [47/500] Acc@1:71.09% Acc@5:98.18%: 100%|██████████| 79/79 [00:03<00:00, 22.56it/s]
Train Epoch: [48/500] Loss: 0.9786: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03088844717221359
tensor(0.9799)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [48/500] Acc@1:72.08% Acc@5:98.35%: 100%|██████████| 79/79 [00:03<00:00, 22.43it/s]
Train Epoch: [49/500] Loss: 0.9795: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.031689459839994064
tensor(0.9795)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [49/500] Acc@1:72.01% Acc@5:98.20%: 100%|██████████| 79/79 [00:03<00:00, 22.47it/s]
Train Epoch: [50/500] Loss: 0.9728: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03250069763067036
tensor(0.9731)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.35it/s]
Test Epoch: [50/500] Acc@1:71.62% Acc@5:98.31%: 100%|██████████| 79/79 [00:03<00:00, 22.29it/s]
Train Epoch: [51/500] Loss: 0.9624: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.033321804696370624
tensor(0.9629)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [51/500] Acc@1:71.87% Acc@5:98.30%: 100%|██████████| 79/79 [00:03<00:00, 22.38it/s]
Train Epoch: [52/500] Loss: 0.9636: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.034152420860084826
tensor(0.9635)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [52/500] Acc@1:72.60% Acc@5:98.32%: 100%|██████████| 79/79 [00:03<00:00, 22.67it/s]
Train Epoch: [53/500] Loss: 0.9569: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03499218177365565
tensor(0.9572)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.20it/s]
Test Epoch: [53/500] Acc@1:71.91% Acc@5:98.35%: 100%|██████████| 79/79 [00:03<00:00, 22.56it/s]
Train Epoch: [54/500] Loss: 0.9460: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03584071907759913
tensor(0.9466)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.36it/s]
Test Epoch: [54/500] Acc@1:72.08% Acc@5:98.36%: 100%|██████████| 79/79 [00:03<00:00, 22.36it/s]
Train Epoch: [55/500] Loss: 0.9471: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03669766056268478
tensor(0.9471)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.31it/s]
Test Epoch: [55/500] Acc@1:71.86% Acc@5:98.43%: 100%|██████████| 79/79 [00:03<00:00, 22.32it/s]
Train Epoch: [56/500] Loss: 0.9294: 100%|██████████| 390/390 [01:32<00:00,  4.20it/s]


0.03756263033320456
tensor(0.9303)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.31it/s]
Test Epoch: [56/500] Acc@1:73.17% Acc@5:98.43%: 100%|██████████| 79/79 [00:03<00:00, 22.63it/s]
Train Epoch: [57/500] Loss: 0.9275: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03843524897185861
tensor(0.9276)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.37it/s]
Test Epoch: [57/500] Acc@1:72.92% Acc@5:98.50%: 100%|██████████| 79/79 [00:03<00:00, 22.57it/s]
Train Epoch: [58/500] Loss: 0.9252: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.03931513370618606
tensor(0.9253)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.37it/s]
Test Epoch: [58/500] Acc@1:72.84% Acc@5:98.43%: 100%|██████████| 79/79 [00:03<00:00, 22.62it/s]
Train Epoch: [59/500] Loss: 0.9227: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.04020189857646738
tensor(0.9228)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.29it/s]
Test Epoch: [59/500] Acc@1:72.52% Acc@5:98.42%: 100%|██████████| 79/79 [00:03<00:00, 22.67it/s]
Train Epoch: [60/500] Loss: 0.9055: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.04109515460502482
tensor(0.9064)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.29it/s]
Test Epoch: [60/500] Acc@1:72.84% Acc@5:98.44%: 100%|██████████| 79/79 [00:03<00:00, 22.56it/s]
Train Epoch: [61/500] Loss: 0.9133: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.041994509966846734
tensor(0.9129)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [61/500] Acc@1:72.26% Acc@5:98.33%: 100%|██████████| 79/79 [00:03<00:00, 22.52it/s]
Train Epoch: [62/500] Loss: 0.8975: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.042899570161460884
tensor(0.8983)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.35it/s]
Test Epoch: [62/500] Acc@1:73.29% Acc@5:98.46%: 100%|██████████| 79/79 [00:03<00:00, 22.38it/s]
Train Epoch: [63/500] Loss: 0.8993: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.04380993818598124
tensor(0.8992)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [63/500] Acc@1:72.79% Acc@5:98.50%: 100%|██████████| 79/79 [00:03<00:00, 22.54it/s]
Train Epoch: [64/500] Loss: 0.9048: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.044725214709252586
tensor(0.9045)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [64/500] Acc@1:72.72% Acc@5:98.43%: 100%|██████████| 79/79 [00:03<00:00, 22.74it/s]
Train Epoch: [65/500] Loss: 0.8929: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.045644998247016295
tensor(0.8935)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.22it/s]
Test Epoch: [65/500] Acc@1:73.63% Acc@5:98.50%: 100%|██████████| 79/79 [00:03<00:00, 22.63it/s]
Train Epoch: [66/500] Loss: 0.8832: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.04656888533802065
tensor(0.8837)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [66/500] Acc@1:73.42% Acc@5:98.60%: 100%|██████████| 79/79 [00:03<00:00, 22.61it/s]
Train Epoch: [67/500] Loss: 0.8856: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.04749647072099833
tensor(0.8855)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [67/500] Acc@1:73.10% Acc@5:98.45%: 100%|██████████| 79/79 [00:03<00:00, 22.19it/s]
Train Epoch: [68/500] Loss: 0.8756: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.04842734751243345
tensor(0.8761)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.31it/s]
Test Epoch: [68/500] Acc@1:72.95% Acc@5:98.54%: 100%|██████████| 79/79 [00:03<00:00, 22.26it/s]
Train Epoch: [69/500] Loss: 0.8733: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.04936110738504025
tensor(0.8735)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.37it/s]
Test Epoch: [69/500] Acc@1:73.67% Acc@5:98.48%: 100%|██████████| 79/79 [00:03<00:00, 22.49it/s]
Train Epoch: [70/500] Loss: 0.8619: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.05029734074687503
tensor(0.8625)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.26it/s]
Test Epoch: [70/500] Acc@1:73.10% Acc@5:98.54%: 100%|██████████| 79/79 [00:03<00:00, 22.55it/s]
Train Epoch: [71/500] Loss: 0.8609: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.05123563692100289
tensor(0.8609)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [71/500] Acc@1:73.57% Acc@5:98.63%: 100%|██████████| 79/79 [00:03<00:00, 22.47it/s]
Train Epoch: [72/500] Loss: 0.8642: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.052175584325640345
tensor(0.8640)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [72/500] Acc@1:73.76% Acc@5:98.46%: 100%|██████████| 79/79 [00:03<00:00, 22.50it/s]
Train Epoch: [73/500] Loss: 0.8500: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.053116770654694855
tensor(0.8507)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.37it/s]
Test Epoch: [73/500] Acc@1:72.98% Acc@5:98.53%: 100%|██████████| 79/79 [00:03<00:00, 22.62it/s]
Train Epoch: [74/500] Loss: 0.8507: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.05405878305862215
tensor(0.8507)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [74/500] Acc@1:73.69% Acc@5:98.44%: 100%|██████████| 79/79 [00:03<00:00, 22.58it/s]
Train Epoch: [75/500] Loss: 0.8572: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.05500120832552176
tensor(0.8569)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.38it/s]
Test Epoch: [75/500] Acc@1:74.19% Acc@5:98.61%: 100%|██████████| 79/79 [00:03<00:00, 22.40it/s]
Train Epoch: [76/500] Loss: 0.8524: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.055943633062391744
tensor(0.8526)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [76/500] Acc@1:73.93% Acc@5:98.68%: 100%|██████████| 79/79 [00:03<00:00, 22.52it/s]
Train Epoch: [77/500] Loss: 0.8475: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.05688564387646263
tensor(0.8478)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.39it/s]
Test Epoch: [77/500] Acc@1:73.98% Acc@5:98.64%: 100%|██████████| 79/79 [00:03<00:00, 22.48it/s]
Train Epoch: [78/500] Loss: 0.8377: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.05782682755653138
tensor(0.8382)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.39it/s]
Test Epoch: [78/500] Acc@1:74.62% Acc@5:98.57%: 100%|██████████| 79/79 [00:03<00:00, 22.75it/s]
Train Epoch: [79/500] Loss: 0.8483: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.058766771254215665
tensor(0.8478)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.36it/s]
Test Epoch: [79/500] Acc@1:73.66% Acc@5:98.59%: 100%|██████████| 79/79 [00:03<00:00, 22.46it/s]
Train Epoch: [80/500] Loss: 0.8283: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.059705062665049005
tensor(0.8293)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s]
Test Epoch: [80/500] Acc@1:74.01% Acc@5:98.49%: 100%|██████████| 79/79 [00:03<00:00, 22.53it/s]
Train Epoch: [81/500] Loss: 0.8289: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.060641290209337305
tensor(0.8289)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.28it/s]
Test Epoch: [81/500] Acc@1:74.18% Acc@5:98.72%: 100%|██████████| 79/79 [00:03<00:00, 22.51it/s]
Train Epoch: [82/500] Loss: 0.8309: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.06157504321269755
tensor(0.8308)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.32it/s]
Test Epoch: [82/500] Acc@1:74.89% Acc@5:98.73%: 100%|██████████| 79/79 [00:03<00:00, 22.48it/s]
Train Epoch: [83/500] Loss: 0.8245: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.06250591208619921
tensor(0.8249)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.38it/s]
Test Epoch: [83/500] Acc@1:73.90% Acc@5:98.80%: 100%|██████████| 79/79 [00:03<00:00, 22.58it/s]
Train Epoch: [84/500] Loss: 0.8245: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.0634334885060297
tensor(0.8245)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.42it/s]
Test Epoch: [84/500] Acc@1:74.93% Acc@5:98.77%: 100%|██████████| 79/79 [00:03<00:00, 22.38it/s]
Train Epoch: [85/500] Loss: 0.8081: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.06435736559260483
tensor(0.8089)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [85/500] Acc@1:73.25% Acc@5:98.59%: 100%|██████████| 79/79 [00:03<00:00, 22.49it/s]
Train Epoch: [86/500] Loss: 0.8170: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.06527713808904566
tensor(0.8166)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [86/500] Acc@1:75.14% Acc@5:98.69%: 100%|██████████| 79/79 [00:03<00:00, 22.35it/s]
Train Epoch: [87/500] Loss: 0.8117: 100%|██████████| 390/390 [01:32<00:00,  4.21it/s]


0.06619240253894376
tensor(0.8119)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [87/500] Acc@1:74.86% Acc@5:98.77%: 100%|██████████| 79/79 [00:03<00:00, 22.45it/s]
Train Epoch: [88/500] Loss: 0.8109: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.06710275746333647
tensor(0.8110)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.43it/s]
Test Epoch: [88/500] Acc@1:73.95% Acc@5:98.64%: 100%|██████████| 79/79 [00:03<00:00, 22.59it/s]
Train Epoch: [89/500] Loss: 0.8019: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.06800780353681488
tensor(0.8023)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.39it/s]
Test Epoch: [89/500] Acc@1:74.45% Acc@5:98.55%: 100%|██████████| 79/79 [00:03<00:00, 22.69it/s]
Train Epoch: [90/500] Loss: 0.7944: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.06890714376268721
tensor(0.7948)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.30it/s]
Test Epoch: [90/500] Acc@1:74.23% Acc@5:98.49%: 100%|██████████| 79/79 [00:03<00:00, 22.73it/s]
Train Epoch: [91/500] Loss: 0.7945: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.06980038364712053
tensor(0.7945)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.17it/s]
Test Epoch: [91/500] Acc@1:74.83% Acc@5:98.66%: 100%|██████████| 79/79 [00:03<00:00, 22.62it/s]
Train Epoch: [92/500] Loss: 0.7864: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07068713137218484
tensor(0.7868)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.37it/s]
Test Epoch: [92/500] Acc@1:75.23% Acc@5:98.68%: 100%|██████████| 79/79 [00:03<00:00, 22.32it/s]
Train Epoch: [93/500] Loss: 0.7939: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07156699796772308
tensor(0.7935)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.37it/s]
Test Epoch: [93/500] Acc@1:74.72% Acc@5:98.60%: 100%|██████████| 79/79 [00:03<00:00, 22.52it/s]
Train Epoch: [94/500] Loss: 0.7908: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07243959748197226
tensor(0.7909)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.36it/s]
Test Epoch: [94/500] Acc@1:75.51% Acc@5:98.82%: 100%|██████████| 79/79 [00:03<00:00, 22.32it/s]
Train Epoch: [95/500] Loss: 0.7876: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07330454715086035
tensor(0.7878)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.45it/s]
Test Epoch: [95/500] Acc@1:74.81% Acc@5:98.63%: 100%|██████████| 79/79 [00:03<00:00, 22.65it/s]
Train Epoch: [96/500] Loss: 0.7747: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07416146756590514
tensor(0.7754)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.39it/s]
Test Epoch: [96/500] Acc@1:75.66% Acc@5:98.80%: 100%|██████████| 79/79 [00:03<00:00, 22.66it/s]
Train Epoch: [97/500] Loss: 0.7702: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07500998284064084
tensor(0.7704)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [97/500] Acc@1:75.25% Acc@5:98.65%: 100%|██████████| 79/79 [00:03<00:00, 22.49it/s]
Train Epoch: [98/500] Loss: 0.7720: 100%|██████████| 390/390 [01:32<00:00,  4.22it/s]


0.07584972077550003
tensor(0.7720)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.39it/s]
Test Epoch: [98/500] Acc@1:75.73% Acc@5:98.64%: 100%|██████████| 79/79 [00:03<00:00, 22.68it/s]
Train Epoch: [99/500] Loss: 0.7662: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07668031302107836
tensor(0.7665)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.41it/s]
Test Epoch: [99/500] Acc@1:75.98% Acc@5:98.72%: 100%|██████████| 79/79 [00:03<00:00, 22.21it/s]
Train Epoch: [100/500] Loss: 0.7719: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07750139523971013
tensor(0.7717)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.41it/s]
Test Epoch: [100/500] Acc@1:75.49% Acc@5:98.65%: 100%|██████████| 79/79 [00:03<00:00, 22.46it/s]
Train Epoch: [101/500] Loss: 0.7605: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.0783126072652844
tensor(0.7610)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.33it/s]
Test Epoch: [101/500] Acc@1:76.12% Acc@5:98.85%: 100%|██████████| 79/79 [00:03<00:00, 22.26it/s]
Train Epoch: [102/500] Loss: 0.7613: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07911359326123119
tensor(0.7613)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.41it/s]
Test Epoch: [102/500] Acc@1:75.92% Acc@5:98.73%: 100%|██████████| 79/79 [00:03<00:00, 22.57it/s]
Train Epoch: [103/500] Loss: 0.7630: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.07990400187660834
tensor(0.7630)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.46it/s]
Test Epoch: [103/500] Acc@1:75.42% Acc@5:98.57%: 100%|██████████| 79/79 [00:03<00:00, 22.69it/s]
Train Epoch: [104/500] Loss: 0.7629: 100%|██████████| 390/390 [01:32<00:00,  4.23it/s]


0.08068348640022108
tensor(0.7629)


Feature extracting: 100%|██████████| 391/391 [00:13<00:00, 28.34it/s]
Test Epoch: [104/500] Acc@1:75.74% Acc@5:98.82%: 100%|██████████| 79/79 [00:03<00:00, 22.63it/s]
Train Epoch: [105/500] Loss: 0.7673:  30%|███       | 117/390 [00:28<01:03,  4.28it/s]

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df=pd.read_csv(csvfilename)

f1=plt.figure()
plt.semilogx(df['lr_epoch'],df['smooth_loss'])  
plt.xlabel('learning rate')
plt.ylabel('smoothed trg epoch loss')
plt.show()

f2=plt.figure()
plt.plot(df['test_acc@1'])
plt.title('Sim CLR with RAdam')
plt.xlabel('epoch number')
plt.ylabel('epoch Test Accuracy')
plt.show()