# Contrastive Learning on CIFAR10 using ConvNext Backbone

First, we import the dataset and define transformation operations on it. We apply random transformation on images (crop + flip + colorjitter + grayscale).

In [1]:
from PIL import Image
from torchvision import transforms
from torchvision.datasets import CIFAR10
import torch

class CIFAR10Pair(CIFAR10):
    """CIFAR10 Dataset.
    """

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            pos_1 = self.transform(img)
            pos_2 = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return pos_1, pos_2, target


train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

In [2]:
# check whether run in Colab
import sys
if 'google.colab' in sys.modules:
    print('Running in Colab.')
    !pip3 install timm==0.5.4 

from timm import create_model


model_name = "convnext_small"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("device = ", device)
# create a ConvNeXt model : https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/convnext.py
convnext_model = create_model(model_name, pretrained=False).to(device)

device =  cuda


In [3]:
# convnext_model.stem[0] = torch.nn.Conv2d(3, 96, kernel_size=(3,3), stride=(1,1), padding=(1,1))
convnext_model.head = convnext_model.head[0:3]
convnext_model.head

Sequential(
  (global_pool): SelectAdaptivePool2d (pool_type=avg, flatten=Identity())
  (norm): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)

We use commonly used ResNet-50 as ConvNet encoders for simplicity in the original paper. The task 1 is to set encoder and projection head. The parameters are adapted from the original paper.

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
#from torchvision.models.resnet import resnet50


class Model(nn.Module):
    def __init__(self, feature_dim=128):
        super(Model, self).__init__()

        self.f = []
        for name, module in convnext_model.named_children():
            # if name == 'conv1':
            #     module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            # if not isinstance(module, nn.Linear) and not isinstance(module, nn.MaxPool2d):
            self.f.append(module)
        # ----------------------------------------------------------------------
        # START OF YOUR CODE
        # ----------------------------------------------------------------------
        # Task 1
        # set a neural network base encoder self.f
        # hint: nn.Sequential

        self.f = nn.Sequential(*self.f)

        # set a small neural network projection head
        # Dense-> Relu-> Dense (2-layer MLP to project the representation to a 128-dimensional latent space and 
        # the representation is 2048-dimensional here)

        self.g = nn.Sequential(nn.Linear(768, 1000, bias=True), 
                               nn.GELU(), 
                               nn.Linear(1000, feature_dim, bias=True))
        
        # ----------------------------------------------------------------------
        # END OF YOUR CODE
        # ----------------------------------------------------------------------
    def forward(self, x):
        x = self.f(x)
        feature = torch.flatten(x, start_dim=1)
        out = self.g(feature)
        return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)


We train encoder network and projection head to maximize agreement using a contrastive loss. The default epoch is 1 for time efficiency while it could takes about 10 minutes to run for one epoch in google colab. The task 2 is to calculate the contrastive loss.
To evaluate the influence of temperature value for contrastive loss, we run this training process 3 times with different temperature value (0.1,0.5 and 1.0).

In [5]:
import argparse
import os

import numpy as np
import pandas as pd
import torch
import torch.optim as optim
!pip install thop
from thop import profile, clever_format
from torch.utils.data import DataLoader
from tqdm import tqdm


import math

def contrastive_loss(out_1, out_2, temperature):

    # ------------------------------------------------------------------
    # START OF YOUR CODE
    # ------------------------------------------------------------------
    # Task2: implement contrastive loss function and return loss variable
    # hint: loss formula could refer to the slides
    # input: out_1, out_2，temperature
    # output: loss variable

    out = torch.cat([out_1, out_2], dim=0)
    # [2*B, 2*B]
    sim_matrix = torch.exp(torch.mm(out, out.t().contiguous()) / temperature)
    mask = (torch.ones_like(sim_matrix) - torch.eye(2 * batch_size, device=sim_matrix.device)).bool()
    # [2*B, 2*B-1]
    sim_matrix = sim_matrix.masked_select(mask).view(2 * batch_size, -1)

    # compute loss
    pos_sim = torch.exp(torch.sum(out_1 * out_2, dim=-1) / temperature)
    # [2*B]
    pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
    loss = (- torch.log(pos_sim / sim_matrix.sum(dim=-1))).mean() 

    # ------------------------------------------------------------------
    # END OF YOUR CODE
    # ------------------------------------------------------------------

    return loss

# train for one epoch to learn unique features
def train(net, data_loader, train_optimizer, train_scheduler, temperature):
    net.train()
    total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader)
    for pos_1, pos_2, target in train_bar:
        pos_1, pos_2 = pos_1.cuda(non_blocking=True), pos_2.cuda(non_blocking=True)
        feature_1, out_1 = net(pos_1)
        feature_2, out_2 = net(pos_2)

        loss = contrastive_loss(out_1, out_2, temperature)

        train_optimizer.zero_grad()
        loss.backward()
        train_optimizer.step()
        train_scheduler.step()

        total_num += batch_size
        total_loss += loss.item() * batch_size
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, epochs, total_loss / total_num))

    return total_loss / total_num


# test for one epoch, use weighted knn to find the most similar images' label to assign the test image
def test(net, memory_data_loader, test_data_loader, temperature):
    net.eval()
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    with torch.no_grad():
        # generate feature bank
        for data, _, target in tqdm(memory_data_loader, desc='Feature extracting'):
            feature, out = net(data.cuda(non_blocking=True))
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device)
        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_data_loader)
        for data, _, target in test_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature, out = net(data)

            total_num += data.size(0)
            # compute cos similarity between each feature vector and feature bank ---> [B, N]
            sim_matrix = torch.mm(feature, feature_bank)
            # [B, K]
            sim_weight, sim_indices = sim_matrix.topk(k=k, dim=-1)
            # [B, K]
            sim_labels = torch.gather(feature_labels.expand(data.size(0), -1), dim=-1, index=sim_indices)
            sim_weight = (sim_weight / temperature).exp()

            # counts for each class
            one_hot_label = torch.zeros(data.size(0) * k, c, device=sim_labels.device)
            # [B*K, C]
            one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
            # weighted score ---> [B, C]
            pred_scores = torch.sum(one_hot_label.view(data.size(0), -1, c) * sim_weight.unsqueeze(dim=-1), dim=1)

            pred_labels = pred_scores.argsort(dim=-1, descending=True)
            total_top1 += torch.sum((pred_labels[:, :1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_top5 += torch.sum((pred_labels[:, :5] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            test_bar.set_description('Test Epoch: [{}/{}] Acc@1:{:.2f}% Acc@5:{:.2f}%'
                                     .format(epoch, epochs, total_top1 / total_num * 100, total_top5 / total_num * 100))

    return total_top1 / total_num * 100, total_top5 / total_num * 100



In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Train SimCLR
import numpy as np
   
# Feature dim for latent vector, Temperature used in softmax, Top k most similar images used to predict the label
feature_dim, temp, k = 128, [0.1], 200
# Number of images in each mini-batch, Number of sweeps over the dataset to train
batch_size=128
#Coarse setting
#Fine setting
lr_start = 1.25e-4
lr_end =0.07943
max_lrvalue=0.01562 #Rule of Thumb is peaklr (from lambdalr test)*3/8
temp0 = 0.1 #contrastive loss temperature setting
schedulertype='onecyclelr' #'lambdalr' for testing range of training or 'onecyclelr' for actual training
if schedulertype=='lambdalr':
    epochs=10
    #epochs=10 #coarserg recommend 10 epochs, finerg recommend 100 epochs 
elif schedulertype=='onecyclelr':
    epochs=500
    #epochs=3
else:
    print('choose valid option for scheduler')
smoothfactor=0.95 #Smooth Factor for smoothing contrastive loss    
IterationStr='It1'
loadmodel=0 #loadmodel=0 From scratch or loadmodel=1 Continue from presaved model 
pathtosave='/home/umaiyal/CS5260Project/results/Simclr_convnext_RAdamv2'+schedulertype+'/'+IterationStr+'/'
save_name_pre = '{}_{}_{}_{}_{}'.format(feature_dim, temp0, k, batch_size, epochs)
csvfilename=pathtosave+'{}_statistics.csv'.format(save_name_pre)
modelfilename=pathtosave+'{}_model.pth'.format(save_name_pre)

# data prepare
train_data = CIFAR10Pair(root='data', train=True, transform=train_transform, download=True)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True,
                          drop_last=True)
memory_data = CIFAR10Pair(root='data', train=True, transform=test_transform, download=True)
memory_loader = DataLoader(memory_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)
test_data = CIFAR10Pair(root='data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)

import torch
torch.cuda.is_available()
torch.cuda.set_device(1)

# model setup and optimizer config
model = Model(feature_dim).cuda()

flops, params = profile(model, inputs=(torch.randn(1, 3, 32, 32).cuda(),))
flops, params = clever_format([flops, params])
print('# Model Params: {} FLOPs: {}'.format(params, flops))

optimizer = optim.RAdam(model.parameters(), lr=1.0)
#optimizer=optim.Adam(model.parameters(),lr=1.0)

#exponentially increase learning rate from low to high
def lrs(batch):
   low = math.log2(lr_start)
   high = math.log2(lr_end)
   return 2**(low+(high-low)*batch/len(train_loader)/epochs)
   
if schedulertype=='lambdalr':
   scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lrs)
elif schedulertype=='onecyclelr':
   div_factorvalue=10#round(max_lrvalue/0.0006) 
   scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,div_factor=div_factorvalue,max_lr=max_lrvalue,total_steps=len(train_loader)*epochs,epochs=epochs)
else:
   print('choose valid option for scheduler')

if loadmodel==1:
   checkpoint=torch.load(modelfilename)
   model.load_state_dict(checkpoint['model_state_dict'],strict=False)
   model.to(device)
   optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
   scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
   startepoch=checkpoint['epoch']+1
   best_acc=checkpoint['best_acc']

   print(startepoch)
else:     
   startepoch=1
   best_acc=0
   

c = len(memory_data.classes)

if not os.path.exists(pathtosave):
   os.makedirs(pathtosave)

train_loss_epoch=torch.zeros(epochs)
smooth_loss_epoch=torch.zeros(epochs)
test_acc_1_epoch=torch.zeros(epochs)
test_acc_5_epoch=torch.zeros(epochs)
lr_epoch=torch.zeros(epochs)

if loadmodel==1:

   df=pd.read_csv(csvfilename)
   temp=pd.to_numeric(df.iloc[0:startepoch-1,1]).apply(np.array)
   train_loss_epoch[0:temp.size]=torch.tensor(temp)
   train_loss_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,2]).apply(np.array)
   test_acc_1_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_1_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,3]).apply(np.array)
   test_acc_5_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_5_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,4]).apply(np.array)
   smooth_loss_epoch[0:temp.size]=torch.tensor(temp)
   smooth_loss_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,5]).apply(np.array)
   lr_epoch[0:temp.size]=torch.tensor(temp)
   lr_list=temp.tolist()
   results = {'train_loss': train_loss_list, 'test_acc@1': test_acc_1_list, 'test_acc@5': test_acc_5_list, 'smooth_loss': smooth_loss_list, 'lr_epoch': lr_list}

else:
   results = {'train_loss': [], 'test_acc@1': [], 'test_acc@5': [], 'smooth_loss': [], 'lr_epoch': []}


for epoch in range(startepoch, epochs + 1):
    train_loss = train(model, train_loader, optimizer, scheduler, temp0)
    train_loss_epoch[epoch-1]=train_loss
    if epoch>1:
       smooth_loss=float(train_loss_epoch[epoch-1]*smoothfactor+smooth_loss_epoch[epoch-2]*(1.0-smoothfactor))
    else:
       smooth_loss=train_loss
    smooth_loss_epoch[epoch-1]=torch.tensor(smooth_loss)


    print(optimizer.param_groups[0]['lr'])
    print(smooth_loss_epoch[epoch-1])
    lr_epoch[epoch-1]=float(optimizer.param_groups[0]['lr'])
   
        
    results['train_loss'].append(train_loss)
    test_acc_1, test_acc_5 = test(model, memory_loader, test_loader, temp0)
    results['test_acc@1'].append(test_acc_1)
    results['test_acc@5'].append(test_acc_5)
    results['smooth_loss'].append(smooth_loss)
    results['lr_epoch'].append(optimizer.param_groups[0]['lr'])
   
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv(csvfilename, index_label='epoch')
    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        torch.save({'epoch':epoch,'model_state_dict':model.state_dict(),'optimizer_state_dict':optimizer.state_dict(),'scheduler_state_dict':scheduler.state_dict(),'best_acc':best_acc}, modelfilename)
    test_acc_1_epoch[epoch-1]=test_acc_1
    test_acc_5_epoch[epoch-1]=test_acc_5
    
minloss_loc=torch.argmin(smooth_loss_epoch)
minloss_loclr=lr_epoch[minloss_loc]
print(f'lr corresponding to minloss={minloss_loclr}');
print(f'suggested maxlr={minloss_loclr*3/8}');
print(f'suggested minlr={minloss_loclr*3/80}');

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


  kernel = torch.DoubleTensor([*(x[0].shape[2:])]) // torch.DoubleTensor(list((m.output_size,))).squeeze()


[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[91m[WARN] Cannot find rule for <class 'timm.models.convnext.LayerNorm2d'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.linear.Identity'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.normalization.LayerNorm'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.activation.GELU'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[91m[WARN] Cannot find rule for <class 'timm.models.layers.mlp.Mlp'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 

Train Epoch: [1/500] Loss: 4.3234: 100%|██████████| 390/390 [00:34<00:00, 11.21it/s]


0.0015635416285581846
tensor(4.3234)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.12it/s] 
Test Epoch: [1/500] Acc@1:46.42% Acc@5:91.17%: 100%|██████████| 79/79 [00:01<00:00, 47.94it/s]
Train Epoch: [2/500] Loss: 3.8685: 100%|██████████| 390/390 [00:33<00:00, 11.74it/s]


0.0015681658380003807
tensor(3.8912)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 80.55it/s] 
Test Epoch: [2/500] Acc@1:48.94% Acc@5:92.36%: 100%|██████████| 79/79 [00:01<00:00, 47.01it/s]
Train Epoch: [3/500] Loss: 3.5550: 100%|██████████| 390/390 [00:34<00:00, 11.24it/s]


0.0015758705999261522
tensor(3.5718)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.34it/s] 
Test Epoch: [3/500] Acc@1:50.30% Acc@5:92.67%: 100%|██████████| 79/79 [00:01<00:00, 45.91it/s]
Train Epoch: [4/500] Loss: 3.3203: 100%|██████████| 390/390 [00:33<00:00, 11.61it/s]


0.0015866525346567364
tensor(3.3328)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.91it/s] 
Test Epoch: [4/500] Acc@1:51.22% Acc@5:93.03%: 100%|██████████| 79/79 [00:01<00:00, 49.47it/s]
Train Epoch: [5/500] Loss: 3.1310: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.001600506912717528
tensor(3.1411)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.23it/s] 
Test Epoch: [5/500] Acc@1:51.61% Acc@5:93.32%: 100%|██████████| 79/79 [00:01<00:00, 48.97it/s]
Train Epoch: [6/500] Loss: 2.9788: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.0016174276569126626
tensor(2.9869)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.56it/s] 
Test Epoch: [6/500] Acc@1:52.66% Acc@5:93.67%: 100%|██████████| 79/79 [00:01<00:00, 47.38it/s]
Train Epoch: [7/500] Loss: 2.8193: 100%|██████████| 390/390 [00:33<00:00, 11.64it/s]


0.001637407344990757
tensor(2.8277)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.32it/s] 
Test Epoch: [7/500] Acc@1:53.09% Acc@5:93.77%: 100%|██████████| 79/79 [00:01<00:00, 49.16it/s]
Train Epoch: [8/500] Loss: 2.7039: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.0016604372129006718
tensor(2.7101)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.36it/s] 
Test Epoch: [8/500] Acc@1:52.87% Acc@5:93.77%: 100%|██████████| 79/79 [00:01<00:00, 50.04it/s]
Train Epoch: [9/500] Loss: 2.5963: 100%|██████████| 390/390 [00:34<00:00, 11.41it/s]


0.0016865071586358478
tensor(2.6020)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.16it/s] 
Test Epoch: [9/500] Acc@1:53.83% Acc@5:94.10%: 100%|██████████| 79/79 [00:01<00:00, 49.23it/s]
Train Epoch: [10/500] Loss: 2.4914: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.0017156057466655248
tensor(2.4970)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.65it/s] 
Test Epoch: [10/500] Acc@1:54.58% Acc@5:94.22%: 100%|██████████| 79/79 [00:01<00:00, 47.52it/s]
Train Epoch: [11/500] Loss: 2.4002: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.0017477202129509185
tensor(2.4050)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.88it/s] 
Test Epoch: [11/500] Acc@1:54.33% Acc@5:94.75%: 100%|██████████| 79/79 [00:01<00:00, 48.76it/s]
Train Epoch: [12/500] Loss: 2.3112: 100%|██████████| 390/390 [00:33<00:00, 11.59it/s]


0.0017828364705441396
tensor(2.3159)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.19it/s] 
Test Epoch: [12/500] Acc@1:55.72% Acc@5:95.09%: 100%|██████████| 79/79 [00:01<00:00, 48.04it/s]
Train Epoch: [13/500] Loss: 2.2066: 100%|██████████| 390/390 [00:33<00:00, 11.69it/s]


0.001820939115767405
tensor(2.2120)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.23it/s] 
Test Epoch: [13/500] Acc@1:56.46% Acc@5:94.93%: 100%|██████████| 79/79 [00:01<00:00, 50.94it/s]
Train Epoch: [14/500] Loss: 2.1140: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.0018620114349698347
tensor(2.1189)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.21it/s] 
Test Epoch: [14/500] Acc@1:56.28% Acc@5:95.28%: 100%|██████████| 79/79 [00:01<00:00, 49.36it/s]
Train Epoch: [15/500] Loss: 2.0727: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.0019060354118588634
tensor(2.0750)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.79it/s] 
Test Epoch: [15/500] Acc@1:57.62% Acc@5:95.69%: 100%|██████████| 79/79 [00:01<00:00, 50.00it/s]
Train Epoch: [16/500] Loss: 1.9902: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.0019529917354030417
tensor(1.9944)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.30it/s] 
Test Epoch: [16/500] Acc@1:58.93% Acc@5:95.92%: 100%|██████████| 79/79 [00:01<00:00, 48.32it/s]
Train Epoch: [17/500] Loss: 1.8992: 100%|██████████| 390/390 [00:34<00:00, 11.30it/s]


0.0020028598083027937
tensor(1.9040)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.57it/s] 
Test Epoch: [17/500] Acc@1:59.61% Acc@5:96.09%: 100%|██████████| 79/79 [00:01<00:00, 49.51it/s]
Train Epoch: [18/500] Loss: 1.8453: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.002055617756025377
tensor(1.8482)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.16it/s] 
Test Epoch: [18/500] Acc@1:60.05% Acc@5:96.38%: 100%|██████████| 79/79 [00:01<00:00, 47.10it/s]
Train Epoch: [19/500] Loss: 1.8264: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.002111242436400103
tensor(1.8275)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.34it/s] 
Test Epoch: [19/500] Acc@1:59.98% Acc@5:96.32%: 100%|██████████| 79/79 [00:01<00:00, 48.60it/s]
Train Epoch: [20/500] Loss: 1.7735: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.0021697094497696093
tensor(1.7762)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.17it/s] 
Test Epoch: [20/500] Acc@1:61.17% Acc@5:96.37%: 100%|██████████| 79/79 [00:01<00:00, 48.76it/s]
Train Epoch: [21/500] Loss: 1.7317: 100%|██████████| 390/390 [00:33<00:00, 11.72it/s]


0.0022309931496927154
tensor(1.7339)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.47it/s] 
Test Epoch: [21/500] Acc@1:60.56% Acc@5:96.42%: 100%|██████████| 79/79 [00:01<00:00, 47.86it/s]
Train Epoch: [22/500] Loss: 1.7062: 100%|██████████| 390/390 [00:34<00:00, 11.39it/s]


0.0022950666541941937
tensor(1.7076)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.24it/s] 
Test Epoch: [22/500] Acc@1:61.90% Acc@5:96.41%: 100%|██████████| 79/79 [00:01<00:00, 46.65it/s]
Train Epoch: [23/500] Loss: 1.6986: 100%|██████████| 390/390 [00:33<00:00, 11.60it/s]


0.0023619018575564785
tensor(1.6990)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.04it/s] 
Test Epoch: [23/500] Acc@1:62.72% Acc@5:96.54%: 100%|██████████| 79/79 [00:01<00:00, 47.17it/s]
Train Epoch: [24/500] Loss: 1.6495: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.0024314694426482066
tensor(1.6519)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.69it/s] 
Test Epoch: [24/500] Acc@1:62.26% Acc@5:96.48%: 100%|██████████| 79/79 [00:01<00:00, 48.26it/s]
Train Epoch: [25/500] Loss: 1.6223: 100%|██████████| 390/390 [00:34<00:00, 11.41it/s]


0.0025037388937841007
tensor(1.6238)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.92it/s] 
Test Epoch: [25/500] Acc@1:62.30% Acc@5:96.70%: 100%|██████████| 79/79 [00:01<00:00, 47.00it/s]
Train Epoch: [26/500] Loss: 1.6219: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.002578678510110623
tensor(1.6220)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.28it/s] 
Test Epoch: [26/500] Acc@1:62.78% Acc@5:96.64%: 100%|██████████| 79/79 [00:01<00:00, 48.58it/s]
Train Epoch: [27/500] Loss: 1.5945: 100%|██████████| 390/390 [00:33<00:00, 11.64it/s]


0.002656255419511493
tensor(1.5959)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.06it/s] 
Test Epoch: [27/500] Acc@1:63.03% Acc@5:96.91%: 100%|██████████| 79/79 [00:01<00:00, 48.36it/s]
Train Epoch: [28/500] Loss: 1.5750: 100%|██████████| 390/390 [00:34<00:00, 11.23it/s]


0.0027364355930269754
tensor(1.5760)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.38it/s] 
Test Epoch: [28/500] Acc@1:63.17% Acc@5:96.84%: 100%|██████████| 79/79 [00:01<00:00, 48.69it/s]
Train Epoch: [29/500] Loss: 1.5488: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.0028191838597806194
tensor(1.5502)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.37it/s] 
Test Epoch: [29/500] Acc@1:63.29% Acc@5:96.93%: 100%|██████████| 79/79 [00:01<00:00, 48.11it/s]
Train Epoch: [30/500] Loss: 1.5270: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.00290446392240689
tensor(1.5282)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.10it/s] 
Test Epoch: [30/500] Acc@1:63.88% Acc@5:96.99%: 100%|██████████| 79/79 [00:01<00:00, 46.42it/s]
Train Epoch: [31/500] Loss: 1.5202: 100%|██████████| 390/390 [00:33<00:00, 11.69it/s]


0.0029922383729729437
tensor(1.5206)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.34it/s] 
Test Epoch: [31/500] Acc@1:64.23% Acc@5:97.00%: 100%|██████████| 79/79 [00:01<00:00, 48.51it/s]
Train Epoch: [32/500] Loss: 1.5196: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.0030824687093875362
tensor(1.5196)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.66it/s] 
Test Epoch: [32/500] Acc@1:64.45% Acc@5:97.22%: 100%|██████████| 79/79 [00:01<00:00, 48.43it/s]
Train Epoch: [33/500] Loss: 1.5053: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.0031751153522898926
tensor(1.5060)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.56it/s] 
Test Epoch: [33/500] Acc@1:64.91% Acc@5:97.16%: 100%|██████████| 79/79 [00:01<00:00, 46.57it/s]
Train Epoch: [34/500] Loss: 1.4972: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.0032701376624111253
tensor(1.4976)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.45it/s] 
Test Epoch: [34/500] Acc@1:64.67% Acc@5:97.23%: 100%|██████████| 79/79 [00:01<00:00, 48.79it/s]
Train Epoch: [35/500] Loss: 1.4808: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.003367493958400563
tensor(1.4816)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.84it/s] 
Test Epoch: [35/500] Acc@1:65.37% Acc@5:97.16%: 100%|██████████| 79/79 [00:01<00:00, 47.27it/s]
Train Epoch: [36/500] Loss: 1.4764: 100%|██████████| 390/390 [00:33<00:00, 11.61it/s]


0.0034671415351091826
tensor(1.4766)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.41it/s] 
Test Epoch: [36/500] Acc@1:65.71% Acc@5:97.29%: 100%|██████████| 79/79 [00:01<00:00, 49.61it/s]
Train Epoch: [37/500] Loss: 1.4546: 100%|██████████| 390/390 [00:33<00:00, 11.65it/s]


0.0035690366823221617
tensor(1.4557)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.93it/s] 
Test Epoch: [37/500] Acc@1:66.07% Acc@5:97.40%: 100%|██████████| 79/79 [00:01<00:00, 47.72it/s]
Train Epoch: [38/500] Loss: 1.4408: 100%|██████████| 390/390 [00:33<00:00, 11.67it/s]


0.003673134703932261
tensor(1.4416)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.25it/s] 
Test Epoch: [38/500] Acc@1:66.29% Acc@5:97.49%: 100%|██████████| 79/79 [00:01<00:00, 46.65it/s]
Train Epoch: [39/500] Loss: 1.4377: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.0037793899375456966
tensor(1.4379)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.51it/s] 
Test Epoch: [39/500] Acc@1:65.87% Acc@5:97.26%: 100%|██████████| 79/79 [00:01<00:00, 47.75it/s]
Train Epoch: [40/500] Loss: 1.4170: 100%|██████████| 390/390 [00:34<00:00, 11.46it/s]


0.0038877557745118484
tensor(1.4181)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.63it/s] 
Test Epoch: [40/500] Acc@1:65.82% Acc@5:97.50%: 100%|██████████| 79/79 [00:01<00:00, 49.47it/s]
Train Epoch: [41/500] Loss: 1.4107: 100%|██████████| 390/390 [00:33<00:00, 11.71it/s]


0.003998184680368062
tensor(1.4111)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.32it/s] 
Test Epoch: [41/500] Acc@1:66.02% Acc@5:97.50%: 100%|██████████| 79/79 [00:01<00:00, 47.72it/s]
Train Epoch: [42/500] Loss: 1.4047: 100%|██████████| 390/390 [00:33<00:00, 11.81it/s]


0.004110628215690549
tensor(1.4050)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.79it/s] 
Test Epoch: [42/500] Acc@1:66.15% Acc@5:97.58%: 100%|██████████| 79/79 [00:01<00:00, 48.50it/s]
Train Epoch: [43/500] Loss: 1.3988: 100%|██████████| 390/390 [00:33<00:00, 11.58it/s]


0.004225037057342233
tensor(1.3991)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.80it/s] 
Test Epoch: [43/500] Acc@1:66.82% Acc@5:97.56%: 100%|██████████| 79/79 [00:01<00:00, 48.10it/s]
Train Epoch: [44/500] Loss: 1.3865: 100%|██████████| 390/390 [00:34<00:00, 11.29it/s]


0.004341361020108266
tensor(1.3872)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.75it/s] 
Test Epoch: [44/500] Acc@1:67.03% Acc@5:97.58%: 100%|██████████| 79/79 [00:01<00:00, 50.53it/s]
Train Epoch: [45/500] Loss: 1.3872: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.004459549078709659
tensor(1.3872)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.29it/s] 
Test Epoch: [45/500] Acc@1:66.54% Acc@5:97.36%: 100%|██████████| 79/79 [00:01<00:00, 48.94it/s]
Train Epoch: [46/500] Loss: 1.3784: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.00457954939018543
tensor(1.3788)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.69it/s] 
Test Epoch: [46/500] Acc@1:66.56% Acc@5:97.42%: 100%|██████████| 79/79 [00:01<00:00, 46.55it/s]
Train Epoch: [47/500] Loss: 1.3544: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.0047013093166334095
tensor(1.3556)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.90it/s] 
Test Epoch: [47/500] Acc@1:67.46% Acc@5:97.66%: 100%|██████████| 79/79 [00:01<00:00, 49.10it/s]
Train Epoch: [48/500] Loss: 1.3588: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.0048247754482997635
tensor(1.3586)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.81it/s] 
Test Epoch: [48/500] Acc@1:67.51% Acc@5:97.46%: 100%|██████████| 79/79 [00:01<00:00, 49.36it/s]
Train Epoch: [49/500] Loss: 1.3541: 100%|██████████| 390/390 [00:33<00:00, 11.71it/s]


0.004949893627007072
tensor(1.3543)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.42it/s] 
Test Epoch: [49/500] Acc@1:67.32% Acc@5:97.59%: 100%|██████████| 79/79 [00:01<00:00, 46.37it/s]
Train Epoch: [50/500] Loss: 1.3570: 100%|██████████| 390/390 [00:33<00:00, 11.55it/s]


0.005076608969910711
tensor(1.3569)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.43it/s] 
Test Epoch: [50/500] Acc@1:66.81% Acc@5:97.53%: 100%|██████████| 79/79 [00:01<00:00, 46.15it/s]
Train Epoch: [51/500] Loss: 1.3480: 100%|██████████| 390/390 [00:33<00:00, 11.66it/s]


0.005204865893573092
tensor(1.3484)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.98it/s] 
Test Epoch: [51/500] Acc@1:66.59% Acc@5:97.51%: 100%|██████████| 79/79 [00:01<00:00, 43.22it/s]
Train Epoch: [52/500] Loss: 1.3468: 100%|██████████| 390/390 [00:34<00:00, 11.32it/s]


0.0053346081383452496
tensor(1.3469)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.20it/s] 
Test Epoch: [52/500] Acc@1:67.50% Acc@5:97.61%: 100%|██████████| 79/79 [00:01<00:00, 49.50it/s]
Train Epoch: [53/500] Loss: 1.3342: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.005465778793045013
tensor(1.3349)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.41it/s] 
Test Epoch: [53/500] Acc@1:67.82% Acc@5:97.84%: 100%|██████████| 79/79 [00:01<00:00, 48.20it/s]
Train Epoch: [54/500] Loss: 1.3318: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.005598320319920984
tensor(1.3320)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.64it/s] 
Test Epoch: [54/500] Acc@1:67.63% Acc@5:97.75%: 100%|██████████| 79/79 [00:01<00:00, 46.81it/s]
Train Epoch: [55/500] Loss: 1.3160: 100%|██████████| 390/390 [00:34<00:00, 11.40it/s]


0.005732174579891364
tensor(1.3168)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.17it/s] 
Test Epoch: [55/500] Acc@1:67.94% Acc@5:97.83%: 100%|██████████| 79/79 [00:01<00:00, 49.17it/s]
Train Epoch: [56/500] Loss: 1.3222: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.005867282858046552
tensor(1.3219)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.52it/s] 
Test Epoch: [56/500] Acc@1:68.11% Acc@5:97.63%: 100%|██████████| 79/79 [00:01<00:00, 49.03it/s]
Train Epoch: [57/500] Loss: 1.3030: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.006003585889404315
tensor(1.3039)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.58it/s] 
Test Epoch: [57/500] Acc@1:68.42% Acc@5:97.76%: 100%|██████████| 79/79 [00:01<00:00, 48.54it/s]
Train Epoch: [58/500] Loss: 1.3068: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.0061410238849062635
tensor(1.3066)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.11it/s] 
Test Epoch: [58/500] Acc@1:69.23% Acc@5:97.94%: 100%|██████████| 79/79 [00:01<00:00, 47.29it/s]
Train Epoch: [59/500] Loss: 1.3020: 100%|██████████| 390/390 [00:33<00:00, 11.63it/s]


0.006279536557644206
tensor(1.3022)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.80it/s] 
Test Epoch: [59/500] Acc@1:68.53% Acc@5:97.93%: 100%|██████████| 79/79 [00:01<00:00, 47.12it/s]
Train Epoch: [60/500] Loss: 1.2877: 100%|██████████| 390/390 [00:34<00:00, 11.46it/s]


0.006419063149304875
tensor(1.2885)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.54it/s] 
Test Epoch: [60/500] Acc@1:68.68% Acc@5:97.92%: 100%|██████████| 79/79 [00:01<00:00, 46.95it/s]
Train Epoch: [61/500] Loss: 1.2904: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.006559542456821461
tensor(1.2903)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.67it/s] 
Test Epoch: [61/500] Acc@1:69.11% Acc@5:97.86%: 100%|██████████| 79/79 [00:01<00:00, 46.89it/s]
Train Epoch: [62/500] Loss: 1.2739: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.00670091285922019
tensor(1.2747)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.37it/s] 
Test Epoch: [62/500] Acc@1:68.15% Acc@5:97.67%: 100%|██████████| 79/79 [00:01<00:00, 49.29it/s]
Train Epoch: [63/500] Loss: 1.2648: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.0068431123446502685
tensor(1.2653)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.94it/s] 
Test Epoch: [63/500] Acc@1:68.72% Acc@5:97.89%: 100%|██████████| 79/79 [00:01<00:00, 47.68it/s]
Train Epoch: [64/500] Loss: 1.2597: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.006986078537585254
tensor(1.2599)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.42it/s] 
Test Epoch: [64/500] Acc@1:68.43% Acc@5:97.72%: 100%|██████████| 79/79 [00:01<00:00, 47.35it/s]
Train Epoch: [65/500] Loss: 1.2739: 100%|██████████| 390/390 [00:34<00:00, 11.33it/s]


0.0071297487261839446
tensor(1.2732)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.46it/s] 
Test Epoch: [65/500] Acc@1:69.23% Acc@5:98.02%: 100%|██████████| 79/79 [00:01<00:00, 50.02it/s]
Train Epoch: [66/500] Loss: 1.2676: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.007274059889798826
tensor(1.2678)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.33it/s] 
Test Epoch: [66/500] Acc@1:69.45% Acc@5:97.83%: 100%|██████████| 79/79 [00:01<00:00, 47.89it/s]
Train Epoch: [67/500] Loss: 1.2579: 100%|██████████| 390/390 [00:34<00:00, 11.46it/s]


0.007418948726619939
tensor(1.2584)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.27it/s] 
Test Epoch: [67/500] Acc@1:69.28% Acc@5:97.83%: 100%|██████████| 79/79 [00:01<00:00, 47.05it/s]
Train Epoch: [68/500] Loss: 1.2584: 100%|██████████| 390/390 [00:33<00:00, 11.57it/s]


0.0075643516814421045
tensor(1.2584)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.53it/s] 
Test Epoch: [68/500] Acc@1:68.93% Acc@5:97.80%: 100%|██████████| 79/79 [00:01<00:00, 49.77it/s]
Train Epoch: [69/500] Loss: 1.2564: 100%|██████████| 390/390 [00:33<00:00, 11.58it/s]


0.007710204973543286
tensor(1.2565)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 80.27it/s] 
Test Epoch: [69/500] Acc@1:69.19% Acc@5:97.87%: 100%|██████████| 79/79 [00:01<00:00, 46.38it/s]
Train Epoch: [70/500] Loss: 1.2502: 100%|██████████| 390/390 [00:33<00:00, 11.67it/s]


0.007856444624661878
tensor(1.2505)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.54it/s] 
Test Epoch: [70/500] Acc@1:69.83% Acc@5:97.79%: 100%|██████████| 79/79 [00:01<00:00, 45.17it/s]
Train Epoch: [71/500] Loss: 1.2519: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.008003006487060652
tensor(1.2518)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.06it/s] 
Test Epoch: [71/500] Acc@1:69.26% Acc@5:97.92%: 100%|██████████| 79/79 [00:01<00:00, 47.87it/s]
Train Epoch: [72/500] Loss: 1.2446: 100%|██████████| 390/390 [00:34<00:00, 11.25it/s]


0.008149826271665021
tensor(1.2450)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.23it/s] 
Test Epoch: [72/500] Acc@1:70.02% Acc@5:97.90%: 100%|██████████| 79/79 [00:01<00:00, 48.00it/s]
Train Epoch: [73/500] Loss: 1.2289: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.008296839576263336
tensor(1.2297)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.46it/s] 
Test Epoch: [73/500] Acc@1:69.02% Acc@5:97.84%: 100%|██████████| 79/79 [00:01<00:00, 48.79it/s]
Train Epoch: [74/500] Loss: 1.2250: 100%|██████████| 390/390 [00:34<00:00, 11.39it/s]


0.008443981913756778
tensor(1.2252)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.52it/s] 
Test Epoch: [74/500] Acc@1:69.69% Acc@5:97.85%: 100%|██████████| 79/79 [00:01<00:00, 47.91it/s]
Train Epoch: [75/500] Loss: 1.2221: 100%|██████████| 390/390 [00:33<00:00, 11.63it/s]


0.008591188740446499
tensor(1.2223)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.20it/s] 
Test Epoch: [75/500] Acc@1:69.96% Acc@5:97.92%: 100%|██████████| 79/79 [00:01<00:00, 47.44it/s]
Train Epoch: [76/500] Loss: 1.2288: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.008738395484345591
tensor(1.2285)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 81.60it/s] 
Test Epoch: [76/500] Acc@1:70.32% Acc@5:98.13%: 100%|██████████| 79/79 [00:01<00:00, 48.47it/s]
Train Epoch: [77/500] Loss: 1.2370: 100%|██████████| 390/390 [00:33<00:00, 11.57it/s]


0.008885537573503462
tensor(1.2366)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.60it/s] 
Test Epoch: [77/500] Acc@1:70.41% Acc@5:98.04%: 100%|██████████| 79/79 [00:01<00:00, 47.41it/s]
Train Epoch: [78/500] Loss: 1.2191: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.009032550464330201
tensor(1.2200)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.45it/s] 
Test Epoch: [78/500] Acc@1:69.59% Acc@5:98.00%: 100%|██████████| 79/79 [00:01<00:00, 47.90it/s]
Train Epoch: [79/500] Loss: 1.2162: 100%|██████████| 390/390 [00:34<00:00, 11.47it/s]


0.009179369669908486
tensor(1.2164)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 80.48it/s] 
Test Epoch: [79/500] Acc@1:70.04% Acc@5:97.93%: 100%|██████████| 79/79 [00:01<00:00, 46.93it/s]
Train Epoch: [80/500] Loss: 1.2227: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.009325930788280654
tensor(1.2224)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.40it/s] 
Test Epoch: [80/500] Acc@1:70.67% Acc@5:97.97%: 100%|██████████| 79/79 [00:01<00:00, 48.55it/s]
Train Epoch: [81/500] Loss: 1.2051: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.009472169530698488
tensor(1.2060)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.67it/s] 
Test Epoch: [81/500] Acc@1:70.72% Acc@5:98.09%: 100%|██████████| 79/79 [00:01<00:00, 47.89it/s]
Train Epoch: [82/500] Loss: 1.2111: 100%|██████████| 390/390 [00:34<00:00, 11.24it/s]


0.009618021749823357
tensor(1.2109)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.11it/s] 
Test Epoch: [82/500] Acc@1:70.62% Acc@5:98.19%: 100%|██████████| 79/79 [00:01<00:00, 47.38it/s]
Train Epoch: [83/500] Loss: 1.2058: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.009763423467864315
tensor(1.2061)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.95it/s] 
Test Epoch: [83/500] Acc@1:70.74% Acc@5:98.17%: 100%|██████████| 79/79 [00:01<00:00, 49.02it/s]
Train Epoch: [84/500] Loss: 1.1915: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.00990831090464184
tensor(1.1922)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.68it/s] 
Test Epoch: [84/500] Acc@1:70.82% Acc@5:98.12%: 100%|██████████| 79/79 [00:01<00:00, 48.38it/s]
Train Epoch: [85/500] Loss: 1.1977: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.010052620505564875
tensor(1.1974)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.61it/s] 
Test Epoch: [85/500] Acc@1:70.45% Acc@5:97.86%: 100%|██████████| 79/79 [00:01<00:00, 49.76it/s]
Train Epoch: [86/500] Loss: 1.1864: 100%|██████████| 390/390 [00:34<00:00, 11.28it/s]


0.010196288969508931
tensor(1.1869)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.79it/s] 
Test Epoch: [86/500] Acc@1:70.58% Acc@5:98.07%: 100%|██████████| 79/79 [00:01<00:00, 48.24it/s]
Train Epoch: [87/500] Loss: 1.1830: 100%|██████████| 390/390 [00:33<00:00, 11.79it/s]


0.010339253276583017
tensor(1.1832)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.41it/s] 
Test Epoch: [87/500] Acc@1:70.56% Acc@5:97.95%: 100%|██████████| 79/79 [00:01<00:00, 47.76it/s]
Train Epoch: [88/500] Loss: 1.1835: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.010481450715773156
tensor(1.1835)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.81it/s] 
Test Epoch: [88/500] Acc@1:70.12% Acc@5:98.11%: 100%|██████████| 79/79 [00:01<00:00, 47.02it/s]
Train Epoch: [89/500] Loss: 1.1805: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.010622818912450482
tensor(1.1807)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.71it/s] 
Test Epoch: [89/500] Acc@1:71.17% Acc@5:98.25%: 100%|██████████| 79/79 [00:01<00:00, 46.87it/s]
Train Epoch: [90/500] Loss: 1.1904: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.010763295855731742
tensor(1.1899)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.46it/s] 
Test Epoch: [90/500] Acc@1:71.49% Acc@5:98.05%: 100%|██████████| 79/79 [00:01<00:00, 48.18it/s]
Train Epoch: [91/500] Loss: 1.1751: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.010902819925680226
tensor(1.1759)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.07it/s] 
Test Epoch: [91/500] Acc@1:71.06% Acc@5:98.06%: 100%|██████████| 79/79 [00:01<00:00, 47.19it/s]
Train Epoch: [92/500] Loss: 1.1694: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.01104132992033527
tensor(1.1698)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.77it/s] 
Test Epoch: [92/500] Acc@1:70.55% Acc@5:98.13%: 100%|██████████| 79/79 [00:01<00:00, 48.33it/s]
Train Epoch: [93/500] Loss: 1.1722: 100%|██████████| 390/390 [00:34<00:00, 11.26it/s]


0.011178765082558342
tensor(1.1720)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.62it/s] 
Test Epoch: [93/500] Acc@1:71.17% Acc@5:98.18%: 100%|██████████| 79/79 [00:01<00:00, 47.47it/s]
Train Epoch: [94/500] Loss: 1.1680: 100%|██████████| 390/390 [00:33<00:00, 11.55it/s]


0.011315065126684065
tensor(1.1682)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.00it/s] 
Test Epoch: [94/500] Acc@1:70.78% Acc@5:98.01%: 100%|██████████| 79/79 [00:01<00:00, 48.36it/s]
Train Epoch: [95/500] Loss: 1.1672: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.011450170264964386
tensor(1.1672)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.52it/s] 
Test Epoch: [95/500] Acc@1:71.88% Acc@5:98.10%: 100%|██████████| 79/79 [00:01<00:00, 46.89it/s]
Train Epoch: [96/500] Loss: 1.1562: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.011584021233794383
tensor(1.1567)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.48it/s] 
Test Epoch: [96/500] Acc@1:71.03% Acc@5:98.07%: 100%|██████████| 79/79 [00:01<00:00, 46.93it/s]
Train Epoch: [97/500] Loss: 1.1614: 100%|██████████| 390/390 [00:33<00:00, 11.70it/s]


0.011716559319708098
tensor(1.1612)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.67it/s] 
Test Epoch: [97/500] Acc@1:71.37% Acc@5:98.27%: 100%|██████████| 79/79 [00:01<00:00, 48.57it/s]
Train Epoch: [98/500] Loss: 1.1460: 100%|██████████| 390/390 [00:34<00:00, 11.46it/s]


0.011847726385133105
tensor(1.1467)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.29it/s] 
Test Epoch: [98/500] Acc@1:71.16% Acc@5:98.01%: 100%|██████████| 79/79 [00:01<00:00, 47.59it/s]
Train Epoch: [99/500] Loss: 1.1425: 100%|██████████| 390/390 [00:34<00:00, 11.39it/s]


0.01197746489389244
tensor(1.1427)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.67it/s] 
Test Epoch: [99/500] Acc@1:71.52% Acc@5:98.09%: 100%|██████████| 79/79 [00:01<00:00, 49.44it/s]
Train Epoch: [100/500] Loss: 1.1529: 100%|██████████| 390/390 [00:33<00:00, 11.69it/s]


0.01210571793644272
tensor(1.1523)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.15it/s] 
Test Epoch: [100/500] Acc@1:71.79% Acc@5:98.10%: 100%|██████████| 79/79 [00:01<00:00, 49.08it/s]
Train Epoch: [101/500] Loss: 1.1446: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.012232429254837423
tensor(1.1450)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.22it/s] 
Test Epoch: [101/500] Acc@1:71.67% Acc@5:98.44%: 100%|██████████| 79/79 [00:01<00:00, 45.59it/s]
Train Epoch: [102/500] Loss: 1.1391: 100%|██████████| 390/390 [00:34<00:00, 11.47it/s]


0.01235754326740431
tensor(1.1394)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.30it/s] 
Test Epoch: [102/500] Acc@1:71.10% Acc@5:98.21%: 100%|██████████| 79/79 [00:01<00:00, 47.43it/s]
Train Epoch: [103/500] Loss: 1.1525: 100%|██████████| 390/390 [00:33<00:00, 11.63it/s]


0.012481005093126222
tensor(1.1519)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.87it/s] 
Test Epoch: [103/500] Acc@1:71.73% Acc@5:98.02%: 100%|██████████| 79/79 [00:01<00:00, 48.45it/s]
Train Epoch: [104/500] Loss: 1.1329: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.012602760575714531
tensor(1.1339)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 90.68it/s] 
Test Epoch: [104/500] Acc@1:71.19% Acc@5:98.09%: 100%|██████████| 79/79 [00:01<00:00, 48.40it/s]
Train Epoch: [105/500] Loss: 1.1388: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.012722756307364691
tensor(1.1385)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.13it/s] 
Test Epoch: [105/500] Acc@1:72.04% Acc@5:98.19%: 100%|██████████| 79/79 [00:01<00:00, 48.10it/s]
Train Epoch: [106/500] Loss: 1.1228: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.012840939652183473
tensor(1.1236)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.36it/s] 
Test Epoch: [106/500] Acc@1:71.84% Acc@5:98.07%: 100%|██████████| 79/79 [00:01<00:00, 47.95it/s]
Train Epoch: [107/500] Loss: 1.1283: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.012957258769277578
tensor(1.1280)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 90.06it/s] 
Test Epoch: [107/500] Acc@1:72.73% Acc@5:98.46%: 100%|██████████| 79/79 [00:01<00:00, 48.56it/s]
Train Epoch: [108/500] Loss: 1.1311: 100%|██████████| 390/390 [00:33<00:00, 11.59it/s]


0.013071662635493572
tensor(1.1309)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.09it/s] 
Test Epoch: [108/500] Acc@1:72.28% Acc@5:98.15%: 100%|██████████| 79/79 [00:01<00:00, 49.47it/s]
Train Epoch: [109/500] Loss: 1.1261: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.013184101067799058
tensor(1.1263)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.04it/s] 
Test Epoch: [109/500] Acc@1:71.70% Acc@5:98.06%: 100%|██████████| 79/79 [00:01<00:00, 47.91it/s]
Train Epoch: [110/500] Loss: 1.1159: 100%|██████████| 390/390 [00:33<00:00, 11.60it/s]


0.0132945247452954
tensor(1.1164)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.29it/s] 
Test Epoch: [110/500] Acc@1:71.82% Acc@5:98.43%: 100%|██████████| 79/79 [00:01<00:00, 49.00it/s]
Train Epoch: [111/500] Loss: 1.1003: 100%|██████████| 390/390 [00:33<00:00, 11.57it/s]


0.013402885230852214
tensor(1.1011)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.10it/s] 
Test Epoch: [111/500] Acc@1:72.10% Acc@5:98.29%: 100%|██████████| 79/79 [00:01<00:00, 46.90it/s]
Train Epoch: [112/500] Loss: 1.1084: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.013509134992354229
tensor(1.1080)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.16it/s] 
Test Epoch: [112/500] Acc@1:72.51% Acc@5:98.34%: 100%|██████████| 79/79 [00:01<00:00, 48.63it/s]
Train Epoch: [113/500] Loss: 1.1115: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.013613227423551163
tensor(1.1114)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.47it/s] 
Test Epoch: [113/500] Acc@1:72.28% Acc@5:98.26%: 100%|██████████| 79/79 [00:01<00:00, 46.28it/s]
Train Epoch: [114/500] Loss: 1.1104: 100%|██████████| 390/390 [00:33<00:00, 11.55it/s]


0.013715116864501449
tensor(1.1104)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.19it/s] 
Test Epoch: [114/500] Acc@1:71.74% Acc@5:98.28%: 100%|██████████| 79/79 [00:01<00:00, 49.41it/s]
Train Epoch: [115/500] Loss: 1.1055: 100%|██████████| 390/390 [00:34<00:00, 11.41it/s]


0.013814758621600892
tensor(1.1057)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 90.50it/s] 
Test Epoch: [115/500] Acc@1:71.61% Acc@5:98.10%: 100%|██████████| 79/79 [00:01<00:00, 46.09it/s]
Train Epoch: [116/500] Loss: 1.1032: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.013912108987187423
tensor(1.1033)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.70it/s] 
Test Epoch: [116/500] Acc@1:72.60% Acc@5:98.43%: 100%|██████████| 79/79 [00:01<00:00, 48.96it/s]
Train Epoch: [117/500] Loss: 1.1009: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.014007125258713393
tensor(1.1010)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.57it/s] 
Test Epoch: [117/500] Acc@1:72.51% Acc@5:98.27%: 100%|██████████| 79/79 [00:01<00:00, 46.06it/s]
Train Epoch: [118/500] Loss: 1.0944: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.014099765757476937
tensor(1.0947)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.40it/s] 
Test Epoch: [118/500] Acc@1:71.82% Acc@5:98.28%: 100%|██████████| 79/79 [00:01<00:00, 47.62it/s]
Train Epoch: [119/500] Loss: 1.1021: 100%|██████████| 390/390 [00:34<00:00, 11.35it/s]


0.014189989846904286
tensor(1.1017)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 78.22it/s] 
Test Epoch: [119/500] Acc@1:71.72% Acc@5:98.16%: 100%|██████████| 79/79 [00:01<00:00, 50.02it/s]
Train Epoch: [120/500] Loss: 1.0939: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.01427775795037489
tensor(1.0942)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.08it/s] 
Test Epoch: [120/500] Acc@1:72.63% Acc@5:98.39%: 100%|██████████| 79/79 [00:01<00:00, 47.68it/s]
Train Epoch: [121/500] Loss: 1.0994: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.01436303156858165
tensor(1.0991)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.82it/s] 
Test Epoch: [121/500] Acc@1:72.59% Acc@5:98.33%: 100%|██████████| 79/79 [00:01<00:00, 47.51it/s]
Train Epoch: [122/500] Loss: 1.0928: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.014445773296418555
tensor(1.0931)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.75it/s] 
Test Epoch: [122/500] Acc@1:71.83% Acc@5:98.34%: 100%|██████████| 79/79 [00:01<00:00, 49.70it/s]
Train Epoch: [123/500] Loss: 1.0641: 100%|██████████| 390/390 [00:34<00:00, 11.27it/s]


0.014525946839388352
tensor(1.0655)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.88it/s] 
Test Epoch: [123/500] Acc@1:72.86% Acc@5:98.47%: 100%|██████████| 79/79 [00:01<00:00, 48.55it/s]
Train Epoch: [124/500] Loss: 1.0809: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.014603517029523063
tensor(1.0802)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.96it/s] 
Test Epoch: [124/500] Acc@1:72.37% Acc@5:98.36%: 100%|██████████| 79/79 [00:01<00:00, 48.00it/s]
Train Epoch: [125/500] Loss: 1.0798: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.014678449840810346
tensor(1.0798)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.27it/s] 
Test Epoch: [125/500] Acc@1:72.10% Acc@5:98.32%: 100%|██████████| 79/79 [00:01<00:00, 47.69it/s]
Train Epoch: [126/500] Loss: 1.0713: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.014750712404118942
tensor(1.0718)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.02it/s] 
Test Epoch: [126/500] Acc@1:72.81% Acc@5:98.36%: 100%|██████████| 79/79 [00:01<00:00, 49.05it/s]
Train Epoch: [127/500] Loss: 1.0719: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.014820273021616643
tensor(1.0719)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.65it/s] 
Test Epoch: [127/500] Acc@1:72.40% Acc@5:98.17%: 100%|██████████| 79/79 [00:01<00:00, 48.79it/s]
Train Epoch: [128/500] Loss: 1.0781: 100%|██████████| 390/390 [00:34<00:00, 11.31it/s]


0.014887101180674496
tensor(1.0778)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.19it/s] 
Test Epoch: [128/500] Acc@1:72.81% Acc@5:98.34%: 100%|██████████| 79/79 [00:01<00:00, 45.51it/s]
Train Epoch: [129/500] Loss: 1.0636: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.0149511675672511
tensor(1.0643)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.83it/s] 
Test Epoch: [129/500] Acc@1:72.65% Acc@5:98.40%: 100%|██████████| 79/79 [00:01<00:00, 48.36it/s]
Train Epoch: [130/500] Loss: 1.0663: 100%|██████████| 390/390 [00:33<00:00, 11.77it/s]


0.015012444078751162
tensor(1.0662)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.36it/s] 
Test Epoch: [130/500] Acc@1:73.03% Acc@5:98.40%: 100%|██████████| 79/79 [00:01<00:00, 48.56it/s]
Train Epoch: [131/500] Loss: 1.0641: 100%|██████████| 390/390 [00:33<00:00, 11.47it/s]


0.015070903836352637
tensor(1.0642)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.12it/s] 
Test Epoch: [131/500] Acc@1:72.84% Acc@5:98.30%: 100%|██████████| 79/79 [00:01<00:00, 47.37it/s]
Train Epoch: [132/500] Loss: 1.0566: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.015126521196797076
tensor(1.0570)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.93it/s] 
Test Epoch: [132/500] Acc@1:72.79% Acc@5:98.31%: 100%|██████████| 79/79 [00:01<00:00, 49.20it/s]
Train Epoch: [133/500] Loss: 1.0623: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.015179271763637986
tensor(1.0621)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.99it/s] 
Test Epoch: [133/500] Acc@1:72.59% Acc@5:98.52%: 100%|██████████| 79/79 [00:01<00:00, 47.93it/s]
Train Epoch: [134/500] Loss: 1.0495: 100%|██████████| 390/390 [00:34<00:00, 11.33it/s]


0.01522913239794229
tensor(1.0502)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.00it/s] 
Test Epoch: [134/500] Acc@1:72.78% Acc@5:98.40%: 100%|██████████| 79/79 [00:01<00:00, 47.89it/s]
Train Epoch: [135/500] Loss: 1.0486: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.015276081228440172
tensor(1.0487)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.42it/s] 
Test Epoch: [135/500] Acc@1:72.50% Acc@5:98.41%: 100%|██████████| 79/79 [00:01<00:00, 49.76it/s]
Train Epoch: [136/500] Loss: 1.0475: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.015320097661118864
tensor(1.0476)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.04it/s] 
Test Epoch: [136/500] Acc@1:72.80% Acc@5:98.48%: 100%|██████████| 79/79 [00:01<00:00, 48.05it/s]
Train Epoch: [137/500] Loss: 1.0435: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.015361162388256175
tensor(1.0437)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.73it/s] 
Test Epoch: [137/500] Acc@1:73.06% Acc@5:98.31%: 100%|██████████| 79/79 [00:01<00:00, 49.31it/s]
Train Epoch: [138/500] Loss: 1.0526: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.015399257396889778
tensor(1.0521)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.84it/s] 
Test Epoch: [138/500] Acc@1:73.31% Acc@5:98.38%: 100%|██████████| 79/79 [00:01<00:00, 49.50it/s]
Train Epoch: [139/500] Loss: 1.0321: 100%|██████████| 390/390 [00:34<00:00, 11.31it/s]


0.015434365976718567
tensor(1.0331)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.23it/s] 
Test Epoch: [139/500] Acc@1:73.60% Acc@5:98.59%: 100%|██████████| 79/79 [00:01<00:00, 46.76it/s]
Train Epoch: [140/500] Loss: 1.0405: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.0154664727274326
tensor(1.0401)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.56it/s] 
Test Epoch: [140/500] Acc@1:72.90% Acc@5:98.40%: 100%|██████████| 79/79 [00:01<00:00, 49.27it/s]
Train Epoch: [141/500] Loss: 1.0364: 100%|██████████| 390/390 [00:33<00:00, 11.63it/s]


0.015495563565468409
tensor(1.0366)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.69it/s] 
Test Epoch: [141/500] Acc@1:73.36% Acc@5:98.40%: 100%|██████████| 79/79 [00:01<00:00, 48.92it/s]
Train Epoch: [142/500] Loss: 1.0263: 100%|██████████| 390/390 [00:33<00:00, 11.69it/s]


0.015521625730186726
tensor(1.0268)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.11it/s] 
Test Epoch: [142/500] Acc@1:73.66% Acc@5:98.45%: 100%|██████████| 79/79 [00:01<00:00, 47.90it/s]
Train Epoch: [143/500] Loss: 1.0217: 100%|██████████| 390/390 [00:33<00:00, 11.67it/s]


0.015544647789469918
tensor(1.0219)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 90.02it/s] 
Test Epoch: [143/500] Acc@1:73.78% Acc@5:98.59%: 100%|██████████| 79/79 [00:01<00:00, 47.89it/s]
Train Epoch: [144/500] Loss: 1.0372: 100%|██████████| 390/390 [00:33<00:00, 11.63it/s]


0.015564619644736667
tensor(1.0364)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.32it/s] 
Test Epoch: [144/500] Acc@1:74.17% Acc@5:98.41%: 100%|██████████| 79/79 [00:01<00:00, 47.90it/s]
Train Epoch: [145/500] Loss: 1.0186: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.015581532535371678
tensor(1.0195)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.49it/s] 
Test Epoch: [145/500] Acc@1:73.87% Acc@5:98.60%: 100%|██████████| 79/79 [00:01<00:00, 48.05it/s]
Train Epoch: [146/500] Loss: 1.0151: 100%|██████████| 390/390 [00:33<00:00, 11.61it/s]


0.015595379042568521
tensor(1.0153)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.87it/s] 
Test Epoch: [146/500] Acc@1:73.71% Acc@5:98.44%: 100%|██████████| 79/79 [00:01<00:00, 48.45it/s]
Train Epoch: [147/500] Loss: 1.0230: 100%|██████████| 390/390 [00:34<00:00, 11.31it/s]


0.015606153092583869
tensor(1.0226)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.70it/s] 
Test Epoch: [147/500] Acc@1:74.45% Acc@5:98.56%: 100%|██████████| 79/79 [00:01<00:00, 44.42it/s]
Train Epoch: [148/500] Loss: 1.0151: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.01561384995940173
tensor(1.0155)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.77it/s] 
Test Epoch: [148/500] Acc@1:74.50% Acc@5:98.48%: 100%|██████████| 79/79 [00:01<00:00, 48.28it/s]
Train Epoch: [149/500] Loss: 1.0105: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.015618466266806517
tensor(1.0107)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.11it/s] 
Test Epoch: [149/500] Acc@1:73.94% Acc@5:98.48%: 100%|██████████| 79/79 [00:01<00:00, 48.02it/s]
Train Epoch: [150/500] Loss: 1.0150: 100%|██████████| 390/390 [00:34<00:00, 11.26it/s]


0.015619999997931523
tensor(1.0148)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 92.03it/s] 
Test Epoch: [150/500] Acc@1:74.07% Acc@5:98.57%: 100%|██████████| 79/79 [00:01<00:00, 49.34it/s]
Train Epoch: [151/500] Loss: 1.0037: 100%|██████████| 390/390 [00:33<00:00, 11.68it/s]


0.01561968377098024
tensor(1.0043)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 80.61it/s] 
Test Epoch: [151/500] Acc@1:73.42% Acc@5:98.62%: 100%|██████████| 79/79 [00:01<00:00, 48.57it/s]
Train Epoch: [152/500] Loss: 1.0029: 100%|██████████| 390/390 [00:33<00:00, 11.67it/s]


0.01561873834238813
tensor(1.0030)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.69it/s] 
Test Epoch: [152/500] Acc@1:74.37% Acc@5:98.58%: 100%|██████████| 79/79 [00:01<00:00, 47.00it/s]
Train Epoch: [153/500] Loss: 0.9902: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.01561716378832616
tensor(0.9909)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.58it/s] 
Test Epoch: [153/500] Acc@1:74.65% Acc@5:98.64%: 100%|██████████| 79/79 [00:01<00:00, 46.63it/s]
Train Epoch: [154/500] Loss: 1.0045: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.015614960235652463
tensor(1.0038)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.37it/s] 
Test Epoch: [154/500] Acc@1:74.37% Acc@5:98.47%: 100%|██████████| 79/79 [00:01<00:00, 50.31it/s]
Train Epoch: [155/500] Loss: 1.0074: 100%|██████████| 390/390 [00:33<00:00, 11.47it/s]


0.015612127861902119
tensor(1.0072)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.04it/s] 
Test Epoch: [155/500] Acc@1:74.35% Acc@5:98.61%: 100%|██████████| 79/79 [00:01<00:00, 48.42it/s]
Train Epoch: [156/500] Loss: 0.9934: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.015608666895272849
tensor(0.9941)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.03it/s] 
Test Epoch: [156/500] Acc@1:74.96% Acc@5:98.57%: 100%|██████████| 79/79 [00:01<00:00, 47.10it/s]
Train Epoch: [157/500] Loss: 1.0010: 100%|██████████| 390/390 [00:33<00:00, 11.68it/s]


0.015604577614606627
tensor(1.0006)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.24it/s] 
Test Epoch: [157/500] Acc@1:74.81% Acc@5:98.70%: 100%|██████████| 79/79 [00:01<00:00, 47.51it/s]
Train Epoch: [158/500] Loss: 0.9792: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.015599860349367223
tensor(0.9803)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.33it/s] 
Test Epoch: [158/500] Acc@1:74.41% Acc@5:98.60%: 100%|██████████| 79/79 [00:01<00:00, 48.48it/s]
Train Epoch: [159/500] Loss: 0.9897: 100%|██████████| 390/390 [00:33<00:00, 11.57it/s]


0.015594515479613647
tensor(0.9893)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.93it/s] 
Test Epoch: [159/500] Acc@1:74.71% Acc@5:98.60%: 100%|██████████| 79/79 [00:01<00:00, 47.00it/s]
Train Epoch: [160/500] Loss: 0.9791: 100%|██████████| 390/390 [00:34<00:00, 11.32it/s]


0.015588543435969545
tensor(0.9796)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.71it/s] 
Test Epoch: [160/500] Acc@1:74.62% Acc@5:98.56%: 100%|██████████| 79/79 [00:01<00:00, 48.33it/s]
Train Epoch: [161/500] Loss: 0.9801: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.015581944699588481
tensor(0.9801)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 81.49it/s] 
Test Epoch: [161/500] Acc@1:74.47% Acc@5:98.54%: 100%|██████████| 79/79 [00:01<00:00, 47.99it/s]
Train Epoch: [162/500] Loss: 0.9813: 100%|██████████| 390/390 [00:33<00:00, 11.60it/s]


0.015574719802115198
tensor(0.9812)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.95it/s] 
Test Epoch: [162/500] Acc@1:74.99% Acc@5:98.49%: 100%|██████████| 79/79 [00:01<00:00, 46.46it/s]
Train Epoch: [163/500] Loss: 0.9769: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.015566869325642765
tensor(0.9771)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.42it/s] 
Test Epoch: [163/500] Acc@1:74.68% Acc@5:98.61%: 100%|██████████| 79/79 [00:01<00:00, 45.37it/s]
Train Epoch: [164/500] Loss: 0.9794: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.015558393902665687
tensor(0.9793)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.75it/s] 
Test Epoch: [164/500] Acc@1:74.04% Acc@5:98.51%: 100%|██████████| 79/79 [00:01<00:00, 49.73it/s]
Train Epoch: [165/500] Loss: 0.9699: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.015549294216028945
tensor(0.9703)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.49it/s] 
Test Epoch: [165/500] Acc@1:74.42% Acc@5:98.47%: 100%|██████████| 79/79 [00:01<00:00, 48.32it/s]
Train Epoch: [166/500] Loss: 0.9688: 100%|██████████| 390/390 [00:34<00:00, 11.35it/s]


0.015539570998872983
tensor(0.9689)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.07it/s] 
Test Epoch: [166/500] Acc@1:74.64% Acc@5:98.48%: 100%|██████████| 79/79 [00:01<00:00, 48.50it/s]
Train Epoch: [167/500] Loss: 0.9716: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.01552922503457464
tensor(0.9714)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.52it/s] 
Test Epoch: [167/500] Acc@1:74.99% Acc@5:98.59%: 100%|██████████| 79/79 [00:01<00:00, 49.26it/s]
Train Epoch: [168/500] Loss: 0.9560: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.015518257156684031
tensor(0.9568)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.55it/s] 
Test Epoch: [168/500] Acc@1:74.74% Acc@5:98.65%: 100%|██████████| 79/79 [00:01<00:00, 48.13it/s]
Train Epoch: [169/500] Loss: 0.9585: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.015506668248857393
tensor(0.9584)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.65it/s] 
Test Epoch: [169/500] Acc@1:75.18% Acc@5:98.72%: 100%|██████████| 79/79 [00:01<00:00, 50.78it/s]
Train Epoch: [170/500] Loss: 0.9542: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.015494459244785892
tensor(0.9544)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.20it/s] 
Test Epoch: [170/500] Acc@1:74.98% Acc@5:98.67%: 100%|██████████| 79/79 [00:01<00:00, 48.42it/s]
Train Epoch: [171/500] Loss: 0.9548: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.015481631128120394
tensor(0.9547)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.84it/s] 
Test Epoch: [171/500] Acc@1:74.70% Acc@5:98.59%: 100%|██████████| 79/79 [00:01<00:00, 49.00it/s]
Train Epoch: [172/500] Loss: 0.9552: 100%|██████████| 390/390 [00:33<00:00, 11.63it/s]


0.015468184932392222
tensor(0.9552)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.77it/s] 
Test Epoch: [172/500] Acc@1:75.11% Acc@5:98.65%: 100%|██████████| 79/79 [00:01<00:00, 47.91it/s]
Train Epoch: [173/500] Loss: 0.9453: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.015454121740929874
tensor(0.9458)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.38it/s] 
Test Epoch: [173/500] Acc@1:75.05% Acc@5:98.61%: 100%|██████████| 79/79 [00:01<00:00, 48.47it/s]
Train Epoch: [174/500] Loss: 0.9592: 100%|██████████| 390/390 [00:33<00:00, 11.57it/s]


0.015439442686771757
tensor(0.9585)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.57it/s] 
Test Epoch: [174/500] Acc@1:75.39% Acc@5:98.64%: 100%|██████████| 79/79 [00:01<00:00, 48.33it/s]
Train Epoch: [175/500] Loss: 0.9521: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.015424148952574887
tensor(0.9525)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.62it/s] 
Test Epoch: [175/500] Acc@1:75.61% Acc@5:98.73%: 100%|██████████| 79/79 [00:01<00:00, 45.85it/s]
Train Epoch: [176/500] Loss: 0.9468: 100%|██████████| 390/390 [00:34<00:00, 11.26it/s]


0.015408241770519618
tensor(0.9471)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.22it/s] 
Test Epoch: [176/500] Acc@1:75.44% Acc@5:98.75%: 100%|██████████| 79/79 [00:01<00:00, 47.70it/s]
Train Epoch: [177/500] Loss: 0.9276: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.015391722422210355
tensor(0.9286)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.80it/s] 
Test Epoch: [177/500] Acc@1:75.09% Acc@5:98.66%: 100%|██████████| 79/79 [00:01<00:00, 49.11it/s]
Train Epoch: [178/500] Loss: 0.9532: 100%|██████████| 390/390 [00:33<00:00, 11.55it/s]


0.015374592238572309
tensor(0.9519)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 91.17it/s] 
Test Epoch: [178/500] Acc@1:75.40% Acc@5:98.62%: 100%|██████████| 79/79 [00:01<00:00, 48.66it/s]
Train Epoch: [179/500] Loss: 0.9437: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.015356852599744258
tensor(0.9441)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.70it/s] 
Test Epoch: [179/500] Acc@1:75.73% Acc@5:98.58%: 100%|██████████| 79/79 [00:01<00:00, 49.12it/s]
Train Epoch: [180/500] Loss: 0.9341: 100%|██████████| 390/390 [00:34<00:00, 11.36it/s]


0.015338504934967365
tensor(0.9346)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.40it/s] 
Test Epoch: [180/500] Acc@1:75.17% Acc@5:98.44%: 100%|██████████| 79/79 [00:01<00:00, 47.26it/s]
Train Epoch: [181/500] Loss: 0.9363: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.015319550722470016
tensor(0.9362)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.83it/s] 
Test Epoch: [181/500] Acc@1:75.33% Acc@5:98.58%: 100%|██████████| 79/79 [00:01<00:00, 47.57it/s]
Train Epoch: [182/500] Loss: 0.9266: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.01529999148934872
tensor(0.9270)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.80it/s] 
Test Epoch: [182/500] Acc@1:75.70% Acc@5:98.72%: 100%|██████████| 79/79 [00:01<00:00, 45.21it/s]
Train Epoch: [183/500] Loss: 0.9383: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.01527982881144509
tensor(0.9377)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 91.19it/s] 
Test Epoch: [183/500] Acc@1:75.78% Acc@5:98.60%: 100%|██████████| 79/79 [00:01<00:00, 47.91it/s]
Train Epoch: [184/500] Loss: 0.9227: 100%|██████████| 390/390 [00:34<00:00, 11.30it/s]


0.015259064313218872
tensor(0.9235)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.68it/s] 
Test Epoch: [184/500] Acc@1:75.49% Acc@5:98.64%: 100%|██████████| 79/79 [00:01<00:00, 49.05it/s]
Train Epoch: [185/500] Loss: 0.9243: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.01523769966761706
tensor(0.9243)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.42it/s] 
Test Epoch: [185/500] Acc@1:75.62% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 45.16it/s]
Train Epoch: [186/500] Loss: 0.9247: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.015215736595939116
tensor(0.9247)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.12it/s] 
Test Epoch: [186/500] Acc@1:75.96% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 48.83it/s]
Train Epoch: [187/500] Loss: 0.9166: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.015193176867698301
tensor(0.9170)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.36it/s] 
Test Epoch: [187/500] Acc@1:75.78% Acc@5:98.79%: 100%|██████████| 79/79 [00:01<00:00, 49.67it/s]
Train Epoch: [188/500] Loss: 0.9159: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.015170022300479088
tensor(0.9159)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.02it/s] 
Test Epoch: [188/500] Acc@1:75.94% Acc@5:98.70%: 100%|██████████| 79/79 [00:01<00:00, 47.27it/s]
Train Epoch: [189/500] Loss: 0.9176: 100%|██████████| 390/390 [00:34<00:00, 11.35it/s]


0.015146274759790732
tensor(0.9176)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.82it/s] 
Test Epoch: [189/500] Acc@1:75.81% Acc@5:98.67%: 100%|██████████| 79/79 [00:01<00:00, 47.53it/s]
Train Epoch: [190/500] Loss: 0.9140: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.015121936158916978
tensor(0.9142)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.31it/s] 
Test Epoch: [190/500] Acc@1:75.77% Acc@5:98.67%: 100%|██████████| 79/79 [00:01<00:00, 48.84it/s]
Train Epoch: [191/500] Loss: 0.9068: 100%|██████████| 390/390 [00:35<00:00, 11.14it/s]


0.015097008458761904
tensor(0.9072)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.63it/s] 
Test Epoch: [191/500] Acc@1:75.86% Acc@5:98.62%: 100%|██████████| 79/79 [00:01<00:00, 48.87it/s]
Train Epoch: [192/500] Loss: 0.9117: 100%|██████████| 390/390 [00:34<00:00, 11.46it/s]


0.015071493667691937
tensor(0.9114)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.96it/s] 
Test Epoch: [192/500] Acc@1:76.13% Acc@5:98.68%: 100%|██████████| 79/79 [00:01<00:00, 49.11it/s]
Train Epoch: [193/500] Loss: 0.9033: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.015045393841374044
tensor(0.9037)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.68it/s] 
Test Epoch: [193/500] Acc@1:76.05% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 47.24it/s]
Train Epoch: [194/500] Loss: 0.9164: 100%|██████████| 390/390 [00:34<00:00, 11.28it/s]


0.015018711082610112
tensor(0.9157)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.44it/s] 
Test Epoch: [194/500] Acc@1:75.64% Acc@5:98.67%: 100%|██████████| 79/79 [00:01<00:00, 49.19it/s]
Train Epoch: [195/500] Loss: 0.9171: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.014991447541167532
tensor(0.9171)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.00it/s] 
Test Epoch: [195/500] Acc@1:75.84% Acc@5:98.65%: 100%|██████████| 79/79 [00:01<00:00, 48.83it/s]
Train Epoch: [196/500] Loss: 0.9002: 100%|██████████| 390/390 [00:34<00:00, 11.31it/s]


0.014963605413605996
tensor(0.9010)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.99it/s] 
Test Epoch: [196/500] Acc@1:76.44% Acc@5:98.78%: 100%|██████████| 79/79 [00:01<00:00, 49.37it/s]
Train Epoch: [197/500] Loss: 0.9054: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.014935186943100516
tensor(0.9052)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.45it/s] 
Test Epoch: [197/500] Acc@1:76.38% Acc@5:98.86%: 100%|██████████| 79/79 [00:01<00:00, 48.48it/s]
Train Epoch: [198/500] Loss: 0.9054: 100%|██████████| 390/390 [00:34<00:00, 11.40it/s]


0.014906194419260716
tensor(0.9054)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.77it/s] 
Test Epoch: [198/500] Acc@1:76.03% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 49.90it/s]
Train Epoch: [199/500] Loss: 0.8910: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.014876630177946349
tensor(0.8918)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.18it/s] 
Test Epoch: [199/500] Acc@1:76.12% Acc@5:98.67%: 100%|██████████| 79/79 [00:01<00:00, 48.21it/s]
Train Epoch: [200/500] Loss: 0.9021: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.014846496601079104
tensor(0.9016)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.67it/s] 
Test Epoch: [200/500] Acc@1:76.65% Acc@5:98.85%: 100%|██████████| 79/79 [00:01<00:00, 48.36it/s]
Train Epoch: [201/500] Loss: 0.8931: 100%|██████████| 390/390 [00:33<00:00, 11.60it/s]


0.014815796116450708
tensor(0.8936)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.49it/s] 
Test Epoch: [201/500] Acc@1:76.45% Acc@5:98.84%: 100%|██████████| 79/79 [00:01<00:00, 48.71it/s]
Train Epoch: [202/500] Loss: 0.8824: 100%|██████████| 390/390 [00:33<00:00, 11.47it/s]


0.014784531197527309
tensor(0.8830)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.79it/s] 
Test Epoch: [202/500] Acc@1:76.31% Acc@5:98.74%: 100%|██████████| 79/79 [00:01<00:00, 47.96it/s]
Train Epoch: [203/500] Loss: 0.8901: 100%|██████████| 390/390 [00:33<00:00, 11.66it/s]


0.014752704363250215
tensor(0.8897)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.19it/s] 
Test Epoch: [203/500] Acc@1:76.29% Acc@5:98.80%: 100%|██████████| 79/79 [00:01<00:00, 46.17it/s]
Train Epoch: [204/500] Loss: 0.8960: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.014720318177832932
tensor(0.8957)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.01it/s] 
Test Epoch: [204/500] Acc@1:75.74% Acc@5:98.74%: 100%|██████████| 79/79 [00:01<00:00, 46.54it/s]
Train Epoch: [205/500] Loss: 0.9012: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.014687375250554578
tensor(0.9009)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.06it/s] 
Test Epoch: [205/500] Acc@1:76.30% Acc@5:98.76%: 100%|██████████| 79/79 [00:01<00:00, 47.71it/s]
Train Epoch: [206/500] Loss: 0.8774: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.014653878235549662
tensor(0.8785)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.42it/s] 
Test Epoch: [206/500] Acc@1:76.51% Acc@5:98.86%: 100%|██████████| 79/79 [00:01<00:00, 46.05it/s]
Train Epoch: [207/500] Loss: 0.8944: 100%|██████████| 390/390 [00:34<00:00, 11.29it/s]


0.014619829831594239
tensor(0.8936)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.90it/s] 
Test Epoch: [207/500] Acc@1:76.90% Acc@5:98.85%: 100%|██████████| 79/79 [00:01<00:00, 50.17it/s]
Train Epoch: [208/500] Loss: 0.8857: 100%|██████████| 390/390 [00:34<00:00, 11.40it/s]


0.014585232781888485
tensor(0.8861)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.01it/s] 
Test Epoch: [208/500] Acc@1:76.63% Acc@5:98.82%: 100%|██████████| 79/79 [00:01<00:00, 48.71it/s]
Train Epoch: [209/500] Loss: 0.8704: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.014550089873835678
tensor(0.8711)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.75it/s] 
Test Epoch: [209/500] Acc@1:77.04% Acc@5:98.82%: 100%|██████████| 79/79 [00:01<00:00, 48.52it/s]
Train Epoch: [210/500] Loss: 0.8877: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.014514403938817621
tensor(0.8869)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.72it/s] 
Test Epoch: [210/500] Acc@1:76.72% Acc@5:98.81%: 100%|██████████| 79/79 [00:01<00:00, 46.52it/s]
Train Epoch: [211/500] Loss: 0.8782: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.014478177851966534
tensor(0.8786)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.51it/s] 
Test Epoch: [211/500] Acc@1:76.87% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 48.46it/s]
Train Epoch: [212/500] Loss: 0.8725: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.014441414531933403
tensor(0.8728)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.05it/s] 
Test Epoch: [212/500] Acc@1:76.17% Acc@5:98.71%: 100%|██████████| 79/79 [00:01<00:00, 48.95it/s]
Train Epoch: [213/500] Loss: 0.8740: 100%|██████████| 390/390 [00:34<00:00, 11.42it/s]


0.014404116940652831
tensor(0.8740)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.24it/s] 
Test Epoch: [213/500] Acc@1:77.03% Acc@5:98.83%: 100%|██████████| 79/79 [00:01<00:00, 48.19it/s]
Train Epoch: [214/500] Loss: 0.8658: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.014366288083104407
tensor(0.8662)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.62it/s] 
Test Epoch: [214/500] Acc@1:76.90% Acc@5:98.88%: 100%|██████████| 79/79 [00:01<00:00, 48.33it/s]
Train Epoch: [215/500] Loss: 0.8625: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.014327931007070596
tensor(0.8627)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.49it/s] 
Test Epoch: [215/500] Acc@1:76.83% Acc@5:98.83%: 100%|██████████| 79/79 [00:01<00:00, 48.37it/s]
Train Epoch: [216/500] Loss: 0.8763: 100%|██████████| 390/390 [00:33<00:00, 11.55it/s]


0.014289048802891195
tensor(0.8756)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.25it/s] 
Test Epoch: [216/500] Acc@1:76.91% Acc@5:98.96%: 100%|██████████| 79/79 [00:01<00:00, 47.67it/s]
Train Epoch: [217/500] Loss: 0.8679: 100%|██████████| 390/390 [00:33<00:00, 11.67it/s]


0.014249644603214342
tensor(0.8683)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.87it/s] 
Test Epoch: [217/500] Acc@1:77.09% Acc@5:98.78%: 100%|██████████| 79/79 [00:01<00:00, 47.70it/s]
Train Epoch: [218/500] Loss: 0.8688: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.014209721582744129
tensor(0.8688)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.02it/s] 
Test Epoch: [218/500] Acc@1:76.25% Acc@5:98.73%: 100%|██████████| 79/79 [00:01<00:00, 46.01it/s]
Train Epoch: [219/500] Loss: 0.8637: 100%|██████████| 390/390 [00:33<00:00, 11.64it/s]


0.014169282957984825
tensor(0.8640)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.75it/s] 
Test Epoch: [219/500] Acc@1:77.00% Acc@5:98.80%: 100%|██████████| 79/79 [00:01<00:00, 46.52it/s]
Train Epoch: [220/500] Loss: 0.8550: 100%|██████████| 390/390 [00:33<00:00, 11.64it/s]


0.014128331986981733
tensor(0.8554)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.80it/s] 
Test Epoch: [220/500] Acc@1:76.68% Acc@5:98.69%: 100%|██████████| 79/79 [00:01<00:00, 48.85it/s]
Train Epoch: [221/500] Loss: 0.8523: 100%|██████████| 390/390 [00:34<00:00, 11.41it/s]


0.014086871969058688
tensor(0.8525)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.31it/s] 
Test Epoch: [221/500] Acc@1:76.66% Acc@5:98.74%: 100%|██████████| 79/79 [00:01<00:00, 48.23it/s]
Train Epoch: [222/500] Loss: 0.8551: 100%|██████████| 390/390 [00:33<00:00, 11.71it/s]


0.014044906244552248
tensor(0.8550)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.69it/s] 
Test Epoch: [222/500] Acc@1:77.33% Acc@5:98.82%: 100%|██████████| 79/79 [00:01<00:00, 49.10it/s]
Train Epoch: [223/500] Loss: 0.8533: 100%|██████████| 390/390 [00:33<00:00, 11.55it/s]


0.014002438194542562
tensor(0.8534)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.74it/s] 
Test Epoch: [223/500] Acc@1:77.49% Acc@5:98.83%: 100%|██████████| 79/79 [00:01<00:00, 46.14it/s]
Train Epoch: [224/500] Loss: 0.8578: 100%|██████████| 390/390 [00:34<00:00, 11.39it/s]


0.013959471240580966
tensor(0.8576)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 78.79it/s] 
Test Epoch: [224/500] Acc@1:76.70% Acc@5:98.85%: 100%|██████████| 79/79 [00:01<00:00, 47.96it/s]
Train Epoch: [225/500] Loss: 0.8495: 100%|██████████| 390/390 [00:33<00:00, 11.68it/s]


0.013916008844414329
tensor(0.8499)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.25it/s] 
Test Epoch: [225/500] Acc@1:77.22% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 46.47it/s]
Train Epoch: [226/500] Loss: 0.8524: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.01387205450770613
tensor(0.8523)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.54it/s] 
Test Epoch: [226/500] Acc@1:77.58% Acc@5:98.89%: 100%|██████████| 79/79 [00:01<00:00, 47.70it/s]
Train Epoch: [227/500] Loss: 0.8484: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.01382761177175435
tensor(0.8486)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.08it/s] 
Test Epoch: [227/500] Acc@1:77.42% Acc@5:98.86%: 100%|██████████| 79/79 [00:01<00:00, 49.19it/s]
Train Epoch: [228/500] Loss: 0.8470: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.013782684217206147
tensor(0.8470)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.24it/s] 
Test Epoch: [228/500] Acc@1:77.33% Acc@5:98.80%: 100%|██████████| 79/79 [00:01<00:00, 48.51it/s]
Train Epoch: [229/500] Loss: 0.8489: 100%|██████████| 390/390 [00:34<00:00, 11.33it/s]


0.013737275463769384
tensor(0.8488)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.40it/s] 
Test Epoch: [229/500] Acc@1:77.42% Acc@5:98.91%: 100%|██████████| 79/79 [00:01<00:00, 49.19it/s]
Train Epoch: [230/500] Loss: 0.8348: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.013691389169920992
tensor(0.8355)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.13it/s] 
Test Epoch: [230/500] Acc@1:77.19% Acc@5:98.76%: 100%|██████████| 79/79 [00:01<00:00, 47.76it/s]
Train Epoch: [231/500] Loss: 0.8346: 100%|██████████| 390/390 [00:34<00:00, 11.45it/s]


0.013645029032612211
tensor(0.8347)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.61it/s] 
Test Epoch: [231/500] Acc@1:77.58% Acc@5:98.76%: 100%|██████████| 79/79 [00:01<00:00, 47.44it/s]
Train Epoch: [232/500] Loss: 0.8407: 100%|██████████| 390/390 [00:33<00:00, 11.61it/s]


0.01359819878697074
tensor(0.8404)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.87it/s] 
Test Epoch: [232/500] Acc@1:77.25% Acc@5:98.70%: 100%|██████████| 79/79 [00:01<00:00, 46.12it/s]
Train Epoch: [233/500] Loss: 0.8376: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.013550902205999806
tensor(0.8378)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.19it/s] 
Test Epoch: [233/500] Acc@1:77.64% Acc@5:98.86%: 100%|██████████| 79/79 [00:01<00:00, 48.55it/s]
Train Epoch: [234/500] Loss: 0.8366: 100%|██████████| 390/390 [00:32<00:00, 11.82it/s]


0.013503143100274182
tensor(0.8366)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.92it/s] 
Test Epoch: [234/500] Acc@1:77.38% Acc@5:98.89%: 100%|██████████| 79/79 [00:01<00:00, 47.44it/s]
Train Epoch: [235/500] Loss: 0.8385: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.013454925317633174
tensor(0.8384)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.47it/s] 
Test Epoch: [235/500] Acc@1:77.36% Acc@5:98.90%: 100%|██████████| 79/79 [00:01<00:00, 49.01it/s]
Train Epoch: [236/500] Loss: 0.8363: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.013406252742870621
tensor(0.8364)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.89it/s] 
Test Epoch: [236/500] Acc@1:77.94% Acc@5:98.87%: 100%|██████████| 79/79 [00:01<00:00, 48.68it/s]
Train Epoch: [237/500] Loss: 0.8174: 100%|██████████| 390/390 [00:34<00:00, 11.33it/s]


0.01335712929742189
tensor(0.8183)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.64it/s] 
Test Epoch: [237/500] Acc@1:77.53% Acc@5:98.93%: 100%|██████████| 79/79 [00:01<00:00, 48.82it/s]
Train Epoch: [238/500] Loss: 0.8276: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.013307558939047948
tensor(0.8271)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.85it/s] 
Test Epoch: [238/500] Acc@1:77.81% Acc@5:98.89%: 100%|██████████| 79/79 [00:01<00:00, 46.98it/s]
Train Epoch: [239/500] Loss: 0.8235: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.013257545661516485
tensor(0.8237)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.63it/s] 
Test Epoch: [239/500] Acc@1:77.71% Acc@5:98.99%: 100%|██████████| 79/79 [00:01<00:00, 48.32it/s]
Train Epoch: [240/500] Loss: 0.8320: 100%|██████████| 390/390 [00:34<00:00, 11.47it/s]


0.013207093494280155
tensor(0.8316)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.92it/s] 
Test Epoch: [240/500] Acc@1:77.80% Acc@5:98.97%: 100%|██████████| 79/79 [00:01<00:00, 48.64it/s]
Train Epoch: [241/500] Loss: 0.8230: 100%|██████████| 390/390 [00:34<00:00, 11.29it/s]


0.013156206502151928
tensor(0.8234)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.58it/s] 
Test Epoch: [241/500] Acc@1:78.16% Acc@5:99.02%: 100%|██████████| 79/79 [00:01<00:00, 48.58it/s]
Train Epoch: [242/500] Loss: 0.8214: 100%|██████████| 390/390 [00:34<00:00, 11.43it/s]


0.013104888784977594
tensor(0.8215)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.05it/s] 
Test Epoch: [242/500] Acc@1:77.58% Acc@5:98.88%: 100%|██████████| 79/79 [00:01<00:00, 49.11it/s]
Train Epoch: [243/500] Loss: 0.8207: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.013053144477305451
tensor(0.8207)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.06it/s] 
Test Epoch: [243/500] Acc@1:78.15% Acc@5:99.00%: 100%|██████████| 79/79 [00:01<00:00, 49.14it/s]
Train Epoch: [244/500] Loss: 0.8138: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.0130009777480532
tensor(0.8141)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.99it/s] 
Test Epoch: [244/500] Acc@1:77.91% Acc@5:98.93%: 100%|██████████| 79/79 [00:01<00:00, 49.83it/s]
Train Epoch: [245/500] Loss: 0.8143: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.012948392800172053
tensor(0.8143)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 80.83it/s] 
Test Epoch: [245/500] Acc@1:78.14% Acc@5:98.85%: 100%|██████████| 79/79 [00:01<00:00, 46.57it/s]
Train Epoch: [246/500] Loss: 0.8179: 100%|██████████| 390/390 [00:33<00:00, 11.69it/s]


0.012895393870308122
tensor(0.8177)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.83it/s] 
Test Epoch: [246/500] Acc@1:78.05% Acc@5:98.84%: 100%|██████████| 79/79 [00:01<00:00, 47.47it/s]
Train Epoch: [247/500] Loss: 0.8126: 100%|██████████| 390/390 [00:33<00:00, 11.52it/s]


0.012841985228461075
tensor(0.8128)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.79it/s] 
Test Epoch: [247/500] Acc@1:78.76% Acc@5:99.03%: 100%|██████████| 79/79 [00:01<00:00, 48.52it/s]
Train Epoch: [248/500] Loss: 0.8071: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.012788171177640119
tensor(0.8074)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.67it/s] 
Test Epoch: [248/500] Acc@1:77.91% Acc@5:98.90%: 100%|██████████| 79/79 [00:01<00:00, 49.92it/s]
Train Epoch: [249/500] Loss: 0.8159: 100%|██████████| 390/390 [00:33<00:00, 11.76it/s]


0.012733956053517308
tensor(0.8155)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.19it/s] 
Test Epoch: [249/500] Acc@1:78.30% Acc@5:98.91%: 100%|██████████| 79/79 [00:01<00:00, 49.50it/s]
Train Epoch: [250/500] Loss: 0.8072: 100%|██████████| 390/390 [00:33<00:00, 11.58it/s]


0.012679344224078234
tensor(0.8076)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.62it/s] 
Test Epoch: [250/500] Acc@1:78.17% Acc@5:98.94%: 100%|██████████| 79/79 [00:01<00:00, 48.77it/s]
Train Epoch: [251/500] Loss: 0.8110: 100%|██████████| 390/390 [00:34<00:00, 11.34it/s]


0.012624340089270112
tensor(0.8108)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.08it/s] 
Test Epoch: [251/500] Acc@1:77.98% Acc@5:98.94%: 100%|██████████| 79/79 [00:01<00:00, 48.17it/s]
Train Epoch: [252/500] Loss: 0.8097: 100%|██████████| 390/390 [00:34<00:00, 11.41it/s]


0.012568948080647272
tensor(0.8098)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.56it/s] 
Test Epoch: [252/500] Acc@1:78.26% Acc@5:98.97%: 100%|██████████| 79/79 [00:01<00:00, 48.85it/s]
Train Epoch: [253/500] Loss: 0.8147: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.012513172661014137
tensor(0.8145)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.20it/s] 
Test Epoch: [253/500] Acc@1:77.84% Acc@5:98.89%: 100%|██████████| 79/79 [00:01<00:00, 46.96it/s]
Train Epoch: [254/500] Loss: 0.8127: 100%|██████████| 390/390 [00:34<00:00, 11.46it/s]


0.012457018324065658
tensor(0.8128)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.75it/s] 
Test Epoch: [254/500] Acc@1:78.38% Acc@5:98.95%: 100%|██████████| 79/79 [00:01<00:00, 48.37it/s]
Train Epoch: [255/500] Loss: 0.7981: 100%|██████████| 390/390 [00:34<00:00, 11.47it/s]


0.012400489594025264
tensor(0.7989)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.59it/s] 
Test Epoch: [255/500] Acc@1:78.32% Acc@5:98.96%: 100%|██████████| 79/79 [00:01<00:00, 43.99it/s]
Train Epoch: [256/500] Loss: 0.8019: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.012343591025280353
tensor(0.8017)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.08it/s] 
Test Epoch: [256/500] Acc@1:78.48% Acc@5:98.98%: 100%|██████████| 79/79 [00:01<00:00, 49.89it/s]
Train Epoch: [257/500] Loss: 0.8087: 100%|██████████| 390/390 [00:34<00:00, 11.38it/s]


0.012286327202015373
tensor(0.8083)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.52it/s] 
Test Epoch: [257/500] Acc@1:77.84% Acc@5:98.88%: 100%|██████████| 79/79 [00:01<00:00, 47.71it/s]
Train Epoch: [258/500] Loss: 0.7952: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.012228702737842463
tensor(0.7958)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.48it/s] 
Test Epoch: [258/500] Acc@1:78.69% Acc@5:98.99%: 100%|██████████| 79/79 [00:01<00:00, 48.68it/s]
Train Epoch: [259/500] Loss: 0.8034: 100%|██████████| 390/390 [00:33<00:00, 11.51it/s]


0.012170722275429762
tensor(0.8030)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.74it/s] 
Test Epoch: [259/500] Acc@1:78.21% Acc@5:98.84%: 100%|██████████| 79/79 [00:01<00:00, 47.03it/s]
Train Epoch: [260/500] Loss: 0.7973: 100%|██████████| 390/390 [00:33<00:00, 11.50it/s]


0.012112390486127353
tensor(0.7976)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.16it/s] 
Test Epoch: [260/500] Acc@1:78.24% Acc@5:98.92%: 100%|██████████| 79/79 [00:01<00:00, 49.20it/s]
Train Epoch: [261/500] Loss: 0.7890: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.0120537120695909
tensor(0.7894)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.82it/s] 
Test Epoch: [261/500] Acc@1:78.25% Acc@5:98.97%: 100%|██████████| 79/79 [00:01<00:00, 47.16it/s]
Train Epoch: [262/500] Loss: 0.7913: 100%|██████████| 390/390 [00:34<00:00, 11.32it/s]


0.011994691753403014
tensor(0.7912)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.85it/s] 
Test Epoch: [262/500] Acc@1:78.74% Acc@5:99.03%: 100%|██████████| 79/79 [00:01<00:00, 48.74it/s]
Train Epoch: [263/500] Loss: 0.7901: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.011935334292692359
tensor(0.7902)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.90it/s] 
Test Epoch: [263/500] Acc@1:78.12% Acc@5:98.88%: 100%|██████████| 79/79 [00:01<00:00, 47.62it/s]
Train Epoch: [264/500] Loss: 0.7903: 100%|██████████| 390/390 [00:34<00:00, 11.29it/s]


0.011875644469750545
tensor(0.7903)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 83.30it/s] 
Test Epoch: [264/500] Acc@1:78.21% Acc@5:98.94%: 100%|██████████| 79/79 [00:01<00:00, 48.00it/s]
Train Epoch: [265/500] Loss: 0.7822: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.01181562709364683
tensor(0.7826)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.18it/s] 
Test Epoch: [265/500] Acc@1:78.55% Acc@5:98.95%: 100%|██████████| 79/79 [00:01<00:00, 46.36it/s]
Train Epoch: [266/500] Loss: 0.7852: 100%|██████████| 390/390 [00:33<00:00, 11.62it/s]


0.011755286999840658
tensor(0.7851)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.93it/s] 
Test Epoch: [266/500] Acc@1:78.28% Acc@5:98.95%: 100%|██████████| 79/79 [00:01<00:00, 48.01it/s]
Train Epoch: [267/500] Loss: 0.7846: 100%|██████████| 390/390 [00:33<00:00, 11.48it/s]


0.01169462904979209
tensor(0.7846)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.86it/s] 
Test Epoch: [267/500] Acc@1:78.47% Acc@5:98.98%: 100%|██████████| 79/79 [00:01<00:00, 48.30it/s]
Train Epoch: [268/500] Loss: 0.7729: 100%|██████████| 390/390 [00:33<00:00, 11.49it/s]


0.011633658130570112
tensor(0.7735)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.51it/s] 
Test Epoch: [268/500] Acc@1:78.32% Acc@5:98.89%: 100%|██████████| 79/79 [00:01<00:00, 47.95it/s]
Train Epoch: [269/500] Loss: 0.7849: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.01157237915445891
tensor(0.7843)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.74it/s] 
Test Epoch: [269/500] Acc@1:78.58% Acc@5:98.87%: 100%|██████████| 79/79 [00:01<00:00, 50.10it/s]
Train Epoch: [270/500] Loss: 0.7828: 100%|██████████| 390/390 [00:34<00:00, 11.16it/s]


0.01151079705856209
tensor(0.7829)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 87.81it/s] 
Test Epoch: [270/500] Acc@1:78.78% Acc@5:98.99%: 100%|██████████| 79/79 [00:01<00:00, 49.77it/s]
Train Epoch: [271/500] Loss: 0.7810: 100%|██████████| 390/390 [00:33<00:00, 11.53it/s]


0.01144891680440491
tensor(0.7811)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 89.89it/s] 
Test Epoch: [271/500] Acc@1:78.85% Acc@5:98.87%: 100%|██████████| 79/79 [00:01<00:00, 48.04it/s]
Train Epoch: [272/500] Loss: 0.7763: 100%|██████████| 390/390 [00:34<00:00, 11.27it/s]


0.011386743377534538
tensor(0.7765)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.04it/s] 
Test Epoch: [272/500] Acc@1:78.81% Acc@5:99.02%: 100%|██████████| 79/79 [00:01<00:00, 47.09it/s]
Train Epoch: [273/500] Loss: 0.7849: 100%|██████████| 390/390 [00:34<00:00, 11.21it/s]


0.011324281787118381
tensor(0.7845)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.55it/s] 
Test Epoch: [273/500] Acc@1:78.26% Acc@5:98.93%: 100%|██████████| 79/79 [00:01<00:00, 46.90it/s]
Train Epoch: [274/500] Loss: 0.7679: 100%|██████████| 390/390 [00:33<00:00, 11.70it/s]


0.011261537065540513
tensor(0.7687)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.52it/s] 
Test Epoch: [274/500] Acc@1:78.70% Acc@5:98.92%: 100%|██████████| 79/79 [00:01<00:00, 47.81it/s]
Train Epoch: [275/500] Loss: 0.7722: 100%|██████████| 390/390 [00:34<00:00, 11.44it/s]


0.011198514267996217
tensor(0.7720)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.94it/s] 
Test Epoch: [275/500] Acc@1:79.01% Acc@5:98.92%: 100%|██████████| 79/79 [00:01<00:00, 49.12it/s]
Train Epoch: [276/500] Loss: 0.7717: 100%|██████████| 390/390 [00:33<00:00, 11.77it/s]


0.011135218472084714
tensor(0.7717)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 84.89it/s] 
Test Epoch: [276/500] Acc@1:78.84% Acc@5:98.83%: 100%|██████████| 79/79 [00:01<00:00, 48.05it/s]
Train Epoch: [277/500] Loss: 0.7783: 100%|██████████| 390/390 [00:33<00:00, 11.69it/s]


0.01107165477740005
tensor(0.7780)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 82.43it/s] 
Test Epoch: [277/500] Acc@1:78.63% Acc@5:98.95%: 100%|██████████| 79/79 [00:01<00:00, 47.58it/s]
Train Epoch: [278/500] Loss: 0.7644: 100%|██████████| 390/390 [00:33<00:00, 11.56it/s]


0.01100782830512026
tensor(0.7651)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 88.05it/s] 
Test Epoch: [278/500] Acc@1:78.81% Acc@5:98.95%: 100%|██████████| 79/79 [00:01<00:00, 46.59it/s]
Train Epoch: [279/500] Loss: 0.7619: 100%|██████████| 390/390 [00:34<00:00, 11.37it/s]


0.010943744197594753
tensor(0.7621)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.57it/s] 
Test Epoch: [279/500] Acc@1:78.52% Acc@5:98.77%: 100%|██████████| 79/79 [00:01<00:00, 48.16it/s]
Train Epoch: [280/500] Loss: 0.7715: 100%|██████████| 390/390 [00:33<00:00, 11.54it/s]


0.010879407617929996
tensor(0.7710)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 86.36it/s] 
Test Epoch: [280/500] Acc@1:78.83% Acc@5:98.89%: 100%|██████████| 79/79 [00:01<00:00, 47.24it/s]
Train Epoch: [281/500] Loss: 0.7670: 100%|██████████| 390/390 [00:34<00:00, 11.41it/s]


0.010814823749573562
tensor(0.7672)


Feature extracting: 100%|██████████| 391/391 [00:04<00:00, 85.98it/s] 
Test Epoch: [281/500] Acc@1:78.94% Acc@5:98.93%: 100%|██████████| 79/79 [00:01<00:00, 48.72it/s]
Train Epoch: [282/500] Loss: 0.7411:  18%|█▊        | 71/390 [00:06<00:26, 11.88it/s]

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df=pd.read_csv(csvfilename)

f1=plt.figure()
plt.semilogx(df['lr_epoch'],df['smooth_loss'])  
plt.xlabel('learning rate')
plt.ylabel('smoothed trg epoch loss')
plt.show()

f2=plt.figure()
plt.plot(df['test_acc@1'])
plt.title('Sim CLR with RAdam')
plt.xlabel('epoch number')
plt.ylabel('epoch Test Accuracy')
plt.show()