# Contrastive Learning on CIFAR10 using ConvNext Backbone

First, we import the dataset and define transformation operations on it. We apply random transformation on images (crop + flip + colorjitter + grayscale).

In [1]:
from PIL import Image
from torchvision import transforms
from torchvision.datasets import CIFAR10
import torch

class CIFAR10Pair(CIFAR10):
    """CIFAR10 Dataset.
    """

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]
        img = Image.fromarray(img)

        if self.transform is not None:
            pos_1 = self.transform(img)
            pos_2 = self.transform(img)

        if self.target_transform is not None:
            target = self.target_transform(target)

        return pos_1, pos_2, target


train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])])

We use commonly used ResNet-50 as ConvNet encoders for simplicity in the original paper. The task 1 is to set encoder and projection head. The parameters are adapted from the original paper.

In [2]:
# self written ViT code
# referenced from https://github.com/lucidrains/vit-pytorch/blob/main/vit_pytorch/vit.py

!pip install einops

import torch
from torch import nn
import torch.nn.functional as F

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

# helpers

def pair(t):
    return t if isinstance(t, tuple) else (t, t)

# classes

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        project_out = not (heads == 1 and dim_head == dim)

        self.heads = heads
        self.scale = dim_head ** -0.5

        self.attend = nn.Softmax(dim = -1)
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)

        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale

        attn = self.attend(dots)

        out = torch.matmul(attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        return self.to_out(out)

class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x



class ViT(nn.Module):
    def __init__(self, *, image_size=32, patch_size=8, num_classes=1000, dim=1024, depth=6, heads=16, mlp_dim=2048, pool = 'cls', channels = 3, dim_head = 64, dropout = 0.1, emb_dropout = 0.1, feature_dim=128):
    #def __init__(self, *, image_size=256, patch_size=16, num_classes=1000, dim=1024, depth=6, heads=16, mlp_dim=2048, pool = 'cls', channels = 3, dim_head = 64, dropout = 0.1, emb_dropout = 0.1, feature_dim=128):
        super().__init__()
        image_height, image_width = pair(image_size)
        patch_height, patch_width = pair(patch_size)

        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'

        num_patches = (image_height // patch_height) * (image_width // patch_width)
        patch_dim = channels * patch_height * patch_width
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),
            nn.Linear(patch_dim, dim),
        )

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)
        #self.linear_crossover = nn.Linear(5120, mlp_dim)
        self.linear_crossover = nn.Linear(17408,mlp_dim) 

        self.g = nn.Sequential(nn.Linear(mlp_dim, 512, bias=False), 
                               nn.BatchNorm1d(512),
                               nn.ReLU(inplace=True),
                               nn.Linear(512, feature_dim, bias=True))


        #comment out below
        #self.pool = pool
        #self.to_latent = nn.Identity()
        #self.mlp_head = nn.Sequential(
        #    nn.LayerNorm(dim),
        #    nn.Linear(dim, num_classes)
        #)
        


    def forward(self, x):

        #print("x input shape: ", x.shape)

        x = self.to_patch_embedding(x)

        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]

        x = self.dropout(x)
        #print("X after dropout layer: ", x.shape)

        x = self.transformer(x)
        #print("X after transformer layer: ", x.shape)

        x = torch.flatten(x,1)
        #print("X after reduce_dim layer: ", x.shape)

        x = self.linear_crossover(x)
        #print("X after linear layer: ", x.shape)

        feature = x
        #print("F after dimension removal: ", feature.shape)

        out = self.g(feature)
        #out = feature
        #print("Out Shape: ", out.shape)


        #comment out below
        #x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
        #x = self.to_latent(x)
        #x = self.mlp_head(x)

        return F.normalize(feature, dim=-1), F.normalize(out, dim=-1)

        




We train encoder network and projection head to maximize agreement using a contrastive loss. The default epoch is 1 for time efficiency while it could takes about 10 minutes to run for one epoch in google colab. The task 2 is to calculate the contrastive loss.
To evaluate the influence of temperature value for contrastive loss, we run this training process 3 times with different temperature value (0.1,0.5 and 1.0).

In [3]:
import argparse
import os

import numpy as np
import pandas as pd
import torch
import torch.optim as optim
!pip install thop
from thop import profile, clever_format
from torch.utils.data import DataLoader
from tqdm import tqdm


import math

def contrastive_loss(out_1, out_2, temperature):

    # ------------------------------------------------------------------
    # START OF YOUR CODE
    # ------------------------------------------------------------------
    # Task2: implement contrastive loss function and return loss variable
    # hint: loss formula could refer to the slides
    # input: out_1, out_2，temperature
    # output: loss variable

    out = torch.cat([out_1, out_2], dim=0)
    # [2*B, 2*B]
    sim_matrix = torch.exp(torch.mm(out, out.t().contiguous()) / temperature)
    mask = (torch.ones_like(sim_matrix) - torch.eye(2 * batch_size, device=sim_matrix.device)).bool()
    # [2*B, 2*B-1]
    sim_matrix = sim_matrix.masked_select(mask).view(2 * batch_size, -1)

    # compute loss
    pos_sim = torch.exp(torch.sum(out_1 * out_2, dim=-1) / temperature)
    # [2*B]
    pos_sim = torch.cat([pos_sim, pos_sim], dim=0)
    loss = (- torch.log(pos_sim / sim_matrix.sum(dim=-1))).mean() 

    # ------------------------------------------------------------------
    # END OF YOUR CODE
    # ------------------------------------------------------------------

    return loss

# train for one epoch to learn unique features
def train(net, data_loader, train_optimizer, train_scheduler, temperature):
    net.train()
    total_loss, total_num, train_bar = 0.0, 0, tqdm(data_loader)
    for pos_1, pos_2, target in train_bar:
        pos_1, pos_2 = pos_1.cuda(non_blocking=True), pos_2.cuda(non_blocking=True)
        feature_1, out_1 = net(pos_1)
        feature_2, out_2 = net(pos_2)

        loss = contrastive_loss(out_1, out_2, temperature)

        train_optimizer.zero_grad()
        loss.backward()
        train_optimizer.step()
        train_scheduler.step()

        total_num += batch_size
        total_loss += loss.item() * batch_size
        train_bar.set_description('Train Epoch: [{}/{}] Loss: {:.4f}'.format(epoch, epochs, total_loss / total_num))

    return total_loss / total_num


# test for one epoch, use weighted knn to find the most similar images' label to assign the test image
def test(net, memory_data_loader, test_data_loader, temperature):
    net.eval()
    total_top1, total_top5, total_num, feature_bank = 0.0, 0.0, 0, []
    with torch.no_grad():
        # generate feature bank
        for data, _, target in tqdm(memory_data_loader, desc='Feature extracting'):
            feature, out = net(data.cuda(non_blocking=True))
            feature_bank.append(feature)
        # [D, N]
        feature_bank = torch.cat(feature_bank, dim=0).t().contiguous()
        # [N]
        feature_labels = torch.tensor(memory_data_loader.dataset.targets, device=feature_bank.device)
        # loop test data to predict the label by weighted knn search
        test_bar = tqdm(test_data_loader)
        for data, _, target in test_bar:
            data, target = data.cuda(non_blocking=True), target.cuda(non_blocking=True)
            feature, out = net(data)

            total_num += data.size(0)
            # compute cos similarity between each feature vector and feature bank ---> [B, N]
            sim_matrix = torch.mm(feature, feature_bank)
            # [B, K]
            sim_weight, sim_indices = sim_matrix.topk(k=k, dim=-1)
            # [B, K]
            sim_labels = torch.gather(feature_labels.expand(data.size(0), -1), dim=-1, index=sim_indices)
            sim_weight = (sim_weight / temperature).exp()

            # counts for each class
            one_hot_label = torch.zeros(data.size(0) * k, c, device=sim_labels.device)
            # [B*K, C]
            one_hot_label = one_hot_label.scatter(dim=-1, index=sim_labels.view(-1, 1), value=1.0)
            # weighted score ---> [B, C]
            pred_scores = torch.sum(one_hot_label.view(data.size(0), -1, c) * sim_weight.unsqueeze(dim=-1), dim=1)

            pred_labels = pred_scores.argsort(dim=-1, descending=True)
            total_top1 += torch.sum((pred_labels[:, :1] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            total_top5 += torch.sum((pred_labels[:, :5] == target.unsqueeze(dim=-1)).any(dim=-1).float()).item()
            test_bar.set_description('Test Epoch: [{}/{}] Acc@1:{:.2f}% Acc@5:{:.2f}%'
                                     .format(epoch, epochs, total_top1 / total_num * 100, total_top5 / total_num * 100))

    return total_top1 / total_num * 100, total_top5 / total_num * 100



In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
# Train SimCLR
import numpy as np
   
# Feature dim for latent vector, Temperature used in softmax, Top k most similar images used to predict the label
feature_dim, temp, k = 128, [0.1], 200
# Number of images in each mini-batch, Number of sweeps over the dataset to train
batch_size=128
#Coarse setting
#Fine setting
lr_start = 2.5e-5
lr_end =0.0158
max_lrvalue=2.35e-04 #Rule of Thumb is peaklr (from lambdalr test)*3/8
temp0 = 0.1 #contrastive loss temperature setting
schedulertype='onecyclelr' #'lambdalr' for testing range of training or 'onecyclelr' for actual training
if schedulertype=='lambdalr':
    epochs=10
    #epochs=10 #coarserg recommend 10 epochs, finerg recommend 100 epochs 
elif schedulertype=='onecyclelr':
    epochs=500
    #epochs=3
else:
    print('choose valid option for scheduler')
smoothfactor=0.95 #Smooth Factor for smoothing contrastive loss    
IterationStr='It1'
loadmodel=0 #loadmodel=0 From scratch or loadmodel=1 Continue from presaved model 
pathtosave='/home/umaiyal/CS5260Project/results/Simclr_vit_RAdamv2'+schedulertype+'/'+IterationStr+'/'
save_name_pre = '{}_{}_{}_{}_{}'.format(feature_dim, temp0, k, batch_size, epochs)
csvfilename=pathtosave+'{}_statistics.csv'.format(save_name_pre)
modelfilename=pathtosave+'{}_model.pth'.format(save_name_pre)

# data prepare
train_data = CIFAR10Pair(root='data', train=True, transform=train_transform, download=True)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True,
                          drop_last=True)
memory_data = CIFAR10Pair(root='data', train=True, transform=test_transform, download=True)
memory_loader = DataLoader(memory_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)
test_data = CIFAR10Pair(root='data', train=False, transform=test_transform, download=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True)

import torch
torch.cuda.is_available()
torch.cuda.set_device(2)

# model setup and optimizer config
model = ViT().cuda()

flops, params = profile(model, inputs=(torch.randn(1, 3, 32, 32).cuda(),))
flops, params = clever_format([flops, params])
print('# Model Params: {} FLOPs: {}'.format(params, flops))

optimizer = optim.RAdam(model.parameters(), lr=1.0)
#optimizer=optim.Adam(model.parameters(),lr=1.0)

#exponentially increase learning rate from low to high
def lrs(batch):
   low = math.log2(lr_start)
   high = math.log2(lr_end)
   return 2**(low+(high-low)*batch/len(train_loader)/epochs)
   
if schedulertype=='lambdalr':
   scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lrs)
elif schedulertype=='onecyclelr':
   div_factorvalue=10#round(max_lrvalue/2.0e-5) 
   scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,div_factor=div_factorvalue,max_lr=max_lrvalue,total_steps=len(train_loader)*epochs,epochs=epochs)
else:
   print('choose valid option for scheduler')

if loadmodel==1:
   checkpoint=torch.load(modelfilename)
   model.load_state_dict(checkpoint['model_state_dict'],strict=False)
   model.to(device)
   optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
   scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
   startepoch=checkpoint['epoch']+1
   best_acc=checkpoint['best_acc']

   print(startepoch)
else:     
   startepoch=1
   best_acc=0
   

c = len(memory_data.classes)

if not os.path.exists(pathtosave):
   os.makedirs(pathtosave)

train_loss_epoch=torch.zeros(epochs)
smooth_loss_epoch=torch.zeros(epochs)
test_acc_1_epoch=torch.zeros(epochs)
test_acc_5_epoch=torch.zeros(epochs)
lr_epoch=torch.zeros(epochs)

if loadmodel==1:

   df=pd.read_csv(csvfilename)
   temp=pd.to_numeric(df.iloc[0:startepoch-1,1]).apply(np.array)
   train_loss_epoch[0:temp.size]=torch.tensor(temp)
   train_loss_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,2]).apply(np.array)
   test_acc_1_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_1_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,3]).apply(np.array)
   test_acc_5_epoch[0:temp.size]=torch.tensor(temp)
   test_acc_5_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,4]).apply(np.array)
   smooth_loss_epoch[0:temp.size]=torch.tensor(temp)
   smooth_loss_list=temp.tolist()
   temp=pd.to_numeric(df.iloc[0:startepoch-1,5]).apply(np.array)
   lr_epoch[0:temp.size]=torch.tensor(temp)
   lr_list=temp.tolist()
   results = {'train_loss': train_loss_list, 'test_acc@1': test_acc_1_list, 'test_acc@5': test_acc_5_list, 'smooth_loss': smooth_loss_list, 'lr_epoch': lr_list}

else:
   results = {'train_loss': [], 'test_acc@1': [], 'test_acc@5': [], 'smooth_loss': [], 'lr_epoch': []}


for epoch in range(startepoch, epochs + 1):
    train_loss = train(model, train_loader, optimizer, scheduler, temp0)
    train_loss_epoch[epoch-1]=train_loss
    if epoch>1:
       smooth_loss=float(train_loss_epoch[epoch-1]*smoothfactor+smooth_loss_epoch[epoch-2]*(1.0-smoothfactor))
    else:
       smooth_loss=train_loss
    smooth_loss_epoch[epoch-1]=torch.tensor(smooth_loss)


    print(optimizer.param_groups[0]['lr'])
    print(smooth_loss_epoch[epoch-1])
    lr_epoch[epoch-1]=float(optimizer.param_groups[0]['lr'])
   
        
    results['train_loss'].append(train_loss)
    test_acc_1, test_acc_5 = test(model, memory_loader, test_loader, temp0)
    results['test_acc@1'].append(test_acc_1)
    results['test_acc@5'].append(test_acc_5)
    results['smooth_loss'].append(smooth_loss)
    results['lr_epoch'].append(optimizer.param_groups[0]['lr'])
    # save statistics
    data_frame = pd.DataFrame(data=results, index=range(1, epoch + 1))
    data_frame.to_csv(csvfilename, index_label='epoch')
    if test_acc_1 > best_acc:
        best_acc = test_acc_1
        torch.save({'epoch':epoch,'model_state_dict':model.state_dict(),'optimizer_state_dict':optimizer.state_dict(),'scheduler_state_dict':scheduler.state_dict(),'best_acc':best_acc}, modelfilename)
    test_acc_1_epoch[epoch-1]=test_acc_1
    test_acc_5_epoch[epoch-1]=test_acc_5
    
minloss_loc=torch.argmin(smooth_loss_epoch)
minloss_loclr=lr_epoch[minloss_loc]
print(f'lr corresponding to minloss={minloss_loclr}');
print(f'suggested maxlr={minloss_loclr*3/8}');
print(f'suggested minlr={minloss_loclr*3/80}');

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
[91m[WARN] Cannot find rule for <class 'einops.layers.torch.Rearrange'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.container.Sequential'>. Treat it as zero Macs and zero Params.[00m
[INFO] Register zero_ops() for <class 'torch.nn.modules.dropout.Dropout'>.
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.normalization.LayerNorm'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class 'torch.nn.modules.activation.Softmax'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class '__main__.Attention'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find rule for <class '__main__.PreNorm'>. Treat it as zero Macs and zero Params.[00m
[91m[WARN] Cannot find r

Train Epoch: [1/500] Loss: 4.5624: 100%|██████████| 390/390 [00:35<00:00, 11.03it/s]


2.352319351544004e-05
tensor(4.5624)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 70.67it/s]
Test Epoch: [1/500] Acc@1:37.40% Acc@5:86.61%: 100%|██████████| 79/79 [00:02<00:00, 37.39it/s]
Train Epoch: [2/500] Loss: 4.1373: 100%|██████████| 390/390 [00:36<00:00, 10.80it/s]


2.3592763887969874e-05
tensor(4.1585)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.59it/s]
Test Epoch: [2/500] Acc@1:40.88% Acc@5:88.77%: 100%|██████████| 79/79 [00:02<00:00, 37.27it/s]
Train Epoch: [3/500] Loss: 3.9545: 100%|██████████| 390/390 [00:36<00:00, 10.74it/s]


2.3708680600681577e-05
tensor(3.9647)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.49it/s]
Test Epoch: [3/500] Acc@1:42.54% Acc@5:89.59%: 100%|██████████| 79/79 [00:02<00:00, 37.88it/s]
Train Epoch: [4/500] Loss: 3.7608: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


2.387089280693556e-05
tensor(3.7709)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.30it/s]
Test Epoch: [4/500] Acc@1:44.54% Acc@5:90.67%: 100%|██████████| 79/79 [00:02<00:00, 37.94it/s]
Train Epoch: [5/500] Loss: 3.5005: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


2.4079329352664483e-05
tensor(3.5141)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.23it/s]
Test Epoch: [5/500] Acc@1:46.95% Acc@5:91.65%: 100%|██████████| 79/79 [00:02<00:00, 37.56it/s]
Train Epoch: [6/500] Loss: 3.2941: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


2.433389880758489e-05
tensor(3.3051)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.13it/s]
Test Epoch: [6/500] Acc@1:48.00% Acc@5:92.21%: 100%|██████████| 79/79 [00:02<00:00, 38.10it/s]
Train Epoch: [7/500] Loss: 3.1402: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


2.4634489505302688e-05
tensor(3.1485)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.83it/s]
Test Epoch: [7/500] Acc@1:49.86% Acc@5:92.78%: 100%|██████████| 79/79 [00:02<00:00, 38.29it/s]
Train Epoch: [8/500] Loss: 3.0056: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


2.498096959229566e-05
tensor(3.0127)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.70it/s]
Test Epoch: [8/500] Acc@1:50.56% Acc@5:92.97%: 100%|██████████| 79/79 [00:02<00:00, 37.96it/s]
Train Epoch: [9/500] Loss: 2.8875: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


2.5373187085750594e-05
tensor(2.8938)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.02it/s]
Test Epoch: [9/500] Acc@1:51.54% Acc@5:92.95%: 100%|██████████| 79/79 [00:02<00:00, 37.40it/s]
Train Epoch: [10/500] Loss: 2.7811: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


2.5810969940230372e-05
tensor(2.7867)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.03it/s]
Test Epoch: [10/500] Acc@1:51.90% Acc@5:93.12%: 100%|██████████| 79/79 [00:02<00:00, 38.45it/s]
Train Epoch: [11/500] Loss: 2.6914: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


2.629412612314123e-05
tensor(2.6962)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.87it/s]
Test Epoch: [11/500] Acc@1:52.20% Acc@5:93.34%: 100%|██████████| 79/79 [00:02<00:00, 38.31it/s]
Train Epoch: [12/500] Loss: 2.5920: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


2.6822443698967563e-05
tensor(2.5972)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.54it/s]
Test Epoch: [12/500] Acc@1:52.27% Acc@5:93.67%: 100%|██████████| 79/79 [00:02<00:00, 37.93it/s]
Train Epoch: [13/500] Loss: 2.5185: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


2.7395690922236895e-05
tensor(2.5224)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [13/500] Acc@1:53.02% Acc@5:93.46%: 100%|██████████| 79/79 [00:02<00:00, 38.41it/s]
Train Epoch: [14/500] Loss: 2.4695: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


2.8013616339174865e-05
tensor(2.4721)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.62it/s]
Test Epoch: [14/500] Acc@1:53.26% Acc@5:93.97%: 100%|██████████| 79/79 [00:02<00:00, 37.98it/s]
Train Epoch: [15/500] Loss: 2.4044: 100%|██████████| 390/390 [00:36<00:00, 10.73it/s]


2.8675948898004692e-05
tensor(2.4078)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.07it/s]
Test Epoch: [15/500] Acc@1:53.95% Acc@5:94.08%: 100%|██████████| 79/79 [00:02<00:00, 37.64it/s]
Train Epoch: [16/500] Loss: 2.3473: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


2.938239806784347e-05
tensor(2.3503)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.69it/s]
Test Epoch: [16/500] Acc@1:53.73% Acc@5:94.30%: 100%|██████████| 79/79 [00:02<00:00, 37.70it/s]
Train Epoch: [17/500] Loss: 2.3069: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


3.013265396614319e-05
tensor(2.3091)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.05it/s]
Test Epoch: [17/500] Acc@1:54.19% Acc@5:94.23%: 100%|██████████| 79/79 [00:02<00:00, 38.40it/s]
Train Epoch: [18/500] Loss: 2.2761: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


3.0926387494619964e-05
tensor(2.2778)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.83it/s]
Test Epoch: [18/500] Acc@1:54.55% Acc@5:94.60%: 100%|██████████| 79/79 [00:02<00:00, 36.95it/s]
Train Epoch: [19/500] Loss: 2.2143: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


3.1763250483612314e-05
tensor(2.2175)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.85it/s]
Test Epoch: [19/500] Acc@1:54.66% Acc@5:94.56%: 100%|██████████| 79/79 [00:02<00:00, 37.72it/s]
Train Epoch: [20/500] Loss: 2.2051: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


3.264287584480528e-05
tensor(2.2057)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [20/500] Acc@1:54.94% Acc@5:94.75%: 100%|██████████| 79/79 [00:02<00:00, 37.86it/s]
Train Epoch: [21/500] Loss: 2.1512: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


3.356487773225277e-05
tensor(2.1540)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.98it/s]
Test Epoch: [21/500] Acc@1:55.31% Acc@5:94.95%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [22/500] Loss: 2.1197: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


3.4528851711628395e-05
tensor(2.1214)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.61it/s]
Test Epoch: [22/500] Acc@1:55.16% Acc@5:94.85%: 100%|██████████| 79/79 [00:02<00:00, 37.09it/s]
Train Epoch: [23/500] Loss: 2.0805: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


3.553437493762949e-05
tensor(2.0825)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.83it/s]
Test Epoch: [23/500] Acc@1:55.23% Acc@5:95.10%: 100%|██████████| 79/79 [00:02<00:00, 37.71it/s]
Train Epoch: [24/500] Loss: 2.0591: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


3.658100633945768e-05
tensor(2.0602)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.86it/s]
Test Epoch: [24/500] Acc@1:55.73% Acc@5:95.04%: 100%|██████████| 79/79 [00:02<00:00, 37.69it/s]
Train Epoch: [25/500] Loss: 2.0400: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


3.766828681429346e-05
tensor(2.0410)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.31it/s]
Test Epoch: [25/500] Acc@1:55.70% Acc@5:95.15%: 100%|██████████| 79/79 [00:02<00:00, 38.14it/s]
Train Epoch: [26/500] Loss: 2.0058: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


3.879573942868096e-05
tensor(2.0076)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.50it/s]
Test Epoch: [26/500] Acc@1:55.61% Acc@5:94.87%: 100%|██████████| 79/79 [00:02<00:00, 37.48it/s]
Train Epoch: [27/500] Loss: 1.9853: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


3.9962869627733724e-05
tensor(1.9864)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.09it/s]
Test Epoch: [27/500] Acc@1:56.92% Acc@5:95.18%: 100%|██████████| 79/79 [00:02<00:00, 37.75it/s]
Train Epoch: [28/500] Loss: 1.9637: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


4.116916545207039e-05
tensor(1.9649)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.40it/s]
Test Epoch: [28/500] Acc@1:56.80% Acc@5:95.05%: 100%|██████████| 79/79 [00:02<00:00, 37.65it/s]
Train Epoch: [29/500] Loss: 1.9163: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


4.241409776238448e-05
tensor(1.9187)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.09it/s]
Test Epoch: [29/500] Acc@1:56.39% Acc@5:95.17%: 100%|██████████| 79/79 [00:02<00:00, 37.99it/s]
Train Epoch: [30/500] Loss: 1.9069: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


4.3697120471550553e-05
tensor(1.9075)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.43it/s]
Test Epoch: [30/500] Acc@1:57.49% Acc@5:95.31%: 100%|██████████| 79/79 [00:02<00:00, 37.47it/s]
Train Epoch: [31/500] Loss: 1.8883: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


4.501767078416403e-05
tensor(1.8892)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.67it/s]
Test Epoch: [31/500] Acc@1:57.51% Acc@5:95.62%: 100%|██████████| 79/79 [00:02<00:00, 37.89it/s]
Train Epoch: [32/500] Loss: 1.8606: 100%|██████████| 390/390 [00:36<00:00, 10.73it/s]


4.637516944341045e-05
tensor(1.8621)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.05it/s]
Test Epoch: [32/500] Acc@1:57.44% Acc@5:95.72%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [33/500] Loss: 1.8341: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


4.776902098515523e-05
tensor(1.8355)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.99it/s]
Test Epoch: [33/500] Acc@1:57.67% Acc@5:95.49%: 100%|██████████| 79/79 [00:02<00:00, 38.23it/s]
Train Epoch: [34/500] Loss: 1.8373: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


4.919861399914306e-05
tensor(1.8372)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [34/500] Acc@1:57.63% Acc@5:95.59%: 100%|██████████| 79/79 [00:02<00:00, 38.15it/s]
Train Epoch: [35/500] Loss: 1.8258: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


5.0663321397191575e-05
tensor(1.8264)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.63it/s]
Test Epoch: [35/500] Acc@1:58.13% Acc@5:95.60%: 100%|██████████| 79/79 [00:02<00:00, 38.40it/s]
Train Epoch: [36/500] Loss: 1.8091: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


5.2162500688262355e-05
tensor(1.8100)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.47it/s]
Test Epoch: [36/500] Acc@1:58.59% Acc@5:95.61%: 100%|██████████| 79/79 [00:02<00:00, 36.98it/s]
Train Epoch: [37/500] Loss: 1.7880: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


5.36954942602886e-05
tensor(1.7891)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [37/500] Acc@1:58.92% Acc@5:95.80%: 100%|██████████| 79/79 [00:02<00:00, 35.91it/s]
Train Epoch: [38/500] Loss: 1.7740: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


5.526162966863518e-05
tensor(1.7748)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.32it/s]
Test Epoch: [38/500] Acc@1:58.55% Acc@5:95.67%: 100%|██████████| 79/79 [00:02<00:00, 38.21it/s]
Train Epoch: [39/500] Loss: 1.7619: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


5.6860219931065216e-05
tensor(1.7625)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.56it/s]
Test Epoch: [39/500] Acc@1:59.32% Acc@5:95.61%: 100%|██████████| 79/79 [00:02<00:00, 38.69it/s]
Train Epoch: [40/500] Loss: 1.7421: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


5.849056382908351e-05
tensor(1.7432)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.34it/s]
Test Epoch: [40/500] Acc@1:59.05% Acc@5:95.61%: 100%|██████████| 79/79 [00:02<00:00, 38.46it/s]
Train Epoch: [41/500] Loss: 1.7343: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


6.015194621552462e-05
tensor(1.7348)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.47it/s]
Test Epoch: [41/500] Acc@1:59.61% Acc@5:95.80%: 100%|██████████| 79/79 [00:02<00:00, 38.06it/s]
Train Epoch: [42/500] Loss: 1.7039: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


6.184363832825089e-05
tensor(1.7054)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.82it/s]
Test Epoch: [42/500] Acc@1:60.07% Acc@5:95.90%: 100%|██████████| 79/79 [00:02<00:00, 37.98it/s]
Train Epoch: [43/500] Loss: 1.6866: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


6.356489810982233e-05
tensor(1.6876)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.76it/s]
Test Epoch: [43/500] Acc@1:59.69% Acc@5:95.74%: 100%|██████████| 79/79 [00:02<00:00, 38.19it/s]
Train Epoch: [44/500] Loss: 1.6779: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


6.53149705329989e-05
tensor(1.6784)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.46it/s]
Test Epoch: [44/500] Acc@1:60.29% Acc@5:95.98%: 100%|██████████| 79/79 [00:02<00:00, 37.18it/s]
Train Epoch: [45/500] Loss: 1.6661: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


6.709308793193149e-05
tensor(1.6667)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.82it/s]
Test Epoch: [45/500] Acc@1:60.27% Acc@5:96.01%: 100%|██████████| 79/79 [00:02<00:00, 37.61it/s]
Train Epoch: [46/500] Loss: 1.6395: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


6.88984703388973e-05
tensor(1.6408)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.24it/s]
Test Epoch: [46/500] Acc@1:60.08% Acc@5:95.86%: 100%|██████████| 79/79 [00:02<00:00, 38.36it/s]
Train Epoch: [47/500] Loss: 1.6421: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


7.073032582643094e-05
tensor(1.6421)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.12it/s]
Test Epoch: [47/500] Acc@1:60.48% Acc@5:96.03%: 100%|██████████| 79/79 [00:02<00:00, 37.75it/s]
Train Epoch: [48/500] Loss: 1.6263: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


7.258785085470195e-05
tensor(1.6271)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.05it/s]
Test Epoch: [48/500] Acc@1:60.84% Acc@5:96.12%: 100%|██████████| 79/79 [00:02<00:00, 37.56it/s]
Train Epoch: [49/500] Loss: 1.5979: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


7.447023062398606e-05
tensor(1.5994)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.70it/s]
Test Epoch: [49/500] Acc@1:61.27% Acc@5:96.24%: 100%|██████████| 79/79 [00:02<00:00, 38.75it/s]
Train Epoch: [50/500] Loss: 1.5968: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


7.637663943207534e-05
tensor(1.5970)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.16it/s]
Test Epoch: [50/500] Acc@1:61.29% Acc@5:96.30%: 100%|██████████| 79/79 [00:02<00:00, 37.61it/s]
Train Epoch: [51/500] Loss: 1.5927: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


7.8306241036471e-05
tensor(1.5929)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.78it/s]
Test Epoch: [51/500] Acc@1:61.68% Acc@5:96.38%: 100%|██████████| 79/79 [00:02<00:00, 37.69it/s]
Train Epoch: [52/500] Loss: 1.5701: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


8.025818902119937e-05
tensor(1.5713)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.33it/s]
Test Epoch: [52/500] Acc@1:61.71% Acc@5:96.39%: 100%|██████████| 79/79 [00:02<00:00, 37.94it/s]
Train Epoch: [53/500] Loss: 1.5572: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


8.223162716809078e-05
tensor(1.5579)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.64it/s]
Test Epoch: [53/500] Acc@1:61.44% Acc@5:96.40%: 100%|██████████| 79/79 [00:02<00:00, 38.51it/s]
Train Epoch: [54/500] Loss: 1.5466: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


8.422568983235796e-05
tensor(1.5472)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.11it/s]
Test Epoch: [54/500] Acc@1:61.93% Acc@5:96.54%: 100%|██████████| 79/79 [00:02<00:00, 36.26it/s]
Train Epoch: [55/500] Loss: 1.5334: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


8.623950232230926e-05
tensor(1.5341)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.39it/s]
Test Epoch: [55/500] Acc@1:62.74% Acc@5:96.63%: 100%|██████████| 79/79 [00:02<00:00, 38.40it/s]
Train Epoch: [56/500] Loss: 1.5264: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


8.827218128303071e-05
tensor(1.5268)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.86it/s]
Test Epoch: [56/500] Acc@1:62.51% Acc@5:96.34%: 100%|██████████| 79/79 [00:02<00:00, 38.13it/s]
Train Epoch: [57/500] Loss: 1.5173: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


9.032283508386775e-05
tensor(1.5178)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.44it/s]
Test Epoch: [57/500] Acc@1:62.14% Acc@5:96.45%: 100%|██████████| 79/79 [00:02<00:00, 38.28it/s]
Train Epoch: [58/500] Loss: 1.5047: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


9.239056420953725e-05
tensor(1.5053)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.65it/s]
Test Epoch: [58/500] Acc@1:62.84% Acc@5:96.61%: 100%|██████████| 79/79 [00:02<00:00, 38.10it/s]
Train Epoch: [59/500] Loss: 1.5109: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


9.447446165469835e-05
tensor(1.5106)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.56it/s]
Test Epoch: [59/500] Acc@1:62.44% Acc@5:96.68%: 100%|██████████| 79/79 [00:02<00:00, 38.04it/s]
Train Epoch: [60/500] Loss: 1.5019: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


9.657361332180832e-05
tensor(1.5024)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.77it/s]
Test Epoch: [60/500] Acc@1:63.63% Acc@5:96.67%: 100%|██████████| 79/79 [00:02<00:00, 38.03it/s]
Train Epoch: [61/500] Loss: 1.4686: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


9.868709842208984e-05
tensor(1.4703)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.52it/s]
Test Epoch: [61/500] Acc@1:63.65% Acc@5:96.97%: 100%|██████████| 79/79 [00:02<00:00, 37.65it/s]
Train Epoch: [62/500] Loss: 1.4752: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00010081398987943309
tensor(1.4750)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.16it/s]
Test Epoch: [62/500] Acc@1:63.36% Acc@5:96.93%: 100%|██████████| 79/79 [00:02<00:00, 38.04it/s]
Train Epoch: [63/500] Loss: 1.4567: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00010295335473705592
tensor(1.4577)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.84it/s]
Test Epoch: [63/500] Acc@1:63.95% Acc@5:96.94%: 100%|██████████| 79/79 [00:02<00:00, 38.59it/s]
Train Epoch: [64/500] Loss: 1.4573: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.0001051042545667436
tensor(1.4574)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.46it/s]
Test Epoch: [64/500] Acc@1:63.65% Acc@5:96.94%: 100%|██████████| 79/79 [00:02<00:00, 38.24it/s]
Train Epoch: [65/500] Loss: 1.4256: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00010726574588048828
tensor(1.4271)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.30it/s]
Test Epoch: [65/500] Acc@1:64.02% Acc@5:97.03%: 100%|██████████| 79/79 [00:02<00:00, 38.28it/s]
Train Epoch: [66/500] Loss: 1.4402: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00010943688054434854
tensor(1.4395)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.94it/s]
Test Epoch: [66/500] Acc@1:63.29% Acc@5:96.95%: 100%|██████████| 79/79 [00:02<00:00, 36.98it/s]
Train Epoch: [67/500] Loss: 1.4335: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00011161670619434607
tensor(1.4338)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.25it/s]
Test Epoch: [67/500] Acc@1:63.91% Acc@5:96.88%: 100%|██████████| 79/79 [00:02<00:00, 38.15it/s]
Train Epoch: [68/500] Loss: 1.4251: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00011380426665421861
tensor(1.4255)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.60it/s]
Test Epoch: [68/500] Acc@1:64.20% Acc@5:97.01%: 100%|██████████| 79/79 [00:02<00:00, 38.61it/s]
Train Epoch: [69/500] Loss: 1.4125: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00011599860235484457
tensor(1.4131)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.61it/s]
Test Epoch: [69/500] Acc@1:64.14% Acc@5:96.79%: 100%|██████████| 79/79 [00:02<00:00, 37.46it/s]
Train Epoch: [70/500] Loss: 1.4068: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00011819875075515632
tensor(1.4071)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.67it/s]
Test Epoch: [70/500] Acc@1:64.57% Acc@5:97.02%: 100%|██████████| 79/79 [00:02<00:00, 37.47it/s]
Train Epoch: [71/500] Loss: 1.4002: 100%|██████████| 390/390 [00:36<00:00, 10.73it/s]


0.00012040374676435679
tensor(1.4006)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.94it/s]
Test Epoch: [71/500] Acc@1:64.49% Acc@5:97.16%: 100%|██████████| 79/79 [00:02<00:00, 37.74it/s]
Train Epoch: [72/500] Loss: 1.3941: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0001226126231652548
tensor(1.3944)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.73it/s]
Test Epoch: [72/500] Acc@1:64.62% Acc@5:97.06%: 100%|██████████| 79/79 [00:02<00:00, 38.11it/s]
Train Epoch: [73/500] Loss: 1.3940: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0001248244110385329
tensor(1.3940)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.50it/s]
Test Epoch: [73/500] Acc@1:64.24% Acc@5:97.04%: 100%|██████████| 79/79 [00:02<00:00, 38.71it/s]
Train Epoch: [74/500] Loss: 1.3764: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00012703814018776206
tensor(1.3772)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.40it/s]
Test Epoch: [74/500] Acc@1:64.68% Acc@5:97.04%: 100%|██████████| 79/79 [00:02<00:00, 38.04it/s]
Train Epoch: [75/500] Loss: 1.3604: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00012925283956497614
tensor(1.3613)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.93it/s]
Test Epoch: [75/500] Acc@1:65.22% Acc@5:97.14%: 100%|██████████| 79/79 [00:02<00:00, 38.69it/s]
Train Epoch: [76/500] Loss: 1.3527: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.0001314675376966206
tensor(1.3532)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.93it/s]
Test Epoch: [76/500] Acc@1:65.98% Acc@5:97.21%: 100%|██████████| 79/79 [00:02<00:00, 38.65it/s]
Train Epoch: [77/500] Loss: 1.3577: 100%|██████████| 390/390 [00:36<00:00, 10.73it/s]


0.00013368126310968716
tensor(1.3575)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.38it/s]
Test Epoch: [77/500] Acc@1:65.44% Acc@5:97.10%: 100%|██████████| 79/79 [00:02<00:00, 38.50it/s]
Train Epoch: [78/500] Loss: 1.3546: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00013589304475784873
tensor(1.3547)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.32it/s]
Test Epoch: [78/500] Acc@1:65.19% Acc@5:97.20%: 100%|██████████| 79/79 [00:02<00:00, 37.99it/s]
Train Epoch: [79/500] Loss: 1.3269: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00013810191244740681
tensor(1.3283)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.86it/s]
Test Epoch: [79/500] Acc@1:65.39% Acc@5:97.22%: 100%|██████████| 79/79 [00:02<00:00, 36.42it/s]
Train Epoch: [80/500] Loss: 1.3418: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00014030689726286517
tensor(1.3412)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.61it/s]
Test Epoch: [80/500] Acc@1:64.86% Acc@5:97.14%: 100%|██████████| 79/79 [00:02<00:00, 38.20it/s]
Train Epoch: [81/500] Loss: 1.3277: 100%|██████████| 390/390 [00:36<00:00, 10.64it/s]


0.00014250703199194267
tensor(1.3283)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.24it/s]
Test Epoch: [81/500] Acc@1:65.72% Acc@5:97.26%: 100%|██████████| 79/79 [00:02<00:00, 38.09it/s]
Train Epoch: [82/500] Loss: 1.3358: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00014470135154983924
tensor(1.3354)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.26it/s]
Test Epoch: [82/500] Acc@1:65.92% Acc@5:97.48%: 100%|██████████| 79/79 [00:02<00:00, 37.91it/s]
Train Epoch: [83/500] Loss: 1.3212: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00014688889340256812
tensor(1.3219)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.04it/s]
Test Epoch: [83/500] Acc@1:65.82% Acc@5:97.24%: 100%|██████████| 79/79 [00:02<00:00, 38.23it/s]
Train Epoch: [84/500] Loss: 1.3251: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0001490686979891698
tensor(1.3249)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.10it/s]
Test Epoch: [84/500] Acc@1:66.20% Acc@5:97.40%: 100%|██████████| 79/79 [00:02<00:00, 37.82it/s]
Train Epoch: [85/500] Loss: 1.3079: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00015123980914262135
tensor(1.3088)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.48it/s]
Test Epoch: [85/500] Acc@1:65.87% Acc@5:97.18%: 100%|██████████| 79/79 [00:02<00:00, 38.03it/s]
Train Epoch: [86/500] Loss: 1.3060: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.0001534012745092573
tensor(1.3061)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.47it/s]
Test Epoch: [86/500] Acc@1:65.81% Acc@5:97.40%: 100%|██████████| 79/79 [00:02<00:00, 38.37it/s]
Train Epoch: [87/500] Loss: 1.3030: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00015555214596651783
tensor(1.3031)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.21it/s]
Test Epoch: [87/500] Acc@1:65.83% Acc@5:97.26%: 100%|██████████| 79/79 [00:02<00:00, 38.43it/s]
Train Epoch: [88/500] Loss: 1.3052: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00015769148003884066
tensor(1.3051)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.87it/s]
Test Epoch: [88/500] Acc@1:65.97% Acc@5:97.40%: 100%|██████████| 79/79 [00:02<00:00, 38.48it/s]
Train Epoch: [89/500] Loss: 1.2851: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00015981833831151497
tensor(1.2861)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.98it/s]
Test Epoch: [89/500] Acc@1:65.97% Acc@5:97.51%: 100%|██████████| 79/79 [00:02<00:00, 37.57it/s]
Train Epoch: [90/500] Loss: 1.2613: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0001619317878423149
tensor(1.2625)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.68it/s]
Test Epoch: [90/500] Acc@1:66.65% Acc@5:97.51%: 100%|██████████| 79/79 [00:02<00:00, 38.08it/s]
Train Epoch: [91/500] Loss: 1.2687: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00016403090157073323
tensor(1.2684)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.75it/s]
Test Epoch: [91/500] Acc@1:66.54% Acc@5:97.42%: 100%|██████████| 79/79 [00:02<00:00, 38.31it/s]
Train Epoch: [92/500] Loss: 1.2639: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00016611475872463436
tensor(1.2641)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.81it/s]
Test Epoch: [92/500] Acc@1:66.77% Acc@5:97.46%: 100%|██████████| 79/79 [00:02<00:00, 37.30it/s]
Train Epoch: [93/500] Loss: 1.2604: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00016818244522414923
tensor(1.2605)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.90it/s]
Test Epoch: [93/500] Acc@1:67.02% Acc@5:97.44%: 100%|██████████| 79/79 [00:02<00:00, 37.90it/s]
Train Epoch: [94/500] Loss: 1.2751: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.0001702330540826348
tensor(1.2744)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.36it/s]
Test Epoch: [94/500] Acc@1:67.00% Acc@5:97.47%: 100%|██████████| 79/79 [00:02<00:00, 38.22it/s]
Train Epoch: [95/500] Loss: 1.2600: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00017226568580452182
tensor(1.2607)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.83it/s]
Test Epoch: [95/500] Acc@1:67.13% Acc@5:97.38%: 100%|██████████| 79/79 [00:02<00:00, 38.20it/s]
Train Epoch: [96/500] Loss: 1.2548: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00017427944877987706
tensor(1.2551)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.86it/s]
Test Epoch: [96/500] Acc@1:67.06% Acc@5:97.58%: 100%|██████████| 79/79 [00:02<00:00, 38.46it/s]
Train Epoch: [97/500] Loss: 1.2417: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00017627345967550595
tensor(1.2424)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.88it/s]
Test Epoch: [97/500] Acc@1:67.17% Acc@5:97.68%: 100%|██████████| 79/79 [00:02<00:00, 38.31it/s]
Train Epoch: [98/500] Loss: 1.2411: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00017824684382242506
tensor(1.2411)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.47it/s]
Test Epoch: [98/500] Acc@1:67.13% Acc@5:97.43%: 100%|██████████| 79/79 [00:02<00:00, 38.35it/s]
Train Epoch: [99/500] Loss: 1.2313: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00018019873559953415
tensor(1.2318)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.55it/s]
Test Epoch: [99/500] Acc@1:66.49% Acc@5:97.58%: 100%|██████████| 79/79 [00:02<00:00, 38.02it/s]
Train Epoch: [100/500] Loss: 1.2397: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00018212827881331877
tensor(1.2393)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.87it/s]
Test Epoch: [100/500] Acc@1:66.93% Acc@5:97.61%: 100%|██████████| 79/79 [00:02<00:00, 38.14it/s]
Train Epoch: [101/500] Loss: 1.2236: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00018403462707341833
tensor(1.2244)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.41it/s]
Test Epoch: [101/500] Acc@1:66.99% Acc@5:97.42%: 100%|██████████| 79/79 [00:02<00:00, 37.34it/s]
Train Epoch: [102/500] Loss: 1.2264: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00018591694416389328
tensor(1.2263)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.78it/s]
Test Epoch: [102/500] Acc@1:67.09% Acc@5:97.63%: 100%|██████████| 79/79 [00:02<00:00, 38.21it/s]
Train Epoch: [103/500] Loss: 1.2199: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0001877744044100296
tensor(1.2203)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.45it/s]
Test Epoch: [103/500] Acc@1:67.63% Acc@5:97.59%: 100%|██████████| 79/79 [00:02<00:00, 38.48it/s]
Train Epoch: [104/500] Loss: 1.2184: 100%|██████████| 390/390 [00:36<00:00, 10.74it/s]


0.0001896061930405195
tensor(1.2185)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.01it/s]
Test Epoch: [104/500] Acc@1:68.47% Acc@5:97.61%: 100%|██████████| 79/79 [00:02<00:00, 38.03it/s]
Train Epoch: [105/500] Loss: 1.2130: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.0001914115065448593
tensor(1.2133)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.88it/s]
Test Epoch: [105/500] Acc@1:67.62% Acc@5:97.76%: 100%|██████████| 79/79 [00:02<00:00, 38.45it/s]
Train Epoch: [106/500] Loss: 1.2056: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00019318955302580768
tensor(1.2060)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.54it/s]
Test Epoch: [106/500] Acc@1:67.69% Acc@5:97.74%: 100%|██████████| 79/79 [00:02<00:00, 38.49it/s]
Train Epoch: [107/500] Loss: 1.1938: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00019493955254674974
tensor(1.1944)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.68it/s]
Test Epoch: [107/500] Acc@1:68.21% Acc@5:97.82%: 100%|██████████| 79/79 [00:02<00:00, 37.15it/s]
Train Epoch: [108/500] Loss: 1.1882: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00019666073747381493
tensor(1.1885)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.50it/s]
Test Epoch: [108/500] Acc@1:67.56% Acc@5:97.66%: 100%|██████████| 79/79 [00:02<00:00, 38.18it/s]
Train Epoch: [109/500] Loss: 1.1843: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00019835235281259788
tensor(1.1845)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.51it/s]
Test Epoch: [109/500] Acc@1:67.49% Acc@5:97.69%: 100%|██████████| 79/79 [00:02<00:00, 38.41it/s]
Train Epoch: [110/500] Loss: 1.1826: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.0002000136565393354
tensor(1.1827)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.77it/s]
Test Epoch: [110/500] Acc@1:67.32% Acc@5:97.73%: 100%|██████████| 79/79 [00:02<00:00, 38.48it/s]
Train Epoch: [111/500] Loss: 1.1743: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00020164391992639374
tensor(1.1747)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.16it/s]
Test Epoch: [111/500] Acc@1:68.12% Acc@5:97.85%: 100%|██████████| 79/79 [00:02<00:00, 38.55it/s]
Train Epoch: [112/500] Loss: 1.1694: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00020324242786192343
tensor(1.1697)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.81it/s]
Test Epoch: [112/500] Acc@1:67.80% Acc@5:97.69%: 100%|██████████| 79/79 [00:02<00:00, 38.80it/s]
Train Epoch: [113/500] Loss: 1.1691: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002048084791635418
tensor(1.1691)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.67it/s]
Test Epoch: [113/500] Acc@1:68.18% Acc@5:97.80%: 100%|██████████| 79/79 [00:02<00:00, 38.08it/s]
Train Epoch: [114/500] Loss: 1.1710: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00020634138688590528
tensor(1.1709)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.53it/s]
Test Epoch: [114/500] Acc@1:68.06% Acc@5:97.79%: 100%|██████████| 79/79 [00:02<00:00, 37.75it/s]
Train Epoch: [115/500] Loss: 1.1653: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00020784047862203644
tensor(1.1656)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.32it/s]
Test Epoch: [115/500] Acc@1:68.23% Acc@5:97.49%: 100%|██████████| 79/79 [00:02<00:00, 38.00it/s]
Train Epoch: [116/500] Loss: 1.1656: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.0002093050967982743
tensor(1.1656)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.04it/s]
Test Epoch: [116/500] Acc@1:68.87% Acc@5:97.79%: 100%|██████████| 79/79 [00:02<00:00, 37.94it/s]
Train Epoch: [117/500] Loss: 1.1444: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.0002107345989627175
tensor(1.1455)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.97it/s]
Test Epoch: [117/500] Acc@1:68.17% Acc@5:97.74%: 100%|██████████| 79/79 [00:02<00:00, 38.05it/s]
Train Epoch: [118/500] Loss: 1.1480: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00021212835806703459
tensor(1.1478)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.25it/s]
Test Epoch: [118/500] Acc@1:68.52% Acc@5:97.92%: 100%|██████████| 79/79 [00:02<00:00, 38.61it/s]
Train Epoch: [119/500] Loss: 1.1411: 100%|██████████| 390/390 [00:36<00:00, 10.65it/s]


0.00021348576274151773
tensor(1.1414)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.82it/s]
Test Epoch: [119/500] Acc@1:68.68% Acc@5:97.74%: 100%|██████████| 79/79 [00:02<00:00, 38.38it/s]
Train Epoch: [120/500] Loss: 1.1245: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00021480621756325856
tensor(1.1253)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.24it/s]
Test Epoch: [120/500] Acc@1:68.54% Acc@5:97.83%: 100%|██████████| 79/79 [00:02<00:00, 37.94it/s]
Train Epoch: [121/500] Loss: 1.1366: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00021608914331732956
tensor(1.1361)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.56it/s]
Test Epoch: [121/500] Acc@1:68.76% Acc@5:97.87%: 100%|██████████| 79/79 [00:02<00:00, 37.97it/s]
Train Epoch: [122/500] Loss: 1.1255: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00021733397725085533
tensor(1.1261)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.28it/s]
Test Epoch: [122/500] Acc@1:69.29% Acc@5:97.89%: 100%|██████████| 79/79 [00:02<00:00, 38.09it/s]
Train Epoch: [123/500] Loss: 1.1240: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00021854017331986316
tensor(1.1241)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.27it/s]
Test Epoch: [123/500] Acc@1:68.99% Acc@5:98.06%: 100%|██████████| 79/79 [00:02<00:00, 38.06it/s]
Train Epoch: [124/500] Loss: 1.1315: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00021970720242880405
tensor(1.1311)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.79it/s]
Test Epoch: [124/500] Acc@1:69.21% Acc@5:97.95%: 100%|██████████| 79/79 [00:02<00:00, 38.78it/s]
Train Epoch: [125/500] Loss: 1.1153: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00022083455266263966
tensor(1.1161)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [125/500] Acc@1:69.08% Acc@5:97.85%: 100%|██████████| 79/79 [00:02<00:00, 38.42it/s]
Train Epoch: [126/500] Loss: 1.1278: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00022192172951139254
tensor(1.1272)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.75it/s]
Test Epoch: [126/500] Acc@1:69.54% Acc@5:98.08%: 100%|██████████| 79/79 [00:02<00:00, 38.46it/s]
Train Epoch: [127/500] Loss: 1.1110: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00022296825608706216
tensor(1.1118)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.29it/s]
Test Epoch: [127/500] Acc@1:69.79% Acc@5:97.95%: 100%|██████████| 79/79 [00:02<00:00, 38.22it/s]
Train Epoch: [128/500] Loss: 1.1079: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.0002239736733328109
tensor(1.1081)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.38it/s]
Test Epoch: [128/500] Acc@1:69.16% Acc@5:98.03%: 100%|██████████| 79/79 [00:02<00:00, 37.59it/s]
Train Epoch: [129/500] Loss: 1.0966: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002249375402243283
tensor(1.0972)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.64it/s]
Test Epoch: [129/500] Acc@1:69.04% Acc@5:97.98%: 100%|██████████| 79/79 [00:02<00:00, 37.99it/s]
Train Epoch: [130/500] Loss: 1.0920: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002258594339632857
tensor(1.0922)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.85it/s]
Test Epoch: [130/500] Acc@1:69.70% Acc@5:98.17%: 100%|██████████| 79/79 [00:02<00:00, 38.00it/s]
Train Epoch: [131/500] Loss: 1.0813: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00022673895016279576
tensor(1.0819)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.93it/s]
Test Epoch: [131/500] Acc@1:69.70% Acc@5:97.92%: 100%|██████████| 79/79 [00:02<00:00, 38.01it/s]
Train Epoch: [132/500] Loss: 1.0824: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00022757570302479595
tensor(1.0823)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.44it/s]
Test Epoch: [132/500] Acc@1:69.98% Acc@5:98.18%: 100%|██████████| 79/79 [00:02<00:00, 37.92it/s]
Train Epoch: [133/500] Loss: 1.0893: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00022836932550927827
tensor(1.0889)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.77it/s]
Test Epoch: [133/500] Acc@1:69.60% Acc@5:98.02%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [134/500] Loss: 1.0775: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00022911946949529053
tensor(1.0780)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.59it/s]
Test Epoch: [134/500] Acc@1:69.51% Acc@5:98.13%: 100%|██████████| 79/79 [00:02<00:00, 37.84it/s]
Train Epoch: [135/500] Loss: 1.0685: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00022982580593363893
tensor(1.0690)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.10it/s]
Test Epoch: [135/500] Acc@1:69.71% Acc@5:98.01%: 100%|██████████| 79/79 [00:02<00:00, 38.38it/s]
Train Epoch: [136/500] Loss: 1.0716: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002304880249912249
tensor(1.0714)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.06it/s]
Test Epoch: [136/500] Acc@1:69.55% Acc@5:97.73%: 100%|██████████| 79/79 [00:02<00:00, 37.55it/s]
Train Epoch: [137/500] Loss: 1.0646: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002311058361869527
tensor(1.0649)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.30it/s]
Test Epoch: [137/500] Acc@1:70.55% Acc@5:97.95%: 100%|██████████| 79/79 [00:02<00:00, 38.57it/s]
Train Epoch: [138/500] Loss: 1.0606: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00023167896851914837
tensor(1.0608)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.27it/s]
Test Epoch: [138/500] Acc@1:70.45% Acc@5:98.23%: 100%|██████████| 79/79 [00:02<00:00, 37.85it/s]
Train Epoch: [139/500] Loss: 1.0573: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00023220717058443425
tensor(1.0574)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.33it/s]
Test Epoch: [139/500] Acc@1:70.15% Acc@5:97.94%: 100%|██████████| 79/79 [00:02<00:00, 38.55it/s]
Train Epoch: [140/500] Loss: 1.0637: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00023269021068800645
tensor(1.0634)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.24it/s]
Test Epoch: [140/500] Acc@1:69.89% Acc@5:97.97%: 100%|██████████| 79/79 [00:02<00:00, 38.30it/s]
Train Epoch: [141/500] Loss: 1.0491: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00023312787694526734
tensor(1.0498)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.19it/s]
Test Epoch: [141/500] Acc@1:70.42% Acc@5:98.14%: 100%|██████████| 79/79 [00:02<00:00, 38.02it/s]
Train Epoch: [142/500] Loss: 1.0530: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00023351997737476826
tensor(1.0528)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.78it/s]
Test Epoch: [142/500] Acc@1:69.81% Acc@5:97.95%: 100%|██████████| 79/79 [00:02<00:00, 37.29it/s]
Train Epoch: [143/500] Loss: 1.0466: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00023386633998242194
tensor(1.0469)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.67it/s]
Test Epoch: [143/500] Acc@1:69.91% Acc@5:98.10%: 100%|██████████| 79/79 [00:02<00:00, 37.90it/s]
Train Epoch: [144/500] Loss: 1.0447: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00023416681283694728
tensor(1.0448)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.13it/s]
Test Epoch: [144/500] Acc@1:70.28% Acc@5:98.07%: 100%|██████████| 79/79 [00:02<00:00, 37.86it/s]
Train Epoch: [145/500] Loss: 1.0451: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00023442126413651372
tensor(1.0450)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.71it/s]
Test Epoch: [145/500] Acc@1:70.88% Acc@5:98.03%: 100%|██████████| 79/79 [00:02<00:00, 38.48it/s]
Train Epoch: [146/500] Loss: 1.0320: 100%|██████████| 390/390 [00:36<00:00, 10.65it/s]


0.00023462958226655583
tensor(1.0326)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.66it/s]
Test Epoch: [146/500] Acc@1:70.54% Acc@5:98.01%: 100%|██████████| 79/79 [00:02<00:00, 38.01it/s]
Train Epoch: [147/500] Loss: 1.0266: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00023479167584873297
tensor(1.0269)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.80it/s]
Test Epoch: [147/500] Acc@1:70.48% Acc@5:98.03%: 100%|██████████| 79/79 [00:02<00:00, 36.89it/s]
Train Epoch: [148/500] Loss: 1.0356: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023490747378101195
tensor(1.0351)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.71it/s]
Test Epoch: [148/500] Acc@1:70.86% Acc@5:98.12%: 100%|██████████| 79/79 [00:02<00:00, 38.52it/s]
Train Epoch: [149/500] Loss: 1.0241: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00023497692526885605
tensor(1.0247)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.93it/s]
Test Epoch: [149/500] Acc@1:70.96% Acc@5:98.15%: 100%|██████████| 79/79 [00:02<00:00, 37.76it/s]
Train Epoch: [150/500] Loss: 1.0125: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00023499999996888014
tensor(1.0131)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.42it/s]
Test Epoch: [150/500] Acc@1:70.87% Acc@5:98.11%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [151/500] Loss: 1.0182: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002349952423931086
tensor(1.0180)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.49it/s]
Test Epoch: [151/500] Acc@1:70.95% Acc@5:98.08%: 100%|██████████| 79/79 [00:02<00:00, 37.40it/s]
Train Epoch: [152/500] Loss: 1.0292: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.000234981018595468
tensor(1.0286)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.88it/s]
Test Epoch: [152/500] Acc@1:71.11% Acc@5:98.11%: 100%|██████████| 79/79 [00:02<00:00, 38.94it/s]
Train Epoch: [153/500] Loss: 1.0116: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00023495732972193646
tensor(1.0124)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.09it/s]
Test Epoch: [153/500] Acc@1:71.14% Acc@5:98.23%: 100%|██████████| 79/79 [00:02<00:00, 37.45it/s]
Train Epoch: [154/500] Loss: 1.0079: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.000234924177681071
tensor(1.0081)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.87it/s]
Test Epoch: [154/500] Acc@1:71.24% Acc@5:98.22%: 100%|██████████| 79/79 [00:02<00:00, 38.09it/s]
Train Epoch: [155/500] Loss: 1.0059: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.0002348815651438539
tensor(1.0060)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.26it/s]
Test Epoch: [155/500] Acc@1:71.54% Acc@5:98.30%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [156/500] Loss: 0.9940: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00023482949554347755
tensor(0.9946)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.82it/s]
Test Epoch: [156/500] Acc@1:71.88% Acc@5:98.23%: 100%|██████████| 79/79 [00:02<00:00, 37.70it/s]
Train Epoch: [157/500] Loss: 0.9864: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00023476797307506766
tensor(0.9868)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.42it/s]
Test Epoch: [157/500] Acc@1:71.89% Acc@5:98.34%: 100%|██████████| 79/79 [00:02<00:00, 37.16it/s]
Train Epoch: [158/500] Loss: 0.9913: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023469700269534554
tensor(0.9911)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.66it/s]
Test Epoch: [158/500] Acc@1:71.66% Acc@5:98.14%: 100%|██████████| 79/79 [00:02<00:00, 37.53it/s]
Train Epoch: [159/500] Loss: 0.9936: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023461659012222836
tensor(0.9935)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.75it/s]
Test Epoch: [159/500] Acc@1:71.65% Acc@5:98.37%: 100%|██████████| 79/79 [00:02<00:00, 37.80it/s]
Train Epoch: [160/500] Loss: 0.9892: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023452674183436893
tensor(0.9894)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.81it/s]
Test Epoch: [160/500] Acc@1:71.52% Acc@5:98.43%: 100%|██████████| 79/79 [00:02<00:00, 37.64it/s]
Train Epoch: [161/500] Loss: 0.9761: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00023442746507063334
tensor(0.9768)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.86it/s]
Test Epoch: [161/500] Acc@1:72.08% Acc@5:98.33%: 100%|██████████| 79/79 [00:02<00:00, 37.27it/s]
Train Epoch: [162/500] Loss: 0.9794: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.000234318767829518
tensor(0.9793)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.73it/s]
Test Epoch: [162/500] Acc@1:71.92% Acc@5:98.46%: 100%|██████████| 79/79 [00:02<00:00, 37.65it/s]
Train Epoch: [163/500] Loss: 0.9886: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00023420065886850508
tensor(0.9881)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.18it/s]
Test Epoch: [163/500] Acc@1:72.51% Acc@5:98.47%: 100%|██████████| 79/79 [00:02<00:00, 36.95it/s]
Train Epoch: [164/500] Loss: 0.9823: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023407314770335698
tensor(0.9826)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.00it/s]
Test Epoch: [164/500] Acc@1:72.15% Acc@5:98.31%: 100%|██████████| 79/79 [00:02<00:00, 38.20it/s]
Train Epoch: [165/500] Loss: 0.9760: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00023393624460734967
tensor(0.9763)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.33it/s]
Test Epoch: [165/500] Acc@1:72.60% Acc@5:98.26%: 100%|██████████| 79/79 [00:02<00:00, 38.19it/s]
Train Epoch: [166/500] Loss: 0.9647: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023378996061044498
tensor(0.9653)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.56it/s]
Test Epoch: [166/500] Acc@1:72.02% Acc@5:98.22%: 100%|██████████| 79/79 [00:02<00:00, 37.66it/s]
Train Epoch: [167/500] Loss: 0.9610: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00023363430749840207
tensor(0.9612)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.42it/s]
Test Epoch: [167/500] Acc@1:72.45% Acc@5:98.36%: 100%|██████████| 79/79 [00:02<00:00, 37.49it/s]
Train Epoch: [168/500] Loss: 0.9686: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00023346929781182762
tensor(0.9682)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.71it/s]
Test Epoch: [168/500] Acc@1:71.87% Acc@5:98.35%: 100%|██████████| 79/79 [00:02<00:00, 37.74it/s]
Train Epoch: [169/500] Loss: 0.9625: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00023329494484516564
tensor(0.9628)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.19it/s]
Test Epoch: [169/500] Acc@1:72.22% Acc@5:98.37%: 100%|██████████| 79/79 [00:02<00:00, 38.33it/s]
Train Epoch: [170/500] Loss: 0.9611: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002331112626456264
tensor(0.9612)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.33it/s]
Test Epoch: [170/500] Acc@1:72.13% Acc@5:98.27%: 100%|██████████| 79/79 [00:02<00:00, 38.09it/s]
Train Epoch: [171/500] Loss: 0.9605: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00023291826601205457
tensor(0.9605)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.12it/s]
Test Epoch: [171/500] Acc@1:72.84% Acc@5:98.26%: 100%|██████████| 79/79 [00:02<00:00, 38.22it/s]
Train Epoch: [172/500] Loss: 0.9525: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.000232715970493737
tensor(0.9529)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.83it/s]
Test Epoch: [172/500] Acc@1:72.25% Acc@5:98.43%: 100%|██████████| 79/79 [00:02<00:00, 37.12it/s]
Train Epoch: [173/500] Loss: 0.9493: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002325043923891498
tensor(0.9495)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [173/500] Acc@1:72.87% Acc@5:98.41%: 100%|██████████| 79/79 [00:02<00:00, 37.65it/s]
Train Epoch: [174/500] Loss: 0.9425: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002322835487446455
tensor(0.9428)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.49it/s]
Test Epoch: [174/500] Acc@1:72.92% Acc@5:98.50%: 100%|██████████| 79/79 [00:02<00:00, 37.43it/s]
Train Epoch: [175/500] Loss: 0.9320: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00023205345735307929
tensor(0.9325)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.29it/s]
Test Epoch: [175/500] Acc@1:72.33% Acc@5:98.40%: 100%|██████████| 79/79 [00:02<00:00, 37.97it/s]
Train Epoch: [176/500] Loss: 0.9363: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002318141367523758
tensor(0.9361)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.85it/s]
Test Epoch: [176/500] Acc@1:72.82% Acc@5:98.37%: 100%|██████████| 79/79 [00:02<00:00, 37.90it/s]
Train Epoch: [177/500] Loss: 0.9334: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00023156560622403542
tensor(0.9335)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.35it/s]
Test Epoch: [177/500] Acc@1:72.89% Acc@5:98.46%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [178/500] Loss: 0.9386: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.0002313078857915808
tensor(0.9383)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.61it/s]
Test Epoch: [178/500] Acc@1:73.46% Acc@5:98.41%: 100%|██████████| 79/79 [00:02<00:00, 37.61it/s]
Train Epoch: [179/500] Loss: 0.9461: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002310409962189437
tensor(0.9457)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.94it/s]
Test Epoch: [179/500] Acc@1:73.14% Acc@5:98.49%: 100%|██████████| 79/79 [00:02<00:00, 37.74it/s]
Train Epoch: [180/500] Loss: 0.9346: 100%|██████████| 390/390 [00:36<00:00, 10.63it/s]


0.00023076495900879198
tensor(0.9352)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.36it/s]
Test Epoch: [180/500] Acc@1:72.84% Acc@5:98.52%: 100%|██████████| 79/79 [00:02<00:00, 38.43it/s]
Train Epoch: [181/500] Loss: 0.9304: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00023047979640079727
tensor(0.9306)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.62it/s]
Test Epoch: [181/500] Acc@1:73.27% Acc@5:98.47%: 100%|██████████| 79/79 [00:02<00:00, 38.47it/s]
Train Epoch: [182/500] Loss: 0.9287: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00023018553136984308
tensor(0.9288)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.15it/s]
Test Epoch: [182/500] Acc@1:73.16% Acc@5:98.40%: 100%|██████████| 79/79 [00:02<00:00, 38.70it/s]
Train Epoch: [183/500] Loss: 0.9247: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002298821876241739
tensor(0.9249)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.48it/s]
Test Epoch: [183/500] Acc@1:72.95% Acc@5:98.42%: 100%|██████████| 79/79 [00:02<00:00, 37.76it/s]
Train Epoch: [184/500] Loss: 0.9281: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00022956978960348494
tensor(0.9279)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.27it/s]
Test Epoch: [184/500] Acc@1:73.40% Acc@5:98.37%: 100%|██████████| 79/79 [00:02<00:00, 37.92it/s]
Train Epoch: [185/500] Loss: 0.9327: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00022924836247695317
tensor(0.9325)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.72it/s]
Test Epoch: [185/500] Acc@1:73.64% Acc@5:98.54%: 100%|██████████| 79/79 [00:02<00:00, 37.83it/s]
Train Epoch: [186/500] Loss: 0.9221: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.0002289179321412095
tensor(0.9227)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.40it/s]
Test Epoch: [186/500] Acc@1:73.32% Acc@5:98.42%: 100%|██████████| 79/79 [00:02<00:00, 38.36it/s]
Train Epoch: [187/500] Loss: 0.9239: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00022857852521825228
tensor(0.9238)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.76it/s]
Test Epoch: [187/500] Acc@1:73.27% Acc@5:98.43%: 100%|██████████| 79/79 [00:02<00:00, 38.36it/s]
Train Epoch: [188/500] Loss: 0.9219: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00022823016905330253
tensor(0.9220)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.85it/s]
Test Epoch: [188/500] Acc@1:73.39% Acc@5:98.42%: 100%|██████████| 79/79 [00:02<00:00, 38.05it/s]
Train Epoch: [189/500] Loss: 0.9127: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00022787289171260063
tensor(0.9132)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.86it/s]
Test Epoch: [189/500] Acc@1:73.54% Acc@5:98.54%: 100%|██████████| 79/79 [00:02<00:00, 38.09it/s]
Train Epoch: [190/500] Loss: 0.9038: 100%|██████████| 390/390 [00:36<00:00, 10.65it/s]


0.0002275067219811453
tensor(0.9042)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.74it/s]
Test Epoch: [190/500] Acc@1:73.71% Acc@5:98.65%: 100%|██████████| 79/79 [00:02<00:00, 38.44it/s]
Train Epoch: [191/500] Loss: 0.9079: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00022713168936037436
tensor(0.9078)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.51it/s]
Test Epoch: [191/500] Acc@1:74.11% Acc@5:98.54%: 100%|██████████| 79/79 [00:02<00:00, 38.30it/s]
Train Epoch: [192/500] Loss: 0.9009: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00022674782406578776
tensor(0.9013)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.52it/s]
Test Epoch: [192/500] Acc@1:73.57% Acc@5:98.50%: 100%|██████████| 79/79 [00:02<00:00, 37.79it/s]
Train Epoch: [193/500] Loss: 0.9144: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00022635515702451345
tensor(0.9138)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.78it/s]
Test Epoch: [193/500] Acc@1:73.41% Acc@5:98.55%: 100%|██████████| 79/79 [00:02<00:00, 38.34it/s]
Train Epoch: [194/500] Loss: 0.9054: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002259537198728154
tensor(0.9058)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.96it/s]
Test Epoch: [194/500] Acc@1:73.32% Acc@5:98.55%: 100%|██████████| 79/79 [00:02<00:00, 37.70it/s]
Train Epoch: [195/500] Loss: 0.8977: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002255435449535448
tensor(0.8981)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.51it/s]
Test Epoch: [195/500] Acc@1:73.41% Acc@5:98.56%: 100%|██████████| 79/79 [00:02<00:00, 37.81it/s]
Train Epoch: [196/500] Loss: 0.9063: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002251246653135345
tensor(0.9059)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.77it/s]
Test Epoch: [196/500] Acc@1:73.75% Acc@5:98.56%: 100%|██████████| 79/79 [00:02<00:00, 38.26it/s]
Train Epoch: [197/500] Loss: 0.8829: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00022469711470093605
tensor(0.8840)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.86it/s]
Test Epoch: [197/500] Acc@1:73.75% Acc@5:98.44%: 100%|██████████| 79/79 [00:02<00:00, 38.16it/s]
Train Epoch: [198/500] Loss: 0.8984: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00022426092756250114
tensor(0.8977)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 69.05it/s]
Test Epoch: [198/500] Acc@1:74.04% Acc@5:98.51%: 100%|██████████| 79/79 [00:02<00:00, 37.96it/s]
Train Epoch: [199/500] Loss: 0.8960: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00022381613904080613
tensor(0.8961)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.66it/s]
Test Epoch: [199/500] Acc@1:73.78% Acc@5:98.42%: 100%|██████████| 79/79 [00:02<00:00, 37.68it/s]
Train Epoch: [200/500] Loss: 0.8982: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00022336278497142055
tensor(0.8981)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.78it/s]
Test Epoch: [200/500] Acc@1:74.49% Acc@5:98.58%: 100%|██████████| 79/79 [00:02<00:00, 38.09it/s]
Train Epoch: [201/500] Loss: 0.8923: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00022290090188002023
tensor(0.8926)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.34it/s]
Test Epoch: [201/500] Acc@1:74.14% Acc@5:98.43%: 100%|██████████| 79/79 [00:02<00:00, 38.21it/s]
Train Epoch: [202/500] Loss: 0.8877: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00022243052697944416
tensor(0.8879)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.40it/s]
Test Epoch: [202/500] Acc@1:73.72% Acc@5:98.50%: 100%|██████████| 79/79 [00:02<00:00, 37.76it/s]
Train Epoch: [203/500] Loss: 0.8729: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00022195169816669656
tensor(0.8736)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.20it/s]
Test Epoch: [203/500] Acc@1:74.43% Acc@5:98.55%: 100%|██████████| 79/79 [00:02<00:00, 37.90it/s]
Train Epoch: [204/500] Loss: 0.8861: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00022146445401989364
tensor(0.8855)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.25it/s]
Test Epoch: [204/500] Acc@1:74.00% Acc@5:98.60%: 100%|██████████| 79/79 [00:02<00:00, 37.62it/s]
Train Epoch: [205/500] Loss: 0.8819: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002209688337951553
tensor(0.8820)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.46it/s]
Test Epoch: [205/500] Acc@1:74.12% Acc@5:98.55%: 100%|██████████| 79/79 [00:02<00:00, 37.94it/s]
Train Epoch: [206/500] Loss: 0.8825: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00022046487742344238
tensor(0.8825)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.68it/s]
Test Epoch: [206/500] Acc@1:74.43% Acc@5:98.67%: 100%|██████████| 79/79 [00:02<00:00, 39.00it/s]
Train Epoch: [207/500] Loss: 0.8732: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002199526255073397
tensor(0.8736)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.29it/s]
Test Epoch: [207/500] Acc@1:74.47% Acc@5:98.66%: 100%|██████████| 79/79 [00:02<00:00, 37.36it/s]
Train Epoch: [208/500] Loss: 0.8793: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002194321193177845
tensor(0.8790)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.30it/s]
Test Epoch: [208/500] Acc@1:74.40% Acc@5:98.49%: 100%|██████████| 79/79 [00:02<00:00, 37.63it/s]
Train Epoch: [209/500] Loss: 0.8701: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.0002189034007907416
tensor(0.8705)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.43it/s]
Test Epoch: [209/500] Acc@1:74.79% Acc@5:98.62%: 100%|██████████| 79/79 [00:02<00:00, 38.38it/s]
Train Epoch: [210/500] Loss: 0.8697: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00021836651252382463
tensor(0.8697)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.51it/s]
Test Epoch: [210/500] Acc@1:74.50% Acc@5:98.48%: 100%|██████████| 79/79 [00:02<00:00, 37.59it/s]
Train Epoch: [211/500] Loss: 0.8733: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00021782149777286398
tensor(0.8731)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.60it/s]
Test Epoch: [211/500] Acc@1:74.29% Acc@5:98.55%: 100%|██████████| 79/79 [00:02<00:00, 36.77it/s]
Train Epoch: [212/500] Loss: 0.8623: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00021726840044842188
tensor(0.8628)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.60it/s]
Test Epoch: [212/500] Acc@1:74.28% Acc@5:98.61%: 100%|██████████| 79/79 [00:02<00:00, 37.95it/s]
Train Epoch: [213/500] Loss: 0.8535: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002167072651122545
tensor(0.8540)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.37it/s]
Test Epoch: [213/500] Acc@1:74.53% Acc@5:98.71%: 100%|██████████| 79/79 [00:02<00:00, 37.72it/s]
Train Epoch: [214/500] Loss: 0.8687: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00021613813697372186
tensor(0.8680)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.25it/s]
Test Epoch: [214/500] Acc@1:74.45% Acc@5:98.62%: 100%|██████████| 79/79 [00:02<00:00, 37.39it/s]
Train Epoch: [215/500] Loss: 0.8568: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00021556106188614532
tensor(0.8574)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.58it/s]
Test Epoch: [215/500] Acc@1:75.15% Acc@5:98.65%: 100%|██████████| 79/79 [00:02<00:00, 38.21it/s]
Train Epoch: [216/500] Loss: 0.8602: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00021497608634311335
tensor(0.8601)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.80it/s]
Test Epoch: [216/500] Acc@1:74.76% Acc@5:98.55%: 100%|██████████| 79/79 [00:02<00:00, 37.35it/s]
Train Epoch: [217/500] Loss: 0.8543: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002143832574747356
tensor(0.8546)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.50it/s]
Test Epoch: [217/500] Acc@1:74.86% Acc@5:98.52%: 100%|██████████| 79/79 [00:02<00:00, 38.32it/s]
Train Epoch: [218/500] Loss: 0.8592: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00021378262304384572
tensor(0.8589)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.28it/s]
Test Epoch: [218/500] Acc@1:74.63% Acc@5:98.75%: 100%|██████████| 79/79 [00:02<00:00, 37.80it/s]
Train Epoch: [219/500] Loss: 0.8548: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00021317423144215323
tensor(0.8550)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.72it/s]
Test Epoch: [219/500] Acc@1:74.54% Acc@5:98.61%: 100%|██████████| 79/79 [00:02<00:00, 38.16it/s]
Train Epoch: [220/500] Loss: 0.8569: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002125581316863449
tensor(0.8568)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.66it/s]
Test Epoch: [220/500] Acc@1:74.73% Acc@5:98.65%: 100%|██████████| 79/79 [00:02<00:00, 35.87it/s]
Train Epoch: [221/500] Loss: 0.8567: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002119343734141352
tensor(0.8567)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.52it/s]
Test Epoch: [221/500] Acc@1:74.95% Acc@5:98.61%: 100%|██████████| 79/79 [00:02<00:00, 37.78it/s]
Train Epoch: [222/500] Loss: 0.8530: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0002113030068802675
tensor(0.8532)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.16it/s]
Test Epoch: [222/500] Acc@1:74.57% Acc@5:98.65%: 100%|██████████| 79/79 [00:02<00:00, 38.27it/s]
Train Epoch: [223/500] Loss: 0.8430: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00021066408295246489
tensor(0.8435)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.29it/s]
Test Epoch: [223/500] Acc@1:75.12% Acc@5:98.73%: 100%|██████████| 79/79 [00:02<00:00, 37.55it/s]
Train Epoch: [224/500] Loss: 0.8476: 100%|██████████| 390/390 [00:36<00:00, 10.64it/s]


0.00021001765310733206
tensor(0.8474)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.33it/s]
Test Epoch: [224/500] Acc@1:74.97% Acc@5:98.58%: 100%|██████████| 79/79 [00:02<00:00, 37.95it/s]
Train Epoch: [225/500] Loss: 0.8445: 100%|██████████| 390/390 [00:36<00:00, 10.65it/s]


0.00020936376942620787
tensor(0.8446)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.45it/s]
Test Epoch: [225/500] Acc@1:74.48% Acc@5:98.46%: 100%|██████████| 79/79 [00:02<00:00, 37.92it/s]
Train Epoch: [226/500] Loss: 0.8341: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0002087024845909693
tensor(0.8346)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.68it/s]
Test Epoch: [226/500] Acc@1:75.42% Acc@5:98.58%: 100%|██████████| 79/79 [00:02<00:00, 37.48it/s]
Train Epoch: [227/500] Loss: 0.8372: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00020803385187978695
tensor(0.8370)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.57it/s]
Test Epoch: [227/500] Acc@1:74.72% Acc@5:98.59%: 100%|██████████| 79/79 [00:02<00:00, 37.81it/s]
Train Epoch: [228/500] Loss: 0.8319: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00020735792516283255
tensor(0.8322)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.25it/s]
Test Epoch: [228/500] Acc@1:74.61% Acc@5:98.48%: 100%|██████████| 79/79 [00:02<00:00, 38.75it/s]
Train Epoch: [229/500] Loss: 0.8400: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00020667475889793888
tensor(0.8396)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.56it/s]
Test Epoch: [229/500] Acc@1:74.39% Acc@5:98.51%: 100%|██████████| 79/79 [00:02<00:00, 37.90it/s]
Train Epoch: [230/500] Loss: 0.8382: 100%|██████████| 390/390 [00:36<00:00, 10.66it/s]


0.00020598440812621212
tensor(0.8383)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.84it/s]
Test Epoch: [230/500] Acc@1:75.36% Acc@5:98.62%: 100%|██████████| 79/79 [00:02<00:00, 38.08it/s]
Train Epoch: [231/500] Loss: 0.8315: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00020528692846759727
tensor(0.8318)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.48it/s]
Test Epoch: [231/500] Acc@1:75.58% Acc@5:98.54%: 100%|██████████| 79/79 [00:02<00:00, 37.78it/s]
Train Epoch: [232/500] Loss: 0.8309: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00020458237611639714
tensor(0.8309)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.08it/s]
Test Epoch: [232/500] Acc@1:75.23% Acc@5:98.56%: 100%|██████████| 79/79 [00:02<00:00, 38.31it/s]
Train Epoch: [233/500] Loss: 0.8354: 100%|██████████| 390/390 [00:36<00:00, 10.72it/s]


0.00020387080783674483
tensor(0.8352)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.33it/s]
Test Epoch: [233/500] Acc@1:75.17% Acc@5:98.68%: 100%|██████████| 79/79 [00:02<00:00, 38.37it/s]
Train Epoch: [234/500] Loss: 0.8241: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00020315228095803026
tensor(0.8246)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.76it/s]
Test Epoch: [234/500] Acc@1:75.54% Acc@5:98.65%: 100%|██████████| 79/79 [00:02<00:00, 37.90it/s]
Train Epoch: [235/500] Loss: 0.8123: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0002024268533702814
tensor(0.8129)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.66it/s]
Test Epoch: [235/500] Acc@1:75.24% Acc@5:98.67%: 100%|██████████| 79/79 [00:02<00:00, 38.63it/s]
Train Epoch: [236/500] Loss: 0.8162: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00020169458351950036
tensor(0.8161)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.97it/s]
Test Epoch: [236/500] Acc@1:75.14% Acc@5:98.56%: 100%|██████████| 79/79 [00:02<00:00, 38.28it/s]
Train Epoch: [237/500] Loss: 0.8235: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00020095553040295417
tensor(0.8231)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.33it/s]
Test Epoch: [237/500] Acc@1:76.04% Acc@5:98.62%: 100%|██████████| 79/79 [00:02<00:00, 38.64it/s]
Train Epoch: [238/500] Loss: 0.8243: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00020020975356442174
tensor(0.8242)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.80it/s]
Test Epoch: [238/500] Acc@1:76.06% Acc@5:98.74%: 100%|██████████| 79/79 [00:02<00:00, 37.71it/s]
Train Epoch: [239/500] Loss: 0.8232: 100%|██████████| 390/390 [00:36<00:00, 10.71it/s]


0.00019945731308939653
tensor(0.8232)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.57it/s]
Test Epoch: [239/500] Acc@1:75.40% Acc@5:98.68%: 100%|██████████| 79/79 [00:02<00:00, 38.14it/s]
Train Epoch: [240/500] Loss: 0.8147: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00019869826960024558
tensor(0.8151)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.91it/s]
Test Epoch: [240/500] Acc@1:75.29% Acc@5:98.81%: 100%|██████████| 79/79 [00:02<00:00, 38.06it/s]
Train Epoch: [241/500] Loss: 0.8200: 100%|██████████| 390/390 [00:36<00:00, 10.65it/s]


0.0001979326842513254
tensor(0.8198)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 67.73it/s]
Test Epoch: [241/500] Acc@1:75.41% Acc@5:98.64%: 100%|██████████| 79/79 [00:02<00:00, 38.21it/s]
Train Epoch: [242/500] Loss: 0.8190: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0001971606187240547
tensor(0.8191)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.51it/s]
Test Epoch: [242/500] Acc@1:75.68% Acc@5:98.56%: 100%|██████████| 79/79 [00:02<00:00, 38.29it/s]
Train Epoch: [243/500] Loss: 0.8126: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.000196382135221945
tensor(0.8129)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.76it/s]
Test Epoch: [243/500] Acc@1:75.38% Acc@5:98.63%: 100%|██████████| 79/79 [00:02<00:00, 38.42it/s]
Train Epoch: [244/500] Loss: 0.8097: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.0001955972964655891
tensor(0.8099)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.73it/s]
Test Epoch: [244/500] Acc@1:75.19% Acc@5:98.51%: 100%|██████████| 79/79 [00:02<00:00, 37.96it/s]
Train Epoch: [245/500] Loss: 0.8114: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0001948061656876077
tensor(0.8113)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.31it/s]
Test Epoch: [245/500] Acc@1:75.40% Acc@5:98.60%: 100%|██████████| 79/79 [00:02<00:00, 38.24it/s]
Train Epoch: [246/500] Loss: 0.8102: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00019400880662755497
tensor(0.8102)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.77it/s]
Test Epoch: [246/500] Acc@1:75.41% Acc@5:98.53%: 100%|██████████| 79/79 [00:02<00:00, 38.57it/s]
Train Epoch: [247/500] Loss: 0.8071: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00019320528352678314
tensor(0.8072)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.10it/s]
Test Epoch: [247/500] Acc@1:75.30% Acc@5:98.64%: 100%|██████████| 79/79 [00:02<00:00, 37.50it/s]
Train Epoch: [248/500] Loss: 0.7978: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.00019239566112326683
tensor(0.7982)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.92it/s]
Test Epoch: [248/500] Acc@1:75.51% Acc@5:98.60%: 100%|██████████| 79/79 [00:02<00:00, 38.15it/s]
Train Epoch: [249/500] Loss: 0.8050: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00019158000464638715
tensor(0.8047)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.29it/s]
Test Epoch: [249/500] Acc@1:75.95% Acc@5:98.71%: 100%|██████████| 79/79 [00:02<00:00, 38.01it/s]
Train Epoch: [250/500] Loss: 0.8127: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.00019075837981167638
tensor(0.8123)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.38it/s]
Test Epoch: [250/500] Acc@1:75.55% Acc@5:98.61%: 100%|██████████| 79/79 [00:02<00:00, 37.15it/s]
Train Epoch: [251/500] Loss: 0.7942: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00018993085281552343
tensor(0.7951)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.49it/s]
Test Epoch: [251/500] Acc@1:75.69% Acc@5:98.64%: 100%|██████████| 79/79 [00:02<00:00, 38.36it/s]
Train Epoch: [252/500] Loss: 0.7996: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.00018909749032984053
tensor(0.7994)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.78it/s]
Test Epoch: [252/500] Acc@1:76.08% Acc@5:98.72%: 100%|██████████| 79/79 [00:02<00:00, 38.22it/s]
Train Epoch: [253/500] Loss: 0.8014: 100%|██████████| 390/390 [00:36<00:00, 10.68it/s]


0.00018825835949669154
tensor(0.8013)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.76it/s]
Test Epoch: [253/500] Acc@1:75.37% Acc@5:98.63%: 100%|██████████| 79/79 [00:02<00:00, 35.97it/s]
Train Epoch: [254/500] Loss: 0.8087: 100%|██████████| 390/390 [00:36<00:00, 10.70it/s]


0.0001874135279228828
tensor(0.8083)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.27it/s]
Test Epoch: [254/500] Acc@1:75.92% Acc@5:98.62%: 100%|██████████| 79/79 [00:02<00:00, 38.42it/s]
Train Epoch: [255/500] Loss: 0.8018: 100%|██████████| 390/390 [00:36<00:00, 10.69it/s]


0.0001865630636745158
tensor(0.8021)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.96it/s]
Test Epoch: [255/500] Acc@1:75.84% Acc@5:98.67%: 100%|██████████| 79/79 [00:02<00:00, 38.13it/s]
Train Epoch: [256/500] Loss: 0.7862: 100%|██████████| 390/390 [00:36<00:00, 10.67it/s]


0.0001857070352715034
tensor(0.7870)


Feature extracting: 100%|██████████| 391/391 [00:05<00:00, 68.35it/s]
Test Epoch: [256/500] Acc@1:75.51% Acc@5:98.86%: 100%|██████████| 79/79 [00:02<00:00, 37.21it/s]
Train Epoch: [257/500] Loss: 0.7796:  29%|██▉       | 115/390 [00:11<00:24, 11.02it/s]

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

df=pd.read_csv(csvfilename)

f1=plt.figure()
plt.semilogx(df['lr_epoch'],df['smooth_loss'])  
plt.xlabel('learning rate')
plt.ylabel('smoothed trg epoch loss')
plt.show()

f2=plt.figure()
plt.plot(df['test_acc@1'])
plt.title('Sim CLR with RAdam')
plt.xlabel('epoch number')
plt.ylabel('epoch Test Accuracy')
plt.show()