In [136]:
import os
import random
import shutil
import time
import warnings

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models

import PIL

In [137]:
#!curl -O https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar

In [138]:
#!curl -O https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar

In [139]:
SEED=1

In [140]:
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True

In [141]:
torch.cuda.device_count()

1

In [142]:
START_EPOCH = 0

In [143]:
ARCH = 'resnet18'
EPOCHS = 200
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-4
PRINT_FREQ = 50
TRAIN_BATCH=128
VAL_BATCH=128
WORKERS=2
TRAINDIR="/w251-hw5/train"
VALDIR="/w251-hw5/val"

In [144]:
if not torch.cuda.is_available():
    print('GPU not detected.. did you pass through your GPU?')

In [145]:
GPU = 0

In [146]:
torch.cuda.set_device(GPU)

In [147]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if GPU is not None:
            images = images.cuda(GPU, non_blocking=True)
        if torch.cuda.is_available():
            target = target.cuda(GPU, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

In [148]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            if GPU is not None:
                images = images.cuda(GPU, non_blocking=True)
            if torch.cuda.is_available():
                target = target.cuda(GPU, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

In [149]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [150]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [151]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [152]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = LR * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [153]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [154]:
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]
cinic_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
cinic_std_RGB = [0.24205776, 0.23828046, 0.25874835]
cifar_mean_RGB = [0.4914, 0.4822, 0.4465]
cifar_std_RGB = [0.2023, 0.1994, 0.2010]

In [155]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [156]:
IMG_SIZE = 224

In [157]:
NUM_CLASSES = 1000

In [158]:
model = models.__dict__[ARCH]()

In [159]:
inf = model.fc.in_features

In [160]:
model.fc = nn.Linear(inf, NUM_CLASSES)

In [161]:
model.cuda(GPU)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [162]:
criterion = nn.CrossEntropyLoss().cuda(GPU)

In [163]:
optimizer = torch.optim.SGD(model.parameters(), LR,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHT_DECAY)

In [164]:
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [127]:
# transform_train = transforms.Compose([
#     transforms.RandomChoice([transforms.Resize(256), transforms.Resize(480)]),
#     transforms.RandomCrop(IMG_SIZE, padding=4),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
# ])

In [165]:
transform_train = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomCrop(IMG_SIZE, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

In [166]:
train_dataset = datasets.ImageFolder(
    TRAINDIR, transform=transform_train)

In [167]:
transform_val = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomCrop(IMG_SIZE, padding=4),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

In [168]:
val_dataset = datasets.ImageFolder(
    VALDIR, transform=transform_val)

In [169]:
train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=TRAIN_BATCH, shuffle=True,
        num_workers=WORKERS, pin_memory=True, sampler=None)

In [170]:
val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=VAL_BATCH, shuffle=False,
        num_workers=WORKERS, pin_memory=True, sampler=None) 

In [171]:
best_acc1 = 0

In [172]:
for epoch in range(START_EPOCH, EPOCHS):
#    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    
    scheduler.step()
    print('lr: ' + str(scheduler.get_last_lr()))

Epoch: [0][    0/10010]	Time  1.489 ( 1.489)	Data  1.228 ( 1.228)	Loss 7.0214e+00 (7.0214e+00)	Acc@1   0.00 (  0.00)	Acc@5   0.00 (  0.00)
Epoch: [0][   50/10010]	Time  0.587 ( 0.601)	Data  0.000 ( 0.035)	Loss 6.9409e+00 (7.1671e+00)	Acc@1   0.00 (  0.14)	Acc@5   0.00 (  0.69)
Epoch: [0][  100/10010]	Time  0.593 ( 0.596)	Data  0.000 ( 0.022)	Loss 6.8117e+00 (7.0271e+00)	Acc@1   0.00 (  0.22)	Acc@5   2.34 (  1.01)
Epoch: [0][  150/10010]	Time  0.589 ( 0.598)	Data  0.000 ( 0.017)	Loss 6.7491e+00 (6.9549e+00)	Acc@1   0.00 (  0.28)	Acc@5   1.56 (  1.25)
Epoch: [0][  200/10010]	Time  0.606 ( 0.600)	Data  0.000 ( 0.015)	Loss 6.7346e+00 (6.9038e+00)	Acc@1   0.00 (  0.35)	Acc@5   0.00 (  1.44)
Epoch: [0][  250/10010]	Time  0.613 ( 0.603)	Data  0.000 ( 0.014)	Loss 6.6537e+00 (6.8605e+00)	Acc@1   0.00 (  0.39)	Acc@5   2.34 (  1.59)
Epoch: [0][  300/10010]	Time  0.625 ( 0.605)	Data  0.000 ( 0.013)	Loss 6.5863e+00 (6.8245e+00)	Acc@1   0.00 (  0.40)	Acc@5   2.34 (  1.72)
Epoch: [0][  350/10010]	Tim

Epoch: [0][ 2950/10010]	Time  0.635 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.5319e+00 (5.6128e+00)	Acc@1   6.25 (  4.86)	Acc@5  28.12 ( 14.21)
Epoch: [0][ 3000/10010]	Time  0.636 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.6137e+00 (5.5974e+00)	Acc@1  12.50 (  4.96)	Acc@5  32.03 ( 14.45)
Epoch: [0][ 3050/10010]	Time  0.631 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.5966e+00 (5.5817e+00)	Acc@1  16.41 (  5.08)	Acc@5  32.81 ( 14.71)
Epoch: [0][ 3100/10010]	Time  0.632 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.5557e+00 (5.5666e+00)	Acc@1  11.72 (  5.19)	Acc@5  28.91 ( 14.94)
Epoch: [0][ 3150/10010]	Time  0.626 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.5847e+00 (5.5517e+00)	Acc@1   8.59 (  5.29)	Acc@5  25.78 ( 15.16)
Epoch: [0][ 3200/10010]	Time  0.635 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.5018e+00 (5.5366e+00)	Acc@1  18.75 (  5.41)	Acc@5  34.38 ( 15.39)
Epoch: [0][ 3250/10010]	Time  0.635 ( 0.632)	Data  0.000 ( 0.009)	Loss 4.3817e+00 (5.5216e+00)	Acc@1  14.84 (  5.53)	Acc@5  33.59 ( 15.64)
Epoch: [0][ 3300/10010]	Tim

Epoch: [0][ 5900/10010]	Time  0.627 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.8504e+00 (4.8899e+00)	Acc@1  21.09 ( 11.11)	Acc@5  43.75 ( 26.42)
Epoch: [0][ 5950/10010]	Time  0.633 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.7890e+00 (4.8801e+00)	Acc@1  25.00 ( 11.22)	Acc@5  43.75 ( 26.60)
Epoch: [0][ 6000/10010]	Time  0.630 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.8917e+00 (4.8699e+00)	Acc@1  23.44 ( 11.32)	Acc@5  43.75 ( 26.78)
Epoch: [0][ 6050/10010]	Time  0.630 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.3733e+00 (4.8599e+00)	Acc@1  28.91 ( 11.43)	Acc@5  53.91 ( 26.96)
Epoch: [0][ 6100/10010]	Time  0.630 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.8204e+00 (4.8502e+00)	Acc@1  21.09 ( 11.53)	Acc@5  45.31 ( 27.14)
Epoch: [0][ 6150/10010]	Time  0.634 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.9303e+00 (4.8409e+00)	Acc@1  21.09 ( 11.63)	Acc@5  42.97 ( 27.30)
Epoch: [0][ 6200/10010]	Time  0.628 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.6640e+00 (4.8314e+00)	Acc@1  27.34 ( 11.73)	Acc@5  46.09 ( 27.47)
Epoch: [0][ 6250/10010]	Tim

Epoch: [0][ 8850/10010]	Time  0.628 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.2083e+00 (4.4166e+00)	Acc@1  30.47 ( 16.43)	Acc@5  53.91 ( 35.04)
Epoch: [0][ 8900/10010]	Time  0.631 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.5698e+00 (4.4102e+00)	Acc@1  23.44 ( 16.51)	Acc@5  53.12 ( 35.16)
Epoch: [0][ 8950/10010]	Time  0.628 ( 0.632)	Data  0.000 ( 0.009)	Loss 3.2349e+00 (4.4040e+00)	Acc@1  34.38 ( 16.59)	Acc@5  58.59 ( 35.27)
Epoch: [0][ 9000/10010]	Time  0.621 ( 0.631)	Data  0.000 ( 0.009)	Loss 3.3172e+00 (4.3974e+00)	Acc@1  28.91 ( 16.66)	Acc@5  57.81 ( 35.40)
Epoch: [0][ 9050/10010]	Time  0.632 ( 0.631)	Data  0.000 ( 0.009)	Loss 3.2078e+00 (4.3908e+00)	Acc@1  30.47 ( 16.75)	Acc@5  60.16 ( 35.52)
Epoch: [0][ 9100/10010]	Time  0.629 ( 0.631)	Data  0.000 ( 0.009)	Loss 3.1004e+00 (4.3846e+00)	Acc@1  39.06 ( 16.82)	Acc@5  57.81 ( 35.63)
Epoch: [0][ 9150/10010]	Time  0.627 ( 0.631)	Data  0.000 ( 0.009)	Loss 2.9085e+00 (4.3783e+00)	Acc@1  38.28 ( 16.90)	Acc@5  60.16 ( 35.75)
Epoch: [0][ 9200/10010]	Tim

Epoch: [1][ 1450/10010]	Time  0.632 ( 0.626)	Data  0.000 ( 0.010)	Loss 3.0917e+00 (3.0496e+00)	Acc@1  32.81 ( 33.66)	Acc@5  53.12 ( 60.35)
Epoch: [1][ 1500/10010]	Time  0.631 ( 0.626)	Data  0.000 ( 0.010)	Loss 3.0171e+00 (3.0479e+00)	Acc@1  37.50 ( 33.67)	Acc@5  57.81 ( 60.39)
Epoch: [1][ 1550/10010]	Time  0.632 ( 0.626)	Data  0.000 ( 0.010)	Loss 2.8785e+00 (3.0459e+00)	Acc@1  38.28 ( 33.70)	Acc@5  60.94 ( 60.43)
Epoch: [1][ 1600/10010]	Time  0.629 ( 0.626)	Data  0.000 ( 0.010)	Loss 2.9102e+00 (3.0442e+00)	Acc@1  36.72 ( 33.76)	Acc@5  62.50 ( 60.46)
Epoch: [1][ 1650/10010]	Time  0.625 ( 0.626)	Data  0.000 ( 0.010)	Loss 2.9943e+00 (3.0411e+00)	Acc@1  31.25 ( 33.81)	Acc@5  66.41 ( 60.52)
Epoch: [1][ 1700/10010]	Time  0.627 ( 0.626)	Data  0.000 ( 0.010)	Loss 2.5545e+00 (3.0388e+00)	Acc@1  40.62 ( 33.85)	Acc@5  69.53 ( 60.56)
Epoch: [1][ 1750/10010]	Time  0.628 ( 0.626)	Data  0.000 ( 0.010)	Loss 3.0450e+00 (3.0370e+00)	Acc@1  33.59 ( 33.91)	Acc@5  53.12 ( 60.59)
Epoch: [1][ 1800/10010]	Tim

Epoch: [1][ 4400/10010]	Time  0.626 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.8477e+00 (2.9577e+00)	Acc@1  33.59 ( 35.25)	Acc@5  64.06 ( 62.01)
Epoch: [1][ 4450/10010]	Time  0.628 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.8730e+00 (2.9569e+00)	Acc@1  38.28 ( 35.26)	Acc@5  64.84 ( 62.04)
Epoch: [1][ 4500/10010]	Time  0.630 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.8162e+00 (2.9560e+00)	Acc@1  37.50 ( 35.28)	Acc@5  60.94 ( 62.05)
Epoch: [1][ 4550/10010]	Time  0.621 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.8684e+00 (2.9550e+00)	Acc@1  33.59 ( 35.30)	Acc@5  70.31 ( 62.07)
Epoch: [1][ 4600/10010]	Time  0.621 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.9389e+00 (2.9536e+00)	Acc@1  37.50 ( 35.32)	Acc@5  63.28 ( 62.10)
Epoch: [1][ 4650/10010]	Time  0.622 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.8675e+00 (2.9521e+00)	Acc@1  36.72 ( 35.35)	Acc@5  68.75 ( 62.12)
Epoch: [1][ 4700/10010]	Time  0.630 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.9499e+00 (2.9517e+00)	Acc@1  32.81 ( 35.36)	Acc@5  64.84 ( 62.13)
Epoch: [1][ 4750/10010]	Tim

Epoch: [1][ 7350/10010]	Time  0.633 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.3759e+00 (2.8906e+00)	Acc@1  44.53 ( 36.42)	Acc@5  69.53 ( 63.21)
Epoch: [1][ 7400/10010]	Time  0.631 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.4017e+00 (2.8891e+00)	Acc@1  45.31 ( 36.45)	Acc@5  68.75 ( 63.24)
Epoch: [1][ 7450/10010]	Time  0.630 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.5806e+00 (2.8878e+00)	Acc@1  38.28 ( 36.47)	Acc@5  68.75 ( 63.26)
Epoch: [1][ 7500/10010]	Time  0.627 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.7925e+00 (2.8868e+00)	Acc@1  37.50 ( 36.48)	Acc@5  62.50 ( 63.28)
Epoch: [1][ 7550/10010]	Time  0.628 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.3643e+00 (2.8850e+00)	Acc@1  46.88 ( 36.52)	Acc@5  74.22 ( 63.31)
Epoch: [1][ 7600/10010]	Time  0.630 ( 0.628)	Data  0.000 ( 0.009)	Loss 3.3351e+00 (2.8839e+00)	Acc@1  30.47 ( 36.54)	Acc@5  57.81 ( 63.34)
Epoch: [1][ 7650/10010]	Time  0.630 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.8880e+00 (2.8826e+00)	Acc@1  37.50 ( 36.56)	Acc@5  64.84 ( 63.36)
Epoch: [1][ 7700/10010]	Tim

Test: [350/391]	Time  0.503 ( 0.465)	Loss 3.2838e+00 (3.1734e+00)	Acc@1  33.59 ( 32.55)	Acc@5  54.69 ( 58.32)
 * Acc@1 32.948 Acc@5 58.798
lr: [0.09997532801828658]
Epoch: [2][    0/10010]	Time  1.303 ( 1.303)	Data  1.132 ( 1.132)	Loss 2.2754e+00 (2.2754e+00)	Acc@1  47.66 ( 47.66)	Acc@5  78.12 ( 78.12)
Epoch: [2][   50/10010]	Time  0.601 ( 0.610)	Data  0.000 ( 0.032)	Loss 2.3474e+00 (2.6483e+00)	Acc@1  39.06 ( 40.13)	Acc@5  79.69 ( 68.09)
Epoch: [2][  100/10010]	Time  0.608 ( 0.608)	Data  0.000 ( 0.020)	Loss 2.4371e+00 (2.5924e+00)	Acc@1  49.22 ( 41.94)	Acc@5  69.53 ( 68.72)
Epoch: [2][  150/10010]	Time  0.617 ( 0.610)	Data  0.000 ( 0.016)	Loss 2.7418e+00 (2.5925e+00)	Acc@1  41.41 ( 41.90)	Acc@5  59.38 ( 68.51)
Epoch: [2][  200/10010]	Time  0.625 ( 0.611)	Data  0.000 ( 0.015)	Loss 2.5781e+00 (2.5891e+00)	Acc@1  42.97 ( 41.91)	Acc@5  67.97 ( 68.43)
Epoch: [2][  250/10010]	Time  0.626 ( 0.613)	Data  0.000 ( 0.013)	Loss 2.4567e+00 (2.5956e+00)	Acc@1  40.62 ( 41.71)	Acc@5  67.97 ( 68.34)
E

Epoch: [2][ 2900/10010]	Time  0.624 ( 0.626)	Data  0.000 ( 0.009)	Loss 2.7144e+00 (2.5751e+00)	Acc@1  34.38 ( 41.90)	Acc@5  65.62 ( 68.78)
Epoch: [2][ 2950/10010]	Time  0.626 ( 0.626)	Data  0.000 ( 0.009)	Loss 2.7605e+00 (2.5741e+00)	Acc@1  38.28 ( 41.91)	Acc@5  65.62 ( 68.79)
Epoch: [2][ 3000/10010]	Time  0.623 ( 0.626)	Data  0.000 ( 0.009)	Loss 2.3702e+00 (2.5740e+00)	Acc@1  44.53 ( 41.91)	Acc@5  72.66 ( 68.78)
Epoch: [2][ 3050/10010]	Time  0.628 ( 0.626)	Data  0.000 ( 0.009)	Loss 2.6470e+00 (2.5734e+00)	Acc@1  34.38 ( 41.91)	Acc@5  71.09 ( 68.79)
Epoch: [2][ 3100/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5931e+00 (2.5730e+00)	Acc@1  45.31 ( 41.92)	Acc@5  70.31 ( 68.80)
Epoch: [2][ 3150/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.3595e+00 (2.5728e+00)	Acc@1  43.75 ( 41.92)	Acc@5  68.75 ( 68.80)
Epoch: [2][ 3200/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.3932e+00 (2.5730e+00)	Acc@1  43.75 ( 41.92)	Acc@5  72.66 ( 68.81)
Epoch: [2][ 3250/10010]	Tim

Epoch: [2][ 5850/10010]	Time  0.630 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4737e+00 (2.5567e+00)	Acc@1  37.50 ( 42.31)	Acc@5  66.41 ( 69.12)
Epoch: [2][ 5900/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5272e+00 (2.5561e+00)	Acc@1  42.97 ( 42.31)	Acc@5  69.53 ( 69.13)
Epoch: [2][ 5950/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4408e+00 (2.5557e+00)	Acc@1  46.09 ( 42.33)	Acc@5  71.88 ( 69.13)
Epoch: [2][ 6000/10010]	Time  0.633 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5700e+00 (2.5556e+00)	Acc@1  42.97 ( 42.33)	Acc@5  68.75 ( 69.14)
Epoch: [2][ 6050/10010]	Time  0.632 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4942e+00 (2.5557e+00)	Acc@1  52.34 ( 42.33)	Acc@5  67.97 ( 69.14)
Epoch: [2][ 6100/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5138e+00 (2.5555e+00)	Acc@1  42.19 ( 42.33)	Acc@5  70.31 ( 69.14)
Epoch: [2][ 6150/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5001e+00 (2.5551e+00)	Acc@1  43.75 ( 42.34)	Acc@5  69.53 ( 69.15)
Epoch: [2][ 6200/10010]	Tim

Epoch: [2][ 8800/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4023e+00 (2.5355e+00)	Acc@1  46.09 ( 42.68)	Acc@5  73.44 ( 69.51)
Epoch: [2][ 8850/10010]	Time  0.624 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5621e+00 (2.5352e+00)	Acc@1  40.62 ( 42.69)	Acc@5  70.31 ( 69.52)
Epoch: [2][ 8900/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2831e+00 (2.5349e+00)	Acc@1  51.56 ( 42.69)	Acc@5  75.00 ( 69.52)
Epoch: [2][ 8950/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5899e+00 (2.5346e+00)	Acc@1  41.41 ( 42.70)	Acc@5  67.19 ( 69.53)
Epoch: [2][ 9000/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2350e+00 (2.5342e+00)	Acc@1  44.53 ( 42.70)	Acc@5  73.44 ( 69.53)
Epoch: [2][ 9050/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.8233e+00 (2.5340e+00)	Acc@1  40.62 ( 42.71)	Acc@5  68.75 ( 69.54)
Epoch: [2][ 9100/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5444e+00 (2.5337e+00)	Acc@1  42.97 ( 42.72)	Acc@5  71.88 ( 69.54)
Epoch: [2][ 9150/10010]	Tim

Epoch: [3][ 1400/10010]	Time  0.630 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.2763e+00 (2.3912e+00)	Acc@1  49.22 ( 45.18)	Acc@5  72.66 ( 71.89)
Epoch: [3][ 1450/10010]	Time  0.634 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.6225e+00 (2.3932e+00)	Acc@1  39.84 ( 45.17)	Acc@5  65.62 ( 71.86)
Epoch: [3][ 1500/10010]	Time  0.629 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.2310e+00 (2.3930e+00)	Acc@1  46.88 ( 45.16)	Acc@5  77.34 ( 71.86)
Epoch: [3][ 1550/10010]	Time  0.627 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.3479e+00 (2.3935e+00)	Acc@1  46.09 ( 45.15)	Acc@5  75.00 ( 71.87)
Epoch: [3][ 1600/10010]	Time  0.628 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.6138e+00 (2.3954e+00)	Acc@1  35.94 ( 45.14)	Acc@5  73.44 ( 71.83)
Epoch: [3][ 1650/10010]	Time  0.626 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.4933e+00 (2.3969e+00)	Acc@1  45.31 ( 45.10)	Acc@5  69.53 ( 71.82)
Epoch: [3][ 1700/10010]	Time  0.626 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.1811e+00 (2.3959e+00)	Acc@1  46.88 ( 45.14)	Acc@5  78.12 ( 71.85)
Epoch: [3][ 1750/10010]	Tim

Epoch: [3][ 4350/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4346e+00 (2.4043e+00)	Acc@1  50.78 ( 45.11)	Acc@5  73.44 ( 71.75)
Epoch: [3][ 4400/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2424e+00 (2.4040e+00)	Acc@1  46.09 ( 45.11)	Acc@5  76.56 ( 71.76)
Epoch: [3][ 4450/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2973e+00 (2.4036e+00)	Acc@1  52.34 ( 45.11)	Acc@5  77.34 ( 71.77)
Epoch: [3][ 4500/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.7363e+00 (2.4036e+00)	Acc@1  35.16 ( 45.12)	Acc@5  68.75 ( 71.76)
Epoch: [3][ 4550/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4322e+00 (2.4041e+00)	Acc@1  47.66 ( 45.11)	Acc@5  71.88 ( 71.76)
Epoch: [3][ 4600/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4513e+00 (2.4042e+00)	Acc@1  39.06 ( 45.10)	Acc@5  71.09 ( 71.76)
Epoch: [3][ 4650/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.3893e+00 (2.4041e+00)	Acc@1  39.06 ( 45.11)	Acc@5  72.66 ( 71.76)
Epoch: [3][ 4700/10010]	Tim

Epoch: [3][ 7300/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.3765e+00 (2.4004e+00)	Acc@1  47.66 ( 45.19)	Acc@5  73.44 ( 71.83)
Epoch: [3][ 7350/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4434e+00 (2.4006e+00)	Acc@1  39.84 ( 45.18)	Acc@5  69.53 ( 71.83)
Epoch: [3][ 7400/10010]	Time  0.630 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.6434e+00 (2.4006e+00)	Acc@1  39.84 ( 45.18)	Acc@5  71.88 ( 71.83)
Epoch: [3][ 7450/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5032e+00 (2.4004e+00)	Acc@1  42.97 ( 45.18)	Acc@5  72.66 ( 71.82)
Epoch: [3][ 7500/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1048e+00 (2.4003e+00)	Acc@1  51.56 ( 45.19)	Acc@5  76.56 ( 71.82)
Epoch: [3][ 7550/10010]	Time  0.624 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1516e+00 (2.4004e+00)	Acc@1  50.78 ( 45.19)	Acc@5  73.44 ( 71.83)
Epoch: [3][ 7600/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.6614e+00 (2.4000e+00)	Acc@1  38.28 ( 45.19)	Acc@5  67.97 ( 71.84)
Epoch: [3][ 7650/10010]	Tim

Test: [250/391]	Time  0.688 ( 0.473)	Loss 2.9699e+00 (2.7088e+00)	Acc@1  41.41 ( 39.39)	Acc@5  64.06 ( 66.96)
Test: [300/391]	Time  0.238 ( 0.469)	Loss 2.9074e+00 (2.8015e+00)	Acc@1  39.84 ( 38.12)	Acc@5  66.41 ( 65.36)
Test: [350/391]	Time  0.177 ( 0.467)	Loss 2.8808e+00 (2.8528e+00)	Acc@1  47.66 ( 37.41)	Acc@5  62.50 ( 64.64)
 * Acc@1 37.530 Acc@5 64.660
lr: [0.09990133642141359]
Epoch: [4][    0/10010]	Time  1.373 ( 1.373)	Data  1.202 ( 1.202)	Loss 2.2339e+00 (2.2339e+00)	Acc@1  42.97 ( 42.97)	Acc@5  72.66 ( 72.66)
Epoch: [4][   50/10010]	Time  0.597 ( 0.609)	Data  0.000 ( 0.033)	Loss 2.2171e+00 (2.3701e+00)	Acc@1  44.53 ( 46.31)	Acc@5  73.44 ( 72.56)
Epoch: [4][  100/10010]	Time  0.609 ( 0.606)	Data  0.000 ( 0.021)	Loss 2.4182e+00 (2.3296e+00)	Acc@1  40.62 ( 46.85)	Acc@5  70.31 ( 73.17)
Epoch: [4][  150/10010]	Time  0.615 ( 0.607)	Data  0.000 ( 0.017)	Loss 2.3545e+00 (2.3188e+00)	Acc@1  47.66 ( 46.78)	Acc@5  74.22 ( 73.11)
Epoch: [4][  200/10010]	Time  0.618 ( 0.609)	Data  0.000 ( 

Epoch: [4][ 2850/10010]	Time  0.624 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.4822e+00 (2.3118e+00)	Acc@1  43.75 ( 46.74)	Acc@5  70.31 ( 73.34)
Epoch: [4][ 2900/10010]	Time  0.625 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.0967e+00 (2.3121e+00)	Acc@1  50.78 ( 46.73)	Acc@5  78.12 ( 73.34)
Epoch: [4][ 2950/10010]	Time  0.623 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.2222e+00 (2.3117e+00)	Acc@1  49.22 ( 46.73)	Acc@5  73.44 ( 73.35)
Epoch: [4][ 3000/10010]	Time  0.619 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.0669e+00 (2.3125e+00)	Acc@1  53.12 ( 46.70)	Acc@5  78.12 ( 73.34)
Epoch: [4][ 3050/10010]	Time  0.628 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1900e+00 (2.3134e+00)	Acc@1  50.00 ( 46.69)	Acc@5  72.66 ( 73.33)
Epoch: [4][ 3100/10010]	Time  0.627 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.0409e+00 (2.3139e+00)	Acc@1  48.44 ( 46.68)	Acc@5  78.12 ( 73.32)
Epoch: [4][ 3150/10010]	Time  0.604 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1493e+00 (2.3143e+00)	Acc@1  53.91 ( 46.68)	Acc@5  71.88 ( 73.31)
Epoch: [4][ 3200/10010]	Tim

Epoch: [4][ 5800/10010]	Time  0.626 ( 0.617)	Data  0.000 ( 0.009)	Loss 2.2859e+00 (2.3199e+00)	Acc@1  44.53 ( 46.58)	Acc@5  69.53 ( 73.24)
Epoch: [4][ 5850/10010]	Time  0.626 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3542e+00 (2.3200e+00)	Acc@1  44.53 ( 46.58)	Acc@5  67.97 ( 73.23)
Epoch: [4][ 5900/10010]	Time  0.626 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0975e+00 (2.3200e+00)	Acc@1  51.56 ( 46.58)	Acc@5  75.00 ( 73.23)
Epoch: [4][ 5950/10010]	Time  0.626 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0875e+00 (2.3197e+00)	Acc@1  49.22 ( 46.58)	Acc@5  78.12 ( 73.24)
Epoch: [4][ 6000/10010]	Time  0.630 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3689e+00 (2.3198e+00)	Acc@1  44.53 ( 46.58)	Acc@5  73.44 ( 73.24)
Epoch: [4][ 6050/10010]	Time  0.620 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.4821e+00 (2.3195e+00)	Acc@1  43.75 ( 46.58)	Acc@5  69.53 ( 73.24)
Epoch: [4][ 6100/10010]	Time  0.628 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2394e+00 (2.3199e+00)	Acc@1  49.22 ( 46.57)	Acc@5  72.66 ( 73.23)
Epoch: [4][ 6150/10010]	Tim

Epoch: [4][ 8750/10010]	Time  0.624 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.6607e+00 (2.3199e+00)	Acc@1  38.28 ( 46.59)	Acc@5  70.31 ( 73.22)
Epoch: [4][ 8800/10010]	Time  0.624 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.7777e+00 (2.3196e+00)	Acc@1  55.47 ( 46.60)	Acc@5  81.25 ( 73.22)
Epoch: [4][ 8850/10010]	Time  0.627 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0660e+00 (2.3197e+00)	Acc@1  46.09 ( 46.59)	Acc@5  78.12 ( 73.22)
Epoch: [4][ 8900/10010]	Time  0.622 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.6411e+00 (2.3200e+00)	Acc@1  40.62 ( 46.59)	Acc@5  67.97 ( 73.21)
Epoch: [4][ 8950/10010]	Time  0.625 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0481e+00 (2.3200e+00)	Acc@1  41.41 ( 46.59)	Acc@5  77.34 ( 73.21)
Epoch: [4][ 9000/10010]	Time  0.627 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.8038e+00 (2.3203e+00)	Acc@1  42.97 ( 46.59)	Acc@5  59.38 ( 73.21)
Epoch: [4][ 9050/10010]	Time  0.628 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3944e+00 (2.3204e+00)	Acc@1  42.97 ( 46.58)	Acc@5  71.09 ( 73.21)
Epoch: [4][ 9100/10010]	Tim

Epoch: [5][ 1350/10010]	Time  0.619 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.1203e+00 (2.2574e+00)	Acc@1  47.66 ( 47.83)	Acc@5  73.44 ( 74.25)
Epoch: [5][ 1400/10010]	Time  0.621 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.3177e+00 (2.2571e+00)	Acc@1  49.22 ( 47.84)	Acc@5  75.78 ( 74.28)
Epoch: [5][ 1450/10010]	Time  0.619 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.2813e+00 (2.2578e+00)	Acc@1  51.56 ( 47.80)	Acc@5  75.78 ( 74.27)
Epoch: [5][ 1500/10010]	Time  0.620 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.0518e+00 (2.2559e+00)	Acc@1  53.12 ( 47.84)	Acc@5  76.56 ( 74.28)
Epoch: [5][ 1550/10010]	Time  0.620 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.3534e+00 (2.2563e+00)	Acc@1  42.19 ( 47.82)	Acc@5  71.88 ( 74.29)
Epoch: [5][ 1600/10010]	Time  0.617 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.5554e+00 (2.2559e+00)	Acc@1  43.75 ( 47.81)	Acc@5  73.44 ( 74.32)
Epoch: [5][ 1650/10010]	Time  0.619 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.2624e+00 (2.2562e+00)	Acc@1  50.00 ( 47.81)	Acc@5  72.66 ( 74.31)
Epoch: [5][ 1700/10010]	Tim

Epoch: [5][ 4300/10010]	Time  0.623 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0490e+00 (2.2646e+00)	Acc@1  55.47 ( 47.66)	Acc@5  77.34 ( 74.14)
Epoch: [5][ 4350/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.3537e+00 (2.2651e+00)	Acc@1  47.66 ( 47.66)	Acc@5  71.88 ( 74.13)
Epoch: [5][ 4400/10010]	Time  0.617 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.6673e+00 (2.2648e+00)	Acc@1  39.06 ( 47.66)	Acc@5  67.97 ( 74.13)
Epoch: [5][ 4450/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1496e+00 (2.2643e+00)	Acc@1  50.78 ( 47.66)	Acc@5  71.88 ( 74.14)
Epoch: [5][ 4500/10010]	Time  0.621 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4164e+00 (2.2646e+00)	Acc@1  41.41 ( 47.65)	Acc@5  75.00 ( 74.14)
Epoch: [5][ 4550/10010]	Time  0.623 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4326e+00 (2.2649e+00)	Acc@1  44.53 ( 47.64)	Acc@5  72.66 ( 74.13)
Epoch: [5][ 4600/10010]	Time  0.619 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4857e+00 (2.2651e+00)	Acc@1  42.97 ( 47.64)	Acc@5  69.53 ( 74.13)
Epoch: [5][ 4650/10010]	Tim

Epoch: [5][ 7250/10010]	Time  0.622 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4330e+00 (2.2707e+00)	Acc@1  36.72 ( 47.55)	Acc@5  69.53 ( 74.05)
Epoch: [5][ 7300/10010]	Time  0.617 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2606e+00 (2.2711e+00)	Acc@1  42.97 ( 47.55)	Acc@5  72.66 ( 74.05)
Epoch: [5][ 7350/10010]	Time  0.621 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4535e+00 (2.2708e+00)	Acc@1  50.00 ( 47.55)	Acc@5  74.22 ( 74.05)
Epoch: [5][ 7400/10010]	Time  0.620 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4400e+00 (2.2711e+00)	Acc@1  42.97 ( 47.54)	Acc@5  71.09 ( 74.04)
Epoch: [5][ 7450/10010]	Time  0.622 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4663e+00 (2.2711e+00)	Acc@1  42.19 ( 47.54)	Acc@5  72.66 ( 74.04)
Epoch: [5][ 7500/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0957e+00 (2.2713e+00)	Acc@1  46.88 ( 47.54)	Acc@5  76.56 ( 74.04)
Epoch: [5][ 7550/10010]	Time  0.621 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2908e+00 (2.2716e+00)	Acc@1  50.00 ( 47.53)	Acc@5  75.00 ( 74.04)
Epoch: [5][ 7600/10010]	Tim

Test: [200/391]	Time  0.662 ( 0.479)	Loss 3.0906e+00 (2.4857e+00)	Acc@1  28.91 ( 43.03)	Acc@5  63.28 ( 71.23)
Test: [250/391]	Time  0.366 ( 0.479)	Loss 2.6005e+00 (2.5587e+00)	Acc@1  45.31 ( 42.37)	Acc@5  64.06 ( 69.73)
Test: [300/391]	Time  0.190 ( 0.475)	Loss 2.3439e+00 (2.6345e+00)	Acc@1  47.66 ( 41.33)	Acc@5  78.12 ( 68.34)
Test: [350/391]	Time  0.167 ( 0.473)	Loss 2.5119e+00 (2.6728e+00)	Acc@1  43.75 ( 40.73)	Acc@5  73.44 ( 67.70)
 * Acc@1 40.828 Acc@5 67.830
lr: [0.09977809823015402]
Epoch: [6][    0/10010]	Time  1.377 ( 1.377)	Data  1.197 ( 1.197)	Loss 2.4498e+00 (2.4498e+00)	Acc@1  41.41 ( 41.41)	Acc@5  72.66 ( 72.66)
Epoch: [6][   50/10010]	Time  0.634 ( 0.644)	Data  0.000 ( 0.034)	Loss 2.2757e+00 (2.2054e+00)	Acc@1  51.56 ( 48.96)	Acc@5  72.66 ( 75.09)
Epoch: [6][  100/10010]	Time  0.619 ( 0.636)	Data  0.000 ( 0.022)	Loss 2.1069e+00 (2.2058e+00)	Acc@1  50.78 ( 48.85)	Acc@5  78.12 ( 75.00)
Epoch: [6][  150/10010]	Time  0.630 ( 0.633)	Data  0.000 ( 0.017)	Loss 2.7214e+00 (2.204

Epoch: [6][ 2800/10010]	Time  0.627 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.2831e+00 (2.2218e+00)	Acc@1  50.78 ( 48.43)	Acc@5  74.22 ( 74.87)
Epoch: [6][ 2850/10010]	Time  0.626 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.4049e+00 (2.2224e+00)	Acc@1  39.84 ( 48.41)	Acc@5  73.44 ( 74.86)
Epoch: [6][ 2900/10010]	Time  0.628 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.2990e+00 (2.2232e+00)	Acc@1  49.22 ( 48.38)	Acc@5  74.22 ( 74.84)
Epoch: [6][ 2950/10010]	Time  0.622 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.4433e+00 (2.2235e+00)	Acc@1  46.88 ( 48.38)	Acc@5  71.09 ( 74.84)
Epoch: [6][ 3000/10010]	Time  0.623 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.3464e+00 (2.2252e+00)	Acc@1  48.44 ( 48.35)	Acc@5  72.66 ( 74.81)
Epoch: [6][ 3050/10010]	Time  0.626 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.3545e+00 (2.2261e+00)	Acc@1  45.31 ( 48.32)	Acc@5  72.66 ( 74.80)
Epoch: [6][ 3100/10010]	Time  0.622 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.1072e+00 (2.2258e+00)	Acc@1  43.75 ( 48.33)	Acc@5  78.91 ( 74.80)
Epoch: [6][ 3150/10010]	Tim

Epoch: [6][ 5750/10010]	Time  0.624 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.3473e+00 (2.2332e+00)	Acc@1  47.66 ( 48.27)	Acc@5  72.66 ( 74.67)
Epoch: [6][ 5800/10010]	Time  0.630 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.2071e+00 (2.2333e+00)	Acc@1  49.22 ( 48.27)	Acc@5  75.00 ( 74.67)
Epoch: [6][ 5850/10010]	Time  0.623 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.6949e+00 (2.2335e+00)	Acc@1  42.97 ( 48.26)	Acc@5  66.41 ( 74.66)
Epoch: [6][ 5900/10010]	Time  0.629 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.5241e+00 (2.2339e+00)	Acc@1  48.44 ( 48.26)	Acc@5  68.75 ( 74.66)
Epoch: [6][ 5950/10010]	Time  0.623 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.1838e+00 (2.2342e+00)	Acc@1  46.88 ( 48.25)	Acc@5  75.78 ( 74.65)
Epoch: [6][ 6000/10010]	Time  0.627 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.3914e+00 (2.2340e+00)	Acc@1  50.00 ( 48.26)	Acc@5  71.09 ( 74.65)
Epoch: [6][ 6050/10010]	Time  0.624 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.3475e+00 (2.2348e+00)	Acc@1  46.09 ( 48.25)	Acc@5  75.00 ( 74.65)
Epoch: [6][ 6100/10010]	Tim

Epoch: [6][ 8700/10010]	Time  0.628 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.1271e+00 (2.2392e+00)	Acc@1  46.88 ( 48.18)	Acc@5  76.56 ( 74.56)
Epoch: [6][ 8750/10010]	Time  0.618 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.6206e+00 (2.2394e+00)	Acc@1  42.97 ( 48.18)	Acc@5  72.66 ( 74.55)
Epoch: [6][ 8800/10010]	Time  0.628 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.0477e+00 (2.2395e+00)	Acc@1  52.34 ( 48.18)	Acc@5  75.78 ( 74.55)
Epoch: [6][ 8850/10010]	Time  0.623 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.4699e+00 (2.2396e+00)	Acc@1  42.19 ( 48.17)	Acc@5  74.22 ( 74.55)
Epoch: [6][ 8900/10010]	Time  0.623 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.1812e+00 (2.2397e+00)	Acc@1  46.88 ( 48.17)	Acc@5  78.12 ( 74.55)
Epoch: [6][ 8950/10010]	Time  0.622 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.0940e+00 (2.2398e+00)	Acc@1  44.53 ( 48.17)	Acc@5  76.56 ( 74.55)
Epoch: [6][ 9000/10010]	Time  0.629 ( 0.625)	Data  0.000 ( 0.009)	Loss 2.5176e+00 (2.2398e+00)	Acc@1  46.88 ( 48.17)	Acc@5  71.09 ( 74.55)
Epoch: [6][ 9050/10010]	Tim

Epoch: [7][ 1300/10010]	Time  0.623 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.2466e+00 (2.1905e+00)	Acc@1  53.91 ( 48.89)	Acc@5  76.56 ( 75.43)
Epoch: [7][ 1350/10010]	Time  0.619 ( 0.622)	Data  0.000 ( 0.010)	Loss 1.7661e+00 (2.1906e+00)	Acc@1  60.16 ( 48.86)	Acc@5  78.91 ( 75.41)
Epoch: [7][ 1400/10010]	Time  0.618 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.3039e+00 (2.1925e+00)	Acc@1  46.09 ( 48.83)	Acc@5  73.44 ( 75.38)
Epoch: [7][ 1450/10010]	Time  0.622 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.0575e+00 (2.1927e+00)	Acc@1  50.00 ( 48.83)	Acc@5  80.47 ( 75.39)
Epoch: [7][ 1500/10010]	Time  0.620 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.3375e+00 (2.1937e+00)	Acc@1  43.75 ( 48.80)	Acc@5  73.44 ( 75.38)
Epoch: [7][ 1550/10010]	Time  0.620 ( 0.622)	Data  0.000 ( 0.010)	Loss 2.4518e+00 (2.1931e+00)	Acc@1  46.09 ( 48.82)	Acc@5  74.22 ( 75.39)
Epoch: [7][ 1600/10010]	Time  0.620 ( 0.622)	Data  0.000 ( 0.010)	Loss 1.9535e+00 (2.1922e+00)	Acc@1  46.09 ( 48.83)	Acc@5  79.69 ( 75.39)
Epoch: [7][ 1650/10010]	Tim

Epoch: [7][ 4250/10010]	Time  0.625 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.3489e+00 (2.2039e+00)	Acc@1  48.44 ( 48.71)	Acc@5  72.66 ( 75.12)
Epoch: [7][ 4300/10010]	Time  0.621 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.6359e+00 (2.2045e+00)	Acc@1  42.19 ( 48.70)	Acc@5  67.97 ( 75.11)
Epoch: [7][ 4350/10010]	Time  0.622 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0914e+00 (2.2044e+00)	Acc@1  47.66 ( 48.71)	Acc@5  79.69 ( 75.11)
Epoch: [7][ 4400/10010]	Time  0.621 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0667e+00 (2.2050e+00)	Acc@1  50.78 ( 48.70)	Acc@5  71.88 ( 75.10)
Epoch: [7][ 4450/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2431e+00 (2.2058e+00)	Acc@1  47.66 ( 48.68)	Acc@5  73.44 ( 75.09)
Epoch: [7][ 4500/10010]	Time  0.616 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1237e+00 (2.2056e+00)	Acc@1  49.22 ( 48.69)	Acc@5  77.34 ( 75.08)
Epoch: [7][ 4550/10010]	Time  0.617 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0616e+00 (2.2057e+00)	Acc@1  50.00 ( 48.68)	Acc@5  75.78 ( 75.08)
Epoch: [7][ 4600/10010]	Tim

Epoch: [7][ 7200/10010]	Time  0.623 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2953e+00 (2.2119e+00)	Acc@1  48.44 ( 48.61)	Acc@5  74.22 ( 74.99)
Epoch: [7][ 7250/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1203e+00 (2.2122e+00)	Acc@1  50.78 ( 48.61)	Acc@5  75.00 ( 74.99)
Epoch: [7][ 7300/10010]	Time  0.623 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.5498e+00 (2.2126e+00)	Acc@1  37.50 ( 48.60)	Acc@5  67.97 ( 74.98)
Epoch: [7][ 7350/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1807e+00 (2.2129e+00)	Acc@1  46.09 ( 48.59)	Acc@5  76.56 ( 74.98)
Epoch: [7][ 7400/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1211e+00 (2.2130e+00)	Acc@1  55.47 ( 48.59)	Acc@5  76.56 ( 74.98)
Epoch: [7][ 7450/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.5440e+00 (2.2128e+00)	Acc@1  41.41 ( 48.60)	Acc@5  73.44 ( 74.98)
Epoch: [7][ 7500/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.5255e+00 (2.2130e+00)	Acc@1  46.09 ( 48.61)	Acc@5  65.62 ( 74.98)
Epoch: [7][ 7550/10010]	Tim

Test: [150/391]	Time  1.034 ( 0.472)	Loss 1.6558e+00 (2.1463e+00)	Acc@1  51.56 ( 47.57)	Acc@5  87.50 ( 76.28)
Test: [200/391]	Time  0.618 ( 0.468)	Loss 2.9068e+00 (2.3474e+00)	Acc@1  33.59 ( 44.92)	Acc@5  58.59 ( 73.26)
Test: [250/391]	Time  0.675 ( 0.464)	Loss 2.5365e+00 (2.4659e+00)	Acc@1  45.31 ( 43.56)	Acc@5  69.53 ( 71.18)
Test: [300/391]	Time  0.188 ( 0.465)	Loss 2.2737e+00 (2.5485e+00)	Acc@1  56.25 ( 42.54)	Acc@5  77.34 ( 69.64)
Test: [350/391]	Time  0.163 ( 0.466)	Loss 2.1851e+00 (2.6186e+00)	Acc@1  53.91 ( 41.64)	Acc@5  72.66 ( 68.41)
 * Acc@1 41.930 Acc@5 68.816
lr: [0.09960573506572391]
Epoch: [8][    0/10010]	Time  1.430 ( 1.430)	Data  1.254 ( 1.254)	Loss 2.1792e+00 (2.1792e+00)	Acc@1  45.31 ( 45.31)	Acc@5  74.22 ( 74.22)
Epoch: [8][   50/10010]	Time  0.628 ( 0.634)	Data  0.000 ( 0.035)	Loss 2.3409e+00 (2.1860e+00)	Acc@1  46.09 ( 48.94)	Acc@5  71.09 ( 75.49)
Epoch: [8][  100/10010]	Time  0.629 ( 0.632)	Data  0.000 ( 0.022)	Loss 2.0015e+00 (2.1775e+00)	Acc@1  53.91 ( 49.56)	

Epoch: [8][ 2750/10010]	Time  0.629 ( 0.628)	Data  0.000 ( 0.010)	Loss 2.2995e+00 (2.1830e+00)	Acc@1  46.88 ( 49.18)	Acc@5  74.22 ( 75.52)
Epoch: [8][ 2800/10010]	Time  0.632 ( 0.628)	Data  0.000 ( 0.010)	Loss 2.2333e+00 (2.1829e+00)	Acc@1  47.66 ( 49.17)	Acc@5  74.22 ( 75.52)
Epoch: [8][ 2850/10010]	Time  0.629 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.1913e+00 (2.1828e+00)	Acc@1  48.44 ( 49.17)	Acc@5  75.78 ( 75.52)
Epoch: [8][ 2900/10010]	Time  0.627 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.2696e+00 (2.1826e+00)	Acc@1  46.88 ( 49.17)	Acc@5  71.09 ( 75.51)
Epoch: [8][ 2950/10010]	Time  0.627 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.2026e+00 (2.1829e+00)	Acc@1  43.75 ( 49.17)	Acc@5  73.44 ( 75.51)
Epoch: [8][ 3000/10010]	Time  0.625 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.1022e+00 (2.1831e+00)	Acc@1  56.25 ( 49.16)	Acc@5  72.66 ( 75.49)
Epoch: [8][ 3050/10010]	Time  0.631 ( 0.628)	Data  0.000 ( 0.009)	Loss 2.1209e+00 (2.1832e+00)	Acc@1  45.31 ( 49.16)	Acc@5  79.69 ( 75.49)
Epoch: [8][ 3100/10010]	Tim

Epoch: [8][ 5700/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 1.8354e+00 (2.1913e+00)	Acc@1  51.56 ( 49.03)	Acc@5  79.69 ( 75.34)
Epoch: [8][ 5750/10010]	Time  0.624 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2421e+00 (2.1917e+00)	Acc@1  42.19 ( 49.02)	Acc@5  75.78 ( 75.33)
Epoch: [8][ 5800/10010]	Time  0.631 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1859e+00 (2.1916e+00)	Acc@1  46.09 ( 49.01)	Acc@5  75.78 ( 75.34)
Epoch: [8][ 5850/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0336e+00 (2.1916e+00)	Acc@1  52.34 ( 49.02)	Acc@5  78.12 ( 75.34)
Epoch: [8][ 5900/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0397e+00 (2.1915e+00)	Acc@1  49.22 ( 49.02)	Acc@5  76.56 ( 75.33)
Epoch: [8][ 5950/10010]	Time  0.630 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.4274e+00 (2.1915e+00)	Acc@1  42.97 ( 49.03)	Acc@5  71.09 ( 75.33)
Epoch: [8][ 6000/10010]	Time  0.630 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0706e+00 (2.1915e+00)	Acc@1  57.81 ( 49.04)	Acc@5  73.44 ( 75.33)
Epoch: [8][ 6050/10010]	Tim

Epoch: [8][ 8650/10010]	Time  0.623 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2450e+00 (2.1956e+00)	Acc@1  51.56 ( 48.98)	Acc@5  71.88 ( 75.25)
Epoch: [8][ 8700/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.3004e+00 (2.1956e+00)	Acc@1  45.31 ( 48.98)	Acc@5  74.22 ( 75.25)
Epoch: [8][ 8750/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 1.9629e+00 (2.1955e+00)	Acc@1  49.22 ( 48.98)	Acc@5  79.69 ( 75.25)
Epoch: [8][ 8800/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0227e+00 (2.1952e+00)	Acc@1  54.69 ( 48.98)	Acc@5  82.03 ( 75.26)
Epoch: [8][ 8850/10010]	Time  0.630 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1212e+00 (2.1954e+00)	Acc@1  48.44 ( 48.98)	Acc@5  75.78 ( 75.25)
Epoch: [8][ 8900/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0714e+00 (2.1952e+00)	Acc@1  52.34 ( 48.99)	Acc@5  72.66 ( 75.26)
Epoch: [8][ 8950/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2690e+00 (2.1953e+00)	Acc@1  48.44 ( 48.98)	Acc@5  75.78 ( 75.25)
Epoch: [8][ 9000/10010]	Tim

Epoch: [9][ 1250/10010]	Time  0.624 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.0361e+00 (2.1385e+00)	Acc@1  47.66 ( 49.96)	Acc@5  77.34 ( 76.26)
Epoch: [9][ 1300/10010]	Time  0.626 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.2301e+00 (2.1386e+00)	Acc@1  51.56 ( 49.98)	Acc@5  75.00 ( 76.25)
Epoch: [9][ 1350/10010]	Time  0.627 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.0165e+00 (2.1391e+00)	Acc@1  54.69 ( 49.95)	Acc@5  78.91 ( 76.24)
Epoch: [9][ 1400/10010]	Time  0.626 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.1965e+00 (2.1395e+00)	Acc@1  49.22 ( 49.96)	Acc@5  72.66 ( 76.23)
Epoch: [9][ 1450/10010]	Time  0.621 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.2328e+00 (2.1403e+00)	Acc@1  43.75 ( 49.97)	Acc@5  77.34 ( 76.22)
Epoch: [9][ 1500/10010]	Time  0.620 ( 0.625)	Data  0.000 ( 0.010)	Loss 2.2063e+00 (2.1399e+00)	Acc@1  49.22 ( 49.98)	Acc@5  76.56 ( 76.24)
Epoch: [9][ 1550/10010]	Time  0.624 ( 0.625)	Data  0.000 ( 0.010)	Loss 1.9627e+00 (2.1387e+00)	Acc@1  53.91 ( 49.99)	Acc@5  77.34 ( 76.25)
Epoch: [9][ 1600/10010]	Tim

Epoch: [9][ 4200/10010]	Time  0.622 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1456e+00 (2.1630e+00)	Acc@1  50.00 ( 49.55)	Acc@5  77.34 ( 75.85)
Epoch: [9][ 4250/10010]	Time  0.626 ( 0.624)	Data  0.000 ( 0.009)	Loss 1.9489e+00 (2.1630e+00)	Acc@1  53.91 ( 49.55)	Acc@5  78.12 ( 75.85)
Epoch: [9][ 4300/10010]	Time  0.625 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1900e+00 (2.1634e+00)	Acc@1  47.66 ( 49.54)	Acc@5  75.00 ( 75.83)
Epoch: [9][ 4350/10010]	Time  0.625 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1180e+00 (2.1639e+00)	Acc@1  53.12 ( 49.54)	Acc@5  77.34 ( 75.83)
Epoch: [9][ 4400/10010]	Time  0.623 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.2510e+00 (2.1641e+00)	Acc@1  50.00 ( 49.54)	Acc@5  75.00 ( 75.83)
Epoch: [9][ 4450/10010]	Time  0.628 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.0645e+00 (2.1641e+00)	Acc@1  56.25 ( 49.54)	Acc@5  77.34 ( 75.83)
Epoch: [9][ 4500/10010]	Time  0.620 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1971e+00 (2.1646e+00)	Acc@1  52.34 ( 49.54)	Acc@5  75.78 ( 75.82)
Epoch: [9][ 4550/10010]	Tim

Epoch: [9][ 7150/10010]	Time  0.623 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.4266e+00 (2.1729e+00)	Acc@1  42.97 ( 49.44)	Acc@5  69.53 ( 75.67)
Epoch: [9][ 7200/10010]	Time  0.622 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.0695e+00 (2.1732e+00)	Acc@1  53.12 ( 49.43)	Acc@5  76.56 ( 75.67)
Epoch: [9][ 7250/10010]	Time  0.624 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1335e+00 (2.1737e+00)	Acc@1  54.69 ( 49.42)	Acc@5  77.34 ( 75.66)
Epoch: [9][ 7300/10010]	Time  0.621 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.1961e+00 (2.1737e+00)	Acc@1  50.78 ( 49.41)	Acc@5  75.78 ( 75.66)
Epoch: [9][ 7350/10010]	Time  0.624 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.2620e+00 (2.1740e+00)	Acc@1  46.09 ( 49.41)	Acc@5  73.44 ( 75.65)
Epoch: [9][ 7400/10010]	Time  0.624 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.2695e+00 (2.1739e+00)	Acc@1  50.78 ( 49.41)	Acc@5  75.78 ( 75.66)
Epoch: [9][ 7450/10010]	Time  0.626 ( 0.624)	Data  0.000 ( 0.009)	Loss 2.3920e+00 (2.1737e+00)	Acc@1  42.97 ( 49.42)	Acc@5  75.78 ( 75.66)
Epoch: [9][ 7500/10010]	Tim

Test: [100/391]	Time  0.538 ( 0.467)	Loss 2.1966e+00 (2.2081e+00)	Acc@1  39.84 ( 46.72)	Acc@5  78.12 ( 75.77)
Test: [150/391]	Time  1.121 ( 0.477)	Loss 1.7026e+00 (2.1497e+00)	Acc@1  49.22 ( 47.63)	Acc@5  85.94 ( 76.67)
Test: [200/391]	Time  0.669 ( 0.475)	Loss 2.5616e+00 (2.3341e+00)	Acc@1  39.84 ( 45.29)	Acc@5  70.31 ( 73.61)
Test: [250/391]	Time  0.696 ( 0.473)	Loss 2.1257e+00 (2.4286e+00)	Acc@1  57.03 ( 44.42)	Acc@5  76.56 ( 71.89)
Test: [300/391]	Time  0.517 ( 0.473)	Loss 2.4910e+00 (2.5164e+00)	Acc@1  48.44 ( 43.33)	Acc@5  72.66 ( 70.29)
Test: [350/391]	Time  0.571 ( 0.474)	Loss 2.4261e+00 (2.5831e+00)	Acc@1  53.12 ( 42.46)	Acc@5  71.09 ( 69.22)
 * Acc@1 42.648 Acc@5 69.390
lr: [0.09938441702975691]
Epoch: [10][    0/10010]	Time  1.341 ( 1.341)	Data  1.165 ( 1.165)	Loss 1.9318e+00 (1.9318e+00)	Acc@1  50.78 ( 50.78)	Acc@5  81.25 ( 81.25)
Epoch: [10][   50/10010]	Time  0.635 ( 0.642)	Data  0.000 ( 0.035)	Loss 1.8979e+00 (2.1450e+00)	Acc@1  57.03 ( 49.42)	Acc@5  83.59 ( 76.26)
Epoch

Epoch: [10][ 2700/10010]	Time  0.619 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1377e+00 (2.1459e+00)	Acc@1  57.03 ( 49.88)	Acc@5  75.78 ( 76.10)
Epoch: [10][ 2750/10010]	Time  0.614 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2532e+00 (2.1465e+00)	Acc@1  51.56 ( 49.88)	Acc@5  70.31 ( 76.09)
Epoch: [10][ 2800/10010]	Time  0.616 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0144e+00 (2.1470e+00)	Acc@1  43.75 ( 49.88)	Acc@5  79.69 ( 76.08)
Epoch: [10][ 2850/10010]	Time  0.619 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.4136e+00 (2.1479e+00)	Acc@1  50.00 ( 49.86)	Acc@5  71.09 ( 76.05)
Epoch: [10][ 2900/10010]	Time  0.620 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2639e+00 (2.1478e+00)	Acc@1  46.09 ( 49.86)	Acc@5  72.66 ( 76.06)
Epoch: [10][ 2950/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 1.9352e+00 (2.1477e+00)	Acc@1  50.78 ( 49.86)	Acc@5  77.34 ( 76.06)
Epoch: [10][ 3000/10010]	Time  0.620 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0310e+00 (2.1481e+00)	Acc@1  47.66 ( 49.84)	Acc@5  76.56 ( 76.05)
Epoch: [10][ 3050/10

Epoch: [10][ 5650/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3459e+00 (2.1590e+00)	Acc@1  43.75 ( 49.65)	Acc@5  74.22 ( 75.91)
Epoch: [10][ 5700/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2589e+00 (2.1588e+00)	Acc@1  49.22 ( 49.66)	Acc@5  73.44 ( 75.91)
Epoch: [10][ 5750/10010]	Time  0.622 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.4494e+00 (2.1593e+00)	Acc@1  42.19 ( 49.65)	Acc@5  72.66 ( 75.90)
Epoch: [10][ 5800/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2737e+00 (2.1596e+00)	Acc@1  49.22 ( 49.65)	Acc@5  74.22 ( 75.90)
Epoch: [10][ 5850/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1982e+00 (2.1597e+00)	Acc@1  49.22 ( 49.64)	Acc@5  74.22 ( 75.90)
Epoch: [10][ 5900/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8719e+00 (2.1596e+00)	Acc@1  54.69 ( 49.64)	Acc@5  79.69 ( 75.90)
Epoch: [10][ 5950/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1901e+00 (2.1602e+00)	Acc@1  52.34 ( 49.63)	Acc@5  78.91 ( 75.89)
Epoch: [10][ 6000/10

Epoch: [10][ 8600/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2936e+00 (2.1675e+00)	Acc@1  48.44 ( 49.49)	Acc@5  71.88 ( 75.75)
Epoch: [10][ 8650/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.9965e+00 (2.1675e+00)	Acc@1  51.56 ( 49.49)	Acc@5  74.22 ( 75.75)
Epoch: [10][ 8700/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1479e+00 (2.1676e+00)	Acc@1  47.66 ( 49.49)	Acc@5  75.00 ( 75.75)
Epoch: [10][ 8750/10010]	Time  0.614 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3179e+00 (2.1676e+00)	Acc@1  51.56 ( 49.49)	Acc@5  72.66 ( 75.75)
Epoch: [10][ 8800/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1609e+00 (2.1677e+00)	Acc@1  55.47 ( 49.49)	Acc@5  72.66 ( 75.75)
Epoch: [10][ 8850/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.9665e+00 (2.1680e+00)	Acc@1  46.88 ( 49.48)	Acc@5  78.12 ( 75.74)
Epoch: [10][ 8900/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0891e+00 (2.1682e+00)	Acc@1  51.56 ( 49.48)	Acc@5  78.91 ( 75.74)
Epoch: [10][ 8950/10

Epoch: [11][ 1150/10010]	Time  0.619 ( 0.621)	Data  0.000 ( 0.010)	Loss 1.9679e+00 (2.1102e+00)	Acc@1  51.56 ( 50.32)	Acc@5  77.34 ( 76.69)
Epoch: [11][ 1200/10010]	Time  0.615 ( 0.620)	Data  0.000 ( 0.010)	Loss 2.2481e+00 (2.1085e+00)	Acc@1  53.91 ( 50.36)	Acc@5  76.56 ( 76.74)
Epoch: [11][ 1250/10010]	Time  0.616 ( 0.620)	Data  0.000 ( 0.010)	Loss 2.0762e+00 (2.1095e+00)	Acc@1  52.34 ( 50.38)	Acc@5  78.12 ( 76.71)
Epoch: [11][ 1300/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.010)	Loss 2.2896e+00 (2.1096e+00)	Acc@1  46.09 ( 50.37)	Acc@5  74.22 ( 76.71)
Epoch: [11][ 1350/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.010)	Loss 2.1346e+00 (2.1114e+00)	Acc@1  48.44 ( 50.36)	Acc@5  76.56 ( 76.65)
Epoch: [11][ 1400/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.010)	Loss 2.2132e+00 (2.1132e+00)	Acc@1  51.56 ( 50.35)	Acc@5  75.00 ( 76.64)
Epoch: [11][ 1450/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.010)	Loss 2.0975e+00 (2.1147e+00)	Acc@1  46.09 ( 50.31)	Acc@5  73.44 ( 76.61)
Epoch: [11][ 1500/10

Epoch: [11][ 4100/10010]	Time  0.614 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.0312e+00 (2.1357e+00)	Acc@1  46.88 ( 50.01)	Acc@5  76.56 ( 76.29)
Epoch: [11][ 4150/10010]	Time  0.611 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.1012e+00 (2.1362e+00)	Acc@1  50.00 ( 50.00)	Acc@5  79.69 ( 76.29)
Epoch: [11][ 4200/10010]	Time  0.614 ( 0.619)	Data  0.000 ( 0.009)	Loss 1.9937e+00 (2.1363e+00)	Acc@1  50.00 ( 50.00)	Acc@5  76.56 ( 76.28)
Epoch: [11][ 4250/10010]	Time  0.620 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.4963e+00 (2.1368e+00)	Acc@1  48.44 ( 50.00)	Acc@5  71.09 ( 76.28)
Epoch: [11][ 4300/10010]	Time  0.617 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.1978e+00 (2.1378e+00)	Acc@1  49.22 ( 49.98)	Acc@5  73.44 ( 76.26)
Epoch: [11][ 4350/10010]	Time  0.615 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3871e+00 (2.1383e+00)	Acc@1  45.31 ( 49.97)	Acc@5  77.34 ( 76.25)
Epoch: [11][ 4400/10010]	Time  0.616 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2531e+00 (2.1386e+00)	Acc@1  49.22 ( 49.97)	Acc@5  72.66 ( 76.24)
Epoch: [11][ 4450/10

Epoch: [11][ 7050/10010]	Time  0.617 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.8758e+00 (2.1484e+00)	Acc@1  57.81 ( 49.83)	Acc@5  80.47 ( 76.06)
Epoch: [11][ 7100/10010]	Time  0.617 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0954e+00 (2.1486e+00)	Acc@1  53.12 ( 49.84)	Acc@5  79.69 ( 76.05)
Epoch: [11][ 7150/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.9396e+00 (2.1487e+00)	Acc@1  50.00 ( 49.83)	Acc@5  77.34 ( 76.05)
Epoch: [11][ 7200/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.1085e+00 (2.1488e+00)	Acc@1  47.66 ( 49.83)	Acc@5  79.69 ( 76.05)
Epoch: [11][ 7250/10010]	Time  0.616 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3860e+00 (2.1491e+00)	Acc@1  46.88 ( 49.82)	Acc@5  74.22 ( 76.05)
Epoch: [11][ 7300/10010]	Time  0.624 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2377e+00 (2.1494e+00)	Acc@1  46.09 ( 49.82)	Acc@5  77.34 ( 76.04)
Epoch: [11][ 7350/10010]	Time  0.624 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3331e+00 (2.1493e+00)	Acc@1  47.66 ( 49.83)	Acc@5  72.66 ( 76.04)
Epoch: [11][ 7400/10

Epoch: [11][10000/10010]	Time  0.617 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.8416e+00 (2.1565e+00)	Acc@1  57.03 ( 49.72)	Acc@5  80.47 ( 75.92)
Test: [  0/391]	Time  1.298 ( 1.298)	Loss 1.4858e+00 (1.4858e+00)	Acc@1  66.41 ( 66.41)	Acc@5  85.16 ( 85.16)
Test: [ 50/391]	Time  0.720 ( 0.488)	Loss 1.6792e+00 (2.1064e+00)	Acc@1  53.12 ( 50.38)	Acc@5  85.16 ( 76.65)
Test: [100/391]	Time  0.529 ( 0.471)	Loss 2.1368e+00 (2.1471e+00)	Acc@1  49.22 ( 47.84)	Acc@5  76.56 ( 76.52)
Test: [150/391]	Time  1.099 ( 0.479)	Loss 1.6330e+00 (2.1298e+00)	Acc@1  50.78 ( 48.47)	Acc@5  89.84 ( 76.90)
Test: [200/391]	Time  0.636 ( 0.476)	Loss 2.9116e+00 (2.3049e+00)	Acc@1  35.16 ( 46.08)	Acc@5  66.41 ( 74.11)
Test: [250/391]	Time  0.484 ( 0.473)	Loss 2.9423e+00 (2.4054e+00)	Acc@1  36.72 ( 45.07)	Acc@5  61.72 ( 72.41)
Test: [300/391]	Time  0.187 ( 0.470)	Loss 3.1783e+00 (2.4658e+00)	Acc@1  33.59 ( 44.25)	Acc@5  62.50 ( 71.31)
Test: [350/391]	Time  0.166 ( 0.469)	Loss 1.8316e+00 (2.5394e+00)	Acc@1  58.59 ( 43.20)	Ac

Epoch: [12][ 2550/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.8983e+00 (2.1205e+00)	Acc@1  50.78 ( 50.31)	Acc@5  84.38 ( 76.59)
Epoch: [12][ 2600/10010]	Time  0.612 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0172e+00 (2.1222e+00)	Acc@1  50.00 ( 50.28)	Acc@5  80.47 ( 76.56)
Epoch: [12][ 2650/10010]	Time  0.617 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2927e+00 (2.1228e+00)	Acc@1  45.31 ( 50.27)	Acc@5  74.22 ( 76.55)
Epoch: [12][ 2700/10010]	Time  0.621 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0233e+00 (2.1235e+00)	Acc@1  50.78 ( 50.26)	Acc@5  81.25 ( 76.54)
Epoch: [12][ 2750/10010]	Time  0.620 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.8583e+00 (2.1235e+00)	Acc@1  56.25 ( 50.27)	Acc@5  82.03 ( 76.54)
Epoch: [12][ 2800/10010]	Time  0.621 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.5486e+00 (2.1232e+00)	Acc@1  45.31 ( 50.28)	Acc@5  68.75 ( 76.55)
Epoch: [12][ 2850/10010]	Time  0.619 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.1461e+00 (2.1232e+00)	Acc@1  51.56 ( 50.28)	Acc@5  72.66 ( 76.55)
Epoch: [12][ 2900/10

Epoch: [12][ 5500/10010]	Time  0.620 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0518e+00 (2.1341e+00)	Acc@1  53.91 ( 50.10)	Acc@5  77.34 ( 76.35)
Epoch: [12][ 5550/10010]	Time  0.617 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.8425e+00 (2.1343e+00)	Acc@1  56.25 ( 50.10)	Acc@5  77.34 ( 76.35)
Epoch: [12][ 5600/10010]	Time  0.620 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0097e+00 (2.1345e+00)	Acc@1  52.34 ( 50.10)	Acc@5  79.69 ( 76.34)
Epoch: [12][ 5650/10010]	Time  0.615 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3796e+00 (2.1346e+00)	Acc@1  45.31 ( 50.09)	Acc@5  71.09 ( 76.34)
Epoch: [12][ 5700/10010]	Time  0.620 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0437e+00 (2.1351e+00)	Acc@1  52.34 ( 50.08)	Acc@5  76.56 ( 76.33)
Epoch: [12][ 5750/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.9872e+00 (2.1351e+00)	Acc@1  50.00 ( 50.08)	Acc@5  82.03 ( 76.33)
Epoch: [12][ 5800/10010]	Time  0.621 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.1331e+00 (2.1356e+00)	Acc@1  48.44 ( 50.08)	Acc@5  74.22 ( 76.32)
Epoch: [12][ 5850/10

Epoch: [12][ 8450/10010]	Time  0.622 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2923e+00 (2.1451e+00)	Acc@1  46.88 ( 49.94)	Acc@5  75.00 ( 76.15)
Epoch: [12][ 8500/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3437e+00 (2.1452e+00)	Acc@1  47.66 ( 49.94)	Acc@5  72.66 ( 76.16)
Epoch: [12][ 8550/10010]	Time  0.619 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.4023e+00 (2.1453e+00)	Acc@1  50.00 ( 49.94)	Acc@5  71.88 ( 76.15)
Epoch: [12][ 8600/10010]	Time  0.619 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2181e+00 (2.1454e+00)	Acc@1  47.66 ( 49.94)	Acc@5  75.00 ( 76.15)
Epoch: [12][ 8650/10010]	Time  0.620 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.4764e+00 (2.1458e+00)	Acc@1  46.09 ( 49.93)	Acc@5  69.53 ( 76.15)
Epoch: [12][ 8700/10010]	Time  0.615 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.3467e+00 (2.1459e+00)	Acc@1  44.53 ( 49.93)	Acc@5  72.66 ( 76.15)
Epoch: [12][ 8750/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.4359e+00 (2.1464e+00)	Acc@1  43.75 ( 49.92)	Acc@5  73.44 ( 76.14)
Epoch: [12][ 8800/10

Epoch: [13][ 1000/10010]	Time  0.621 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.1251e+00 (2.0962e+00)	Acc@1  51.56 ( 50.71)	Acc@5  75.00 ( 76.83)
Epoch: [13][ 1050/10010]	Time  0.619 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.0905e+00 (2.0949e+00)	Acc@1  48.44 ( 50.73)	Acc@5  78.12 ( 76.85)
Epoch: [13][ 1100/10010]	Time  0.624 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.1306e+00 (2.0963e+00)	Acc@1  51.56 ( 50.70)	Acc@5  75.00 ( 76.84)
Epoch: [13][ 1150/10010]	Time  0.621 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.0160e+00 (2.0964e+00)	Acc@1  47.66 ( 50.73)	Acc@5  77.34 ( 76.85)
Epoch: [13][ 1200/10010]	Time  0.621 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.1557e+00 (2.0961e+00)	Acc@1  51.56 ( 50.74)	Acc@5  76.56 ( 76.85)
Epoch: [13][ 1250/10010]	Time  0.613 ( 0.623)	Data  0.000 ( 0.010)	Loss 2.4139e+00 (2.0966e+00)	Acc@1  44.53 ( 50.75)	Acc@5  71.09 ( 76.84)
Epoch: [13][ 1300/10010]	Time  0.621 ( 0.623)	Data  0.000 ( 0.010)	Loss 1.9474e+00 (2.0985e+00)	Acc@1  53.91 ( 50.71)	Acc@5  75.78 ( 76.81)
Epoch: [13][ 1350/10

Epoch: [13][ 3950/10010]	Time  0.619 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0609e+00 (2.1209e+00)	Acc@1  52.34 ( 50.36)	Acc@5  75.78 ( 76.41)
Epoch: [13][ 4000/10010]	Time  0.617 ( 0.621)	Data  0.000 ( 0.009)	Loss 1.8762e+00 (2.1204e+00)	Acc@1  58.59 ( 50.37)	Acc@5  80.47 ( 76.42)
Epoch: [13][ 4050/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0916e+00 (2.1208e+00)	Acc@1  55.47 ( 50.36)	Acc@5  76.56 ( 76.42)
Epoch: [13][ 4100/10010]	Time  0.619 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.3920e+00 (2.1210e+00)	Acc@1  46.88 ( 50.35)	Acc@5  71.88 ( 76.42)
Epoch: [13][ 4150/10010]	Time  0.620 ( 0.621)	Data  0.000 ( 0.009)	Loss 1.9240e+00 (2.1210e+00)	Acc@1  53.12 ( 50.35)	Acc@5  78.91 ( 76.42)
Epoch: [13][ 4200/10010]	Time  0.620 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1706e+00 (2.1207e+00)	Acc@1  51.56 ( 50.36)	Acc@5  76.56 ( 76.43)
Epoch: [13][ 4250/10010]	Time  0.623 ( 0.621)	Data  0.000 ( 0.009)	Loss 1.9430e+00 (2.1210e+00)	Acc@1  56.25 ( 50.35)	Acc@5  79.69 ( 76.43)
Epoch: [13][ 4300/10

Epoch: [13][ 6900/10010]	Time  0.615 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1549e+00 (2.1314e+00)	Acc@1  51.56 ( 50.16)	Acc@5  75.00 ( 76.26)
Epoch: [13][ 6950/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1766e+00 (2.1313e+00)	Acc@1  47.66 ( 50.16)	Acc@5  72.66 ( 76.26)
Epoch: [13][ 7000/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3635e+00 (2.1314e+00)	Acc@1  48.44 ( 50.16)	Acc@5  64.84 ( 76.26)
Epoch: [13][ 7050/10010]	Time  0.622 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2135e+00 (2.1318e+00)	Acc@1  49.22 ( 50.15)	Acc@5  76.56 ( 76.26)
Epoch: [13][ 7100/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2928e+00 (2.1320e+00)	Acc@1  46.88 ( 50.14)	Acc@5  74.22 ( 76.25)
Epoch: [13][ 7150/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.9273e+00 (2.1325e+00)	Acc@1  55.47 ( 50.14)	Acc@5  79.69 ( 76.25)
Epoch: [13][ 7200/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3864e+00 (2.1325e+00)	Acc@1  48.44 ( 50.14)	Acc@5  72.66 ( 76.25)
Epoch: [13][ 7250/10

Epoch: [13][ 9850/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2214e+00 (2.1382e+00)	Acc@1  49.22 ( 50.05)	Acc@5  74.22 ( 76.18)
Epoch: [13][ 9900/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0675e+00 (2.1383e+00)	Acc@1  53.12 ( 50.05)	Acc@5  74.22 ( 76.17)
Epoch: [13][ 9950/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.4905e+00 (2.1381e+00)	Acc@1  48.44 ( 50.06)	Acc@5  72.66 ( 76.18)
Epoch: [13][10000/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1475e+00 (2.1381e+00)	Acc@1  50.78 ( 50.06)	Acc@5  75.00 ( 76.18)
Test: [  0/391]	Time  1.279 ( 1.279)	Loss 1.1586e+00 (1.1586e+00)	Acc@1  75.00 ( 75.00)	Acc@5  89.06 ( 89.06)
Test: [ 50/391]	Time  0.740 ( 0.481)	Loss 1.5719e+00 (2.2129e+00)	Acc@1  59.38 ( 49.25)	Acc@5  85.16 ( 75.12)
Test: [100/391]	Time  0.683 ( 0.463)	Loss 1.8973e+00 (2.1289e+00)	Acc@1  51.56 ( 48.55)	Acc@5  81.25 ( 77.04)
Test: [150/391]	Time  1.064 ( 0.469)	Loss 2.1025e+00 (2.0808e+00)	Acc@1  38.28 ( 49.61)	Acc@5  87.50 ( 77.97)


Epoch: [14][ 2400/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.4815e+00 (2.1040e+00)	Acc@1  42.97 ( 50.64)	Acc@5  73.44 ( 76.79)
Epoch: [14][ 2450/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0717e+00 (2.1037e+00)	Acc@1  52.34 ( 50.65)	Acc@5  72.66 ( 76.80)
Epoch: [14][ 2500/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2378e+00 (2.1036e+00)	Acc@1  46.88 ( 50.66)	Acc@5  70.31 ( 76.80)
Epoch: [14][ 2550/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0029e+00 (2.1040e+00)	Acc@1  53.91 ( 50.67)	Acc@5  76.56 ( 76.79)
Epoch: [14][ 2600/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3146e+00 (2.1040e+00)	Acc@1  53.12 ( 50.67)	Acc@5  71.09 ( 76.79)
Epoch: [14][ 2650/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.7174e+00 (2.1044e+00)	Acc@1  57.81 ( 50.66)	Acc@5  85.94 ( 76.79)
Epoch: [14][ 2700/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1771e+00 (2.1046e+00)	Acc@1  49.22 ( 50.65)	Acc@5  75.00 ( 76.79)
Epoch: [14][ 2750/10

Epoch: [14][ 5350/10010]	Time  0.618 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.0104e+00 (2.1182e+00)	Acc@1  52.34 ( 50.41)	Acc@5  77.34 ( 76.55)
Epoch: [14][ 5400/10010]	Time  0.612 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.1105e+00 (2.1188e+00)	Acc@1  47.66 ( 50.40)	Acc@5  75.78 ( 76.54)
Epoch: [14][ 5450/10010]	Time  0.614 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.0983e+00 (2.1189e+00)	Acc@1  49.22 ( 50.40)	Acc@5  76.56 ( 76.54)
Epoch: [14][ 5500/10010]	Time  0.619 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.5187e+00 (2.1190e+00)	Acc@1  44.53 ( 50.40)	Acc@5  74.22 ( 76.54)
Epoch: [14][ 5550/10010]	Time  0.613 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.3059e+00 (2.1191e+00)	Acc@1  48.44 ( 50.40)	Acc@5  68.75 ( 76.54)
Epoch: [14][ 5600/10010]	Time  0.618 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.2346e+00 (2.1189e+00)	Acc@1  55.47 ( 50.40)	Acc@5  73.44 ( 76.55)
Epoch: [14][ 5650/10010]	Time  0.616 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.0790e+00 (2.1192e+00)	Acc@1  51.56 ( 50.39)	Acc@5  75.78 ( 76.55)
Epoch: [14][ 5700/10

Epoch: [14][ 8300/10010]	Time  0.620 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.1091e+00 (2.1258e+00)	Acc@1  46.88 ( 50.29)	Acc@5  82.03 ( 76.43)
Epoch: [14][ 8350/10010]	Time  0.616 ( 0.619)	Data  0.000 ( 0.009)	Loss 1.9091e+00 (2.1263e+00)	Acc@1  54.69 ( 50.28)	Acc@5  81.25 ( 76.42)
Epoch: [14][ 8400/10010]	Time  0.616 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.0889e+00 (2.1260e+00)	Acc@1  50.78 ( 50.28)	Acc@5  77.34 ( 76.43)
Epoch: [14][ 8450/10010]	Time  0.615 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.0889e+00 (2.1261e+00)	Acc@1  50.00 ( 50.29)	Acc@5  78.91 ( 76.42)
Epoch: [14][ 8500/10010]	Time  0.615 ( 0.619)	Data  0.000 ( 0.009)	Loss 1.9928e+00 (2.1261e+00)	Acc@1  46.88 ( 50.28)	Acc@5  78.12 ( 76.43)
Epoch: [14][ 8550/10010]	Time  0.614 ( 0.619)	Data  0.000 ( 0.009)	Loss 1.8888e+00 (2.1262e+00)	Acc@1  52.34 ( 50.28)	Acc@5  84.38 ( 76.43)
Epoch: [14][ 8600/10010]	Time  0.614 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.1504e+00 (2.1264e+00)	Acc@1  51.56 ( 50.27)	Acc@5  79.69 ( 76.42)
Epoch: [14][ 8650/10

Epoch: [15][  850/10010]	Time  0.621 ( 0.624)	Data  0.000 ( 0.010)	Loss 1.9383e+00 (2.0824e+00)	Acc@1  50.78 ( 50.93)	Acc@5  80.47 ( 77.21)
Epoch: [15][  900/10010]	Time  0.619 ( 0.624)	Data  0.000 ( 0.010)	Loss 1.9693e+00 (2.0844e+00)	Acc@1  53.12 ( 50.90)	Acc@5  79.69 ( 77.17)
Epoch: [15][  950/10010]	Time  0.619 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.1490e+00 (2.0851e+00)	Acc@1  51.56 ( 50.90)	Acc@5  78.12 ( 77.15)
Epoch: [15][ 1000/10010]	Time  0.622 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.0032e+00 (2.0863e+00)	Acc@1  57.03 ( 50.91)	Acc@5  78.91 ( 77.13)
Epoch: [15][ 1050/10010]	Time  0.622 ( 0.623)	Data  0.000 ( 0.010)	Loss 2.6011e+00 (2.0855e+00)	Acc@1  41.41 ( 50.93)	Acc@5  67.97 ( 77.13)
Epoch: [15][ 1100/10010]	Time  0.622 ( 0.623)	Data  0.000 ( 0.010)	Loss 2.1692e+00 (2.0848e+00)	Acc@1  51.56 ( 50.91)	Acc@5  74.22 ( 77.12)
Epoch: [15][ 1150/10010]	Time  0.617 ( 0.623)	Data  0.000 ( 0.010)	Loss 2.0090e+00 (2.0850e+00)	Acc@1  48.44 ( 50.91)	Acc@5  80.47 ( 77.09)
Epoch: [15][ 1200/10

Epoch: [15][ 3800/10010]	Time  0.625 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2227e+00 (2.1080e+00)	Acc@1  46.88 ( 50.59)	Acc@5  71.09 ( 76.77)
Epoch: [15][ 3850/10010]	Time  0.622 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2516e+00 (2.1086e+00)	Acc@1  52.34 ( 50.59)	Acc@5  78.91 ( 76.75)
Epoch: [15][ 3900/10010]	Time  0.621 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1526e+00 (2.1083e+00)	Acc@1  55.47 ( 50.59)	Acc@5  73.44 ( 76.76)
Epoch: [15][ 3950/10010]	Time  0.615 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0976e+00 (2.1093e+00)	Acc@1  46.09 ( 50.58)	Acc@5  75.00 ( 76.74)
Epoch: [15][ 4000/10010]	Time  0.614 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0831e+00 (2.1093e+00)	Acc@1  49.22 ( 50.59)	Acc@5  75.78 ( 76.74)
Epoch: [15][ 4050/10010]	Time  0.615 ( 0.621)	Data  0.000 ( 0.009)	Loss 1.7414e+00 (2.1095e+00)	Acc@1  57.81 ( 50.59)	Acc@5  85.16 ( 76.75)
Epoch: [15][ 4100/10010]	Time  0.616 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0379e+00 (2.1099e+00)	Acc@1  57.81 ( 50.58)	Acc@5  75.78 ( 76.74)
Epoch: [15][ 4150/10

Epoch: [15][ 6750/10010]	Time  0.622 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1145e+00 (2.1183e+00)	Acc@1  50.78 ( 50.42)	Acc@5  76.56 ( 76.57)
Epoch: [15][ 6800/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1196e+00 (2.1182e+00)	Acc@1  53.12 ( 50.42)	Acc@5  77.34 ( 76.57)
Epoch: [15][ 6850/10010]	Time  0.616 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8014e+00 (2.1181e+00)	Acc@1  59.38 ( 50.43)	Acc@5  79.69 ( 76.57)
Epoch: [15][ 6900/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0579e+00 (2.1184e+00)	Acc@1  53.12 ( 50.43)	Acc@5  75.78 ( 76.56)
Epoch: [15][ 6950/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1133e+00 (2.1180e+00)	Acc@1  52.34 ( 50.44)	Acc@5  75.78 ( 76.57)
Epoch: [15][ 7000/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0910e+00 (2.1181e+00)	Acc@1  49.22 ( 50.43)	Acc@5  77.34 ( 76.56)
Epoch: [15][ 7050/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1447e+00 (2.1184e+00)	Acc@1  53.91 ( 50.43)	Acc@5  76.56 ( 76.56)
Epoch: [15][ 7100/10

Epoch: [15][ 9700/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3764e+00 (2.1238e+00)	Acc@1  47.66 ( 50.33)	Acc@5  73.44 ( 76.48)
Epoch: [15][ 9750/10010]	Time  0.615 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.9802e+00 (2.1236e+00)	Acc@1  50.78 ( 50.34)	Acc@5  82.03 ( 76.48)
Epoch: [15][ 9800/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.7596e+00 (2.1237e+00)	Acc@1  65.62 ( 50.34)	Acc@5  83.59 ( 76.48)
Epoch: [15][ 9850/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3469e+00 (2.1238e+00)	Acc@1  47.66 ( 50.34)	Acc@5  73.44 ( 76.48)
Epoch: [15][ 9900/10010]	Time  0.622 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0556e+00 (2.1238e+00)	Acc@1  55.47 ( 50.34)	Acc@5  78.91 ( 76.48)
Epoch: [15][ 9950/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.6605e+00 (2.1238e+00)	Acc@1  42.97 ( 50.34)	Acc@5  64.06 ( 76.48)
Epoch: [15][10000/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1403e+00 (2.1238e+00)	Acc@1  50.00 ( 50.35)	Acc@5  78.12 ( 76.48)
Test: [  0/391]	Time

Epoch: [16][ 2250/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.010)	Loss 2.0281e+00 (2.0923e+00)	Acc@1  54.69 ( 50.99)	Acc@5  75.78 ( 77.01)
Epoch: [16][ 2300/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.010)	Loss 2.1001e+00 (2.0936e+00)	Acc@1  50.00 ( 50.97)	Acc@5  77.34 ( 76.98)
Epoch: [16][ 2350/10010]	Time  0.624 ( 0.627)	Data  0.000 ( 0.010)	Loss 1.8011e+00 (2.0935e+00)	Acc@1  54.69 ( 50.96)	Acc@5  84.38 ( 76.99)
Epoch: [16][ 2400/10010]	Time  0.631 ( 0.627)	Data  0.000 ( 0.010)	Loss 1.9705e+00 (2.0935e+00)	Acc@1  50.00 ( 50.94)	Acc@5  79.69 ( 76.98)
Epoch: [16][ 2450/10010]	Time  0.628 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1099e+00 (2.0940e+00)	Acc@1  50.00 ( 50.95)	Acc@5  79.69 ( 76.96)
Epoch: [16][ 2500/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 1.7511e+00 (2.0937e+00)	Acc@1  57.81 ( 50.94)	Acc@5  85.94 ( 76.97)
Epoch: [16][ 2550/10010]	Time  0.622 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1343e+00 (2.0935e+00)	Acc@1  46.09 ( 50.93)	Acc@5  75.78 ( 76.98)
Epoch: [16][ 2600/10

Epoch: [16][ 5200/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 1.9251e+00 (2.1076e+00)	Acc@1  51.56 ( 50.61)	Acc@5  78.91 ( 76.76)
Epoch: [16][ 5250/10010]	Time  0.624 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0631e+00 (2.1074e+00)	Acc@1  49.22 ( 50.62)	Acc@5  78.91 ( 76.76)
Epoch: [16][ 5300/10010]	Time  0.633 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.2932e+00 (2.1077e+00)	Acc@1  46.09 ( 50.61)	Acc@5  74.22 ( 76.76)
Epoch: [16][ 5350/10010]	Time  0.630 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.5667e+00 (2.1082e+00)	Acc@1  39.06 ( 50.60)	Acc@5  66.41 ( 76.76)
Epoch: [16][ 5400/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0959e+00 (2.1082e+00)	Acc@1  52.34 ( 50.60)	Acc@5  75.78 ( 76.76)
Epoch: [16][ 5450/10010]	Time  0.626 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0049e+00 (2.1086e+00)	Acc@1  56.25 ( 50.59)	Acc@5  74.22 ( 76.76)
Epoch: [16][ 5500/10010]	Time  0.624 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1498e+00 (2.1084e+00)	Acc@1  52.34 ( 50.59)	Acc@5  75.78 ( 76.76)
Epoch: [16][ 5550/10

Epoch: [16][ 8150/10010]	Time  0.627 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0892e+00 (2.1143e+00)	Acc@1  49.22 ( 50.53)	Acc@5  78.91 ( 76.65)
Epoch: [16][ 8200/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1111e+00 (2.1147e+00)	Acc@1  57.03 ( 50.52)	Acc@5  78.12 ( 76.63)
Epoch: [16][ 8250/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.0909e+00 (2.1145e+00)	Acc@1  47.66 ( 50.53)	Acc@5  77.34 ( 76.64)
Epoch: [16][ 8300/10010]	Time  0.625 ( 0.627)	Data  0.000 ( 0.009)	Loss 1.9790e+00 (2.1143e+00)	Acc@1  56.25 ( 50.53)	Acc@5  74.22 ( 76.65)
Epoch: [16][ 8350/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.3059e+00 (2.1145e+00)	Acc@1  43.75 ( 50.53)	Acc@5  73.44 ( 76.64)
Epoch: [16][ 8400/10010]	Time  0.629 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1014e+00 (2.1148e+00)	Acc@1  44.53 ( 50.52)	Acc@5  80.47 ( 76.64)
Epoch: [16][ 8450/10010]	Time  0.631 ( 0.627)	Data  0.000 ( 0.009)	Loss 2.1733e+00 (2.1150e+00)	Acc@1  48.44 ( 50.51)	Acc@5  75.00 ( 76.63)
Epoch: [16][ 8500/10

Epoch: [17][  700/10010]	Time  0.619 ( 0.625)	Data  0.000 ( 0.011)	Loss 2.0459e+00 (2.0633e+00)	Acc@1  49.22 ( 51.41)	Acc@5  76.56 ( 77.33)
Epoch: [17][  750/10010]	Time  0.618 ( 0.625)	Data  0.000 ( 0.011)	Loss 1.9262e+00 (2.0606e+00)	Acc@1  57.81 ( 51.46)	Acc@5  80.47 ( 77.37)
Epoch: [17][  800/10010]	Time  0.618 ( 0.624)	Data  0.000 ( 0.011)	Loss 1.9445e+00 (2.0605e+00)	Acc@1  56.25 ( 51.41)	Acc@5  82.03 ( 77.39)
Epoch: [17][  850/10010]	Time  0.618 ( 0.624)	Data  0.000 ( 0.011)	Loss 1.9523e+00 (2.0591e+00)	Acc@1  50.00 ( 51.43)	Acc@5  81.25 ( 77.42)
Epoch: [17][  900/10010]	Time  0.622 ( 0.624)	Data  0.000 ( 0.010)	Loss 2.1995e+00 (2.0611e+00)	Acc@1  50.78 ( 51.41)	Acc@5  73.44 ( 77.41)
Epoch: [17][  950/10010]	Time  0.618 ( 0.623)	Data  0.000 ( 0.010)	Loss 2.0711e+00 (2.0619e+00)	Acc@1  55.47 ( 51.41)	Acc@5  75.78 ( 77.39)
Epoch: [17][ 1000/10010]	Time  0.621 ( 0.623)	Data  0.000 ( 0.010)	Loss 2.2005e+00 (2.0631e+00)	Acc@1  46.09 ( 51.39)	Acc@5  72.66 ( 77.38)
Epoch: [17][ 1050/10

Epoch: [17][ 3650/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2871e+00 (2.0834e+00)	Acc@1  42.19 ( 51.08)	Acc@5  75.00 ( 77.05)
Epoch: [17][ 3700/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0757e+00 (2.0837e+00)	Acc@1  51.56 ( 51.07)	Acc@5  76.56 ( 77.04)
Epoch: [17][ 3750/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1919e+00 (2.0846e+00)	Acc@1  48.44 ( 51.06)	Acc@5  81.25 ( 77.04)
Epoch: [17][ 3800/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0752e+00 (2.0846e+00)	Acc@1  50.78 ( 51.06)	Acc@5  76.56 ( 77.04)
Epoch: [17][ 3850/10010]	Time  0.612 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8890e+00 (2.0853e+00)	Acc@1  53.12 ( 51.04)	Acc@5  79.69 ( 77.03)
Epoch: [17][ 3900/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3608e+00 (2.0859e+00)	Acc@1  44.53 ( 51.03)	Acc@5  67.97 ( 77.02)
Epoch: [17][ 3950/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8816e+00 (2.0865e+00)	Acc@1  57.81 ( 51.02)	Acc@5  80.47 ( 77.01)
Epoch: [17][ 4000/10

Epoch: [17][ 6600/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8472e+00 (2.1018e+00)	Acc@1  54.69 ( 50.77)	Acc@5  81.25 ( 76.76)
Epoch: [17][ 6650/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8938e+00 (2.1016e+00)	Acc@1  54.69 ( 50.78)	Acc@5  81.25 ( 76.76)
Epoch: [17][ 6700/10010]	Time  0.623 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3331e+00 (2.1017e+00)	Acc@1  44.53 ( 50.77)	Acc@5  70.31 ( 76.76)
Epoch: [17][ 6750/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2872e+00 (2.1026e+00)	Acc@1  50.00 ( 50.75)	Acc@5  75.00 ( 76.75)
Epoch: [17][ 6800/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.7455e+00 (2.1028e+00)	Acc@1  54.69 ( 50.75)	Acc@5  84.38 ( 76.75)
Epoch: [17][ 6850/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0599e+00 (2.1030e+00)	Acc@1  47.66 ( 50.74)	Acc@5  80.47 ( 76.75)
Epoch: [17][ 6900/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0796e+00 (2.1028e+00)	Acc@1  47.66 ( 50.75)	Acc@5  78.12 ( 76.76)
Epoch: [17][ 6950/10

Epoch: [17][ 9550/10010]	Time  0.617 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.2100e+00 (2.1090e+00)	Acc@1  49.22 ( 50.63)	Acc@5  74.22 ( 76.67)
Epoch: [17][ 9600/10010]	Time  0.625 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.2568e+00 (2.1090e+00)	Acc@1  47.66 ( 50.63)	Acc@5  71.88 ( 76.66)
Epoch: [17][ 9650/10010]	Time  0.620 ( 0.619)	Data  0.000 ( 0.009)	Loss 1.8368e+00 (2.1089e+00)	Acc@1  53.91 ( 50.64)	Acc@5  82.81 ( 76.66)
Epoch: [17][ 9700/10010]	Time  0.616 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.2282e+00 (2.1087e+00)	Acc@1  50.00 ( 50.64)	Acc@5  72.66 ( 76.67)
Epoch: [17][ 9750/10010]	Time  0.619 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.1720e+00 (2.1089e+00)	Acc@1  47.66 ( 50.64)	Acc@5  76.56 ( 76.66)
Epoch: [17][ 9800/10010]	Time  0.621 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.3246e+00 (2.1091e+00)	Acc@1  50.00 ( 50.64)	Acc@5  73.44 ( 76.66)
Epoch: [17][ 9850/10010]	Time  0.618 ( 0.619)	Data  0.000 ( 0.009)	Loss 2.3896e+00 (2.1095e+00)	Acc@1  47.66 ( 50.64)	Acc@5  71.09 ( 76.65)
Epoch: [17][ 9900/10

Epoch: [18][ 2100/10010]	Time  0.625 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1202e+00 (2.0675e+00)	Acc@1  49.22 ( 51.25)	Acc@5  77.34 ( 77.35)
Epoch: [18][ 2150/10010]	Time  0.618 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.1506e+00 (2.0679e+00)	Acc@1  50.00 ( 51.24)	Acc@5  76.56 ( 77.35)
Epoch: [18][ 2200/10010]	Time  0.624 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.3151e+00 (2.0687e+00)	Acc@1  46.88 ( 51.25)	Acc@5  75.78 ( 77.35)
Epoch: [18][ 2250/10010]	Time  0.615 ( 0.621)	Data  0.000 ( 0.009)	Loss 1.7747e+00 (2.0692e+00)	Acc@1  60.94 ( 51.25)	Acc@5  81.25 ( 77.34)
Epoch: [18][ 2300/10010]	Time  0.642 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2424e+00 (2.0702e+00)	Acc@1  48.44 ( 51.24)	Acc@5  73.44 ( 77.33)
Epoch: [18][ 2350/10010]	Time  0.623 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.2225e+00 (2.0707e+00)	Acc@1  48.44 ( 51.23)	Acc@5  74.22 ( 77.32)
Epoch: [18][ 2400/10010]	Time  0.612 ( 0.621)	Data  0.000 ( 0.009)	Loss 2.0919e+00 (2.0706e+00)	Acc@1  47.66 ( 51.22)	Acc@5  79.69 ( 77.33)
Epoch: [18][ 2450/10

Epoch: [18][ 5050/10010]	Time  0.622 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1750e+00 (2.0915e+00)	Acc@1  50.78 ( 50.85)	Acc@5  77.34 ( 76.98)
Epoch: [18][ 5100/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3200e+00 (2.0918e+00)	Acc@1  46.88 ( 50.84)	Acc@5  71.09 ( 76.97)
Epoch: [18][ 5150/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3044e+00 (2.0923e+00)	Acc@1  46.88 ( 50.84)	Acc@5  75.00 ( 76.97)
Epoch: [18][ 5200/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1546e+00 (2.0924e+00)	Acc@1  47.66 ( 50.83)	Acc@5  70.31 ( 76.97)
Epoch: [18][ 5250/10010]	Time  0.619 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.2945e+00 (2.0930e+00)	Acc@1  51.56 ( 50.82)	Acc@5  73.44 ( 76.96)
Epoch: [18][ 5300/10010]	Time  0.621 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.5167e+00 (2.0936e+00)	Acc@1  46.09 ( 50.81)	Acc@5  66.41 ( 76.95)
Epoch: [18][ 5350/10010]	Time  0.614 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8485e+00 (2.0937e+00)	Acc@1  57.03 ( 50.82)	Acc@5  80.47 ( 76.94)
Epoch: [18][ 5400/10

Epoch: [18][ 8000/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.3744e+00 (2.1010e+00)	Acc@1  49.22 ( 50.72)	Acc@5  72.66 ( 76.81)
Epoch: [18][ 8050/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1148e+00 (2.1011e+00)	Acc@1  51.56 ( 50.72)	Acc@5  75.78 ( 76.81)
Epoch: [18][ 8100/10010]	Time  0.614 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1150e+00 (2.1012e+00)	Acc@1  55.47 ( 50.73)	Acc@5  70.31 ( 76.81)
Epoch: [18][ 8150/10010]	Time  0.620 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.0948e+00 (2.1011e+00)	Acc@1  56.25 ( 50.73)	Acc@5  78.12 ( 76.81)
Epoch: [18][ 8200/10010]	Time  0.615 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8393e+00 (2.1014e+00)	Acc@1  51.56 ( 50.72)	Acc@5  78.12 ( 76.80)
Epoch: [18][ 8250/10010]	Time  0.617 ( 0.620)	Data  0.000 ( 0.009)	Loss 1.8871e+00 (2.1015e+00)	Acc@1  55.47 ( 50.72)	Acc@5  78.91 ( 76.81)
Epoch: [18][ 8300/10010]	Time  0.618 ( 0.620)	Data  0.000 ( 0.009)	Loss 2.1950e+00 (2.1014e+00)	Acc@1  47.66 ( 50.72)	Acc@5  74.22 ( 76.80)
Epoch: [18][ 8350/10

Epoch: [19][  550/10010]	Time  0.617 ( 0.623)	Data  0.000 ( 0.012)	Loss 2.0195e+00 (2.0376e+00)	Acc@1  50.78 ( 51.79)	Acc@5  82.81 ( 77.88)
Epoch: [19][  600/10010]	Time  0.612 ( 0.623)	Data  0.000 ( 0.011)	Loss 2.1093e+00 (2.0359e+00)	Acc@1  51.56 ( 51.87)	Acc@5  76.56 ( 77.88)
Epoch: [19][  650/10010]	Time  0.616 ( 0.622)	Data  0.000 ( 0.011)	Loss 1.8844e+00 (2.0397e+00)	Acc@1  53.12 ( 51.73)	Acc@5  81.25 ( 77.82)
Epoch: [19][  700/10010]	Time  0.616 ( 0.622)	Data  0.000 ( 0.011)	Loss 2.0375e+00 (2.0415e+00)	Acc@1  47.66 ( 51.74)	Acc@5  80.47 ( 77.80)
Epoch: [19][  750/10010]	Time  0.613 ( 0.622)	Data  0.000 ( 0.011)	Loss 2.0252e+00 (2.0426e+00)	Acc@1  57.03 ( 51.74)	Acc@5  78.91 ( 77.75)
Epoch: [19][  800/10010]	Time  0.611 ( 0.621)	Data  0.000 ( 0.011)	Loss 2.0851e+00 (2.0442e+00)	Acc@1  46.88 ( 51.69)	Acc@5  75.78 ( 77.74)
Epoch: [19][  850/10010]	Time  0.616 ( 0.621)	Data  0.000 ( 0.011)	Loss 2.4364e+00 (2.0487e+00)	Acc@1  44.53 ( 51.63)	Acc@5  67.97 ( 77.69)
Epoch: [19][  900/10

Epoch: [19][ 3500/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0118e+00 (2.0768e+00)	Acc@1  50.00 ( 51.14)	Acc@5  75.00 ( 77.23)
Epoch: [19][ 3550/10010]	Time  0.619 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.0285e+00 (2.0768e+00)	Acc@1  50.78 ( 51.14)	Acc@5  77.34 ( 77.23)
Epoch: [19][ 3600/10010]	Time  0.618 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.1705e+00 (2.0774e+00)	Acc@1  46.88 ( 51.14)	Acc@5  75.78 ( 77.23)
Epoch: [19][ 3650/10010]	Time  0.611 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.1077e+00 (2.0778e+00)	Acc@1  55.47 ( 51.14)	Acc@5  77.34 ( 77.21)
Epoch: [19][ 3700/10010]	Time  0.621 ( 0.618)	Data  0.000 ( 0.009)	Loss 1.9452e+00 (2.0780e+00)	Acc@1  53.12 ( 51.13)	Acc@5  77.34 ( 77.20)
Epoch: [19][ 3750/10010]	Time  0.613 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.1912e+00 (2.0785e+00)	Acc@1  45.31 ( 51.12)	Acc@5  76.56 ( 77.21)
Epoch: [19][ 3800/10010]	Time  0.619 ( 0.618)	Data  0.000 ( 0.009)	Loss 2.2510e+00 (2.0789e+00)	Acc@1  46.09 ( 51.11)	Acc@5  73.44 ( 77.19)
Epoch: [19][ 3850/10

KeyboardInterrupt: 

In [173]:
weights = list(model.parameters())

In [174]:
weights

[Parameter containing:
 tensor([[[[ 3.8362e-02, -1.4293e-01, -2.8124e-01,  ..., -2.0649e-01,
            -4.5692e-02,  4.9017e-02],
           [-8.4938e-02, -3.2173e-01, -3.8361e-01,  ..., -1.5756e-01,
             1.4969e-01,  2.1292e-01],
           [-2.2489e-01, -4.1997e-01, -4.4556e-01,  ...,  3.6018e-02,
             3.3540e-01,  3.8651e-01],
           ...,
           [-3.0152e-01, -3.1979e-01, -2.1017e-01,  ...,  3.9903e-01,
             4.7362e-01,  3.1599e-01],
           [-2.4259e-01, -1.6649e-01, -7.5633e-03,  ...,  4.6768e-01,
             3.9492e-01,  1.5240e-01],
           [-1.5382e-01, -6.3702e-02,  4.9371e-02,  ...,  3.9127e-01,
             2.5934e-01,  4.4165e-02]],
 
          [[-5.4772e-02,  1.4883e-02,  8.1780e-02,  ..., -1.1096e-02,
            -6.5899e-02, -6.2331e-03],
           [-3.5377e-02,  2.3624e-02,  1.9307e-01,  ...,  2.2843e-03,
            -5.9878e-02, -4.6343e-02],
           [ 4.3764e-03,  1.0042e-01,  2.4756e-01,  ..., -1.6909e-02,
            -1.2