In [1]:
# Copyright (c) 2015-present, Facebook, Inc.
# All rights reserved.
import argparse
import datetime
import numpy as np
import time
import torch
import torch.backends.cudnn as cudnn
import json

from pathlib import Path

from timm.data import Mixup
from timm.models import create_model
from timm.loss import LabelSmoothingCrossEntropy, SoftTargetCrossEntropy
from timm.scheduler import create_scheduler
from timm.optim import create_optimizer
from timm.utils import NativeScaler, get_state_dict, ModelEma

from datasets import build_dataset
from engine import train_one_epoch, evaluate
import SwinTransformer
import utils
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_args_parser():
    parser = argparse.ArgumentParser('Training Swin Transformer', add_help=False)
    parser.add_argument('--batch-size', default=256, type=int)
    parser.add_argument('--epochs', default=300, type=int)

    # Model parameters
    parser.add_argument('--model', default='Swin_T', type=str, metavar='MODEL',
                        help='Name of model to train')
    parser.add_argument('--input-size', default=224, type=int, help='images input size')

    parser.add_argument('--drop-path', type=float, default=0.2, metavar='PCT',
                        help='Drop path rate (default: 0.2)')

    # Optimizer parameters
    parser.add_argument('--opt', default='adamw', type=str, metavar='OPTIMIZER',
                        help='Optimizer (default: "adamw"')
    parser.add_argument('--opt-eps', default=1e-8, type=float, metavar='EPSILON',
                        help='Optimizer Epsilon (default: 1e-8)')
    parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA',
                        help='Optimizer Betas (default: None, use opt default)')
    parser.add_argument('--clip-grad', type=float, default=5.0, metavar='NORM',
                        help='Clip gradient norm (default: None, no clipping)')
    parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
                        help='SGD momentum (default: 0.9)')
    parser.add_argument('--weight-decay', type=float, default=0.05,
                        help='weight decay (default: 0.05)')

    # Learning rate schedule parameters
    parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER',
                        help='LR scheduler (default: "cosine"')
    parser.add_argument('--lr', type=float, default=5e-4, metavar='LR',
                        help='learning rate (default: 5e-4)')
    parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct',
                        help='learning rate noise on/off epoch percentages')
    parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT',
                        help='learning rate noise limit percent (default: 0.67)')
    parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV',
                        help='learning rate noise std-dev (default: 1.0)')
    parser.add_argument('--warmup-lr', type=float, default=5e-7, metavar='LR',
                        help='warmup learning rate (default: 5e-7)')
    parser.add_argument('--min-lr', type=float, default=5e-6, metavar='LR',
                        help='lower lr bound for cyclic schedulers that hit 0 (1e-5)')

    parser.add_argument('--decay-epochs', type=float, default=30, metavar='N',
                        help='epoch interval to decay LR')
    parser.add_argument('--warmup-epochs', type=int, default=20, metavar='N',
                        help='epochs to warmup LR, if scheduler supports')
    parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N',
                        help='epochs to cooldown LR at min_lr, after cyclic schedule ends')
    parser.add_argument('--patience-epochs', type=int, default=10, metavar='N',
                        help='patience epochs for Plateau LR scheduler (default: 10')
    parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE',
                        help='LR decay rate (default: 0.1)')

    # Augmentation parameters
    parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT',
                        help='Color jitter factor (default: 0.4)')
    parser.add_argument('--aa', type=str, default='rand-m9-mstd0.5-inc1', metavar='NAME',
                        help='Use AutoAugment policy. "v0" or "original". " + \
                             "(default: rand-m9-mstd0.5-inc1)'),
    parser.add_argument('--smoothing', type=float, default=0.1, help='Label smoothing (default: 0.1)')
    parser.add_argument('--train-interpolation', type=str, default='bicubic',
                        help='Training interpolation (random, bilinear, bicubic default: "bicubic")')

    # * Random Erase params
    parser.add_argument('--reprob', type=float, default=0.25, metavar='PCT',
                        help='Random erase prob (default: 0.25)')
    parser.add_argument('--remode', type=str, default='pixel',
                        help='Random erase mode (default: "pixel")')
    parser.add_argument('--recount', type=int, default=1,
                        help='Random erase count (default: 1)')
    parser.add_argument('--resplit', action='store_true', default=False,
                        help='Do not random erase first (clean) augmentation split')

    # * Mixup params
    parser.add_argument('--mixup', type=float, default=0.8,
                        help='mixup alpha, mixup enabled if > 0. (default: 0.8)')
    parser.add_argument('--cutmix', type=float, default=1.0,
                        help='cutmix alpha, cutmix enabled if > 0. (default: 1.0)')
    parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None,
                        help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)')
    parser.add_argument('--mixup-prob', type=float, default=1.0,
                        help='Probability of performing mixup or cutmix when either/both is enabled')
    parser.add_argument('--mixup-switch-prob', type=float, default=0.5,
                        help='Probability of switching to cutmix when both mixup and cutmix enabled')
    parser.add_argument('--mixup-mode', type=str, default='batch',
                        help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"')

    # Dataset parameters
    parser.add_argument('--data-path', default='/datasets01/imagenet_full_size/061417/', type=str,
                        help='dataset path')
    parser.add_argument('--data-set', default='IMNET', choices=['CIFAR', 'IMNET', 'INAT', 'INAT19'],
                        type=str, help='Image Net dataset path')

    parser.add_argument('--output_dir', default='',
                        help='path where to save, empty for no saving')
    parser.add_argument('--device', default='cuda',
                        help='device to use for training / testing')
    parser.add_argument('--seed', default=0, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    parser.add_argument('--eval', action='store_true', help='Perform evaluation only')
    parser.add_argument('--dist-eval', action='store_true', default=False, help='Enabling distributed evaluation')
    parser.add_argument('--num_workers', default=10, type=int)
    parser.add_argument('--pin-mem', action='store_true',
                        help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
    parser.add_argument('--no-pin-mem', action='store_false', dest='pin_mem',
                        help='')
    parser.set_defaults(pin_mem=True)

    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
    return parser

In [3]:
def main(args):
    utils.init_distributed_mode(args)

    print(args)

    device = torch.device(args.device)

    # fix the seed for reproducibility
    seed = args.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)
    # random.seed(seed)

    cudnn.benchmark = True

    dataset_train, args.nb_classes = build_dataset(is_train=True, args=args)
    dataset_val, _ = build_dataset(is_train=False, args=args)

    if True:  # args.distributed:
        num_tasks = utils.get_world_size()
        global_rank = utils.get_rank()
        sampler_train = torch.utils.data.DistributedSampler(
            dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True
        )
        if args.dist_eval:
            if len(dataset_val) % num_tasks != 0:
                print('Warning: Enabling distributed evaluation with an eval dataset not divisible by process number. '
                      'This will slightly alter validation results as extra duplicate entries are added to achieve '
                      'equal num of samples per-process.')
            sampler_val = torch.utils.data.DistributedSampler(
                dataset_val, num_replicas=num_tasks, rank=global_rank, shuffle=False)
        else:
            sampler_val = torch.utils.data.SequentialSampler(dataset_val)
    else:
        sampler_train = torch.utils.data.RandomSampler(dataset_train)
        sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    data_loader_train = torch.utils.data.DataLoader(
        dataset_train, sampler=sampler_train,
        batch_size=args.batch_size,
        num_workers=args.num_workers,
        pin_memory=args.pin_mem,
        drop_last=True,
    )

    data_loader_val = torch.utils.data.DataLoader(
        dataset_val, sampler=sampler_val,
        batch_size=int(1.0 * args.batch_size),
        num_workers=args.num_workers,
        pin_memory=args.pin_mem,
        drop_last=False
    )

    mixup_fn = None
    mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None
    if mixup_active:
        mixup_fn = Mixup(
            mixup_alpha=args.mixup, cutmix_alpha=args.cutmix, cutmix_minmax=args.cutmix_minmax,
            prob=args.mixup_prob, switch_prob=args.mixup_switch_prob, mode=args.mixup_mode,
            label_smoothing=args.smoothing, num_classes=args.nb_classes)

    print(f"Creating model: {args.model}")
    # model = create_model(
    #     args.model,
    #     pretrained=False,
    #     num_classes=args.nb_classes,
    #     drop_rate=args.drop,
    #     drop_path_rate=args.drop_path,
    #     drop_block_rate=None,
    # )
    model = getattr(SwinTransformer, args.model)(num_classes=args.nb_classes, drop_path_rate=args.drop_path)
    model.to(device)

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        model_without_ddp = model.module
    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('number of params:', n_parameters)

    linear_scaled_lr = args.lr * args.batch_size * utils.get_world_size() / 512.0
    args.lr = linear_scaled_lr

    linear_scaled_warmup_lr = args.warmup_lr * args.batch_size * utils.get_world_size() / 512.0
    args.warmup_lr = linear_scaled_warmup_lr

    optimizer = create_optimizer(args, model_without_ddp)
    loss_scaler = NativeScaler()

    lr_scheduler, _ = create_scheduler(args, optimizer)

    # criterion = LabelSmoothingCrossEntropy()

    if args.mixup > 0.:
        # smoothing is handled with mixup label transform
        criterion = SoftTargetCrossEntropy()
    elif args.smoothing:
        criterion = LabelSmoothingCrossEntropy(smoothing=args.smoothing)
    else:
        criterion = torch.nn.CrossEntropyLoss()


    output_dir = Path(args.output_dir)
    if args.resume:
        if args.resume.startswith('https'):
            checkpoint = torch.hub.load_state_dict_from_url(
                args.resume, map_location='cpu', check_hash=True)
        else:
            checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'])
        if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            args.start_epoch = checkpoint['epoch'] + 1
            if 'scaler' in checkpoint:
                loss_scaler.load_state_dict(checkpoint['scaler'])

    if args.eval:
        test_stats = evaluate(data_loader_val, model, device)
        print(f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%")
        return

    print(f"Start training for {args.epochs} epochs")
    start_time = time.time()
    max_accuracy = 0.0
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            data_loader_train.sampler.set_epoch(epoch)

        lr_scheduler.step(epoch+1)
        train_stats = train_one_epoch(
            model, criterion, data_loader_train,
            optimizer, device, epoch, loss_scaler,
            args.clip_grad, mixup_fn,
            set_training_mode=True  # keep in eval mode during finetuning
        )

        if args.output_dir:
            checkpoint_paths = [output_dir / 'checkpoint.pth']
            for checkpoint_path in checkpoint_paths:
                utils.save_on_master({
                    'model': model_without_ddp.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'epoch': epoch,
                    'scaler': loss_scaler.state_dict(),
                    'args': args,
                }, checkpoint_path)

        test_stats = evaluate(data_loader_val, model, device)
        print(f"Accuracy of the network on the {len(dataset_val)} test images: {test_stats['acc1']:.1f}%")
        max_accuracy = max(max_accuracy, test_stats["acc1"])
        print(f'Max accuracy: {max_accuracy:.2f}%')

        log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
                     **{f'test_{k}': v for k, v in test_stats.items()},
                     'epoch': epoch,
                     'n_parameters': n_parameters}

        if args.output_dir and utils.is_main_process():
            with (output_dir / "log.txt").open("a") as f:
                f.write(json.dumps(log_stats) + "\n")

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

In [4]:
parser = argparse.ArgumentParser('Swin Transformer', parents=[get_args_parser()])

In [5]:
args = parser.parse_args(['--model=Swin_T', '--num_workers=4', '--batch-size=64', '--epochs=100', '--drop-path=0.2', '--data-path=data/ILSVRC2012/', '--output_dir=data/SwinT/'])

In [6]:
if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)

In [7]:
main(args)

Not using distributed mode
Namespace(batch_size=64, epochs=100, model='Swin_T', input_size=224, drop_path=0.2, opt='adamw', opt_eps=1e-08, opt_betas=None, clip_grad=5.0, momentum=0.9, weight_decay=0.05, sched='cosine', lr=0.0005, lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, warmup_lr=5e-07, min_lr=5e-06, decay_epochs=30, warmup_epochs=20, cooldown_epochs=10, patience_epochs=10, decay_rate=0.1, color_jitter=0.4, aa='rand-m9-mstd0.5-inc1', smoothing=0.1, train_interpolation='bicubic', reprob=0.25, remode='pixel', recount=1, resplit=False, mixup=0.8, cutmix=1.0, cutmix_minmax=None, mixup_prob=1.0, mixup_switch_prob=0.5, mixup_mode='batch', data_path='data/ILSVRC2012/', data_set='IMNET', output_dir='data/SwinT/', device='cuda', seed=0, resume='', start_epoch=0, eval=False, dist_eval=False, num_workers=4, pin_mem=True, world_size=1, dist_url='env://', distributed=False)
Creating model: Swin_T
Block Initial Type: W, drop_path_rate:0.000000
Block Initial Type: SW, drop_path_rate:0.0181

Test:  [280/782]  eta: 0:01:01  loss: 6.6912 (6.5339)  acc1: 0.0000 (0.5449)  acc5: 0.0000 (3.5309)  time: 0.1133  data: 0.0483  max mem: 4817
Test:  [290/782]  eta: 0:00:59  loss: 6.6712 (6.5366)  acc1: 0.0000 (0.5262)  acc5: 0.0000 (3.4418)  time: 0.1131  data: 0.0473  max mem: 4817
Test:  [300/782]  eta: 0:00:58  loss: 6.5873 (6.5344)  acc1: 0.0000 (0.5347)  acc5: 0.0000 (3.5039)  time: 0.1286  data: 0.0633  max mem: 4817
Test:  [310/782]  eta: 0:00:57  loss: 6.4777 (6.5350)  acc1: 0.0000 (0.5778)  acc5: 0.0000 (3.5772)  time: 0.1322  data: 0.0674  max mem: 4817
Test:  [320/782]  eta: 0:00:56  loss: 6.4240 (6.5307)  acc1: 0.0000 (0.6036)  acc5: 1.5625 (3.6458)  time: 0.1077  data: 0.0422  max mem: 4817
Test:  [330/782]  eta: 0:00:54  loss: 6.5552 (6.5341)  acc1: 0.0000 (0.5853)  acc5: 0.0000 (3.5498)  time: 0.1155  data: 0.0498  max mem: 4817
Test:  [340/782]  eta: 0:00:53  loss: 6.6985 (6.5396)  acc1: 0.0000 (0.5682)  acc5: 0.0000 (3.4457)  time: 0.1117  data: 0.0453  max mem: 4817

Epoch: [1]  [ 6000/20018]  eta: 0:42:21  lr: 0.000006  loss: 6.6659 (6.7421)  time: 0.1814  data: 0.0001  max mem: 4818
Epoch: [1]  [ 7000/20018]  eta: 0:39:19  lr: 0.000006  loss: 6.6748 (6.7372)  time: 0.1810  data: 0.0001  max mem: 4818
Epoch: [1]  [ 8000/20018]  eta: 0:36:18  lr: 0.000006  loss: 6.6842 (6.7316)  time: 0.1812  data: 0.0001  max mem: 4818
Epoch: [1]  [ 9000/20018]  eta: 0:33:16  lr: 0.000006  loss: 6.6679 (6.7262)  time: 0.1812  data: 0.0001  max mem: 4818
Epoch: [1]  [10000/20018]  eta: 0:30:15  lr: 0.000006  loss: 6.6752 (6.7212)  time: 0.1810  data: 0.0001  max mem: 4818
Epoch: [1]  [11000/20018]  eta: 0:27:13  lr: 0.000006  loss: 6.6733 (6.7167)  time: 0.1808  data: 0.0001  max mem: 4818
Epoch: [1]  [12000/20018]  eta: 0:24:12  lr: 0.000006  loss: 6.6828 (6.7124)  time: 0.1799  data: 0.0001  max mem: 4818
Epoch: [1]  [13000/20018]  eta: 0:21:10  lr: 0.000006  loss: 6.6756 (6.7078)  time: 0.1801  data: 0.0001  max mem: 4818
Epoch: [1]  [14000/20018]  eta: 0:18:09 

Test:  [440/782]  eta: 0:00:40  loss: 5.9671 (6.0823)  acc1: 0.0000 (2.4589)  acc5: 1.5625 (9.3360)  time: 0.1144  data: 0.0497  max mem: 4818
Test:  [450/782]  eta: 0:00:38  loss: 6.0289 (6.0812)  acc1: 0.0000 (2.4460)  acc5: 4.6875 (9.2849)  time: 0.1212  data: 0.0567  max mem: 4818
Test:  [460/782]  eta: 0:00:37  loss: 6.1602 (6.0829)  acc1: 0.0000 (2.4573)  acc5: 7.8125 (9.2801)  time: 0.1143  data: 0.0499  max mem: 4818
Test:  [470/782]  eta: 0:00:36  loss: 6.3499 (6.0898)  acc1: 0.0000 (2.4151)  acc5: 3.1250 (9.2158)  time: 0.1238  data: 0.0594  max mem: 4818
Test:  [480/782]  eta: 0:00:35  loss: 6.3226 (6.0873)  acc1: 0.0000 (2.4916)  acc5: 1.5625 (9.3295)  time: 0.1140  data: 0.0500  max mem: 4818
Test:  [490/782]  eta: 0:00:34  loss: 6.1056 (6.0875)  acc1: 0.0000 (2.5554)  acc5: 4.6875 (9.4418)  time: 0.1007  data: 0.0371  max mem: 4818
Test:  [500/782]  eta: 0:00:32  loss: 6.1248 (6.0877)  acc1: 3.1250 (2.6260)  acc5: 9.3750 (9.6089)  time: 0.1087  data: 0.0448  max mem: 4818

Test:  [ 10/782]  eta: 0:02:10  loss: 5.4711 (5.3798)  acc1: 1.5625 (4.9716)  acc5: 17.1875 (26.5625)  time: 0.1689  data: 0.1045  max mem: 4818
Test:  [ 20/782]  eta: 0:01:54  loss: 5.3996 (5.4018)  acc1: 4.6875 (7.5149)  acc5: 12.5000 (25.3720)  time: 0.1242  data: 0.0598  max mem: 4818
Test:  [ 30/782]  eta: 0:01:43  loss: 5.5631 (5.4864)  acc1: 3.1250 (5.8468)  acc5: 12.5000 (20.1109)  time: 0.1197  data: 0.0553  max mem: 4818
Test:  [ 40/782]  eta: 0:01:39  loss: 5.6514 (5.5178)  acc1: 1.5625 (5.2210)  acc5: 9.3750 (18.1021)  time: 0.1168  data: 0.0530  max mem: 4818
Test:  [ 50/782]  eta: 0:01:33  loss: 5.6118 (5.5455)  acc1: 0.0000 (4.4424)  acc5: 7.8125 (16.8199)  time: 0.1126  data: 0.0489  max mem: 4818
Test:  [ 60/782]  eta: 0:01:34  loss: 5.7426 (5.5502)  acc1: 0.0000 (4.2777)  acc5: 7.8125 (16.7777)  time: 0.1258  data: 0.0620  max mem: 4818
Test:  [ 70/782]  eta: 0:01:30  loss: 5.7426 (5.5908)  acc1: 1.5625 (4.2254)  acc5: 7.8125 (15.9111)  time: 0.1258  data: 0.0615  max

Test:  [580/782]  eta: 0:00:23  loss: 5.8873 (5.7318)  acc1: 0.0000 (5.0048)  acc5: 6.2500 (15.1893)  time: 0.1116  data: 0.0472  max mem: 4818
Test:  [590/782]  eta: 0:00:22  loss: 5.8620 (5.7357)  acc1: 1.5625 (4.9730)  acc5: 7.8125 (15.0962)  time: 0.1105  data: 0.0461  max mem: 4818
Test:  [600/782]  eta: 0:00:21  loss: 6.0327 (5.7400)  acc1: 0.0000 (5.0125)  acc5: 7.8125 (15.0660)  time: 0.1063  data: 0.0419  max mem: 4818
Test:  [610/782]  eta: 0:00:20  loss: 6.1151 (5.7420)  acc1: 0.0000 (5.0660)  acc5: 4.6875 (15.0854)  time: 0.1089  data: 0.0446  max mem: 4818
Test:  [620/782]  eta: 0:00:18  loss: 6.2385 (5.7472)  acc1: 1.5625 (5.0322)  acc5: 6.2500 (15.0136)  time: 0.1168  data: 0.0526  max mem: 4818
Test:  [630/782]  eta: 0:00:17  loss: 6.1653 (5.7479)  acc1: 1.5625 (5.0366)  acc5: 6.2500 (15.0208)  time: 0.1133  data: 0.0491  max mem: 4818
Test:  [640/782]  eta: 0:00:16  loss: 5.8446 (5.7495)  acc1: 1.5625 (5.0190)  acc5: 7.8125 (15.0034)  time: 0.1085  data: 0.0448  max me

Test:  [150/782]  eta: 0:01:15  loss: 5.4448 (5.2476)  acc1: 0.0000 (7.9988)  acc5: 7.8125 (22.5993)  time: 0.1211  data: 0.0568  max mem: 4818
Test:  [160/782]  eta: 0:01:14  loss: 5.2294 (5.2475)  acc1: 1.5625 (7.8028)  acc5: 14.0625 (22.3797)  time: 0.1132  data: 0.0490  max mem: 4818
Test:  [170/782]  eta: 0:01:12  loss: 5.1938 (5.2459)  acc1: 3.1250 (7.8765)  acc5: 18.7500 (22.6151)  time: 0.1153  data: 0.0506  max mem: 4818
Test:  [180/782]  eta: 0:01:11  loss: 5.2673 (5.2456)  acc1: 4.6875 (7.9075)  acc5: 18.7500 (22.6519)  time: 0.1156  data: 0.0503  max mem: 4818
Test:  [190/782]  eta: 0:01:09  loss: 5.1387 (5.2362)  acc1: 4.6875 (8.0497)  acc5: 20.3125 (23.2003)  time: 0.1069  data: 0.0418  max mem: 4818
Test:  [200/782]  eta: 0:01:08  loss: 5.1033 (5.2403)  acc1: 4.6875 (8.0924)  acc5: 21.8750 (23.1732)  time: 0.1049  data: 0.0401  max mem: 4818
Test:  [210/782]  eta: 0:01:06  loss: 5.3066 (5.2467)  acc1: 1.5625 (7.9236)  acc5: 9.3750 (22.8377)  time: 0.1113  data: 0.0467  m

Test:  [720/782]  eta: 0:00:07  loss: 5.3054 (5.4033)  acc1: 3.1250 (7.6348)  acc5: 12.5000 (20.7893)  time: 0.1058  data: 0.0419  max mem: 4818
Test:  [730/782]  eta: 0:00:05  loss: 4.7755 (5.3941)  acc1: 7.8125 (7.7099)  acc5: 32.8125 (20.9986)  time: 0.1084  data: 0.0441  max mem: 4818
Test:  [740/782]  eta: 0:00:04  loss: 4.5699 (5.3810)  acc1: 12.5000 (7.8209)  acc5: 39.0625 (21.3204)  time: 0.1173  data: 0.0528  max mem: 4818
Test:  [750/782]  eta: 0:00:03  loss: 4.4543 (5.3688)  acc1: 12.5000 (7.9706)  acc5: 39.0625 (21.6191)  time: 0.1176  data: 0.0532  max mem: 4818
Test:  [760/782]  eta: 0:00:02  loss: 4.7950 (5.3635)  acc1: 12.5000 (8.0630)  acc5: 35.9375 (21.7641)  time: 0.1105  data: 0.0459  max mem: 4818
Test:  [770/782]  eta: 0:00:01  loss: 4.1765 (5.3459)  acc1: 14.0625 (8.4043)  acc5: 50.0000 (22.2702)  time: 0.1094  data: 0.0448  max mem: 4818
Test:  [780/782]  eta: 0:00:00  loss: 4.3725 (5.3402)  acc1: 18.7500 (8.5267)  acc5: 43.7500 (22.3992)  time: 0.1180  data: 0.

Test:  [290/782]  eta: 0:00:58  loss: 4.7967 (4.7954)  acc1: 6.2500 (13.1980)  acc5: 32.8125 (32.4527)  time: 0.1125  data: 0.0474  max mem: 4818
Test:  [300/782]  eta: 0:00:57  loss: 4.7047 (4.7854)  acc1: 12.5000 (13.4084)  acc5: 37.5000 (32.6516)  time: 0.1290  data: 0.0645  max mem: 4818
Test:  [310/782]  eta: 0:00:56  loss: 4.5550 (4.7818)  acc1: 18.7500 (13.7008)  acc5: 39.0625 (32.8929)  time: 0.1329  data: 0.0680  max mem: 4818
Test:  [320/782]  eta: 0:00:55  loss: 4.7498 (4.7851)  acc1: 12.5000 (13.5903)  acc5: 29.6875 (32.8271)  time: 0.1078  data: 0.0427  max mem: 4818
Test:  [330/782]  eta: 0:00:53  loss: 5.4618 (4.8121)  acc1: 3.1250 (13.3072)  acc5: 12.5000 (32.1469)  time: 0.1105  data: 0.0461  max mem: 4818
Test:  [340/782]  eta: 0:00:52  loss: 5.5190 (4.8269)  acc1: 3.1250 (13.2011)  acc5: 9.3750 (31.8594)  time: 0.1032  data: 0.0389  max mem: 4818
Test:  [350/782]  eta: 0:00:51  loss: 5.3716 (4.8420)  acc1: 3.1250 (12.9986)  acc5: 15.6250 (31.5839)  time: 0.1044  data

Epoch: [5]  [ 5000/20018]  eta: 0:45:03  lr: 0.000019  loss: 6.0719 (6.1716)  time: 0.1803  data: 0.0001  max mem: 4818
Epoch: [5]  [ 6000/20018]  eta: 0:42:03  lr: 0.000019  loss: 6.1016 (6.1686)  time: 0.1794  data: 0.0001  max mem: 4818
Epoch: [5]  [ 7000/20018]  eta: 0:39:03  lr: 0.000019  loss: 6.2235 (6.1653)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [5]  [ 8000/20018]  eta: 0:36:03  lr: 0.000019  loss: 6.1546 (6.1630)  time: 0.1806  data: 0.0001  max mem: 4818
Epoch: [5]  [ 9000/20018]  eta: 0:33:03  lr: 0.000019  loss: 6.1820 (6.1590)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [5]  [10000/20018]  eta: 0:30:03  lr: 0.000019  loss: 6.2511 (6.1551)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [5]  [11000/20018]  eta: 0:27:03  lr: 0.000019  loss: 6.2106 (6.1519)  time: 0.1797  data: 0.0001  max mem: 4818
Epoch: [5]  [12000/20018]  eta: 0:24:03  lr: 0.000019  loss: 5.9346 (6.1483)  time: 0.1805  data: 0.0001  max mem: 4818
Epoch: [5]  [13000/20018]  eta: 0:21:03 

Test:  [420/782]  eta: 0:00:42  loss: 4.9488 (4.5211)  acc1: 7.8125 (16.5677)  acc5: 23.4375 (37.2625)  time: 0.1181  data: 0.0542  max mem: 4818
Test:  [430/782]  eta: 0:00:41  loss: 4.8974 (4.5283)  acc1: 12.5000 (16.5821)  acc5: 29.6875 (37.1121)  time: 0.1132  data: 0.0491  max mem: 4818
Test:  [440/782]  eta: 0:00:40  loss: 4.6930 (4.5308)  acc1: 14.0625 (16.5781)  acc5: 29.6875 (37.0819)  time: 0.1123  data: 0.0478  max mem: 4818
Test:  [450/782]  eta: 0:00:38  loss: 4.8286 (4.5361)  acc1: 10.9375 (16.5015)  acc5: 26.5625 (36.9457)  time: 0.1191  data: 0.0542  max mem: 4818
Test:  [460/782]  eta: 0:00:37  loss: 4.8745 (4.5476)  acc1: 10.9375 (16.4317)  acc5: 26.5625 (36.7645)  time: 0.1148  data: 0.0504  max mem: 4818
Test:  [470/782]  eta: 0:00:36  loss: 5.2638 (4.5646)  acc1: 4.6875 (16.2188)  acc5: 17.1875 (36.4351)  time: 0.1222  data: 0.0580  max mem: 4818
Test:  [480/782]  eta: 0:00:35  loss: 5.2638 (4.5691)  acc1: 4.6875 (16.1902)  acc5: 20.3125 (36.3371)  time: 0.1134  da

Epoch: [6]  [20017/20018]  eta: 0:00:00  lr: 0.000022  loss: 5.9665 (5.9971)  time: 0.1793  data: 0.0005  max mem: 4818
Epoch: [6] Total time: 1:00:06 (0.1802 s / it)
Averaged stats: lr: 0.000022  loss: 5.9665 (5.9971)
Test:  [  0/782]  eta: 0:08:56  loss: 3.4639 (3.4639)  acc1: 59.3750 (59.3750)  acc5: 76.5625 (76.5625)  time: 0.6855  data: 0.6221  max mem: 4818
Test:  [ 10/782]  eta: 0:02:08  loss: 3.5766 (3.8019)  acc1: 17.1875 (26.1364)  acc5: 62.5000 (56.9602)  time: 0.1662  data: 0.1023  max mem: 4818
Test:  [ 20/782]  eta: 0:01:54  loss: 3.5766 (3.7029)  acc1: 26.5625 (29.5387)  acc5: 62.5000 (57.6637)  time: 0.1234  data: 0.0590  max mem: 4818
Test:  [ 30/782]  eta: 0:01:42  loss: 4.0479 (3.9540)  acc1: 23.4375 (25.5544)  acc5: 48.4375 (50.8569)  time: 0.1205  data: 0.0561  max mem: 4818
Test:  [ 40/782]  eta: 0:01:39  loss: 4.6180 (4.1240)  acc1: 10.9375 (22.5229)  acc5: 28.1250 (46.0747)  time: 0.1168  data: 0.0532  max mem: 4818
Test:  [ 50/782]  eta: 0:01:33  loss: 4.5516 (

Test:  [550/782]  eta: 0:00:27  loss: 4.5341 (4.2759)  acc1: 18.7500 (19.4816)  acc5: 32.8125 (41.7763)  time: 0.1092  data: 0.0449  max mem: 4818
Test:  [560/782]  eta: 0:00:25  loss: 4.6616 (4.2842)  acc1: 6.2500 (19.3739)  acc5: 28.1250 (41.5859)  time: 0.1057  data: 0.0410  max mem: 4818
Test:  [570/782]  eta: 0:00:24  loss: 4.7949 (4.2967)  acc1: 6.2500 (19.2426)  acc5: 28.1250 (41.3611)  time: 0.1166  data: 0.0524  max mem: 4818
Test:  [580/782]  eta: 0:00:23  loss: 4.7949 (4.3049)  acc1: 14.0625 (19.1238)  acc5: 35.9375 (41.2140)  time: 0.1175  data: 0.0533  max mem: 4818
Test:  [590/782]  eta: 0:00:22  loss: 4.7054 (4.3137)  acc1: 9.3750 (18.9879)  acc5: 31.2500 (40.9872)  time: 0.1034  data: 0.0392  max mem: 4818
Test:  [600/782]  eta: 0:00:21  loss: 4.8093 (4.3210)  acc1: 10.9375 (18.9710)  acc5: 26.5625 (40.8460)  time: 0.1056  data: 0.0415  max mem: 4818
Test:  [610/782]  eta: 0:00:19  loss: 4.8152 (4.3265)  acc1: 12.5000 (18.9469)  acc5: 28.1250 (40.7298)  time: 0.1105  da

Test:  [120/782]  eta: 0:01:20  loss: 3.0256 (3.6158)  acc1: 39.0625 (30.2299)  acc5: 67.1875 (55.1265)  time: 0.1151  data: 0.0501  max mem: 4818
Test:  [130/782]  eta: 0:01:18  loss: 3.5357 (3.6477)  acc1: 26.5625 (29.0792)  acc5: 51.5625 (54.1150)  time: 0.1164  data: 0.0512  max mem: 4818
Test:  [140/782]  eta: 0:01:17  loss: 4.2173 (3.6898)  acc1: 10.9375 (28.2580)  acc5: 39.0625 (52.8923)  time: 0.1152  data: 0.0498  max mem: 4818
Test:  [150/782]  eta: 0:01:15  loss: 4.1955 (3.7243)  acc1: 12.5000 (27.2454)  acc5: 39.0625 (51.7695)  time: 0.1111  data: 0.0458  max mem: 4818
Test:  [160/782]  eta: 0:01:14  loss: 3.9906 (3.7315)  acc1: 14.0625 (26.4557)  acc5: 42.1875 (51.3975)  time: 0.1092  data: 0.0436  max mem: 4818
Test:  [170/782]  eta: 0:01:12  loss: 3.7967 (3.7357)  acc1: 14.0625 (26.1422)  acc5: 50.0000 (51.3249)  time: 0.1109  data: 0.0455  max mem: 4818
Test:  [180/782]  eta: 0:01:11  loss: 3.7967 (3.7375)  acc1: 18.7500 (25.9755)  acc5: 46.8750 (51.1913)  time: 0.1098 

Test:  [680/782]  eta: 0:00:11  loss: 4.4892 (4.0977)  acc1: 9.3750 (21.7740)  acc5: 31.2500 (44.1768)  time: 0.1157  data: 0.0510  max mem: 4818
Test:  [690/782]  eta: 0:00:10  loss: 4.5590 (4.1014)  acc1: 9.3750 (21.7619)  acc5: 31.2500 (44.1321)  time: 0.1111  data: 0.0464  max mem: 4818
Test:  [700/782]  eta: 0:00:09  loss: 4.3810 (4.1050)  acc1: 17.1875 (21.7479)  acc5: 39.0625 (44.0933)  time: 0.1042  data: 0.0394  max mem: 4818
Test:  [710/782]  eta: 0:00:08  loss: 4.5487 (4.1123)  acc1: 20.3125 (21.6904)  acc5: 34.3750 (43.9302)  time: 0.1220  data: 0.0572  max mem: 4818
Test:  [720/782]  eta: 0:00:07  loss: 4.2668 (4.1095)  acc1: 14.0625 (21.7406)  acc5: 34.3750 (43.9840)  time: 0.1223  data: 0.0576  max mem: 4818
Test:  [730/782]  eta: 0:00:06  loss: 3.3258 (4.1000)  acc1: 29.6875 (21.8942)  acc5: 60.9375 (44.2416)  time: 0.1089  data: 0.0442  max mem: 4818
Test:  [740/782]  eta: 0:00:04  loss: 3.1812 (4.0847)  acc1: 29.6875 (22.1344)  acc5: 67.1875 (44.6082)  time: 0.1227  d

Test:  [250/782]  eta: 0:01:04  loss: 3.4033 (3.4578)  acc1: 28.1250 (29.8120)  acc5: 57.8125 (56.4741)  time: 0.1301  data: 0.0654  max mem: 4818
Test:  [260/782]  eta: 0:01:03  loss: 3.2708 (3.4292)  acc1: 32.8125 (30.5496)  acc5: 56.2500 (57.0342)  time: 0.1389  data: 0.0746  max mem: 4818
Test:  [270/782]  eta: 0:01:02  loss: 3.1862 (3.4335)  acc1: 32.8125 (30.5524)  acc5: 54.6875 (56.8842)  time: 0.1277  data: 0.0628  max mem: 4818
Test:  [280/782]  eta: 0:01:00  loss: 3.7110 (3.4437)  acc1: 26.5625 (30.1879)  acc5: 50.0000 (56.6003)  time: 0.1180  data: 0.0527  max mem: 4818
Test:  [290/782]  eta: 0:00:59  loss: 3.4113 (3.4436)  acc1: 26.5625 (30.2781)  acc5: 53.1250 (56.6044)  time: 0.1118  data: 0.0471  max mem: 4818
Test:  [300/782]  eta: 0:00:58  loss: 3.3743 (3.4386)  acc1: 31.2500 (30.3312)  acc5: 59.3750 (56.6497)  time: 0.1295  data: 0.0653  max mem: 4818
Test:  [310/782]  eta: 0:00:57  loss: 3.0341 (3.4236)  acc1: 31.2500 (30.6722)  acc5: 65.6250 (56.9835)  time: 0.1372 

Epoch: [9]  [    0/20018]  eta: 4:22:11  lr: 0.000031  loss: 5.2734 (5.2734)  time: 0.7859  data: 0.6073  max mem: 4818
Epoch: [9]  [ 1000/20018]  eta: 0:57:08  lr: 0.000031  loss: 5.6014 (5.7035)  time: 0.1802  data: 0.0001  max mem: 4818
Epoch: [9]  [ 2000/20018]  eta: 0:54:06  lr: 0.000031  loss: 5.8563 (5.7119)  time: 0.1802  data: 0.0001  max mem: 4818
Epoch: [9]  [ 3000/20018]  eta: 0:51:05  lr: 0.000031  loss: 5.7997 (5.7053)  time: 0.1796  data: 0.0001  max mem: 4818
Epoch: [9]  [ 4000/20018]  eta: 0:48:04  lr: 0.000031  loss: 5.5811 (5.7039)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [9]  [ 5000/20018]  eta: 0:45:04  lr: 0.000031  loss: 5.9986 (5.7031)  time: 0.1796  data: 0.0001  max mem: 4818
Epoch: [9]  [ 6000/20018]  eta: 0:42:03  lr: 0.000031  loss: 5.7273 (5.7029)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [9]  [ 7000/20018]  eta: 0:39:03  lr: 0.000031  loss: 5.5267 (5.7018)  time: 0.1790  data: 0.0001  max mem: 4818
Epoch: [9]  [ 8000/20018]  eta: 0:36:03 

Test:  [380/782]  eta: 0:00:47  loss: 3.5425 (3.2838)  acc1: 25.0000 (32.9273)  acc5: 43.7500 (59.1002)  time: 0.1161  data: 0.0520  max mem: 4818
Test:  [390/782]  eta: 0:00:46  loss: 4.1172 (3.3097)  acc1: 21.8750 (32.6447)  acc5: 42.1875 (58.6717)  time: 0.1081  data: 0.0437  max mem: 4818
Test:  [400/782]  eta: 0:00:44  loss: 4.3708 (3.3288)  acc1: 17.1875 (32.3488)  acc5: 39.0625 (58.3151)  time: 0.1007  data: 0.0364  max mem: 4818
Test:  [410/782]  eta: 0:00:43  loss: 4.5137 (3.3564)  acc1: 17.1875 (31.9685)  acc5: 37.5000 (57.7973)  time: 0.1110  data: 0.0467  max mem: 4818
Test:  [420/782]  eta: 0:00:42  loss: 3.9593 (3.3598)  acc1: 18.7500 (31.9515)  acc5: 45.3125 (57.8348)  time: 0.1193  data: 0.0549  max mem: 4818
Test:  [430/782]  eta: 0:00:41  loss: 3.9543 (3.3719)  acc1: 25.0000 (31.8264)  acc5: 51.5625 (57.5732)  time: 0.1080  data: 0.0433  max mem: 4818
Test:  [440/782]  eta: 0:00:40  loss: 3.8795 (3.3791)  acc1: 25.0000 (31.7460)  acc5: 51.5625 (57.4476)  time: 0.1159 

Epoch: [10]  [16000/20018]  eta: 0:12:04  lr: 0.000034  loss: 5.5836 (5.5898)  time: 0.1799  data: 0.0001  max mem: 4818
Epoch: [10]  [17000/20018]  eta: 0:09:03  lr: 0.000034  loss: 5.6625 (5.5870)  time: 0.1801  data: 0.0001  max mem: 4818
Epoch: [10]  [18000/20018]  eta: 0:06:03  lr: 0.000034  loss: 5.4185 (5.5860)  time: 0.1804  data: 0.0001  max mem: 4818
Epoch: [10]  [19000/20018]  eta: 0:03:03  lr: 0.000034  loss: 5.6499 (5.5827)  time: 0.1801  data: 0.0001  max mem: 4818
Epoch: [10]  [20000/20018]  eta: 0:00:03  lr: 0.000034  loss: 5.6811 (5.5791)  time: 0.1796  data: 0.0001  max mem: 4818
Epoch: [10]  [20017/20018]  eta: 0:00:00  lr: 0.000034  loss: 5.5339 (5.5790)  time: 0.1792  data: 0.0005  max mem: 4818
Epoch: [10] Total time: 1:00:07 (0.1802 s / it)
Averaged stats: lr: 0.000034  loss: 5.5339 (5.5790)
Test:  [  0/782]  eta: 0:08:52  loss: 2.0838 (2.0838)  acc1: 73.4375 (73.4375)  acc5: 82.8125 (82.8125)  time: 0.6812  data: 0.6145  max mem: 4818
Test:  [ 10/782]  eta: 0:02

Test:  [510/782]  eta: 0:00:31  loss: 3.8985 (3.2776)  acc1: 21.8750 (33.5494)  acc5: 45.3125 (59.3475)  time: 0.1170  data: 0.0527  max mem: 4818
Test:  [520/782]  eta: 0:00:30  loss: 3.8985 (3.2836)  acc1: 15.6250 (33.2624)  acc5: 45.3125 (59.1981)  time: 0.1192  data: 0.0548  max mem: 4818
Test:  [530/782]  eta: 0:00:29  loss: 3.6234 (3.3003)  acc1: 12.5000 (33.0185)  acc5: 50.0000 (58.8836)  time: 0.1129  data: 0.0489  max mem: 4818
Test:  [540/782]  eta: 0:00:28  loss: 3.8906 (3.3068)  acc1: 20.3125 (32.9771)  acc5: 39.0625 (58.6876)  time: 0.1139  data: 0.0504  max mem: 4818
Test:  [550/782]  eta: 0:00:27  loss: 3.8906 (3.3134)  acc1: 25.0000 (32.9174)  acc5: 48.4375 (58.5895)  time: 0.1097  data: 0.0454  max mem: 4818
Test:  [560/782]  eta: 0:00:25  loss: 3.9050 (3.3231)  acc1: 21.8750 (32.7373)  acc5: 48.4375 (58.3807)  time: 0.1100  data: 0.0455  max mem: 4818
Test:  [570/782]  eta: 0:00:24  loss: 3.9343 (3.3362)  acc1: 21.8750 (32.5826)  acc5: 45.3125 (58.1847)  time: 0.1176 

Test:  [ 70/782]  eta: 0:01:31  loss: 2.4166 (2.8318)  acc1: 48.4375 (42.5616)  acc5: 68.7500 (67.0555)  time: 0.1245  data: 0.0597  max mem: 4818
Test:  [ 80/782]  eta: 0:01:29  loss: 2.0274 (2.7329)  acc1: 54.6875 (44.5602)  acc5: 79.6875 (68.5764)  time: 0.1086  data: 0.0443  max mem: 4818
Test:  [ 90/782]  eta: 0:01:25  loss: 2.3092 (2.7514)  acc1: 51.5625 (44.2136)  acc5: 75.0000 (68.3551)  time: 0.1086  data: 0.0442  max mem: 4818
Test:  [100/782]  eta: 0:01:25  loss: 3.0207 (2.8105)  acc1: 34.3750 (42.9610)  acc5: 62.5000 (67.2494)  time: 0.1181  data: 0.0531  max mem: 4818
Test:  [110/782]  eta: 0:01:23  loss: 2.4237 (2.7202)  acc1: 48.4375 (44.8902)  acc5: 71.8750 (68.6796)  time: 0.1219  data: 0.0568  max mem: 4818
Test:  [120/782]  eta: 0:01:21  loss: 1.9482 (2.7056)  acc1: 60.9375 (44.8735)  acc5: 81.2500 (68.8533)  time: 0.1087  data: 0.0441  max mem: 4818
Test:  [130/782]  eta: 0:01:18  loss: 2.8557 (2.7454)  acc1: 32.8125 (43.3922)  acc5: 68.7500 (68.0463)  time: 0.1044 

Test:  [630/782]  eta: 0:00:17  loss: 3.9939 (3.2311)  acc1: 23.4375 (34.5830)  acc5: 45.3125 (59.8158)  time: 0.1074  data: 0.0437  max mem: 4818
Test:  [640/782]  eta: 0:00:16  loss: 3.7106 (3.2382)  acc1: 21.8750 (34.4506)  acc5: 50.0000 (59.7041)  time: 0.1142  data: 0.0504  max mem: 4818
Test:  [650/782]  eta: 0:00:15  loss: 3.6544 (3.2444)  acc1: 21.8750 (34.3294)  acc5: 45.3125 (59.6054)  time: 0.1148  data: 0.0513  max mem: 4818
Test:  [660/782]  eta: 0:00:14  loss: 4.2449 (3.2619)  acc1: 15.6250 (33.9991)  acc5: 40.6250 (59.2781)  time: 0.1045  data: 0.0410  max mem: 4818
Test:  [670/782]  eta: 0:00:12  loss: 4.1507 (3.2675)  acc1: 17.1875 (33.8743)  acc5: 42.1875 (59.1980)  time: 0.1096  data: 0.0459  max mem: 4818
Test:  [680/782]  eta: 0:00:11  loss: 3.7350 (3.2769)  acc1: 21.8750 (33.6454)  acc5: 48.4375 (59.0102)  time: 0.1109  data: 0.0472  max mem: 4818
Test:  [690/782]  eta: 0:00:10  loss: 3.8325 (3.2812)  acc1: 18.7500 (33.5677)  acc5: 45.3125 (58.9182)  time: 0.1063 

Test:  [190/782]  eta: 0:01:10  loss: 2.5988 (2.6653)  acc1: 37.5000 (42.4493)  acc5: 75.0000 (69.3635)  time: 0.1065  data: 0.0414  max mem: 4818
Test:  [200/782]  eta: 0:01:08  loss: 2.3693 (2.6597)  acc1: 37.5000 (42.2497)  acc5: 78.1250 (69.5818)  time: 0.1049  data: 0.0401  max mem: 4818
Test:  [210/782]  eta: 0:01:07  loss: 2.5684 (2.6688)  acc1: 34.3750 (41.9283)  acc5: 71.8750 (69.3646)  time: 0.1119  data: 0.0475  max mem: 4818
Test:  [220/782]  eta: 0:01:07  loss: 2.7734 (2.6662)  acc1: 35.9375 (41.8411)  acc5: 70.3125 (69.5136)  time: 0.1362  data: 0.0713  max mem: 4818
Test:  [230/782]  eta: 0:01:05  loss: 2.3827 (2.6399)  acc1: 48.4375 (42.3295)  acc5: 79.6875 (70.0690)  time: 0.1272  data: 0.0624  max mem: 4818
Test:  [240/782]  eta: 0:01:04  loss: 2.3765 (2.6340)  acc1: 48.4375 (42.4598)  acc5: 78.1250 (70.2930)  time: 0.1107  data: 0.0465  max mem: 4818
Test:  [250/782]  eta: 0:01:03  loss: 2.4595 (2.6452)  acc1: 40.6250 (42.1813)  acc5: 73.4375 (70.0884)  time: 0.1302 

Test:  [750/782]  eta: 0:00:03  loss: 2.0812 (3.0784)  acc1: 51.5625 (36.5492)  acc5: 79.6875 (61.9320)  time: 0.1251  data: 0.0602  max mem: 4818
Test:  [760/782]  eta: 0:00:02  loss: 2.6510 (3.0798)  acc1: 42.1875 (36.5165)  acc5: 65.6250 (61.8984)  time: 0.1149  data: 0.0503  max mem: 4818
Test:  [770/782]  eta: 0:00:01  loss: 2.8133 (3.0677)  acc1: 35.9375 (36.7481)  acc5: 67.1875 (62.1190)  time: 0.1141  data: 0.0499  max mem: 4818
Test:  [780/782]  eta: 0:00:00  loss: 2.0845 (3.0547)  acc1: 51.5625 (37.0499)  acc5: 79.6875 (62.2979)  time: 0.1174  data: 0.0535  max mem: 4818
Test:  [781/782]  eta: 0:00:00  loss: 2.3011 (3.0577)  acc1: 51.5625 (37.0400)  acc5: 75.0000 (62.2860)  time: 0.1151  data: 0.0535  max mem: 4818
Test: Total time: 0:01:30 (0.1161 s / it)
* Acc@1 37.040 Acc@5 62.286 loss 3.058
Accuracy of the network on the 50000 test images: 37.0%
Max accuracy: 37.04%
Epoch: [13]  [    0/20018]  eta: 4:14:21  lr: 0.000044  loss: 5.5735 (5.5735)  time: 0.7624  data: 0.5796  

Test:  [310/782]  eta: 0:00:56  loss: 2.1695 (2.5305)  acc1: 53.1250 (45.3577)  acc5: 76.5625 (72.3674)  time: 0.1331  data: 0.0678  max mem: 4818
Test:  [320/782]  eta: 0:00:55  loss: 2.5087 (2.5374)  acc1: 43.7500 (45.2249)  acc5: 73.4375 (72.2157)  time: 0.1088  data: 0.0431  max mem: 4818
Test:  [330/782]  eta: 0:00:54  loss: 3.1466 (2.5713)  acc1: 34.3750 (44.7508)  acc5: 59.3750 (71.6154)  time: 0.1119  data: 0.0464  max mem: 4818
Test:  [340/782]  eta: 0:00:52  loss: 3.3904 (2.5868)  acc1: 28.1250 (44.5473)  acc5: 60.9375 (71.3526)  time: 0.1038  data: 0.0387  max mem: 4818
Test:  [350/782]  eta: 0:00:51  loss: 3.2781 (2.6112)  acc1: 28.1250 (44.1551)  acc5: 60.9375 (70.8957)  time: 0.1048  data: 0.0398  max mem: 4818
Test:  [360/782]  eta: 0:00:50  loss: 3.5885 (2.6368)  acc1: 25.0000 (43.6461)  acc5: 53.1250 (70.3991)  time: 0.1186  data: 0.0535  max mem: 4818
Test:  [370/782]  eta: 0:00:49  loss: 3.5885 (2.6573)  acc1: 26.5625 (43.4426)  acc5: 51.5625 (69.9292)  time: 0.1219 

Epoch: [14]  [ 7000/20018]  eta: 0:39:12  lr: 0.000047  loss: 5.1675 (5.2882)  time: 0.1801  data: 0.0001  max mem: 4818
Epoch: [14]  [ 8000/20018]  eta: 0:36:10  lr: 0.000047  loss: 5.3023 (5.2832)  time: 0.1803  data: 0.0001  max mem: 4818
Epoch: [14]  [ 9000/20018]  eta: 0:33:08  lr: 0.000047  loss: 5.1761 (5.2835)  time: 0.1801  data: 0.0001  max mem: 4818
Epoch: [14]  [10000/20018]  eta: 0:30:07  lr: 0.000047  loss: 5.4932 (5.2815)  time: 0.1796  data: 0.0001  max mem: 4818
Epoch: [14]  [11000/20018]  eta: 0:27:07  lr: 0.000047  loss: 5.4804 (5.2788)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [14]  [12000/20018]  eta: 0:24:06  lr: 0.000047  loss: 4.9645 (5.2790)  time: 0.1796  data: 0.0001  max mem: 4818
Epoch: [14]  [13000/20018]  eta: 0:21:05  lr: 0.000047  loss: 5.3569 (5.2799)  time: 0.1805  data: 0.0001  max mem: 4818
Epoch: [14]  [14000/20018]  eta: 0:18:04  lr: 0.000047  loss: 5.5914 (5.2781)  time: 0.1800  data: 0.0001  max mem: 4818
Epoch: [14]  [15000/20018]  eta:

Test:  [430/782]  eta: 0:00:41  loss: 3.0161 (2.6178)  acc1: 39.0625 (43.8153)  acc5: 67.1875 (70.4176)  time: 0.1135  data: 0.0488  max mem: 4818
Test:  [440/782]  eta: 0:00:40  loss: 3.0161 (2.6249)  acc1: 39.0625 (43.8173)  acc5: 64.0625 (70.2452)  time: 0.1112  data: 0.0465  max mem: 4818
Test:  [450/782]  eta: 0:00:39  loss: 2.7680 (2.6244)  acc1: 43.7500 (43.8990)  acc5: 67.1875 (70.2501)  time: 0.1175  data: 0.0528  max mem: 4818
Test:  [460/782]  eta: 0:00:38  loss: 2.9720 (2.6395)  acc1: 37.5000 (43.6585)  acc5: 62.5000 (69.9770)  time: 0.1160  data: 0.0513  max mem: 4818
Test:  [470/782]  eta: 0:00:36  loss: 3.3072 (2.6598)  acc1: 31.2500 (43.3187)  acc5: 57.8125 (69.6523)  time: 0.1244  data: 0.0598  max mem: 4818
Test:  [480/782]  eta: 0:00:35  loss: 3.0875 (2.6635)  acc1: 32.8125 (43.3439)  acc5: 65.6250 (69.6076)  time: 0.1157  data: 0.0510  max mem: 4818
Test:  [490/782]  eta: 0:00:34  loss: 3.0875 (2.6786)  acc1: 32.8125 (43.1231)  acc5: 65.6250 (69.3483)  time: 0.0997 

Test:  [  0/782]  eta: 0:08:54  loss: 1.2558 (1.2558)  acc1: 78.1250 (78.1250)  acc5: 89.0625 (89.0625)  time: 0.6838  data: 0.6171  max mem: 4818
Test:  [ 10/782]  eta: 0:02:10  loss: 1.7807 (1.8500)  acc1: 64.0625 (61.0795)  acc5: 87.5000 (83.6648)  time: 0.1686  data: 0.1045  max mem: 4818
Test:  [ 20/782]  eta: 0:01:55  loss: 1.6343 (1.8025)  acc1: 57.8125 (62.7976)  acc5: 87.5000 (83.7054)  time: 0.1244  data: 0.0603  max mem: 4818
Test:  [ 30/782]  eta: 0:01:43  loss: 2.1694 (2.0904)  acc1: 56.2500 (57.7117)  acc5: 78.1250 (79.1835)  time: 0.1202  data: 0.0552  max mem: 4818
Test:  [ 40/782]  eta: 0:01:39  loss: 2.6964 (2.2605)  acc1: 45.3125 (54.1540)  acc5: 68.7500 (76.0671)  time: 0.1163  data: 0.0512  max mem: 4818
Test:  [ 50/782]  eta: 0:01:33  loss: 2.7913 (2.4029)  acc1: 40.6250 (50.6434)  acc5: 65.6250 (73.6520)  time: 0.1130  data: 0.0483  max mem: 4818
Test:  [ 60/782]  eta: 0:01:34  loss: 2.7910 (2.4143)  acc1: 39.0625 (49.8719)  acc5: 71.8750 (73.5143)  time: 0.1251 

Test:  [560/782]  eta: 0:00:26  loss: 3.3244 (2.6556)  acc1: 32.8125 (43.8280)  acc5: 59.3750 (69.6886)  time: 0.1095  data: 0.0455  max mem: 4818
Test:  [570/782]  eta: 0:00:24  loss: 3.1763 (2.6665)  acc1: 35.9375 (43.7308)  acc5: 57.8125 (69.5189)  time: 0.1181  data: 0.0543  max mem: 4818
Test:  [580/782]  eta: 0:00:23  loss: 3.0076 (2.6773)  acc1: 37.5000 (43.6290)  acc5: 62.5000 (69.3390)  time: 0.1171  data: 0.0530  max mem: 4818
Test:  [590/782]  eta: 0:00:22  loss: 3.0094 (2.6886)  acc1: 32.8125 (43.3693)  acc5: 60.9375 (69.0831)  time: 0.1056  data: 0.0413  max mem: 4818
Test:  [600/782]  eta: 0:00:21  loss: 3.2132 (2.6975)  acc1: 34.3750 (43.2924)  acc5: 56.2500 (68.9346)  time: 0.1072  data: 0.0431  max mem: 4818
Test:  [610/782]  eta: 0:00:20  loss: 3.2132 (2.7050)  acc1: 37.5000 (43.1772)  acc5: 57.8125 (68.7653)  time: 0.1121  data: 0.0481  max mem: 4818
Test:  [620/782]  eta: 0:00:18  loss: 3.3641 (2.7185)  acc1: 34.3750 (42.9826)  acc5: 54.6875 (68.5462)  time: 0.1136 

Test:  [120/782]  eta: 0:01:20  loss: 1.5763 (2.0591)  acc1: 70.3125 (55.7464)  acc5: 89.0625 (78.5770)  time: 0.1084  data: 0.0436  max mem: 4818
Test:  [130/782]  eta: 0:01:18  loss: 2.3211 (2.1081)  acc1: 45.3125 (54.1508)  acc5: 78.1250 (78.0892)  time: 0.1047  data: 0.0399  max mem: 4818
Test:  [140/782]  eta: 0:01:17  loss: 2.6402 (2.1451)  acc1: 35.9375 (53.0696)  acc5: 65.6250 (77.5488)  time: 0.1137  data: 0.0486  max mem: 4818
Test:  [150/782]  eta: 0:01:15  loss: 2.6673 (2.1805)  acc1: 34.3750 (51.8626)  acc5: 70.3125 (77.0488)  time: 0.1204  data: 0.0557  max mem: 4818
Test:  [160/782]  eta: 0:01:14  loss: 2.5716 (2.2045)  acc1: 34.3750 (50.9705)  acc5: 75.0000 (76.9410)  time: 0.1133  data: 0.0491  max mem: 4818
Test:  [170/782]  eta: 0:01:12  loss: 2.2922 (2.1980)  acc1: 43.7500 (51.0508)  acc5: 79.6875 (77.2478)  time: 0.1148  data: 0.0503  max mem: 4818
Test:  [180/782]  eta: 0:01:11  loss: 2.1617 (2.2059)  acc1: 50.0000 (50.7942)  acc5: 81.2500 (77.1927)  time: 0.1151 

Test:  [680/782]  eta: 0:00:11  loss: 3.0287 (2.6294)  acc1: 31.2500 (43.8395)  acc5: 64.0625 (69.6609)  time: 0.1170  data: 0.0521  max mem: 4818
Test:  [690/782]  eta: 0:00:10  loss: 3.1103 (2.6328)  acc1: 28.1250 (43.7523)  acc5: 54.6875 (69.6025)  time: 0.1112  data: 0.0470  max mem: 4818
Test:  [700/782]  eta: 0:00:09  loss: 2.8292 (2.6358)  acc1: 35.9375 (43.7188)  acc5: 60.9375 (69.5435)  time: 0.1028  data: 0.0380  max mem: 4818
Test:  [710/782]  eta: 0:00:08  loss: 3.2008 (2.6471)  acc1: 34.3750 (43.5258)  acc5: 59.3750 (69.2884)  time: 0.1210  data: 0.0558  max mem: 4818
Test:  [720/782]  eta: 0:00:07  loss: 2.5763 (2.6427)  acc1: 42.1875 (43.6351)  acc5: 67.1875 (69.3611)  time: 0.1228  data: 0.0582  max mem: 4818
Test:  [730/782]  eta: 0:00:06  loss: 2.2109 (2.6376)  acc1: 50.0000 (43.6987)  acc5: 78.1250 (69.4810)  time: 0.1088  data: 0.0447  max mem: 4818
Test:  [740/782]  eta: 0:00:04  loss: 1.9564 (2.6256)  acc1: 53.1250 (43.9229)  acc5: 85.9375 (69.7242)  time: 0.1224 

Test:  [240/782]  eta: 0:01:04  loss: 1.8100 (2.0818)  acc1: 60.9375 (53.5075)  acc5: 84.3750 (79.7005)  time: 0.1096  data: 0.0455  max mem: 4818
Test:  [250/782]  eta: 0:01:03  loss: 1.9314 (2.0942)  acc1: 53.1250 (53.1686)  acc5: 79.6875 (79.5132)  time: 0.1294  data: 0.0651  max mem: 4818
Test:  [260/782]  eta: 0:01:02  loss: 1.9346 (2.0717)  acc1: 54.6875 (53.7476)  acc5: 82.8125 (79.7833)  time: 0.1344  data: 0.0704  max mem: 4818
Test:  [270/782]  eta: 0:01:00  loss: 2.0058 (2.0744)  acc1: 54.6875 (53.7304)  acc5: 78.1250 (79.7221)  time: 0.1194  data: 0.0554  max mem: 4818
Test:  [280/782]  eta: 0:00:59  loss: 2.1942 (2.0792)  acc1: 50.0000 (53.5420)  acc5: 75.0000 (79.7042)  time: 0.1143  data: 0.0498  max mem: 4818
Test:  [290/782]  eta: 0:00:58  loss: 2.1076 (2.0783)  acc1: 48.4375 (53.6405)  acc5: 79.6875 (79.7519)  time: 0.1129  data: 0.0481  max mem: 4818
Test:  [300/782]  eta: 0:00:57  loss: 2.0619 (2.0787)  acc1: 54.6875 (53.6597)  acc5: 79.6875 (79.6304)  time: 0.1314 

Epoch: [18]  [    0/20018]  eta: 4:25:55  lr: 0.000059  loss: 4.8567 (4.8567)  time: 0.7971  data: 0.6180  max mem: 4818
Epoch: [18]  [ 1000/20018]  eta: 0:57:06  lr: 0.000059  loss: 5.2076 (5.0522)  time: 0.1794  data: 0.0001  max mem: 4818
Epoch: [18]  [ 2000/20018]  eta: 0:54:04  lr: 0.000059  loss: 5.3614 (5.0398)  time: 0.1806  data: 0.0001  max mem: 4818
Epoch: [18]  [ 3000/20018]  eta: 0:51:07  lr: 0.000059  loss: 5.1868 (5.0429)  time: 0.1813  data: 0.0001  max mem: 4818
Epoch: [18]  [ 4000/20018]  eta: 0:48:08  lr: 0.000059  loss: 5.1426 (5.0419)  time: 0.1809  data: 0.0001  max mem: 4818
Epoch: [18]  [ 5000/20018]  eta: 0:45:08  lr: 0.000059  loss: 5.1233 (5.0446)  time: 0.1807  data: 0.0001  max mem: 4818
Epoch: [18]  [ 6000/20018]  eta: 0:42:08  lr: 0.000059  loss: 5.1712 (5.0452)  time: 0.1803  data: 0.0001  max mem: 4818
Epoch: [18]  [ 7000/20018]  eta: 0:39:07  lr: 0.000059  loss: 5.3616 (5.0419)  time: 0.1809  data: 0.0001  max mem: 4818
Epoch: [18]  [ 8000/20018]  eta:

Test:  [370/782]  eta: 0:00:49  loss: 2.9635 (2.1406)  acc1: 34.3750 (52.3332)  acc5: 64.0625 (78.6346)  time: 0.1183  data: 0.0546  max mem: 4818
Test:  [380/782]  eta: 0:00:48  loss: 2.5419 (2.1507)  acc1: 35.9375 (52.1202)  acc5: 60.9375 (78.3752)  time: 0.1202  data: 0.0562  max mem: 4818
Test:  [390/782]  eta: 0:00:46  loss: 2.9863 (2.1791)  acc1: 34.3750 (51.6224)  acc5: 67.1875 (77.8772)  time: 0.1075  data: 0.0434  max mem: 4818
Test:  [400/782]  eta: 0:00:45  loss: 3.2604 (2.2034)  acc1: 31.2500 (51.0871)  acc5: 60.9375 (77.4158)  time: 0.1063  data: 0.0424  max mem: 4818
Test:  [410/782]  eta: 0:00:44  loss: 3.2604 (2.2275)  acc1: 32.8125 (50.6501)  acc5: 57.8125 (77.0263)  time: 0.1092  data: 0.0446  max mem: 4818
Test:  [420/782]  eta: 0:00:43  loss: 2.7506 (2.2305)  acc1: 40.6250 (50.6198)  acc5: 67.1875 (76.9893)  time: 0.1108  data: 0.0462  max mem: 4818
Test:  [430/782]  eta: 0:00:41  loss: 2.5344 (2.2427)  acc1: 42.1875 (50.4677)  acc5: 67.1875 (76.7873)  time: 0.1030 

Epoch: [19]  [15000/20018]  eta: 0:15:07  lr: 0.000057  loss: 5.0819 (4.9509)  time: 0.1851  data: 0.0001  max mem: 4818
Epoch: [19]  [16000/20018]  eta: 0:12:07  lr: 0.000057  loss: 4.7290 (4.9496)  time: 0.1873  data: 0.0001  max mem: 4818
Epoch: [19]  [17000/20018]  eta: 0:09:06  lr: 0.000057  loss: 5.0542 (4.9492)  time: 0.1835  data: 0.0001  max mem: 4818
Epoch: [19]  [18000/20018]  eta: 0:06:05  lr: 0.000057  loss: 5.0265 (4.9476)  time: 0.1837  data: 0.0001  max mem: 4818
Epoch: [19]  [19000/20018]  eta: 0:03:04  lr: 0.000057  loss: 4.6097 (4.9470)  time: 0.1839  data: 0.0001  max mem: 4818
Epoch: [19]  [20000/20018]  eta: 0:00:03  lr: 0.000057  loss: 5.0065 (4.9457)  time: 0.1820  data: 0.0001  max mem: 4818
Epoch: [19]  [20017/20018]  eta: 0:00:00  lr: 0.000057  loss: 4.4947 (4.9454)  time: 0.1802  data: 0.0005  max mem: 4818
Epoch: [19] Total time: 1:00:33 (0.1815 s / it)
Averaged stats: lr: 0.000057  loss: 4.4947 (4.9454)
Test:  [  0/782]  eta: 0:11:31  loss: 1.1298 (1.1298)

Test:  [500/782]  eta: 0:00:33  loss: 2.8066 (2.1647)  acc1: 32.8125 (52.1332)  acc5: 67.1875 (77.1956)  time: 0.1055  data: 0.0387  max mem: 4818
Test:  [510/782]  eta: 0:00:32  loss: 2.8066 (2.1749)  acc1: 39.0625 (51.9906)  acc5: 67.1875 (76.9906)  time: 0.1064  data: 0.0410  max mem: 4818
Test:  [520/782]  eta: 0:00:30  loss: 2.8674 (2.1833)  acc1: 39.0625 (51.7514)  acc5: 65.6250 (76.8954)  time: 0.1162  data: 0.0514  max mem: 4818
Test:  [530/782]  eta: 0:00:29  loss: 2.4170 (2.1967)  acc1: 32.8125 (51.5537)  acc5: 70.3125 (76.6567)  time: 0.1156  data: 0.0495  max mem: 4818
Test:  [540/782]  eta: 0:00:28  loss: 2.4088 (2.2020)  acc1: 43.7500 (51.4701)  acc5: 67.1875 (76.4961)  time: 0.1124  data: 0.0447  max mem: 4818
Test:  [550/782]  eta: 0:00:27  loss: 2.4542 (2.2109)  acc1: 43.7500 (51.3838)  acc5: 67.1875 (76.3527)  time: 0.1096  data: 0.0431  max mem: 4818
Test:  [560/782]  eta: 0:00:26  loss: 2.9525 (2.2210)  acc1: 40.6250 (51.1893)  acc5: 65.6250 (76.2143)  time: 0.1179 

Test:  [ 60/782]  eta: 0:01:37  loss: 2.0076 (1.8917)  acc1: 43.7500 (58.6322)  acc5: 78.1250 (82.1209)  time: 0.1277  data: 0.0599  max mem: 4818
Test:  [ 70/782]  eta: 0:01:32  loss: 1.3478 (1.8292)  acc1: 67.1875 (60.4974)  acc5: 87.5000 (82.8345)  time: 0.1266  data: 0.0576  max mem: 4818
Test:  [ 80/782]  eta: 0:01:29  loss: 1.2933 (1.7572)  acc1: 75.0000 (62.3457)  acc5: 90.6250 (83.7191)  time: 0.1079  data: 0.0408  max mem: 4818
Test:  [ 90/782]  eta: 0:01:26  loss: 1.4045 (1.7732)  acc1: 70.3125 (62.4313)  acc5: 84.3750 (83.4650)  time: 0.1085  data: 0.0431  max mem: 4818
Test:  [100/782]  eta: 0:01:26  loss: 1.8343 (1.8146)  acc1: 60.9375 (61.5873)  acc5: 81.2500 (82.6733)  time: 0.1203  data: 0.0549  max mem: 4818
Test:  [110/782]  eta: 0:01:23  loss: 1.3598 (1.7457)  acc1: 67.1875 (62.8941)  acc5: 85.9375 (83.5726)  time: 0.1218  data: 0.0565  max mem: 4818
Test:  [120/782]  eta: 0:01:21  loss: 1.2175 (1.7336)  acc1: 75.0000 (63.0553)  acc5: 90.6250 (83.7164)  time: 0.1080 

Test:  [620/782]  eta: 0:00:18  loss: 2.7866 (2.2207)  acc1: 37.5000 (51.6002)  acc5: 62.5000 (76.3084)  time: 0.1130  data: 0.0486  max mem: 4818
Test:  [630/782]  eta: 0:00:17  loss: 2.9873 (2.2268)  acc1: 37.5000 (51.5254)  acc5: 62.5000 (76.1737)  time: 0.1094  data: 0.0450  max mem: 4818
Test:  [640/782]  eta: 0:00:16  loss: 2.8601 (2.2344)  acc1: 37.5000 (51.3066)  acc5: 65.6250 (76.0262)  time: 0.1156  data: 0.0508  max mem: 4818
Test:  [650/782]  eta: 0:00:15  loss: 2.4787 (2.2387)  acc1: 35.9375 (51.2145)  acc5: 71.8750 (75.9673)  time: 0.1153  data: 0.0501  max mem: 4818
Test:  [660/782]  eta: 0:00:14  loss: 2.7760 (2.2552)  acc1: 32.8125 (50.9054)  acc5: 62.5000 (75.7139)  time: 0.1043  data: 0.0393  max mem: 4818
Test:  [670/782]  eta: 0:00:13  loss: 2.7760 (2.2582)  acc1: 40.6250 (50.8872)  acc5: 70.3125 (75.7009)  time: 0.1141  data: 0.0491  max mem: 4818
Test:  [680/782]  eta: 0:00:11  loss: 2.5197 (2.2678)  acc1: 39.0625 (50.6631)  acc5: 70.3125 (75.5415)  time: 0.1160 

Test:  [180/782]  eta: 0:01:14  loss: 1.7765 (1.8090)  acc1: 56.2500 (58.4254)  acc5: 82.8125 (83.3736)  time: 0.1211  data: 0.0512  max mem: 4818
Test:  [190/782]  eta: 0:01:12  loss: 1.9995 (1.8088)  acc1: 51.5625 (58.1152)  acc5: 84.3750 (83.5160)  time: 0.1108  data: 0.0407  max mem: 4818
Test:  [200/782]  eta: 0:01:11  loss: 1.7095 (1.8016)  acc1: 51.5625 (58.0846)  acc5: 87.5000 (83.6909)  time: 0.1087  data: 0.0387  max mem: 4818
Test:  [210/782]  eta: 0:01:09  loss: 1.7136 (1.8050)  acc1: 53.1250 (57.8940)  acc5: 84.3750 (83.7011)  time: 0.1154  data: 0.0464  max mem: 4818
Test:  [220/782]  eta: 0:01:09  loss: 1.8813 (1.8165)  acc1: 51.5625 (57.6004)  acc5: 81.2500 (83.5549)  time: 0.1407  data: 0.0715  max mem: 4818
Test:  [230/782]  eta: 0:01:08  loss: 1.4218 (1.7925)  acc1: 68.7500 (58.2725)  acc5: 87.5000 (83.8609)  time: 0.1323  data: 0.0628  max mem: 4818
Test:  [240/782]  eta: 0:01:06  loss: 1.4408 (1.7915)  acc1: 68.7500 (58.2923)  acc5: 87.5000 (83.8758)  time: 0.1158 

Test:  [740/782]  eta: 0:00:05  loss: 1.6602 (2.2006)  acc1: 56.2500 (51.7080)  acc5: 90.6250 (76.6405)  time: 0.1312  data: 0.0607  max mem: 4818
Test:  [750/782]  eta: 0:00:03  loss: 1.4251 (2.1913)  acc1: 68.7500 (51.9557)  acc5: 92.1875 (76.7893)  time: 0.1291  data: 0.0588  max mem: 4818
Test:  [760/782]  eta: 0:00:02  loss: 1.6262 (2.1951)  acc1: 53.1250 (51.8520)  acc5: 87.5000 (76.7432)  time: 0.1162  data: 0.0469  max mem: 4818
Test:  [770/782]  eta: 0:00:01  loss: 2.2200 (2.1874)  acc1: 51.5625 (52.0246)  acc5: 79.6875 (76.8746)  time: 0.1154  data: 0.0466  max mem: 4818
Test:  [780/782]  eta: 0:00:00  loss: 1.2425 (2.1781)  acc1: 71.8750 (52.2747)  acc5: 89.0625 (77.0146)  time: 0.1257  data: 0.0567  max mem: 4818
Test:  [781/782]  eta: 0:00:00  loss: 1.2714 (2.1816)  acc1: 65.6250 (52.2640)  acc5: 87.5000 (77.0020)  time: 0.1232  data: 0.0567  max mem: 4818
Test: Total time: 0:01:34 (0.1212 s / it)
* Acc@1 52.264 Acc@5 77.002 loss 2.182
Accuracy of the network on the 50000 

Test:  [300/782]  eta: 0:00:58  loss: 1.3428 (1.7064)  acc1: 67.1875 (60.5586)  acc5: 87.5000 (85.2004)  time: 0.1302  data: 0.0652  max mem: 4818
Test:  [310/782]  eta: 0:00:57  loss: 1.3428 (1.6979)  acc1: 71.8750 (60.8169)  acc5: 89.0625 (85.2693)  time: 0.1353  data: 0.0699  max mem: 4818
Test:  [320/782]  eta: 0:00:55  loss: 1.5193 (1.7085)  acc1: 60.9375 (60.6600)  acc5: 85.9375 (85.1246)  time: 0.1111  data: 0.0455  max mem: 4818
Test:  [330/782]  eta: 0:00:54  loss: 2.3810 (1.7415)  acc1: 48.4375 (60.0972)  acc5: 76.5625 (84.5591)  time: 0.1133  data: 0.0453  max mem: 4818
Test:  [340/782]  eta: 0:00:53  loss: 2.4151 (1.7559)  acc1: 46.8750 (59.8561)  acc5: 73.4375 (84.3383)  time: 0.1060  data: 0.0379  max mem: 4818
Test:  [350/782]  eta: 0:00:51  loss: 2.2962 (1.7795)  acc1: 50.0000 (59.4240)  acc5: 76.5625 (83.9209)  time: 0.1083  data: 0.0424  max mem: 4818
Test:  [360/782]  eta: 0:00:50  loss: 2.5543 (1.8022)  acc1: 42.1875 (58.9162)  acc5: 71.8750 (83.5570)  time: 0.1187 

KeyboardInterrupt: 