In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

FOLDERNAME = 'cs361_project/adala-optimizer/classification_cifar10'
assert FOLDERNAME is not None, "[!] Enter the foldername."

import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))

%cd drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/cs361_project/adala-optimizer/classification_cifar10


In [2]:
!pip install torch
!pip install adabound

%load_ext autoreload
%autoreload 2



In [3]:
from __future__ import print_function

import torch.optim as optim
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms

import gc
import os
import time
import argparse
from models import *
from tqdm import tqdm
from copy import deepcopy
from adabound import AdaBound
from torch.optim import Adam, SGD
from optimizers import *

In [4]:
def get_parser():
    parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
    parser.add_argument('--total_epoch', default=200, type=int, help='Total number of training epochs')
    parser.add_argument('--decay_epoch', default=150, type=int, help='Number of epochs to decay learning rate')
    parser.add_argument('--model', default='resnet', type=str, help='model',
                        choices=['resnet', 'densenet', 'vgg'])
    parser.add_argument('--optim', default='sgd', type=str, help='optimizer',
                        choices=['sgd', 'adam', 'adamw', 'adabelief', 'adala', 'yogi', 'msvag', 'radam', 'fromage', 'adabound',
                                 ])
    parser.add_argument('--run', default=0, type=int, help='number of runs')
    parser.add_argument('--lr', default=0.1, type=float, help='learning rate')
    parser.add_argument('--lr-gamma', default=0.1, type=float, help='learning rate')
    parser.add_argument('--final_lr', default=0.1, type=float,
                        help='final learning rate of AdaBound')
    parser.add_argument('--gamma', default=1e-3, type=float,
                        help='convergence speed term of AdaBound')

    parser.add_argument('--eps', default=1e-8, type=float, help='eps for var adam')

    parser.add_argument('--momentum', default=0.9, type=float, help='momentum term')
    parser.add_argument('--beta1', default=0.9, type=float, help='Adam coefficients beta_1')
    parser.add_argument('--beta2', default=0.999, type=float, help='Adam coefficients beta_2')
    parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
    parser.add_argument('--batchsize', type=int, default=128, help='batch size')
    parser.add_argument('--weight_decay', default=5e-4, type=float,
                        help='weight decay for optimizers')
    parser.add_argument('--reset', action='store_true',
                        help='whether reset optimizer at learning rate decay')
    return parser

In [5]:
def build_dataset(args):
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,
                                            transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batchsize, shuffle=True,
                                               num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True,
                                           transform=transform_test)
    test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batchsize, shuffle=False, num_workers=2)

    # classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    return train_loader, test_loader

In [6]:
def get_ckpt_name(model='resnet', optimizer='sgd', lr=0.1, final_lr=0.1, momentum=0.9,
                  beta1=0.9, beta2=0.999, gamma=1e-3, eps=1e-8, weight_decay=5e-4,
                  reset = False, run = 0, weight_decouple = False, rectify = False):
    name = {
        'sgd': 'lr{}-momentum{}-wdecay{}-run{}'.format(lr, momentum,weight_decay, run),
        'adam': 'lr{}-betas{}-{}-wdecay{}-eps{}-run{}'.format(lr, beta1, beta2,weight_decay, eps, run),
        'fromage': 'lr{}-betas{}-{}-wdecay{}-eps{}-run{}'.format(lr, beta1, beta2,weight_decay, eps, run),
        'radam': 'lr{}-betas{}-{}-wdecay{}-eps{}-run{}'.format(lr, beta1, beta2,weight_decay, eps, run),
        'adamw': 'lr{}-betas{}-{}-wdecay{}-eps{}-run{}'.format(lr, beta1, beta2,weight_decay, eps, run),
        'adabelief': 'lr{}-betas{}-{}-eps{}-wdecay{}-run{}'.format(lr, beta1, beta2, eps, weight_decay, run),
        'adala': 'lr{}-betas{}-{}-eps{}-wdecay{}-run{}'.format(lr, beta1, beta2, eps, weight_decay, run),
        'adabound': 'lr{}-betas{}-{}-final_lr{}-gamma{}-wdecay{}-run{}'.format(lr, beta1, beta2, final_lr, gamma,weight_decay, run),
        'yogi':'lr{}-betas{}-{}-eps{}-wdecay{}-run{}'.format(lr, beta1, beta2, eps,weight_decay, run),
        'msvag': 'lr{}-betas{}-{}-eps{}-wdecay{}-run{}'.format(lr, beta1, beta2, eps,
                                                                    weight_decay, run),
    }[optimizer]
    return '{}-{}-{}-reset{}'.format(model, optimizer, name, str(reset))

In [7]:
def load_checkpoint(ckpt_name):
    print('==> Resuming from checkpoint..')
    path = os.path.join('checkpoint', ckpt_name)
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    assert os.path.exists(path), 'Error: checkpoint {} not found'.format(ckpt_name)
    return torch.load(path)

In [8]:
def build_model(args, device, ckpt=None):
    print('==> Building model..')
    net = {
        'resnet': ResNet34,
        'densenet': DenseNet121,
        'vgg':vgg11,
    }[args.model]()
    net = net.to(device)
    if device == 'cuda':
        net = torch.nn.DataParallel(net)
        cudnn.benchmark = True

    if ckpt:
        net.load_state_dict(ckpt['net'])

    return net

In [9]:
def create_optimizer(args, model_params):
    args.optim = args.optim.lower()
    if args.optim == 'sgd':
        return optim.SGD(model_params, args.lr, momentum=args.momentum,
                         weight_decay=args.weight_decay)
    elif args.optim == 'adam':
        return Adam(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay, eps=args.eps)
    elif args.optim == 'fromage':
        return Fromage(model_params, args.lr)
    elif args.optim == 'radam':
        return RAdam(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay, eps=args.eps)
    elif args.optim == 'adamw':
        return AdamW(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay, eps=args.eps)
    elif args.optim == 'adabelief':
        return AdaBelief(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay, eps=args.eps)
    elif args.optim == 'adala':
        return AdaLA(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay, eps=args.eps)
    elif args.optim == 'yogi':
        return Yogi(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay)
    elif args.optim == 'msvag':
        return MSVAG(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay)
    elif args.optim == 'adabound':
        return AdaBound(model_params, args.lr, betas=(args.beta1, args.beta2),
                        final_lr=args.final_lr, gamma=args.gamma,
                        weight_decay=args.weight_decay)
    else:
        print('Optimizer not found')

In [10]:
def copy_state(optimizer):
    state = deepcopy(optimizer.param_groups)
    for group_idx, group in enumerate(optimizer.param_groups):
        state[group_idx] = deepcopy(group)
        state[group_idx]['params'] = deepcopy(group['params'])
        for p_idx, p in enumerate(group['params']):
            state[group_idx]['params'][p_idx] = deepcopy(p)
            if p.grad is None:
                continue
            state[group_idx]['params'][p_idx].grad = deepcopy(p.grad)
            state[group_idx]['params'][p_idx].grad.data = deepcopy(p.grad.data)
    return state

In [11]:
def train(net, epoch, device, data_loader, optimizer, criterion, args):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in tqdm(enumerate(data_loader)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        # Save current state of params and grads
        loss = criterion(outputs, targets)
        loss.backward(retain_graph=True)
        old_state = copy_state(optimizer)
        optimizer.step()
        # Look ahead and save next state of params and grads
        loss = criterion(outputs, targets)
        loss.backward()
        new_state = copy_state(optimizer)
        optimizer.step(old_state=old_state, new_state=new_state)
        # Collect garbage
        old_state.clear()
        new_state.clear()
        gc.collect()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    accuracy = 100. * correct / total
    print('train acc %.3f' % accuracy)

    return accuracy

In [12]:
def test(net, device, data_loader, criterion):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(data_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    accuracy = 100. * correct / total
    print(' test acc %.3f' % accuracy)

    return accuracy

In [13]:
def adjust_learning_rate(optimizer, epoch, step_size=150, gamma=0.1, reset=False):
    for param_group in optimizer.param_groups:
        if epoch % step_size == 0 and epoch > 0:
            param_group['lr'] *= gamma

    if epoch % step_size == 0 and epoch > 0 and reset:
        optimizer.reset()

In [None]:
input_args = ['--optim', 'adala',
              '--lr', '1e-3',
              '--eps', '1e-8',
              '--beta1', '0.9',
              '--beta2', '0.999',
              '--momentum', '0.9']
parser = get_parser()
args = parser.parse_args(args=input_args)

train_loader, test_loader = build_dataset(args)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

ckpt_name = get_ckpt_name(model=args.model, optimizer=args.optim, lr=args.lr,
                          final_lr=args.final_lr, momentum=args.momentum,
                          beta1=args.beta1, beta2=args.beta2, gamma=args.gamma,
                          eps=args.eps,
                          reset=args.reset, run=args.run,
                          weight_decay=args.weight_decay)
print('ckpt_name')
if args.resume:
    ckpt = load_checkpoint(ckpt_name)
    best_acc = ckpt['acc']
    start_epoch = ckpt['epoch']

    curve = os.path.join('curve', ckpt_name)     
    curve = torch.load(curve)
    train_accuracies = curve['train_acc']
    test_accuracies = curve['test_acc']
else:
    ckpt = None
    best_acc = 0
    start_epoch = -1
    train_accuracies = []
    test_accuracies = []

net = build_model(args, device, ckpt=ckpt)
criterion = nn.CrossEntropyLoss()
optimizer = create_optimizer(args, net.parameters())
# scheduler = optim.lr_scheduler.StepLR(optimizer,
# step_size=args.decay_epoch, gamma=0.1, last_epoch=start_epoch)

for epoch in range(start_epoch + 1, args.total_epoch):
    start = time.time()
    # scheduler.step()
    adjust_learning_rate(optimizer, epoch, step_size=args.decay_epoch, gamma=args.lr_gamma, reset = args.reset)
    train_acc = train(net, epoch, device, train_loader, optimizer, criterion, args)
    test_acc = test(net, device, test_loader, criterion)
    end = time.time()
    print('Time: {}'.format(end-start))

    # Save checkpoint.
    if test_acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': test_acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, os.path.join('checkpoint', ckpt_name))
        best_acc = test_acc

    train_accuracies.append(train_acc)
    test_accuracies.append(test_acc)
    if not os.path.isdir('curve'):
        os.mkdir('curve')
    torch.save({'train_acc': train_accuracies, 'test_acc': test_accuracies},
              os.path.join('curve', ckpt_name))

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified
ckpt_name
==> Building model..

Epoch: 0


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:1005.)
  grad.add_(group['weight_decay'], p.data)
391it [01:47,  3.65it/s]

train acc 13.244





 test acc 12.800
Time: 111.4666862487793
Saving..

Epoch: 1


391it [01:48,  3.59it/s]

train acc 13.232





 test acc 12.990
Time: 113.388188123703
Saving..

Epoch: 2


391it [01:50,  3.55it/s]

train acc 13.288





 test acc 12.770
Time: 114.46118545532227

Epoch: 3


391it [01:50,  3.55it/s]

train acc 13.304





 test acc 13.010
Time: 114.66189527511597
Saving..

Epoch: 4


391it [01:50,  3.55it/s]

train acc 13.222





 test acc 12.970
Time: 114.61824941635132

Epoch: 5


391it [01:49,  3.56it/s]

train acc 13.286





 test acc 13.380
Time: 114.27499008178711
Saving..

Epoch: 6


391it [01:50,  3.54it/s]

train acc 13.200





 test acc 13.260
Time: 114.6921501159668

Epoch: 7


391it [01:50,  3.55it/s]

train acc 13.302





 test acc 13.040
Time: 114.63577318191528

Epoch: 8


391it [01:50,  3.55it/s]

train acc 13.222





 test acc 12.940
Time: 114.56573677062988

Epoch: 9


391it [01:50,  3.55it/s]

train acc 13.264





 test acc 13.070
Time: 114.50615334510803

Epoch: 10


391it [01:50,  3.55it/s]

train acc 13.216





 test acc 13.020
Time: 114.73196840286255

Epoch: 11


391it [01:50,  3.55it/s]

train acc 13.270





 test acc 13.020
Time: 114.61421012878418

Epoch: 12


391it [01:50,  3.55it/s]

train acc 13.260





 test acc 12.880
Time: 114.54746198654175

Epoch: 13


391it [01:50,  3.55it/s]

train acc 13.244





 test acc 13.020
Time: 114.61385679244995

Epoch: 14


391it [01:50,  3.55it/s]

train acc 13.258





 test acc 13.020
Time: 114.50799655914307

Epoch: 15


391it [01:50,  3.55it/s]

train acc 13.302





 test acc 12.920
Time: 114.58191084861755

Epoch: 16


391it [01:50,  3.55it/s]

train acc 13.214





 test acc 13.190
Time: 114.68262815475464

Epoch: 17


391it [01:50,  3.55it/s]

train acc 13.258





 test acc 13.140
Time: 114.43263244628906

Epoch: 18


391it [01:50,  3.55it/s]

train acc 13.260





 test acc 13.130
Time: 114.52320146560669

Epoch: 19


391it [01:50,  3.55it/s]

train acc 13.300





 test acc 13.010
Time: 114.37342810630798

Epoch: 20


391it [01:50,  3.55it/s]

train acc 13.296





 test acc 13.030
Time: 114.51922059059143

Epoch: 21


391it [01:50,  3.55it/s]

train acc 13.304





 test acc 13.060
Time: 114.51600193977356

Epoch: 22


391it [01:50,  3.54it/s]

train acc 13.332





 test acc 13.010
Time: 114.7346544265747

Epoch: 23


391it [01:50,  3.55it/s]

train acc 13.214





 test acc 12.900
Time: 114.54729962348938

Epoch: 24


391it [01:50,  3.55it/s]

train acc 13.398





 test acc 12.990
Time: 114.4878830909729

Epoch: 25


391it [01:50,  3.55it/s]

train acc 13.262





 test acc 12.960
Time: 114.61688017845154

Epoch: 26


391it [01:50,  3.54it/s]

train acc 13.248





 test acc 13.080
Time: 114.78729629516602

Epoch: 27


391it [01:50,  3.55it/s]

train acc 13.174





 test acc 12.980
Time: 114.6393473148346

Epoch: 28


391it [01:50,  3.55it/s]

train acc 13.266





 test acc 12.920
Time: 114.65873622894287

Epoch: 29


391it [01:50,  3.55it/s]

train acc 13.258





 test acc 13.050
Time: 114.63162040710449

Epoch: 30


391it [01:50,  3.55it/s]

train acc 13.232





 test acc 13.020
Time: 114.61432838439941

Epoch: 31


391it [01:50,  3.55it/s]

train acc 13.244





 test acc 12.940
Time: 114.54699873924255

Epoch: 32


391it [01:50,  3.55it/s]

train acc 13.228





 test acc 13.090
Time: 114.53917002677917

Epoch: 33


391it [01:50,  3.55it/s]

train acc 13.300





 test acc 12.990
Time: 114.48391485214233

Epoch: 34


391it [01:50,  3.55it/s]

train acc 13.218





 test acc 13.070
Time: 114.50893330574036

Epoch: 35


391it [01:49,  3.56it/s]

train acc 13.264





 test acc 12.970
Time: 114.3005633354187

Epoch: 36


391it [01:49,  3.56it/s]

train acc 13.334





 test acc 13.330
Time: 114.3199234008789

Epoch: 37


391it [01:50,  3.55it/s]

train acc 13.268





 test acc 12.930
Time: 114.46770143508911

Epoch: 38


391it [01:49,  3.56it/s]

train acc 13.298





 test acc 13.050
Time: 114.3418116569519

Epoch: 39


391it [01:49,  3.56it/s]

train acc 13.342





 test acc 12.990
Time: 114.37392830848694

Epoch: 40


391it [01:50,  3.55it/s]

train acc 13.272





 test acc 13.160
Time: 114.48680257797241

Epoch: 41


391it [01:50,  3.55it/s]

train acc 13.304





 test acc 13.000
Time: 114.486004114151

Epoch: 42


391it [01:49,  3.56it/s]

train acc 13.208





 test acc 12.970
Time: 114.35910487174988

Epoch: 43


391it [01:50,  3.55it/s]

train acc 13.324





 test acc 12.970
Time: 114.43287801742554

Epoch: 44


391it [01:50,  3.55it/s]

train acc 13.240





 test acc 12.930
Time: 114.44747138023376

Epoch: 45


391it [01:49,  3.55it/s]

train acc 13.288





 test acc 12.890
Time: 114.37949061393738

Epoch: 46


391it [01:49,  3.56it/s]

train acc 13.244





 test acc 13.060
Time: 114.3214316368103

Epoch: 47


391it [01:49,  3.56it/s]

train acc 13.262





 test acc 12.940
Time: 114.35826444625854

Epoch: 48


391it [01:50,  3.55it/s]

train acc 13.268





 test acc 12.960
Time: 114.50083446502686

Epoch: 49


391it [01:49,  3.55it/s]

train acc 13.222





 test acc 13.090
Time: 114.39790892601013

Epoch: 50


391it [01:49,  3.56it/s]

train acc 13.182





 test acc 13.070
Time: 114.38711214065552

Epoch: 51


391it [01:49,  3.56it/s]

train acc 13.244





 test acc 12.820
Time: 114.29351425170898

Epoch: 52


391it [01:49,  3.56it/s]

train acc 13.328





 test acc 13.070
Time: 114.3301088809967

Epoch: 53


391it [01:50,  3.55it/s]

train acc 13.286





 test acc 13.060
Time: 114.42831563949585

Epoch: 54


391it [01:50,  3.55it/s]

train acc 13.380





 test acc 13.100
Time: 114.68014335632324

Epoch: 55


391it [01:49,  3.56it/s]

train acc 13.244





 test acc 13.080
Time: 114.38075399398804

Epoch: 56


391it [01:49,  3.56it/s]

train acc 13.154





 test acc 12.920
Time: 114.33176708221436

Epoch: 57


391it [01:50,  3.55it/s]

train acc 13.332





 test acc 12.790
Time: 114.45092296600342

Epoch: 58


391it [01:50,  3.55it/s]

train acc 13.324





 test acc 13.030
Time: 114.49656414985657

Epoch: 59


391it [01:49,  3.56it/s]

train acc 13.334





 test acc 12.840
Time: 114.36707901954651

Epoch: 60


391it [01:49,  3.56it/s]

train acc 13.374





 test acc 12.950
Time: 114.2289834022522

Epoch: 61


391it [01:49,  3.56it/s]

train acc 13.276





 test acc 13.090
Time: 114.22317790985107

Epoch: 62


391it [01:50,  3.55it/s]

train acc 13.324





 test acc 12.920
Time: 114.40523409843445

Epoch: 63


391it [01:49,  3.55it/s]

train acc 13.390





 test acc 13.010
Time: 114.40054845809937

Epoch: 64


391it [01:50,  3.55it/s]

train acc 13.342





 test acc 13.170
Time: 114.46528100967407

Epoch: 65


391it [01:49,  3.56it/s]

train acc 13.240





 test acc 13.160
Time: 114.36330723762512

Epoch: 66


391it [01:49,  3.56it/s]

train acc 13.162





 test acc 12.960
Time: 114.21023464202881

Epoch: 67


391it [01:49,  3.55it/s]

train acc 13.258





 test acc 12.840
Time: 114.37399435043335

Epoch: 68


391it [01:49,  3.56it/s]

train acc 13.268





 test acc 12.940
Time: 114.30404019355774

Epoch: 69


391it [01:50,  3.55it/s]

train acc 13.226





 test acc 12.830
Time: 114.43123531341553

Epoch: 70


391it [01:50,  3.55it/s]

train acc 13.292





 test acc 13.270
Time: 114.48373532295227

Epoch: 71


391it [01:49,  3.56it/s]

train acc 13.250





 test acc 13.040
Time: 114.30153632164001

Epoch: 72


391it [01:50,  3.55it/s]

train acc 13.256





 test acc 13.080
Time: 114.50616765022278

Epoch: 73


391it [01:50,  3.55it/s]

train acc 13.338





 test acc 12.810
Time: 114.48736119270325

Epoch: 74


391it [01:50,  3.55it/s]

train acc 13.256





 test acc 12.980
Time: 114.4878625869751

Epoch: 75


391it [01:50,  3.55it/s]

train acc 13.266





 test acc 13.000
Time: 114.51837062835693

Epoch: 76


391it [01:50,  3.54it/s]

train acc 13.266





 test acc 13.160
Time: 114.71679949760437

Epoch: 77


391it [01:50,  3.55it/s]

train acc 13.266





 test acc 13.060
Time: 114.68739938735962

Epoch: 78


391it [01:50,  3.55it/s]

train acc 13.290





 test acc 13.090
Time: 114.52599740028381

Epoch: 79


391it [01:50,  3.55it/s]

train acc 13.238





 test acc 12.900
Time: 114.44741702079773

Epoch: 80


391it [01:50,  3.54it/s]

train acc 13.194





 test acc 12.950
Time: 114.7434937953949

Epoch: 81


391it [01:50,  3.54it/s]

train acc 13.430





 test acc 12.920
Time: 114.73312735557556

Epoch: 82


391it [01:50,  3.54it/s]

train acc 13.238





 test acc 13.030
Time: 114.76172161102295

Epoch: 83


391it [01:50,  3.54it/s]

train acc 13.282





 test acc 12.890
Time: 114.72297835350037

Epoch: 84


391it [01:50,  3.55it/s]

train acc 13.266





 test acc 12.870
Time: 114.57467794418335

Epoch: 85


391it [01:50,  3.55it/s]

train acc 13.274





 test acc 12.820
Time: 114.64622235298157

Epoch: 86


391it [01:50,  3.55it/s]

train acc 13.284





 test acc 12.870
Time: 114.62022590637207

Epoch: 87


391it [01:50,  3.55it/s]

train acc 13.254





 test acc 12.620
Time: 114.42890977859497

Epoch: 88


391it [01:50,  3.55it/s]

train acc 13.272





 test acc 12.910
Time: 114.68992757797241

Epoch: 89


391it [01:50,  3.54it/s]

train acc 13.190





 test acc 13.170
Time: 115.06710362434387

Epoch: 90


391it [01:50,  3.54it/s]

train acc 13.278





 test acc 12.990
Time: 114.99604868888855

Epoch: 91


391it [01:50,  3.54it/s]

train acc 13.380





 test acc 13.030
Time: 114.84407138824463

Epoch: 92


391it [01:50,  3.53it/s]

train acc 13.296





 test acc 12.920
Time: 115.10388684272766

Epoch: 93


391it [01:50,  3.54it/s]

train acc 13.296





 test acc 13.040
Time: 115.00669121742249

Epoch: 94


391it [01:50,  3.54it/s]

train acc 13.186





 test acc 13.020
Time: 114.85005736351013

Epoch: 95


391it [01:50,  3.54it/s]

train acc 13.290





 test acc 13.350
Time: 114.89198303222656

Epoch: 96


391it [01:50,  3.54it/s]

train acc 13.242





 test acc 12.910
Time: 114.83400797843933

Epoch: 97


391it [01:50,  3.54it/s]

train acc 13.306





 test acc 13.010
Time: 114.81599974632263

Epoch: 98


391it [01:50,  3.54it/s]

train acc 13.300





 test acc 12.950
Time: 114.79477858543396

Epoch: 99


391it [01:50,  3.54it/s]

train acc 13.334





 test acc 12.920
Time: 114.91419553756714

Epoch: 100


391it [01:50,  3.54it/s]

train acc 13.222





 test acc 13.010
Time: 114.94024515151978

Epoch: 101


391it [01:50,  3.54it/s]

train acc 13.286





 test acc 13.010
Time: 114.96898818016052

Epoch: 102


391it [01:50,  3.55it/s]

train acc 13.304





 test acc 12.930
Time: 114.65218949317932

Epoch: 103


391it [01:50,  3.55it/s]

train acc 13.400





 test acc 13.020
Time: 114.68791890144348

Epoch: 104


391it [01:50,  3.55it/s]

train acc 13.304





 test acc 13.140
Time: 114.66675591468811

Epoch: 105


391it [01:50,  3.55it/s]

train acc 13.290





 test acc 12.920
Time: 114.6364483833313

Epoch: 106


391it [01:50,  3.54it/s]

train acc 13.290





 test acc 12.840
Time: 114.7059268951416

Epoch: 107


391it [01:50,  3.54it/s]

train acc 13.242





 test acc 13.150
Time: 114.72527146339417

Epoch: 108


391it [01:50,  3.54it/s]

train acc 13.352





 test acc 12.850
Time: 114.7243139743805

Epoch: 109


391it [01:50,  3.55it/s]

train acc 13.394





 test acc 12.890
Time: 114.50804591178894

Epoch: 110


391it [01:50,  3.55it/s]

train acc 13.286





 test acc 13.040
Time: 114.62556385993958

Epoch: 111


391it [01:50,  3.55it/s]

train acc 13.324





 test acc 13.070
Time: 114.6696240901947

Epoch: 112


391it [01:50,  3.55it/s]

train acc 13.240





 test acc 13.110
Time: 114.61208868026733

Epoch: 113


391it [01:50,  3.55it/s]

train acc 13.286





 test acc 12.930
Time: 114.69432759284973

Epoch: 114


391it [01:50,  3.55it/s]

train acc 13.354





 test acc 13.280
Time: 114.61167192459106

Epoch: 115


391it [01:50,  3.55it/s]

train acc 13.256





 test acc 12.880
Time: 114.53248882293701

Epoch: 116


391it [01:50,  3.55it/s]

train acc 13.370





 test acc 13.270
Time: 114.5056483745575

Epoch: 117


391it [01:50,  3.55it/s]

train acc 13.392





 test acc 13.100
Time: 114.57805180549622

Epoch: 118


391it [01:50,  3.54it/s]

train acc 13.208





 test acc 13.080
Time: 114.79434061050415

Epoch: 119


391it [01:50,  3.54it/s]

train acc 13.364





 test acc 12.860
Time: 114.82635378837585

Epoch: 120


391it [01:50,  3.55it/s]

train acc 13.178





 test acc 12.980
Time: 114.73786902427673

Epoch: 121


391it [01:50,  3.54it/s]

train acc 13.194





 test acc 12.960
Time: 114.85124278068542

Epoch: 122


391it [01:50,  3.55it/s]

train acc 13.290





 test acc 12.960
Time: 114.67134737968445

Epoch: 123


391it [01:50,  3.55it/s]

train acc 13.268





 test acc 13.040
Time: 114.6124517917633

Epoch: 124


391it [01:50,  3.55it/s]

train acc 13.328





 test acc 13.030
Time: 114.6811671257019

Epoch: 125


391it [01:50,  3.55it/s]

train acc 13.282





 test acc 13.120
Time: 114.58889865875244

Epoch: 126


391it [01:50,  3.55it/s]

train acc 13.256





 test acc 12.800
Time: 114.4131338596344

Epoch: 127


391it [01:50,  3.55it/s]

train acc 13.252





 test acc 13.110
Time: 114.50227046012878

Epoch: 128


391it [01:50,  3.55it/s]

train acc 13.210





 test acc 13.040
Time: 114.57064628601074

Epoch: 129


391it [01:50,  3.54it/s]

train acc 13.342





 test acc 13.140
Time: 114.82231426239014

Epoch: 130


391it [01:50,  3.54it/s]

train acc 13.332





 test acc 12.960
Time: 115.0273060798645

Epoch: 131


391it [01:50,  3.54it/s]

train acc 13.296





 test acc 13.020
Time: 115.00888872146606

Epoch: 132


391it [01:50,  3.55it/s]

train acc 13.190





 test acc 12.910
Time: 114.67325448989868

Epoch: 133


391it [01:50,  3.54it/s]

train acc 13.344





 test acc 13.020
Time: 114.96141028404236

Epoch: 134


391it [01:50,  3.55it/s]

train acc 13.302





 test acc 13.000
Time: 114.65943932533264

Epoch: 135


391it [01:50,  3.55it/s]

train acc 13.296





 test acc 12.810
Time: 114.68285751342773

Epoch: 136


391it [01:50,  3.55it/s]

train acc 13.332





 test acc 13.120
Time: 114.61738729476929

Epoch: 137


391it [01:50,  3.55it/s]

train acc 13.254





 test acc 13.150
Time: 114.64185643196106

Epoch: 138


391it [01:50,  3.55it/s]

train acc 13.304





 test acc 13.320
Time: 114.55103254318237

Epoch: 139


391it [01:50,  3.54it/s]

train acc 13.160





 test acc 13.060
Time: 114.81753396987915

Epoch: 140


391it [01:50,  3.55it/s]

train acc 13.266





 test acc 13.000
Time: 114.66296553611755

Epoch: 141


391it [01:50,  3.54it/s]

train acc 13.228





 test acc 13.010
Time: 114.88889479637146

Epoch: 142


391it [01:50,  3.54it/s]

train acc 13.316





 test acc 12.880
Time: 114.76487159729004

Epoch: 143


391it [01:50,  3.54it/s]

train acc 13.314





 test acc 13.060
Time: 114.98465490341187

Epoch: 144


391it [01:50,  3.54it/s]

train acc 13.314





 test acc 12.840
Time: 114.80000925064087

Epoch: 145


391it [01:50,  3.54it/s]

train acc 13.278





 test acc 13.060
Time: 114.82019114494324

Epoch: 146


391it [01:50,  3.54it/s]

train acc 13.330





 test acc 13.000
Time: 114.99300193786621

Epoch: 147


391it [01:50,  3.54it/s]

train acc 13.200





 test acc 12.930
Time: 114.94864392280579

Epoch: 148


391it [01:50,  3.54it/s]

train acc 13.308





 test acc 13.070
Time: 114.81685543060303

Epoch: 149


391it [01:50,  3.54it/s]

train acc 13.306





 test acc 12.950
Time: 114.78715229034424

Epoch: 150


258it [01:12,  3.52it/s]

In [None]:
while True:
    pass