In [1]:
!nvidia-smi

Wed Sep 29 20:45:48 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  RTX A6000           On   | 00000000:09:00.0 Off |                  Off |
| 71%   81C    P2   109W / 300W |  11643MiB / 48682MiB |     15%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [2]:
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33marutema47[0m (use `wandb login --relogin` to force relogin)


In [3]:
import argparse
import os
import shutil
import time
import pandas as pd
import matplotlib.pyplot as plt
import network.resnet_orig as resnet

import time
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

model_names = sorted(name for name in resnet.__dict__
    if name.islower() and not name.startswith("__")
                     and name.startswith("resnet")
                     and callable(resnet.__dict__[name]))

DATA_DIR = "train"

print_freq = 50

K = 4

In [4]:
train_df = pd.read_csv("trainLabels.csv")
train_df.head()

Unnamed: 0,id,label
0,1,frog
1,2,truck
2,3,truck
3,4,deer
4,5,automobile


In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train_df['label_enc'] = le.fit_transform(train_df['label'])
train_df.head()

Unnamed: 0,id,label,label_enc
0,1,frog,6
1,2,truck,9
2,3,truck,9
3,4,deer,4
4,5,automobile,1


In [6]:
# 5-flod
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

train_df["fold"] = -1
for i, (train_index, test_index) in enumerate(skf.split(train_df.id, train_df.label_enc)):
    train_df.loc[test_index, 'fold'] = i
train_df.head()

Unnamed: 0,id,label,label_enc,fold
0,1,frog,6,1
1,2,truck,9,1
2,3,truck,9,2
3,4,deer,4,1
4,5,automobile,1,2


# prepare dataset

In [7]:
mixup = False

class cifarDataset(Dataset):
    def __init__(self,
                 df,
                 rand=False,
                 transform=None,
                 test=False
                ):

        self.df = df.reset_index(drop=True)
        self.rand = rand
        self.transform = transform
        self.test = test

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        img_id = row.id
        
        images = cv2.imread(os.path.join(DATA_DIR, str(img_id)+".png"))
        
        # Load labels
        label = row.label_enc
        
        # aug
        if self.transform is not None:
            images = self.transform(image=images)['image']
        
        # Mixup part
        """
        rd = torch.rand(1)
        label2 = label
        gamma = np.array(np.ones(1)).astype(np.float32)[0]
        if mixup and rd < 0.3 and self.transform is not None and not self.test:
            mix_idx = np.random.random_integers(0, len(self.df))
            row2 = self.df.iloc[mix_idx]
            img_id2 = row2.id
            images2 = cv2.imread(os.path.join(DATA_DIR, str(img_id2)+".png"))
            
            if self.transform is not None:
                images2 = self.transform(image=images2)['image']
            
            # blend image
            gamma = np.array(np.random.beta(1,1)).astype(np.float32)
            images = ((images*gamma + images2*(1-gamma))).astype(np.uint8)
            # blend labels
            label2 = row2.label_enc
        """
              
        #images = images.astype(np.float32)
        #images /= 255
        images = images.transpose(2, 0, 1)
        
        label = label.astype(np.float32)
        #label2 = label2.astype(np.float32)
        return torch.tensor(images), torch.tensor(label),

In [8]:
import albumentations as A
import albumentations

imsize = 32
transforms_train = albumentations.Compose([
    albumentations.ShiftScaleRotate(scale_limit=0.3, rotate_limit=180,p=0.25),
    A.Cutout(num_holes=12, max_h_size=4, max_w_size=4, fill_value=0, p=0.25),
    #albumentations.Rotate(p=0.5),
    #albumentations.Transpose(p=0.5),
    #albumentations.VerticalFlip(p=0.5),
    albumentations.HorizontalFlip(p=0.5),   
    albumentations.RandomCrop(imsize, imsize, p=1.0), 
    albumentations.Normalize(mean=(0.485, 0.456, 0.406),
                       std=(0.229, 0.224, 0.225), p=1),
])

transforms_val = albumentations.Compose([albumentations.Resize(imsize, imsize, p=1.0),
                                         albumentations.Normalize(mean=(0.485, 0.456, 0.406),
                       std=(0.229, 0.224, 0.225), p=1),])



show imgs

In [9]:
dataset_show = cifarDataset(train_df, transform=transforms_train)
from pylab import rcParams
rcParams['figure.figsize'] = 20,10
for i in range(3):
    f, axarr = plt.subplots(1,5)
    for p in range(5):
        idx = np.random.randint(0, len(dataset_show))
        img, label = dataset_show[idx]
        img = img.flip(0) #BGR2RGB
        axarr[p].imshow(img.transpose(0,1).transpose(1,2))
        axarr[p].set_title(str(label))

Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping i

# Trainer

In [10]:
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [11]:
def train(train_loader, model, criterion, optimizer, epoch):
    """
        Run one train epoch
    """
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    lambda_alpha = 0.0002
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)
        target2 = target

        target = target.long().cuda()
        target2 = target2.long().cuda()
        input_var = input.cuda()
        target_var = target

        if not mixup:
          alpha = 0
        else:
          alpha = 1
        

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # L2 regularization
        l2_alpha = 0.0
        for name, param in model.named_parameters():
            if "alpha" in name:
                l2_alpha += torch.pow(param, 2)
        loss += lambda_alpha * l2_alpha

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        output = output.float()
        loss = loss.float()
        # measure accuracy and record loss
        prec1 = accuracy(output.data, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch, i, len(train_loader), batch_time=batch_time,
                      data_time=data_time, loss=losses, top1=top1))
    return losses.avg

def validate(val_loader, model, criterion):
    """
    Run evaluation
    """
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            target = target.long().cuda()
            input_var = input.cuda()
            target_var = target.cuda()

            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            output = output.float()
            loss = loss.float()

            # measure accuracy and record loss
            prec1 = accuracy(output.data, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec1.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i, len(val_loader), batch_time=batch_time, loss=losses,
                          top1=top1))

    print(' * Prec@1 {top1.avg:.3f}'
          .format(top1=top1))
    for name, param in model.named_parameters():
        if "alpha" in name:
            print(name, param.item())
    return top1.avg, losses.avg

def save_checkpoint(state, filename='checkpoint.pth'):
    """
    Save the training model
    """
    torch.save(state, filename)

class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res

# Train loop

In [12]:
train_dataset = cifarDataset(train_df[train_df.fold!=0], transform=transforms_train)
val_dataset = cifarDataset(train_df[train_df.fold==0], transform=transforms_val, test=True)

train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=128, shuffle=True,
        num_workers=4, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=512, shuffle=False,
    num_workers=4, pin_memory=True)

In [13]:
for K in range(3,8):
    model = resnet.__dict__["resnet20"](K)
    model = model.cuda()

    import wandb
    watermark = "resnet20_paper_quant{}".format(K)
    wandb.init(project="pact",
                name=watermark)

    # define loss function (criterion) and pptimizer
    mixup = False
    criterion = nn.CrossEntropyLoss().cuda()
    def mixup_criterion(criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

    #optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-1,
                                momentum=0.9,
                                weight_decay=0.0002)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                        milestones=[80, 150],)

    best_prec1 = 0
    os.makedirs("models", exist_ok=True)

    for epoch in range(200):
        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        tloss = train(train_loader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1, valloss = validate(val_loader, model, criterion)

        # wandb
        wandb.log({'epoch': epoch, "prec":prec1, "train_loss": tloss, 'val_loss': valloss, "lr": optimizer.param_groups[0]["lr"],})

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        print("Best prec1 : ", best_prec1)
        if is_best:
            torch.save(model.state_dict(), os.path.join(f'models/resnet20_paper_quant{K}.pth'))

bit width: 3


[34m[1mwandb[0m: Currently logged in as: [33marutema47[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


current lr 1.00000e-01
Epoch: [0][0/313]	Time 0.680 (0.680)	Data 0.154 (0.154)	Loss 7.6419 (7.6419)	Prec@1 7.812 (7.812)
Epoch: [0][50/313]	Time 0.063 (0.076)	Data 0.000 (0.003)	Loss 2.5179 (13.8807)	Prec@1 13.281 (11.091)
Epoch: [0][100/313]	Time 0.063 (0.069)	Data 0.000 (0.002)	Loss 2.5170 (8.2726)	Prec@1 17.969 (12.539)
Epoch: [0][150/313]	Time 0.063 (0.068)	Data 0.000 (0.001)	Loss 2.4125 (6.3605)	Prec@1 17.188 (13.680)
Epoch: [0][200/313]	Time 0.064 (0.067)	Data 0.000 (0.001)	Loss 2.3817 (5.3871)	Prec@1 20.312 (14.509)
Epoch: [0][250/313]	Time 0.064 (0.066)	Data 0.000 (0.001)	Loss 2.4851 (4.7933)	Prec@1 14.844 (15.326)
Epoch: [0][300/313]	Time 0.063 (0.066)	Data 0.000 (0.001)	Loss 2.3333 (4.3882)	Prec@1 32.812 (16.276)
Test: [0/20]	Time 0.282 (0.282)	Loss 2.0648 (2.0648)	Prec@1 21.289 (21.289)
 * Prec@1 20.690
alpha1 8.324317932128906
layer1.0.alpha1 8.324336051940918
layer1.0.alpha2 8.313596725463867
layer1.1.alpha1 8.32434368133545
layer1.1.alpha2 8.163472175598145
layer2.0.alpha

Epoch: [6][0/313]	Time 0.230 (0.230)	Data 0.164 (0.164)	Loss 1.5490 (1.5490)	Prec@1 45.312 (45.312)
Epoch: [6][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 1.5559 (1.5614)	Prec@1 41.406 (44.378)
Epoch: [6][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 1.4484 (1.5570)	Prec@1 44.531 (44.214)
Epoch: [6][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 1.4204 (1.5441)	Prec@1 47.656 (44.800)
Epoch: [6][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 1.4971 (1.5331)	Prec@1 43.750 (45.208)
Epoch: [6][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 1.4182 (1.5245)	Prec@1 45.312 (45.477)
Epoch: [6][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 1.5748 (1.5141)	Prec@1 52.344 (45.855)
Test: [0/20]	Time 0.288 (0.288)	Loss 1.2951 (1.2951)	Prec@1 52.539 (52.539)
 * Prec@1 51.910
alpha1 2.6199843883514404
layer1.0.alpha1 2.6515064239501953
layer1.0.alpha2 2.7493338584899902
layer1.1.alpha1 2.675130605697632
layer1.1.alpha2 2.459699869155884
layer2.0.alpha1 2.69876360893249

Epoch: [12][0/313]	Time 0.223 (0.223)	Data 0.157 (0.157)	Loss 0.9380 (0.9380)	Prec@1 68.750 (68.750)
Epoch: [12][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 1.0353 (1.0800)	Prec@1 62.500 (62.515)
Epoch: [12][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 1.0983 (1.0841)	Prec@1 64.062 (62.160)
Epoch: [12][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.9383 (1.0783)	Prec@1 71.094 (62.567)
Epoch: [12][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.9182 (1.0733)	Prec@1 62.500 (62.729)
Epoch: [12][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 1.1681 (1.0734)	Prec@1 61.719 (62.609)
Epoch: [12][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 1.1283 (1.0707)	Prec@1 54.688 (62.731)
Test: [0/20]	Time 0.271 (0.271)	Loss 0.9213 (0.9213)	Prec@1 66.992 (66.992)
 * Prec@1 66.790
alpha1 1.9391868114471436
layer1.0.alpha1 1.5461310148239136
layer1.0.alpha2 1.839910626411438
layer1.1.alpha1 1.3620824813842773
layer1.1.alpha2 1.5869815349578857
layer2.0.alpha1 1.405866

Epoch: [18][0/313]	Time 0.220 (0.220)	Data 0.154 (0.154)	Loss 0.9770 (0.9770)	Prec@1 67.188 (67.188)
Epoch: [18][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.8024 (0.8387)	Prec@1 72.656 (71.078)
Epoch: [18][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.9632 (0.8357)	Prec@1 69.531 (71.248)
Epoch: [18][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.8020 (0.8363)	Prec@1 75.781 (71.058)
Epoch: [18][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.7932 (0.8248)	Prec@1 69.531 (71.482)
Epoch: [18][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 1.1037 (0.8268)	Prec@1 64.062 (71.271)
Epoch: [18][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.8663 (0.8236)	Prec@1 72.656 (71.512)
Test: [0/20]	Time 0.277 (0.277)	Loss 0.6410 (0.6410)	Prec@1 78.320 (78.320)
 * Prec@1 77.300
alpha1 2.0189168453216553
layer1.0.alpha1 1.4483788013458252
layer1.0.alpha2 1.7573471069335938
layer1.1.alpha1 0.9365665912628174
layer1.1.alpha2 1.6601488590240479
layer2.0.alpha1 1.17944

Epoch: [24][0/313]	Time 0.229 (0.229)	Data 0.163 (0.163)	Loss 0.5340 (0.5340)	Prec@1 82.812 (82.812)
Epoch: [24][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.7885 (0.6917)	Prec@1 74.219 (76.716)
Epoch: [24][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.5250 (0.7046)	Prec@1 81.250 (75.797)
Epoch: [24][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.6674 (0.7014)	Prec@1 75.000 (75.735)
Epoch: [24][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.6783 (0.7009)	Prec@1 74.219 (75.894)
Epoch: [24][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5539 (0.6965)	Prec@1 84.375 (76.033)
Epoch: [24][300/313]	Time 0.066 (0.065)	Data 0.000 (0.001)	Loss 0.6536 (0.6957)	Prec@1 78.125 (76.030)
Test: [0/20]	Time 0.283 (0.283)	Loss 0.5599 (0.5599)	Prec@1 81.641 (81.641)
 * Prec@1 80.110
alpha1 1.9048415422439575
layer1.0.alpha1 1.4087637662887573
layer1.0.alpha2 1.812269687652588
layer1.1.alpha1 0.8957857489585876
layer1.1.alpha2 1.5665775537490845
layer2.0.alpha1 1.133915

Epoch: [30][0/313]	Time 0.229 (0.229)	Data 0.164 (0.164)	Loss 0.7391 (0.7391)	Prec@1 72.656 (72.656)
Epoch: [30][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.6292 (0.6184)	Prec@1 80.469 (78.906)
Epoch: [30][100/313]	Time 0.067 (0.066)	Data 0.000 (0.002)	Loss 0.6590 (0.6034)	Prec@1 78.125 (79.401)
Epoch: [30][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5311 (0.6003)	Prec@1 82.812 (79.672)
Epoch: [30][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.8311 (0.6041)	Prec@1 67.969 (79.485)
Epoch: [30][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.6962 (0.6084)	Prec@1 78.125 (79.320)
Epoch: [30][300/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5169 (0.6050)	Prec@1 81.250 (79.451)
Test: [0/20]	Time 0.269 (0.269)	Loss 0.5224 (0.5224)	Prec@1 81.641 (81.641)
 * Prec@1 80.400
alpha1 1.8708416223526
layer1.0.alpha1 1.4916024208068848
layer1.0.alpha2 1.8458505868911743
layer1.1.alpha1 0.9318808913230896
layer1.1.alpha2 1.7146272659301758
layer2.0.alpha1 1.02827394

Epoch: [36][0/313]	Time 0.232 (0.232)	Data 0.167 (0.167)	Loss 0.5262 (0.5262)	Prec@1 82.812 (82.812)
Epoch: [36][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.5165 (0.5474)	Prec@1 84.375 (81.173)
Epoch: [36][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.6474 (0.5399)	Prec@1 79.688 (81.498)
Epoch: [36][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5182 (0.5396)	Prec@1 85.938 (81.560)
Epoch: [36][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.6037 (0.5466)	Prec@1 82.812 (81.386)
Epoch: [36][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.6125 (0.5493)	Prec@1 73.438 (81.287)
Epoch: [36][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.6648 (0.5467)	Prec@1 78.125 (81.359)
Test: [0/20]	Time 0.275 (0.275)	Loss 0.4767 (0.4767)	Prec@1 83.594 (83.594)
 * Prec@1 82.330
alpha1 1.8703505992889404
layer1.0.alpha1 1.4418513774871826
layer1.0.alpha2 1.8360381126403809
layer1.1.alpha1 0.9505946040153503
layer1.1.alpha2 1.594623327255249
layer2.0.alpha1 1.185833

Epoch: [42][0/313]	Time 0.222 (0.222)	Data 0.155 (0.155)	Loss 0.4624 (0.4624)	Prec@1 87.500 (87.500)
Epoch: [42][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.6439 (0.5008)	Prec@1 78.906 (83.287)
Epoch: [42][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.6578 (0.4959)	Prec@1 78.906 (83.315)
Epoch: [42][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4379 (0.4964)	Prec@1 89.062 (83.226)
Epoch: [42][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5190 (0.5032)	Prec@1 81.250 (82.925)
Epoch: [42][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4402 (0.5053)	Prec@1 82.031 (82.806)
Epoch: [42][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4532 (0.5110)	Prec@1 87.500 (82.693)
Test: [0/20]	Time 0.287 (0.287)	Loss 0.4413 (0.4413)	Prec@1 84.570 (84.570)
 * Prec@1 84.890
alpha1 1.804575800895691
layer1.0.alpha1 1.3902270793914795
layer1.0.alpha2 1.8357515335083008
layer1.1.alpha1 0.9368909597396851
layer1.1.alpha2 1.6638256311416626
layer2.0.alpha1 1.087038

Epoch: [48][0/313]	Time 0.220 (0.220)	Data 0.153 (0.153)	Loss 0.5513 (0.5513)	Prec@1 80.469 (80.469)
Epoch: [48][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.4109 (0.4964)	Prec@1 86.719 (82.950)
Epoch: [48][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.4596 (0.4850)	Prec@1 84.375 (83.578)
Epoch: [48][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4616 (0.4882)	Prec@1 83.594 (83.397)
Epoch: [48][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.7214 (0.4914)	Prec@1 73.438 (83.287)
Epoch: [48][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5410 (0.4905)	Prec@1 81.250 (83.370)
Epoch: [48][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3669 (0.4958)	Prec@1 87.500 (83.202)
Test: [0/20]	Time 0.283 (0.283)	Loss 0.6272 (0.6272)	Prec@1 80.273 (80.273)
 * Prec@1 80.740
alpha1 1.8569681644439697
layer1.0.alpha1 1.420851707458496
layer1.0.alpha2 1.9816648960113525
layer1.1.alpha1 1.0056577920913696
layer1.1.alpha2 1.665847659111023
layer2.0.alpha1 1.1259827

Epoch: [54][0/313]	Time 0.222 (0.222)	Data 0.151 (0.151)	Loss 0.5211 (0.5211)	Prec@1 82.031 (82.031)
Epoch: [54][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.4644 (0.4571)	Prec@1 79.688 (84.191)
Epoch: [54][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.4499 (0.4660)	Prec@1 85.938 (84.158)
Epoch: [54][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4994 (0.4665)	Prec@1 82.031 (83.977)
Epoch: [54][200/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.4074 (0.4747)	Prec@1 86.719 (83.648)
Epoch: [54][250/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.4949 (0.4738)	Prec@1 82.812 (83.737)
Epoch: [54][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.4764 (0.4751)	Prec@1 86.719 (83.703)
Test: [0/20]	Time 0.279 (0.279)	Loss 0.4150 (0.4150)	Prec@1 85.547 (85.547)
 * Prec@1 84.710
alpha1 1.9322068691253662
layer1.0.alpha1 1.3569093942642212
layer1.0.alpha2 1.6607632637023926
layer1.1.alpha1 0.9763513803482056
layer1.1.alpha2 1.6769917011260986
layer2.0.alpha1 0.96141

Epoch: [60][0/313]	Time 0.221 (0.221)	Data 0.152 (0.152)	Loss 0.4785 (0.4785)	Prec@1 84.375 (84.375)
Epoch: [60][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.3643 (0.4430)	Prec@1 89.844 (85.156)
Epoch: [60][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.5315 (0.4498)	Prec@1 82.812 (84.592)
Epoch: [60][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.4089 (0.4445)	Prec@1 85.156 (84.856)
Epoch: [60][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5317 (0.4512)	Prec@1 82.031 (84.806)
Epoch: [60][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.5992 (0.4522)	Prec@1 79.688 (84.752)
Epoch: [60][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.2748 (0.4553)	Prec@1 90.625 (84.658)
Test: [0/20]	Time 0.281 (0.281)	Loss 0.4221 (0.4221)	Prec@1 86.523 (86.523)
 * Prec@1 84.820
alpha1 1.9414316415786743
layer1.0.alpha1 1.3166186809539795
layer1.0.alpha2 1.7453601360321045
layer1.1.alpha1 1.0095441341400146
layer1.1.alpha2 1.670580267906189
layer2.0.alpha1 1.138301

Epoch: [66][0/313]	Time 0.225 (0.225)	Data 0.157 (0.157)	Loss 0.2841 (0.2841)	Prec@1 92.188 (92.188)
Epoch: [66][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.3546 (0.4408)	Prec@1 90.625 (85.263)
Epoch: [66][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.4007 (0.4380)	Prec@1 86.719 (85.032)
Epoch: [66][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4822 (0.4393)	Prec@1 81.250 (85.058)
Epoch: [66][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.3899 (0.4438)	Prec@1 86.719 (85.075)
Epoch: [66][250/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.5109 (0.4491)	Prec@1 84.375 (84.913)
Epoch: [66][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.6356 (0.4498)	Prec@1 77.344 (84.858)
Test: [0/20]	Time 0.281 (0.281)	Loss 0.4562 (0.4562)	Prec@1 84.375 (84.375)
 * Prec@1 85.880
alpha1 1.8779462575912476
layer1.0.alpha1 1.3774847984313965
layer1.0.alpha2 1.7369581460952759
layer1.1.alpha1 0.9751850366592407
layer1.1.alpha2 1.6027064323425293
layer2.0.alpha1 1.02102

Epoch: [72][0/313]	Time 0.216 (0.216)	Data 0.151 (0.151)	Loss 0.4890 (0.4890)	Prec@1 81.250 (81.250)
Epoch: [72][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.5553 (0.4417)	Prec@1 84.375 (85.034)
Epoch: [72][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.4855 (0.4389)	Prec@1 82.812 (85.187)
Epoch: [72][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3871 (0.4319)	Prec@1 85.938 (85.353)
Epoch: [72][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4250 (0.4327)	Prec@1 85.156 (85.218)
Epoch: [72][250/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.4347 (0.4349)	Prec@1 83.594 (85.138)
Epoch: [72][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.3629 (0.4355)	Prec@1 90.625 (85.151)
Test: [0/20]	Time 0.271 (0.271)	Loss 0.4883 (0.4883)	Prec@1 82.812 (82.812)
 * Prec@1 83.370
alpha1 1.7922556400299072
layer1.0.alpha1 1.2867354154586792
layer1.0.alpha2 1.617356300354004
layer1.1.alpha1 0.9544793367385864
layer1.1.alpha2 1.4564393758773804
layer2.0.alpha1 1.033100

Epoch: [78][0/313]	Time 0.228 (0.228)	Data 0.162 (0.162)	Loss 0.3479 (0.3479)	Prec@1 85.938 (85.938)
Epoch: [78][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.6517 (0.4252)	Prec@1 82.031 (85.662)
Epoch: [78][100/313]	Time 0.064 (0.065)	Data 0.000 (0.002)	Loss 0.4362 (0.4267)	Prec@1 86.719 (85.489)
Epoch: [78][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5597 (0.4293)	Prec@1 80.469 (85.405)
Epoch: [78][200/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.3787 (0.4287)	Prec@1 89.062 (85.428)
Epoch: [78][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.3866 (0.4303)	Prec@1 84.375 (85.387)
Epoch: [78][300/313]	Time 0.065 (0.064)	Data 0.000 (0.001)	Loss 0.5066 (0.4312)	Prec@1 82.812 (85.283)
Test: [0/20]	Time 0.286 (0.286)	Loss 0.3928 (0.3928)	Prec@1 87.500 (87.500)
 * Prec@1 86.150
alpha1 1.9162756204605103
layer1.0.alpha1 1.1628316640853882
layer1.0.alpha2 1.7884160280227661
layer1.1.alpha1 1.0185248851776123
layer1.1.alpha2 1.6287226676940918
layer2.0.alpha1 1.10237

Epoch: [84][0/313]	Time 0.225 (0.225)	Data 0.160 (0.160)	Loss 0.2132 (0.2132)	Prec@1 92.188 (92.188)
Epoch: [84][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.2316 (0.2394)	Prec@1 89.844 (91.896)
Epoch: [84][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.2402 (0.2394)	Prec@1 92.188 (91.986)
Epoch: [84][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1577 (0.2428)	Prec@1 94.531 (91.918)
Epoch: [84][200/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.3537 (0.2441)	Prec@1 87.500 (91.880)
Epoch: [84][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.2592 (0.2407)	Prec@1 91.406 (91.988)
Epoch: [84][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.2826 (0.2392)	Prec@1 89.062 (92.078)
Test: [0/20]	Time 0.390 (0.390)	Loss 0.2254 (0.2254)	Prec@1 92.383 (92.383)
 * Prec@1 91.500
alpha1 1.7269822359085083
layer1.0.alpha1 1.1726799011230469
layer1.0.alpha2 1.639939308166504
layer1.1.alpha1 0.8998175859451294
layer1.1.alpha2 1.5402483940124512
layer2.0.alpha1 0.946188

Epoch: [90][0/313]	Time 0.212 (0.212)	Data 0.147 (0.147)	Loss 0.2948 (0.2948)	Prec@1 89.844 (89.844)
Epoch: [90][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1703 (0.2115)	Prec@1 95.312 (93.214)
Epoch: [90][100/313]	Time 0.065 (0.066)	Data 0.000 (0.002)	Loss 0.3292 (0.2143)	Prec@1 90.625 (93.062)
Epoch: [90][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2001 (0.2139)	Prec@1 93.750 (93.108)
Epoch: [90][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2434 (0.2136)	Prec@1 91.406 (93.089)
Epoch: [90][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1595 (0.2125)	Prec@1 95.312 (93.112)
Epoch: [90][300/313]	Time 0.065 (0.064)	Data 0.000 (0.001)	Loss 0.2153 (0.2138)	Prec@1 92.188 (93.028)
Test: [0/20]	Time 0.273 (0.273)	Loss 0.2061 (0.2061)	Prec@1 93.164 (93.164)
 * Prec@1 92.180
alpha1 1.5915887355804443
layer1.0.alpha1 1.1519449949264526
layer1.0.alpha2 1.5275835990905762
layer1.1.alpha1 0.8552111387252808
layer1.1.alpha2 1.4785513877868652
layer2.0.alpha1 0.87449

Epoch: [96][0/313]	Time 0.222 (0.222)	Data 0.157 (0.157)	Loss 0.2566 (0.2566)	Prec@1 92.969 (92.969)
Epoch: [96][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.1833 (0.2142)	Prec@1 95.312 (92.800)
Epoch: [96][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.2223 (0.2040)	Prec@1 94.531 (93.193)
Epoch: [96][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2025 (0.2055)	Prec@1 92.188 (93.196)
Epoch: [96][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1979 (0.2045)	Prec@1 93.750 (93.249)
Epoch: [96][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1464 (0.2032)	Prec@1 95.312 (93.252)
Epoch: [96][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1995 (0.2048)	Prec@1 92.188 (93.272)
Test: [0/20]	Time 0.279 (0.279)	Loss 0.1939 (0.1939)	Prec@1 92.969 (92.969)
 * Prec@1 91.810
alpha1 1.5265684127807617
layer1.0.alpha1 1.0550504922866821
layer1.0.alpha2 1.475843906402588
layer1.1.alpha1 0.8355960249900818
layer1.1.alpha2 1.472395420074463
layer2.0.alpha1 0.8737686

Epoch: [102][0/313]	Time 0.229 (0.229)	Data 0.159 (0.159)	Loss 0.2424 (0.2424)	Prec@1 90.625 (90.625)
Epoch: [102][50/313]	Time 0.064 (0.068)	Data 0.000 (0.003)	Loss 0.2276 (0.1936)	Prec@1 89.844 (93.842)
Epoch: [102][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1625 (0.1954)	Prec@1 93.750 (93.765)
Epoch: [102][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1557 (0.1947)	Prec@1 96.094 (93.750)
Epoch: [102][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1620 (0.1939)	Prec@1 94.531 (93.742)
Epoch: [102][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.0667 (0.1931)	Prec@1 99.219 (93.753)
Epoch: [102][300/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 0.1454 (0.1913)	Prec@1 95.312 (93.802)
Test: [0/20]	Time 0.289 (0.289)	Loss 0.1843 (0.1843)	Prec@1 93.555 (93.555)
 * Prec@1 92.080
alpha1 1.4783384799957275
layer1.0.alpha1 1.0332605838775635
layer1.0.alpha2 1.429980993270874
layer1.1.alpha1 0.7888737916946411
layer1.1.alpha2 1.365931510925293
layer2.0.alpha1 0.

Epoch: [108][0/313]	Time 0.223 (0.223)	Data 0.154 (0.154)	Loss 0.1545 (0.1545)	Prec@1 94.531 (94.531)
Epoch: [108][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.1106 (0.1916)	Prec@1 97.656 (93.658)
Epoch: [108][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.1456 (0.1872)	Prec@1 96.094 (93.866)
Epoch: [108][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1700 (0.1886)	Prec@1 96.094 (93.885)
Epoch: [108][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.2917 (0.1850)	Prec@1 90.625 (93.975)
Epoch: [108][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1030 (0.1830)	Prec@1 96.094 (94.030)
Epoch: [108][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.2345 (0.1823)	Prec@1 92.188 (94.059)
Test: [0/20]	Time 0.271 (0.271)	Loss 0.1503 (0.1503)	Prec@1 94.727 (94.727)
 * Prec@1 92.260
alpha1 1.4056273698806763
layer1.0.alpha1 0.9751306176185608
layer1.0.alpha2 1.3745098114013672
layer1.1.alpha1 0.7840749025344849
layer1.1.alpha2 1.3252818584442139
layer2.0.alpha1 

Epoch: [114][0/313]	Time 0.223 (0.223)	Data 0.152 (0.152)	Loss 0.1880 (0.1880)	Prec@1 94.531 (94.531)
Epoch: [114][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.1286 (0.1741)	Prec@1 98.438 (94.256)
Epoch: [114][100/313]	Time 0.064 (0.065)	Data 0.000 (0.002)	Loss 0.1964 (0.1760)	Prec@1 92.969 (94.137)
Epoch: [114][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1289 (0.1758)	Prec@1 95.312 (94.138)
Epoch: [114][200/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.2908 (0.1777)	Prec@1 89.062 (94.010)
Epoch: [114][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.2139 (0.1802)	Prec@1 94.531 (93.962)
Epoch: [114][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.2913 (0.1804)	Prec@1 91.406 (93.965)
Test: [0/20]	Time 0.270 (0.270)	Loss 0.2252 (0.2252)	Prec@1 92.773 (92.773)
 * Prec@1 92.020
alpha1 1.3683103322982788
layer1.0.alpha1 0.9124371409416199
layer1.0.alpha2 1.3009636402130127
layer1.1.alpha1 0.7530561685562134
layer1.1.alpha2 1.3160446882247925
layer2.0.alpha1 

Epoch: [120][0/313]	Time 0.230 (0.230)	Data 0.161 (0.161)	Loss 0.2110 (0.2110)	Prec@1 92.188 (92.188)
Epoch: [120][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1074 (0.1824)	Prec@1 96.094 (93.827)
Epoch: [120][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.2158 (0.1797)	Prec@1 93.750 (94.059)
Epoch: [120][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1864 (0.1755)	Prec@1 93.750 (94.200)
Epoch: [120][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1201 (0.1767)	Prec@1 95.312 (94.166)
Epoch: [120][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1835 (0.1774)	Prec@1 92.969 (94.158)
Epoch: [120][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1531 (0.1763)	Prec@1 94.531 (94.204)
Test: [0/20]	Time 0.274 (0.274)	Loss 0.2218 (0.2218)	Prec@1 93.164 (93.164)
 * Prec@1 91.870
alpha1 1.3688344955444336
layer1.0.alpha1 0.8822048306465149
layer1.0.alpha2 1.2888875007629395
layer1.1.alpha1 0.756005585193634
layer1.1.alpha2 1.2651903629302979
layer2.0.alpha1 0

Epoch: [126][0/313]	Time 0.231 (0.231)	Data 0.165 (0.165)	Loss 0.1665 (0.1665)	Prec@1 94.531 (94.531)
Epoch: [126][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1754 (0.1741)	Prec@1 94.531 (94.393)
Epoch: [126][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1604 (0.1756)	Prec@1 96.875 (94.291)
Epoch: [126][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1371 (0.1741)	Prec@1 92.969 (94.319)
Epoch: [126][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1940 (0.1711)	Prec@1 92.188 (94.372)
Epoch: [126][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1322 (0.1717)	Prec@1 95.312 (94.385)
Epoch: [126][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1894 (0.1738)	Prec@1 94.531 (94.334)
Test: [0/20]	Time 0.278 (0.278)	Loss 0.2075 (0.2075)	Prec@1 93.359 (93.359)
 * Prec@1 92.210
alpha1 1.2469340562820435
layer1.0.alpha1 0.8712072968482971
layer1.0.alpha2 1.2223013639450073
layer1.1.alpha1 0.692564070224762
layer1.1.alpha2 1.2393964529037476
layer2.0.alpha1 0

Epoch: [132][0/313]	Time 0.220 (0.220)	Data 0.152 (0.152)	Loss 0.1145 (0.1145)	Prec@1 96.094 (96.094)
Epoch: [132][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.2180 (0.1828)	Prec@1 92.969 (93.796)
Epoch: [132][100/313]	Time 0.064 (0.065)	Data 0.000 (0.002)	Loss 0.1354 (0.1789)	Prec@1 96.875 (93.951)
Epoch: [132][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1983 (0.1757)	Prec@1 92.188 (94.174)
Epoch: [132][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0817 (0.1755)	Prec@1 96.875 (94.205)
Epoch: [132][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1247 (0.1742)	Prec@1 96.094 (94.257)
Epoch: [132][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.2185 (0.1764)	Prec@1 89.844 (94.186)
Test: [0/20]	Time 0.294 (0.294)	Loss 0.1929 (0.1929)	Prec@1 93.359 (93.359)
 * Prec@1 92.090
alpha1 1.2455114126205444
layer1.0.alpha1 0.8407143354415894
layer1.0.alpha2 1.2002911567687988
layer1.1.alpha1 0.6703093647956848
layer1.1.alpha2 1.2308095693588257
layer2.0.alpha1 

Epoch: [138][0/313]	Time 0.219 (0.219)	Data 0.149 (0.149)	Loss 0.1024 (0.1024)	Prec@1 96.875 (96.875)
Epoch: [138][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1223 (0.1608)	Prec@1 95.312 (94.638)
Epoch: [138][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1823 (0.1603)	Prec@1 93.750 (94.678)
Epoch: [138][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1993 (0.1674)	Prec@1 93.750 (94.417)
Epoch: [138][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1731 (0.1661)	Prec@1 95.312 (94.504)
Epoch: [138][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.0778 (0.1693)	Prec@1 98.438 (94.357)
Epoch: [138][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.1285 (0.1693)	Prec@1 98.438 (94.344)
Test: [0/20]	Time 0.282 (0.282)	Loss 0.2376 (0.2376)	Prec@1 93.164 (93.164)
 * Prec@1 91.930
alpha1 1.2157950401306152
layer1.0.alpha1 0.8011056780815125
layer1.0.alpha2 1.1642976999282837
layer1.1.alpha1 0.6817216277122498
layer1.1.alpha2 1.1624313592910767
layer2.0.alpha1 

Epoch: [144][0/313]	Time 0.218 (0.218)	Data 0.152 (0.152)	Loss 0.1852 (0.1852)	Prec@1 92.969 (92.969)
Epoch: [144][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1084 (0.1711)	Prec@1 97.656 (94.378)
Epoch: [144][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.2112 (0.1586)	Prec@1 93.750 (94.709)
Epoch: [144][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1757 (0.1623)	Prec@1 93.750 (94.588)
Epoch: [144][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1715 (0.1658)	Prec@1 92.969 (94.488)
Epoch: [144][250/313]	Time 0.066 (0.065)	Data 0.000 (0.001)	Loss 0.1750 (0.1647)	Prec@1 97.656 (94.572)
Epoch: [144][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1629 (0.1659)	Prec@1 94.531 (94.479)
Test: [0/20]	Time 0.273 (0.273)	Loss 0.2130 (0.2130)	Prec@1 93.555 (93.555)
 * Prec@1 92.140
alpha1 1.2074092626571655
layer1.0.alpha1 0.8090922236442566
layer1.0.alpha2 1.1776087284088135
layer1.1.alpha1 0.6504364013671875
layer1.1.alpha2 1.1471823453903198
layer2.0.alpha1 

Epoch: [150][0/313]	Time 0.232 (0.232)	Data 0.160 (0.160)	Loss 0.2197 (0.2197)	Prec@1 91.406 (91.406)
Epoch: [150][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1616 (0.1596)	Prec@1 95.312 (94.730)
Epoch: [150][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.1950 (0.1563)	Prec@1 94.531 (94.856)
Epoch: [150][150/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 0.2059 (0.1558)	Prec@1 92.969 (94.785)
Epoch: [150][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1351 (0.1560)	Prec@1 94.531 (94.834)
Epoch: [150][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1725 (0.1571)	Prec@1 94.531 (94.793)
Epoch: [150][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1285 (0.1579)	Prec@1 95.312 (94.767)
Test: [0/20]	Time 0.274 (0.274)	Loss 0.2188 (0.2188)	Prec@1 94.141 (94.141)
 * Prec@1 92.390
alpha1 1.1718254089355469
layer1.0.alpha1 0.8190909624099731
layer1.0.alpha2 1.1114410161972046
layer1.1.alpha1 0.6588855385780334
layer1.1.alpha2 1.1026997566223145
layer2.0.alpha1 

Epoch: [156][0/313]	Time 0.247 (0.247)	Data 0.179 (0.179)	Loss 0.1281 (0.1281)	Prec@1 96.094 (96.094)
Epoch: [156][50/313]	Time 0.064 (0.068)	Data 0.000 (0.004)	Loss 0.1124 (0.1441)	Prec@1 97.656 (95.328)
Epoch: [156][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1563 (0.1431)	Prec@1 92.188 (95.390)
Epoch: [156][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0760 (0.1382)	Prec@1 96.875 (95.525)
Epoch: [156][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1035 (0.1364)	Prec@1 95.312 (95.577)
Epoch: [156][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2055 (0.1403)	Prec@1 92.969 (95.434)
Epoch: [156][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2181 (0.1428)	Prec@1 92.969 (95.364)
Test: [0/20]	Time 0.280 (0.280)	Loss 0.1934 (0.1934)	Prec@1 93.750 (93.750)
 * Prec@1 92.460
alpha1 1.1557707786560059
layer1.0.alpha1 0.7990321516990662
layer1.0.alpha2 1.1047917604446411
layer1.1.alpha1 0.6486882567405701
layer1.1.alpha2 1.1070643663406372
layer2.0.alpha1 

Epoch: [162][0/313]	Time 0.248 (0.248)	Data 0.180 (0.180)	Loss 0.1614 (0.1614)	Prec@1 95.312 (95.312)
Epoch: [162][50/313]	Time 0.064 (0.068)	Data 0.000 (0.004)	Loss 0.1180 (0.1400)	Prec@1 96.094 (95.129)
Epoch: [162][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.1502 (0.1419)	Prec@1 93.750 (95.235)
Epoch: [162][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1080 (0.1376)	Prec@1 96.875 (95.416)
Epoch: [162][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.0817 (0.1351)	Prec@1 97.656 (95.557)
Epoch: [162][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0744 (0.1358)	Prec@1 98.438 (95.558)
Epoch: [162][300/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1350 (0.1377)	Prec@1 96.094 (95.497)
Test: [0/20]	Time 0.285 (0.285)	Loss 0.1931 (0.1931)	Prec@1 93.555 (93.555)
 * Prec@1 92.490
alpha1 1.139035940170288
layer1.0.alpha1 0.7917624115943909
layer1.0.alpha2 1.1009272336959839
layer1.1.alpha1 0.6469837427139282
layer1.1.alpha2 1.1071739196777344
layer2.0.alpha1 0

Epoch: [168][0/313]	Time 0.225 (0.225)	Data 0.157 (0.157)	Loss 0.1988 (0.1988)	Prec@1 93.750 (93.750)
Epoch: [168][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.1760 (0.1351)	Prec@1 93.750 (95.665)
Epoch: [168][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.1157 (0.1336)	Prec@1 95.312 (95.645)
Epoch: [168][150/313]	Time 0.067 (0.065)	Data 0.000 (0.001)	Loss 0.1111 (0.1318)	Prec@1 96.094 (95.726)
Epoch: [168][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1625 (0.1329)	Prec@1 95.312 (95.705)
Epoch: [168][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1523 (0.1346)	Prec@1 97.656 (95.652)
Epoch: [168][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1348 (0.1357)	Prec@1 96.094 (95.608)
Test: [0/20]	Time 0.277 (0.277)	Loss 0.1932 (0.1932)	Prec@1 93.750 (93.750)
 * Prec@1 92.410
alpha1 1.1234204769134521
layer1.0.alpha1 0.788337767124176
layer1.0.alpha2 1.0947664976119995
layer1.1.alpha1 0.6284970641136169
layer1.1.alpha2 1.1011725664138794
layer2.0.alpha1 0

Epoch: [174][0/313]	Time 0.223 (0.223)	Data 0.156 (0.156)	Loss 0.0533 (0.0533)	Prec@1 98.438 (98.438)
Epoch: [174][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1104 (0.1394)	Prec@1 96.094 (95.297)
Epoch: [174][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.1135 (0.1375)	Prec@1 96.875 (95.483)
Epoch: [174][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1078 (0.1363)	Prec@1 97.656 (95.468)
Epoch: [174][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.0987 (0.1335)	Prec@1 96.094 (95.600)
Epoch: [174][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1512 (0.1357)	Prec@1 94.531 (95.456)
Epoch: [174][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1448 (0.1364)	Prec@1 96.094 (95.434)
Test: [0/20]	Time 0.281 (0.281)	Loss 0.1840 (0.1840)	Prec@1 94.336 (94.336)
 * Prec@1 92.550
alpha1 1.130601406097412
layer1.0.alpha1 0.7826254963874817
layer1.0.alpha2 1.0903105735778809
layer1.1.alpha1 0.6303487420082092
layer1.1.alpha2 1.099852442741394
layer2.0.alpha1 0.

Epoch: [180][0/313]	Time 0.228 (0.228)	Data 0.158 (0.158)	Loss 0.0986 (0.0986)	Prec@1 97.656 (97.656)
Epoch: [180][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.2337 (0.1265)	Prec@1 90.625 (95.864)
Epoch: [180][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1290 (0.1231)	Prec@1 96.094 (95.869)
Epoch: [180][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1678 (0.1262)	Prec@1 92.969 (95.747)
Epoch: [180][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1085 (0.1269)	Prec@1 96.875 (95.740)
Epoch: [180][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1265 (0.1279)	Prec@1 95.312 (95.745)
Epoch: [180][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1330 (0.1286)	Prec@1 95.312 (95.697)
Test: [0/20]	Time 0.279 (0.279)	Loss 0.1825 (0.1825)	Prec@1 95.117 (95.117)
 * Prec@1 92.660
alpha1 1.1321260929107666
layer1.0.alpha1 0.7748621702194214
layer1.0.alpha2 1.0762444734573364
layer1.1.alpha1 0.6238200068473816
layer1.1.alpha2 1.0939215421676636
layer2.0.alpha1 

Epoch: [186][0/313]	Time 0.217 (0.217)	Data 0.151 (0.151)	Loss 0.1584 (0.1584)	Prec@1 94.531 (94.531)
Epoch: [186][50/313]	Time 0.065 (0.067)	Data 0.000 (0.003)	Loss 0.0967 (0.1285)	Prec@1 96.094 (95.772)
Epoch: [186][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1933 (0.1350)	Prec@1 95.312 (95.521)
Epoch: [186][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.0882 (0.1359)	Prec@1 97.656 (95.473)
Epoch: [186][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1051 (0.1329)	Prec@1 96.094 (95.581)
Epoch: [186][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1314 (0.1314)	Prec@1 96.094 (95.574)
Epoch: [186][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1882 (0.1314)	Prec@1 92.969 (95.575)
Test: [0/20]	Time 0.271 (0.271)	Loss 0.1877 (0.1877)	Prec@1 94.141 (94.141)
 * Prec@1 92.510
alpha1 1.1276816129684448
layer1.0.alpha1 0.7752315402030945
layer1.0.alpha2 1.072249174118042
layer1.1.alpha1 0.6173899173736572
layer1.1.alpha2 1.0916328430175781
layer2.0.alpha1 0

Epoch: [192][0/313]	Time 0.217 (0.217)	Data 0.147 (0.147)	Loss 0.0760 (0.0760)	Prec@1 95.312 (95.312)
Epoch: [192][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1794 (0.1316)	Prec@1 96.094 (95.665)
Epoch: [192][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1548 (0.1356)	Prec@1 92.969 (95.452)
Epoch: [192][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1297 (0.1328)	Prec@1 95.312 (95.613)
Epoch: [192][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1933 (0.1320)	Prec@1 95.312 (95.631)
Epoch: [192][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0926 (0.1308)	Prec@1 96.094 (95.642)
Epoch: [192][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0914 (0.1314)	Prec@1 96.875 (95.629)
Test: [0/20]	Time 0.287 (0.287)	Loss 0.1823 (0.1823)	Prec@1 94.336 (94.336)
 * Prec@1 92.660
alpha1 1.1196235418319702
layer1.0.alpha1 0.76402747631073
layer1.0.alpha2 1.0699259042739868
layer1.1.alpha1 0.616664469242096
layer1.1.alpha2 1.0854147672653198
layer2.0.alpha1 0.6

Epoch: [198][0/313]	Time 0.231 (0.231)	Data 0.165 (0.165)	Loss 0.1085 (0.1085)	Prec@1 96.875 (96.875)
Epoch: [198][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1603 (0.1208)	Prec@1 94.531 (95.971)
Epoch: [198][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1657 (0.1278)	Prec@1 94.531 (95.730)
Epoch: [198][150/313]	Time 0.067 (0.065)	Data 0.000 (0.001)	Loss 0.1966 (0.1255)	Prec@1 93.750 (95.680)
Epoch: [198][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0777 (0.1270)	Prec@1 98.438 (95.732)
Epoch: [198][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1365 (0.1284)	Prec@1 95.312 (95.702)
Epoch: [198][300/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 0.0565 (0.1282)	Prec@1 98.438 (95.681)
Test: [0/20]	Time 0.277 (0.277)	Loss 0.1774 (0.1774)	Prec@1 93.359 (93.359)
 * Prec@1 92.820
alpha1 1.1115409135818481
layer1.0.alpha1 0.759184718132019
layer1.0.alpha2 1.070306420326233
layer1.1.alpha1 0.6189409494400024
layer1.1.alpha2 1.085792064666748
layer2.0.alpha1 0.6

VBox(children=(Label(value=' 1.67MB of 1.67MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,199.0
prec,92.76
train_loss,0.12839
val_loss,0.2524
lr,0.001
_runtime,4321.0
_timestamp,1632920275.0
_step,199.0


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
prec,▁▃▅▆▆▇▇▆▇▇▇▇▇▇▇▇████████████████████████
train_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▃▃▂▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████████▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


current lr 1.00000e-01
Epoch: [0][0/313]	Time 0.252 (0.252)	Data 0.188 (0.188)	Loss 10.9425 (10.9425)	Prec@1 9.375 (9.375)
Epoch: [0][50/313]	Time 0.063 (0.068)	Data 0.000 (0.004)	Loss 2.6274 (9.3513)	Prec@1 8.594 (10.815)
Epoch: [0][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 2.6179 (6.0125)	Prec@1 8.594 (11.641)
Epoch: [0][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 2.5088 (4.8666)	Prec@1 11.719 (12.717)
Epoch: [0][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 2.4084 (4.2712)	Prec@1 17.969 (13.942)
Epoch: [0][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 2.3742 (3.8960)	Prec@1 21.875 (15.183)
Epoch: [0][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 2.2554 (3.6355)	Prec@1 23.438 (16.178)
Test: [0/20]	Time 0.280 (0.280)	Loss 2.0525 (2.0525)	Prec@1 22.461 (22.461)
 * Prec@1 23.220
alpha1 8.324328422546387
layer1.0.alpha1 8.324204444885254
layer1.0.alpha2 8.32494068145752
layer1.1.alpha1 8.323994636535645
layer1.1.alpha2 8.885551452636719
layer2.0.alpha1

Epoch: [6][0/313]	Time 0.235 (0.235)	Data 0.170 (0.170)	Loss 1.2678 (1.2678)	Prec@1 61.719 (61.719)
Epoch: [6][50/313]	Time 0.065 (0.068)	Data 0.000 (0.003)	Loss 1.4092 (1.2814)	Prec@1 50.781 (56.189)
Epoch: [6][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 1.3579 (1.2850)	Prec@1 49.219 (55.925)
Epoch: [6][150/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 1.2538 (1.2790)	Prec@1 56.250 (55.940)
Epoch: [6][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 1.1452 (1.2702)	Prec@1 62.500 (56.079)
Epoch: [6][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 1.0789 (1.2641)	Prec@1 59.375 (56.294)
Epoch: [6][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 1.2734 (1.2613)	Prec@1 53.906 (56.408)
Test: [0/20]	Time 0.285 (0.285)	Loss 0.9615 (0.9615)	Prec@1 64.844 (64.844)
 * Prec@1 64.440
alpha1 2.6103062629699707
layer1.0.alpha1 2.7855210304260254
layer1.0.alpha2 2.930011510848999
layer1.1.alpha1 2.7036685943603516
layer1.1.alpha2 2.83477783203125
layer2.0.alpha1 2.701510906219482

Epoch: [12][0/313]	Time 0.213 (0.213)	Data 0.147 (0.147)	Loss 0.8384 (0.8384)	Prec@1 72.656 (72.656)
Epoch: [12][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.8536 (0.8363)	Prec@1 73.438 (72.059)
Epoch: [12][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.6746 (0.8318)	Prec@1 76.562 (71.635)
Epoch: [12][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.8467 (0.8368)	Prec@1 67.969 (71.316)
Epoch: [12][200/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.9468 (0.8345)	Prec@1 68.750 (71.389)
Epoch: [12][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.8752 (0.8311)	Prec@1 72.656 (71.604)
Epoch: [12][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.8809 (0.8272)	Prec@1 69.531 (71.730)
Test: [0/20]	Time 0.277 (0.277)	Loss 0.6376 (0.6376)	Prec@1 78.906 (78.906)
 * Prec@1 76.390
alpha1 1.8400545120239258
layer1.0.alpha1 1.8201607465744019
layer1.0.alpha2 2.0092668533325195
layer1.1.alpha1 1.2870190143585205
layer1.1.alpha2 1.806977391242981
layer2.0.alpha1 1.459859

Epoch: [18][0/313]	Time 0.221 (0.221)	Data 0.156 (0.156)	Loss 0.6235 (0.6235)	Prec@1 79.688 (79.688)
Epoch: [18][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.6650 (0.6330)	Prec@1 77.344 (78.814)
Epoch: [18][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.6945 (0.6418)	Prec@1 78.906 (78.504)
Epoch: [18][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.7476 (0.6407)	Prec@1 73.438 (78.311)
Epoch: [18][200/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 0.7568 (0.6504)	Prec@1 74.219 (77.977)
Epoch: [18][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5408 (0.6500)	Prec@1 81.250 (77.938)
Epoch: [18][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.7144 (0.6526)	Prec@1 74.219 (77.780)
Test: [0/20]	Time 0.284 (0.284)	Loss 0.5139 (0.5139)	Prec@1 83.594 (83.594)
 * Prec@1 81.100
alpha1 1.6190019845962524
layer1.0.alpha1 1.442848801612854
layer1.0.alpha2 1.7937651872634888
layer1.1.alpha1 0.9150967001914978
layer1.1.alpha2 1.6958974599838257
layer2.0.alpha1 1.126021

Epoch: [24][0/313]	Time 0.222 (0.222)	Data 0.153 (0.153)	Loss 0.5544 (0.5544)	Prec@1 78.906 (78.906)
Epoch: [24][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.5748 (0.5383)	Prec@1 75.781 (81.939)
Epoch: [24][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.4961 (0.5446)	Prec@1 85.156 (81.590)
Epoch: [24][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5145 (0.5549)	Prec@1 82.812 (81.147)
Epoch: [24][200/313]	Time 0.066 (0.065)	Data 0.000 (0.001)	Loss 0.5461 (0.5616)	Prec@1 80.469 (80.873)
Epoch: [24][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5300 (0.5654)	Prec@1 83.594 (80.618)
Epoch: [24][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5955 (0.5665)	Prec@1 76.562 (80.560)
Test: [0/20]	Time 0.277 (0.277)	Loss 0.5083 (0.5083)	Prec@1 83.203 (83.203)
 * Prec@1 81.690
alpha1 1.5264626741409302
layer1.0.alpha1 1.5499436855316162
layer1.0.alpha2 1.6326168775558472
layer1.1.alpha1 0.9657810926437378
layer1.1.alpha2 1.5174269676208496
layer2.0.alpha1 1.22468

Epoch: [30][0/313]	Time 0.217 (0.217)	Data 0.153 (0.153)	Loss 0.6153 (0.6153)	Prec@1 78.125 (78.125)
Epoch: [30][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.5222 (0.4869)	Prec@1 82.812 (83.379)
Epoch: [30][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.4884 (0.5020)	Prec@1 81.250 (82.720)
Epoch: [30][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5400 (0.5084)	Prec@1 81.250 (82.585)
Epoch: [30][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.6095 (0.5040)	Prec@1 79.688 (82.859)
Epoch: [30][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5692 (0.5080)	Prec@1 80.469 (82.682)
Epoch: [30][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.6233 (0.5118)	Prec@1 78.906 (82.569)
Test: [0/20]	Time 0.282 (0.282)	Loss 0.4552 (0.4552)	Prec@1 83.203 (83.203)
 * Prec@1 82.500
alpha1 1.5303521156311035
layer1.0.alpha1 1.3064537048339844
layer1.0.alpha2 1.7160687446594238
layer1.1.alpha1 0.9269543290138245
layer1.1.alpha2 1.741851568222046
layer2.0.alpha1 1.087385

Epoch: [36][0/313]	Time 0.238 (0.238)	Data 0.169 (0.169)	Loss 0.4248 (0.4248)	Prec@1 85.156 (85.156)
Epoch: [36][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.3995 (0.4621)	Prec@1 87.500 (84.743)
Epoch: [36][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.5098 (0.4723)	Prec@1 84.375 (84.213)
Epoch: [36][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5907 (0.4741)	Prec@1 79.688 (84.059)
Epoch: [36][200/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.5061 (0.4767)	Prec@1 83.594 (83.854)
Epoch: [36][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.4444 (0.4814)	Prec@1 85.156 (83.675)
Epoch: [36][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.6037 (0.4836)	Prec@1 79.688 (83.537)
Test: [0/20]	Time 0.285 (0.285)	Loss 0.4820 (0.4820)	Prec@1 84.570 (84.570)
 * Prec@1 82.640
alpha1 1.7487399578094482
layer1.0.alpha1 1.2603309154510498
layer1.0.alpha2 1.687070369720459
layer1.1.alpha1 0.9599250555038452
layer1.1.alpha2 1.5862985849380493
layer2.0.alpha1 1.063980

Epoch: [42][0/313]	Time 0.228 (0.228)	Data 0.163 (0.163)	Loss 0.4534 (0.4534)	Prec@1 85.156 (85.156)
Epoch: [42][50/313]	Time 0.065 (0.067)	Data 0.000 (0.003)	Loss 0.4076 (0.4449)	Prec@1 85.938 (84.789)
Epoch: [42][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.3944 (0.4489)	Prec@1 85.938 (84.723)
Epoch: [42][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3638 (0.4481)	Prec@1 86.719 (84.644)
Epoch: [42][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.6313 (0.4460)	Prec@1 74.219 (84.888)
Epoch: [42][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5773 (0.4513)	Prec@1 77.344 (84.590)
Epoch: [42][300/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 0.3569 (0.4507)	Prec@1 88.281 (84.666)
Test: [0/20]	Time 0.276 (0.276)	Loss 0.3660 (0.3660)	Prec@1 87.500 (87.500)
 * Prec@1 85.790
alpha1 1.6327511072158813
layer1.0.alpha1 1.2534054517745972
layer1.0.alpha2 1.6206413507461548
layer1.1.alpha1 0.9271547794342041
layer1.1.alpha2 1.5056549310684204
layer2.0.alpha1 1.07064

Epoch: [48][0/313]	Time 0.217 (0.217)	Data 0.150 (0.150)	Loss 0.3308 (0.3308)	Prec@1 85.938 (85.938)
Epoch: [48][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.4035 (0.4111)	Prec@1 84.375 (86.229)
Epoch: [48][100/313]	Time 0.064 (0.065)	Data 0.000 (0.002)	Loss 0.6007 (0.4311)	Prec@1 84.375 (85.651)
Epoch: [48][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4747 (0.4388)	Prec@1 83.594 (85.172)
Epoch: [48][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5420 (0.4385)	Prec@1 82.031 (85.176)
Epoch: [48][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4678 (0.4394)	Prec@1 84.375 (85.078)
Epoch: [48][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.4036 (0.4446)	Prec@1 86.719 (84.866)
Test: [0/20]	Time 0.273 (0.273)	Loss 0.4036 (0.4036)	Prec@1 85.742 (85.742)
 * Prec@1 84.420
alpha1 1.7715815305709839
layer1.0.alpha1 1.1915165185928345
layer1.0.alpha2 1.7204935550689697
layer1.1.alpha1 0.9276249408721924
layer1.1.alpha2 1.5076696872711182
layer2.0.alpha1 1.13908

Epoch: [54][0/313]	Time 0.224 (0.224)	Data 0.158 (0.158)	Loss 0.4912 (0.4912)	Prec@1 84.375 (84.375)
Epoch: [54][50/313]	Time 0.064 (0.068)	Data 0.000 (0.003)	Loss 0.4880 (0.4039)	Prec@1 83.594 (86.612)
Epoch: [54][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.4846 (0.4105)	Prec@1 84.375 (86.231)
Epoch: [54][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4448 (0.4192)	Prec@1 85.938 (86.082)
Epoch: [54][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5695 (0.4159)	Prec@1 83.594 (86.144)
Epoch: [54][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.3914 (0.4182)	Prec@1 89.062 (85.994)
Epoch: [54][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.6006 (0.4196)	Prec@1 82.031 (85.878)
Test: [0/20]	Time 0.272 (0.272)	Loss 0.4048 (0.4048)	Prec@1 87.695 (87.695)
 * Prec@1 86.100
alpha1 1.5947597026824951
layer1.0.alpha1 1.2868624925613403
layer1.0.alpha2 1.4726094007492065
layer1.1.alpha1 0.9595670700073242
layer1.1.alpha2 1.516987681388855
layer2.0.alpha1 0.979729

Epoch: [60][0/313]	Time 0.223 (0.223)	Data 0.158 (0.158)	Loss 0.3568 (0.3568)	Prec@1 89.062 (89.062)
Epoch: [60][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.4230 (0.4248)	Prec@1 85.156 (85.662)
Epoch: [60][100/313]	Time 0.065 (0.066)	Data 0.000 (0.002)	Loss 0.4072 (0.4211)	Prec@1 86.719 (85.744)
Epoch: [60][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2855 (0.4282)	Prec@1 92.969 (85.643)
Epoch: [60][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4155 (0.4208)	Prec@1 86.719 (85.895)
Epoch: [60][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.5793 (0.4239)	Prec@1 83.594 (85.748)
Epoch: [60][300/313]	Time 0.066 (0.065)	Data 0.000 (0.001)	Loss 0.3897 (0.4233)	Prec@1 84.375 (85.706)
Test: [0/20]	Time 0.288 (0.288)	Loss 0.4622 (0.4622)	Prec@1 84.961 (84.961)
 * Prec@1 84.050
alpha1 1.5975183248519897
layer1.0.alpha1 1.2413227558135986
layer1.0.alpha2 1.5816023349761963
layer1.1.alpha1 0.9637518525123596
layer1.1.alpha2 1.5465573072433472
layer2.0.alpha1 0.95251

Epoch: [66][0/313]	Time 0.230 (0.230)	Data 0.160 (0.160)	Loss 0.4132 (0.4132)	Prec@1 84.375 (84.375)
Epoch: [66][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.3979 (0.3883)	Prec@1 84.375 (87.025)
Epoch: [66][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.3999 (0.3950)	Prec@1 86.719 (86.719)
Epoch: [66][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3777 (0.4090)	Prec@1 85.156 (86.238)
Epoch: [66][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4079 (0.4098)	Prec@1 87.500 (86.283)
Epoch: [66][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3512 (0.4073)	Prec@1 89.844 (86.271)
Epoch: [66][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3744 (0.4049)	Prec@1 85.938 (86.278)
Test: [0/20]	Time 0.283 (0.283)	Loss 0.4342 (0.4342)	Prec@1 86.523 (86.523)
 * Prec@1 85.640
alpha1 1.6047402620315552
layer1.0.alpha1 1.1896926164627075
layer1.0.alpha2 1.6273208856582642
layer1.1.alpha1 0.9778171181678772
layer1.1.alpha2 1.5319898128509521
layer2.0.alpha1 1.02085

Epoch: [72][0/313]	Time 0.220 (0.220)	Data 0.153 (0.153)	Loss 0.4242 (0.4242)	Prec@1 86.719 (86.719)
Epoch: [72][50/313]	Time 0.065 (0.067)	Data 0.000 (0.003)	Loss 0.3911 (0.3662)	Prec@1 89.062 (87.561)
Epoch: [72][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.3302 (0.3704)	Prec@1 92.188 (87.384)
Epoch: [72][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.4512 (0.3741)	Prec@1 84.375 (87.283)
Epoch: [72][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3039 (0.3786)	Prec@1 90.625 (87.139)
Epoch: [72][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.3787 (0.3839)	Prec@1 86.719 (86.962)
Epoch: [72][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4501 (0.3876)	Prec@1 82.031 (86.773)
Test: [0/20]	Time 0.276 (0.276)	Loss 0.4119 (0.4119)	Prec@1 85.352 (85.352)
 * Prec@1 85.580
alpha1 1.5188660621643066
layer1.0.alpha1 1.1751124858856201
layer1.0.alpha2 1.5412185192108154
layer1.1.alpha1 0.8817414045333862
layer1.1.alpha2 1.5409083366394043
layer2.0.alpha1 0.93324

Epoch: [78][0/313]	Time 0.228 (0.228)	Data 0.157 (0.157)	Loss 0.3350 (0.3350)	Prec@1 88.281 (88.281)
Epoch: [78][50/313]	Time 0.065 (0.069)	Data 0.000 (0.004)	Loss 0.3534 (0.3729)	Prec@1 90.625 (87.377)
Epoch: [78][100/313]	Time 0.064 (0.067)	Data 0.000 (0.002)	Loss 0.2988 (0.3719)	Prec@1 89.844 (87.438)
Epoch: [78][150/313]	Time 0.065 (0.066)	Data 0.000 (0.001)	Loss 0.4579 (0.3729)	Prec@1 84.375 (87.469)
Epoch: [78][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.5848 (0.3786)	Prec@1 81.250 (87.240)
Epoch: [78][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.4011 (0.3822)	Prec@1 82.031 (87.092)
Epoch: [78][300/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.4721 (0.3888)	Prec@1 85.156 (86.854)
Test: [0/20]	Time 0.273 (0.273)	Loss 0.2924 (0.2924)	Prec@1 90.234 (90.234)
 * Prec@1 88.020
alpha1 1.4557338953018188
layer1.0.alpha1 1.005427598953247
layer1.0.alpha2 1.562502384185791
layer1.1.alpha1 0.9704911112785339
layer1.1.alpha2 1.5024282932281494
layer2.0.alpha1 0.9740174

Epoch: [84][0/313]	Time 0.234 (0.234)	Data 0.165 (0.165)	Loss 0.2577 (0.2577)	Prec@1 91.406 (91.406)
Epoch: [84][50/313]	Time 0.065 (0.067)	Data 0.000 (0.003)	Loss 0.2006 (0.2168)	Prec@1 94.531 (92.862)
Epoch: [84][100/313]	Time 0.063 (0.066)	Data 0.000 (0.002)	Loss 0.2458 (0.2064)	Prec@1 91.406 (93.131)
Epoch: [84][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1966 (0.2055)	Prec@1 92.188 (93.191)
Epoch: [84][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1526 (0.2073)	Prec@1 92.969 (93.210)
Epoch: [84][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1229 (0.2093)	Prec@1 96.094 (93.131)
Epoch: [84][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1553 (0.2100)	Prec@1 95.312 (93.112)
Test: [0/20]	Time 0.280 (0.280)	Loss 0.2178 (0.2178)	Prec@1 92.578 (92.578)
 * Prec@1 91.830
alpha1 1.4786955118179321
layer1.0.alpha1 0.9459094405174255
layer1.0.alpha2 1.417222023010254
layer1.1.alpha1 0.8526462912559509
layer1.1.alpha2 1.3538826704025269
layer2.0.alpha1 0.845228

Epoch: [90][0/313]	Time 0.229 (0.229)	Data 0.163 (0.163)	Loss 0.1368 (0.1368)	Prec@1 95.312 (95.312)
Epoch: [90][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.2024 (0.1952)	Prec@1 92.969 (93.673)
Epoch: [90][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1890 (0.1949)	Prec@1 92.188 (93.502)
Epoch: [90][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1359 (0.1956)	Prec@1 96.875 (93.377)
Epoch: [90][200/313]	Time 0.065 (0.065)	Data 0.000 (0.001)	Loss 0.2671 (0.1963)	Prec@1 91.406 (93.389)
Epoch: [90][250/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.2374 (0.1942)	Prec@1 92.188 (93.473)
Epoch: [90][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.2003 (0.1934)	Prec@1 92.969 (93.519)
Test: [0/20]	Time 0.281 (0.281)	Loss 0.2069 (0.2069)	Prec@1 93.359 (93.359)
 * Prec@1 91.990
alpha1 1.4338957071304321
layer1.0.alpha1 0.9274486303329468
layer1.0.alpha2 1.370319128036499
layer1.1.alpha1 0.8214725255966187
layer1.1.alpha2 1.314862847328186
layer2.0.alpha1 0.8085950

Epoch: [96][0/313]	Time 0.215 (0.215)	Data 0.149 (0.149)	Loss 0.1163 (0.1163)	Prec@1 96.094 (96.094)
Epoch: [96][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1123 (0.1686)	Prec@1 96.094 (94.409)
Epoch: [96][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1687 (0.1708)	Prec@1 93.750 (94.307)
Epoch: [96][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1053 (0.1708)	Prec@1 96.094 (94.304)
Epoch: [96][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1221 (0.1740)	Prec@1 95.312 (94.255)
Epoch: [96][250/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.1979 (0.1729)	Prec@1 92.188 (94.323)
Epoch: [96][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.2482 (0.1740)	Prec@1 91.406 (94.264)
Test: [0/20]	Time 0.282 (0.282)	Loss 0.2023 (0.2023)	Prec@1 93.359 (93.359)
 * Prec@1 92.180
alpha1 1.2873338460922241
layer1.0.alpha1 0.9046310186386108
layer1.0.alpha2 1.292419195175171
layer1.1.alpha1 0.7981685996055603
layer1.1.alpha2 1.23208487033844
layer2.0.alpha1 0.74915623

Epoch: [102][0/313]	Time 0.239 (0.239)	Data 0.171 (0.171)	Loss 0.1953 (0.1953)	Prec@1 92.969 (92.969)
Epoch: [102][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.1862 (0.1593)	Prec@1 92.969 (94.455)
Epoch: [102][100/313]	Time 0.064 (0.065)	Data 0.000 (0.002)	Loss 0.0986 (0.1672)	Prec@1 97.656 (94.322)
Epoch: [102][150/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.2282 (0.1685)	Prec@1 92.969 (94.371)
Epoch: [102][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1834 (0.1702)	Prec@1 92.188 (94.360)
Epoch: [102][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2368 (0.1713)	Prec@1 92.188 (94.329)
Epoch: [102][300/313]	Time 0.064 (0.064)	Data 0.000 (0.001)	Loss 0.1912 (0.1699)	Prec@1 92.188 (94.318)
Test: [0/20]	Time 0.296 (0.296)	Loss 0.1974 (0.1974)	Prec@1 93.164 (93.164)
 * Prec@1 92.330
alpha1 1.2306195497512817
layer1.0.alpha1 0.8903988599777222
layer1.0.alpha2 1.2045658826828003
layer1.1.alpha1 0.7452350854873657
layer1.1.alpha2 1.2201539278030396
layer2.0.alpha1 

Epoch: [108][0/313]	Time 0.235 (0.235)	Data 0.167 (0.167)	Loss 0.2411 (0.2411)	Prec@1 90.625 (90.625)
Epoch: [108][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1109 (0.1695)	Prec@1 96.875 (94.256)
Epoch: [108][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.2029 (0.1661)	Prec@1 92.969 (94.369)
Epoch: [108][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0659 (0.1671)	Prec@1 99.219 (94.464)
Epoch: [108][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1882 (0.1640)	Prec@1 92.969 (94.523)
Epoch: [108][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1885 (0.1632)	Prec@1 93.750 (94.565)
Epoch: [108][300/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.1641 (0.1632)	Prec@1 94.531 (94.544)
Test: [0/20]	Time 0.308 (0.308)	Loss 0.2088 (0.2088)	Prec@1 92.969 (92.969)
 * Prec@1 92.470
alpha1 1.2288254499435425
layer1.0.alpha1 0.7947936058044434
layer1.0.alpha2 1.1362614631652832
layer1.1.alpha1 0.7089576721191406
layer1.1.alpha2 1.1610450744628906
layer2.0.alpha1 

Epoch: [114][0/313]	Time 0.218 (0.218)	Data 0.152 (0.152)	Loss 0.1586 (0.1586)	Prec@1 92.969 (92.969)
Epoch: [114][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.0807 (0.1362)	Prec@1 97.656 (95.604)
Epoch: [114][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1547 (0.1488)	Prec@1 93.750 (94.941)
Epoch: [114][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0994 (0.1513)	Prec@1 97.656 (94.935)
Epoch: [114][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2215 (0.1517)	Prec@1 92.969 (94.873)
Epoch: [114][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1613 (0.1535)	Prec@1 95.312 (94.833)
Epoch: [114][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1135 (0.1542)	Prec@1 96.094 (94.827)
Test: [0/20]	Time 0.276 (0.276)	Loss 0.1724 (0.1724)	Prec@1 94.727 (94.727)
 * Prec@1 92.300
alpha1 1.154839277267456
layer1.0.alpha1 0.7938284873962402
layer1.0.alpha2 1.1153298616409302
layer1.1.alpha1 0.661801815032959
layer1.1.alpha2 1.1279408931732178
layer2.0.alpha1 0.

Epoch: [120][0/313]	Time 0.216 (0.216)	Data 0.149 (0.149)	Loss 0.0689 (0.0689)	Prec@1 98.438 (98.438)
Epoch: [120][50/313]	Time 0.064 (0.066)	Data 0.000 (0.003)	Loss 0.1334 (0.1518)	Prec@1 94.531 (95.205)
Epoch: [120][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.1842 (0.1515)	Prec@1 92.969 (95.150)
Epoch: [120][150/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1273 (0.1535)	Prec@1 94.531 (94.961)
Epoch: [120][200/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1313 (0.1506)	Prec@1 96.094 (95.025)
Epoch: [120][250/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.1299 (0.1509)	Prec@1 94.531 (95.032)
Epoch: [120][300/313]	Time 0.063 (0.064)	Data 0.000 (0.001)	Loss 0.2148 (0.1501)	Prec@1 92.188 (95.087)
Test: [0/20]	Time 0.284 (0.284)	Loss 0.1967 (0.1967)	Prec@1 94.336 (94.336)
 * Prec@1 92.490
alpha1 1.162499189376831
layer1.0.alpha1 0.7788605690002441
layer1.0.alpha2 1.1066889762878418
layer1.1.alpha1 0.6630980968475342
layer1.1.alpha2 1.0834914445877075
layer2.0.alpha1 0

Epoch: [126][0/313]	Time 0.222 (0.222)	Data 0.153 (0.153)	Loss 0.1188 (0.1188)	Prec@1 95.312 (95.312)
Epoch: [126][50/313]	Time 0.065 (0.067)	Data 0.000 (0.003)	Loss 0.1612 (0.1486)	Prec@1 94.531 (94.991)
Epoch: [126][100/313]	Time 0.063 (0.065)	Data 0.000 (0.002)	Loss 0.0994 (0.1531)	Prec@1 98.438 (94.926)
Epoch: [126][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1753 (0.1504)	Prec@1 95.312 (94.971)
Epoch: [126][200/313]	Time 0.063 (0.065)	Data 0.000 (0.001)	Loss 0.0842 (0.1499)	Prec@1 96.875 (95.072)
Epoch: [126][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1944 (0.1510)	Prec@1 93.750 (95.048)
Epoch: [126][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0965 (0.1509)	Prec@1 96.875 (95.045)
Test: [0/20]	Time 0.284 (0.284)	Loss 0.1797 (0.1797)	Prec@1 94.531 (94.531)
 * Prec@1 92.270
alpha1 1.0849709510803223
layer1.0.alpha1 0.7231065630912781
layer1.0.alpha2 1.0446244478225708
layer1.1.alpha1 0.614225447177887
layer1.1.alpha2 1.0505894422531128
layer2.0.alpha1 0

Epoch: [132][0/313]	Time 0.215 (0.215)	Data 0.148 (0.148)	Loss 0.1984 (0.1984)	Prec@1 94.531 (94.531)
Epoch: [132][50/313]	Time 0.064 (0.067)	Data 0.000 (0.003)	Loss 0.1729 (0.1469)	Prec@1 95.312 (95.236)
Epoch: [132][100/313]	Time 0.064 (0.065)	Data 0.000 (0.002)	Loss 0.1201 (0.1453)	Prec@1 96.875 (95.181)
Epoch: [132][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.0815 (0.1436)	Prec@1 96.875 (95.266)
Epoch: [132][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1195 (0.1428)	Prec@1 96.875 (95.208)
Epoch: [132][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1269 (0.1442)	Prec@1 97.656 (95.210)
Epoch: [132][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1466 (0.1457)	Prec@1 96.094 (95.183)
Test: [0/20]	Time 0.276 (0.276)	Loss 0.1767 (0.1767)	Prec@1 94.141 (94.141)
 * Prec@1 92.140
alpha1 1.082770824432373
layer1.0.alpha1 0.7136817574501038
layer1.0.alpha2 1.0130186080932617
layer1.1.alpha1 0.5905248522758484
layer1.1.alpha2 1.006473183631897
layer2.0.alpha1 0.

Epoch: [138][0/313]	Time 0.233 (0.233)	Data 0.163 (0.163)	Loss 0.1001 (0.1001)	Prec@1 96.094 (96.094)
Epoch: [138][50/313]	Time 0.063 (0.067)	Data 0.000 (0.003)	Loss 0.1601 (0.1478)	Prec@1 94.531 (94.853)
Epoch: [138][100/313]	Time 0.064 (0.066)	Data 0.000 (0.002)	Loss 0.1684 (0.1464)	Prec@1 94.531 (95.057)
Epoch: [138][150/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1183 (0.1464)	Prec@1 95.312 (95.116)
Epoch: [138][200/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1832 (0.1451)	Prec@1 95.312 (95.173)
Epoch: [138][250/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.1202 (0.1448)	Prec@1 96.094 (95.210)
Epoch: [138][300/313]	Time 0.064 (0.065)	Data 0.000 (0.001)	Loss 0.2624 (0.1443)	Prec@1 92.969 (95.232)
Test: [0/20]	Time 0.277 (0.277)	Loss 0.1701 (0.1701)	Prec@1 94.727 (94.727)
 * Prec@1 92.510
alpha1 1.0693154335021973
layer1.0.alpha1 0.6775432825088501
layer1.0.alpha2 0.9726573824882507
layer1.1.alpha1 0.5515686869621277
layer1.1.alpha2 1.0357176065444946
layer2.0.alpha1 

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
arch = "resnet34"

for K in range(3,8):
    model = resnet.__dict__[arch](K)
    model = model.cuda()

    import wandb
    watermark = "{}_paper_quant{}".format(arch,K)
    wandb.init(project="pact",
                name=watermark)

    # define loss function (criterion) and pptimizer
    mixup = False
    criterion = nn.CrossEntropyLoss().cuda()
    def mixup_criterion(criterion, pred, y_a, y_b, lam):
        return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

    #optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-1,
                                momentum=0.9,
                                weight_decay=0.0002)

    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, 
                                                        milestones=[80, 150],)

    best_prec1 = 0
    os.makedirs("models", exist_ok=True)

    for epoch in range(200):
        # train for one epoch
        print('current lr {:.5e}'.format(optimizer.param_groups[0]['lr']))
        tloss = train(train_loader, model, criterion, optimizer, epoch)
        lr_scheduler.step()

        # evaluate on validation set
        prec1, valloss = validate(val_loader, model, criterion)

        # wandb
        wandb.log({'epoch': epoch, "prec":prec1, "train_loss": tloss, 'val_loss': valloss, "lr": optimizer.param_groups[0]["lr"],})

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)

        print("Best prec1 : ", best_prec1)
        if is_best:
            torch.save(model.state_dict(), os.path.join(f'models/{arch}_paper_quant{K}.pth'))

bit width: 3


VBox(children=(Label(value=' 0.78MB of 0.78MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,199.0
prec,93.06
train_loss,0.10556
val_loss,0.23313
lr,0.001
_runtime,4326.0
_timestamp,1632937600.0
_step,199.0


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
prec,▁▄▆▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████████
train_loss,█▆▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▃▂▂▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████████▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


current lr 1.00000e-01
Epoch: [0][0/313]	Time 0.256 (0.256)	Data 0.148 (0.148)	Loss 9.5734 (9.5734)	Prec@1 9.375 (9.375)
Epoch: [0][50/313]	Time 0.106 (0.109)	Data 0.000 (0.003)	Loss 2.8731 (16.2324)	Prec@1 16.406 (11.734)
Epoch: [0][100/313]	Time 0.105 (0.107)	Data 0.000 (0.002)	Loss 2.7176 (9.6006)	Prec@1 21.094 (13.436)
Epoch: [0][150/313]	Time 0.105 (0.107)	Data 0.000 (0.001)	Loss 2.6285 (7.3266)	Prec@1 22.656 (15.066)
Epoch: [0][200/313]	Time 0.106 (0.106)	Data 0.000 (0.001)	Loss 2.6343 (6.1689)	Prec@1 20.312 (16.142)
Epoch: [0][250/313]	Time 0.107 (0.106)	Data 0.000 (0.001)	Loss 2.6166 (5.4592)	Prec@1 22.656 (17.013)
Epoch: [0][300/313]	Time 0.105 (0.106)	Data 0.000 (0.001)	Loss 2.5649 (4.9759)	Prec@1 10.938 (17.546)
Test: [0/20]	Time 0.334 (0.334)	Loss 2.0656 (2.0656)	Prec@1 22.070 (22.070)
 * Prec@1 23.070
alpha1 8.321855545043945
layer1.0.alpha1 8.326095581054688
layer1.0.alpha2 8.322935104370117
layer1.1.alpha1 8.324823379516602
layer1.1.alpha2 8.318604469299316
layer1.2.alph

Epoch: [4][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 1.8708 (1.8563)	Prec@1 32.031 (35.432)
Epoch: [4][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 1.7260 (1.8446)	Prec@1 46.875 (35.782)
Epoch: [4][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 1.6501 (1.8332)	Prec@1 41.406 (36.153)
Test: [0/20]	Time 0.371 (0.371)	Loss 1.5524 (1.5524)	Prec@1 39.258 (39.258)
 * Prec@1 42.000
alpha1 4.060616970062256
layer1.0.alpha1 3.9133152961730957
layer1.0.alpha2 3.9867937564849854
layer1.1.alpha1 3.914029598236084
layer1.1.alpha2 4.045048713684082
layer1.2.alpha1 3.910189390182495
layer1.2.alpha2 3.957794427871704
layer2.0.alpha1 3.9102394580841064
layer2.0.alpha2 3.9165899753570557
layer2.1.alpha1 3.9112741947174072
layer2.1.alpha2 3.9555954933166504
layer2.2.alpha1 3.910571813583374
layer2.2.alpha2 3.957385778427124
layer2.3.alpha1 3.91054630279541
layer2.3.alpha2 4.1303606033325195
layer3.0.alpha1 3.910491943359375
layer3.0.alpha2 3.9590961933135986
layer3.1.alpha1 3.9120171

Test: [0/20]	Time 0.331 (0.331)	Loss 1.2153 (1.2153)	Prec@1 55.078 (55.078)
 * Prec@1 53.300
alpha1 2.4854331016540527
layer1.0.alpha1 2.2082362174987793
layer1.0.alpha2 2.5536744594573975
layer1.1.alpha1 2.068439483642578
layer1.1.alpha2 2.5889062881469727
layer1.2.alpha1 1.9150809049606323
layer1.2.alpha2 2.479336738586426
layer2.0.alpha1 1.9868701696395874
layer2.0.alpha2 2.3227949142456055
layer2.1.alpha1 1.9942318201065063
layer2.1.alpha2 2.4511759281158447
layer2.2.alpha1 1.8792163133621216
layer2.2.alpha2 2.3562610149383545
layer2.3.alpha1 1.8770252466201782
layer2.3.alpha2 2.3816778659820557
layer3.0.alpha1 1.949143648147583
layer3.0.alpha2 2.3994429111480713
layer3.1.alpha1 2.072840929031372
layer3.1.alpha2 2.5608184337615967
layer3.2.alpha1 2.0304040908813477
layer3.2.alpha2 2.5894787311553955
layer3.3.alpha1 1.9411303997039795
layer3.3.alpha2 2.539820432662964
layer3.4.alpha1 1.9218138456344604
layer3.4.alpha2 2.525484323501587
layer3.5.alpha1 1.8905878067016602
layer3.5.alp

current lr 1.00000e-01
Epoch: [13][0/313]	Time 0.272 (0.272)	Data 0.161 (0.161)	Loss 1.0986 (1.0986)	Prec@1 61.719 (61.719)
Epoch: [13][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.9360 (1.0278)	Prec@1 67.188 (65.028)
Epoch: [13][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 1.0687 (1.0140)	Prec@1 61.719 (65.377)
Epoch: [13][150/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 1.0452 (1.0156)	Prec@1 61.719 (65.490)
Epoch: [13][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 1.1471 (1.0167)	Prec@1 57.031 (65.368)
Epoch: [13][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.9622 (1.0153)	Prec@1 64.062 (65.304)
Epoch: [13][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.9453 (1.0117)	Prec@1 69.531 (65.459)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.7437 (0.7437)	Prec@1 75.391 (75.391)
 * Prec@1 72.650
alpha1 2.1108577251434326
layer1.0.alpha1 1.6903074979782104
layer1.0.alpha2 2.0422000885009766
layer1.1.alpha1 1.4067150354385376
layer1.1.alpha2 2.0855798721313477


Epoch: [17][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.8978 (0.8677)	Prec@1 72.656 (70.738)
Epoch: [17][150/313]	Time 0.107 (0.109)	Data 0.000 (0.001)	Loss 0.9819 (0.8563)	Prec@1 70.312 (71.114)
Epoch: [17][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6660 (0.8484)	Prec@1 79.688 (71.455)
Epoch: [17][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.9725 (0.8465)	Prec@1 63.281 (71.492)
Epoch: [17][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.7943 (0.8463)	Prec@1 69.531 (71.377)
Test: [0/20]	Time 0.319 (0.319)	Loss 0.7457 (0.7457)	Prec@1 75.000 (75.000)
 * Prec@1 74.930
alpha1 1.9819176197052002
layer1.0.alpha1 1.4056282043457031
layer1.0.alpha2 1.9331109523773193
layer1.1.alpha1 1.176755666732788
layer1.1.alpha2 1.93061363697052
layer1.2.alpha1 1.0800811052322388
layer1.2.alpha2 1.7883485555648804
layer2.0.alpha1 1.176218032836914
layer2.0.alpha2 1.5806957483291626
layer2.1.alpha1 1.1490944623947144
layer2.1.alpha2 1.6301072835922241
layer2.2.alpha1 0.91

Epoch: [21][200/313]	Time 0.110 (0.109)	Data 0.000 (0.001)	Loss 0.8893 (0.7365)	Prec@1 63.281 (74.949)
Epoch: [21][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.6982 (0.7412)	Prec@1 78.125 (74.723)
Epoch: [21][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.6596 (0.7414)	Prec@1 75.000 (74.714)
Test: [0/20]	Time 0.315 (0.315)	Loss 0.5426 (0.5426)	Prec@1 82.031 (82.031)
 * Prec@1 80.410
alpha1 1.9434674978256226
layer1.0.alpha1 1.2528518438339233
layer1.0.alpha2 1.8665118217468262
layer1.1.alpha1 1.112221598625183
layer1.1.alpha2 1.8211034536361694
layer1.2.alpha1 0.9241588115692139
layer1.2.alpha2 1.7093100547790527
layer2.0.alpha1 1.1084797382354736
layer2.0.alpha2 1.4914220571517944
layer2.1.alpha1 0.983445405960083
layer2.1.alpha2 1.6124904155731201
layer2.2.alpha1 0.9349696040153503
layer2.2.alpha2 1.5497866868972778
layer2.3.alpha1 0.7696075439453125
layer2.3.alpha2 1.4298713207244873
layer3.0.alpha1 1.077661156654358
layer3.0.alpha2 1.5256102085113525
layer3.1.alpha1

Epoch: [25][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6780 (0.6726)	Prec@1 81.250 (77.209)
Test: [0/20]	Time 0.336 (0.336)	Loss 0.5436 (0.5436)	Prec@1 81.055 (81.055)
 * Prec@1 80.530
alpha1 1.799073576927185
layer1.0.alpha1 1.1836446523666382
layer1.0.alpha2 1.8554584980010986
layer1.1.alpha1 1.02292001247406
layer1.1.alpha2 1.89149010181427
layer1.2.alpha1 1.0018460750579834
layer1.2.alpha2 1.8214091062545776
layer2.0.alpha1 1.0975052118301392
layer2.0.alpha2 1.4611828327178955
layer2.1.alpha1 0.9942193031311035
layer2.1.alpha2 1.484343409538269
layer2.2.alpha1 0.820849597454071
layer2.2.alpha2 1.4529608488082886
layer2.3.alpha1 0.6845669150352478
layer2.3.alpha2 1.422597885131836
layer3.0.alpha1 1.0079327821731567
layer3.0.alpha2 1.4162877798080444
layer3.1.alpha1 0.9149126410484314
layer3.1.alpha2 1.3967851400375366
layer3.2.alpha1 0.7294459342956543
layer3.2.alpha2 1.2892156839370728
layer3.3.alpha1 0.6693984866142273
layer3.3.alpha2 1.2005912065505981
layer3.4.alpha1 

 * Prec@1 80.710
alpha1 1.7721890211105347
layer1.0.alpha1 1.0755642652511597
layer1.0.alpha2 1.7942698001861572
layer1.1.alpha1 1.0075095891952515
layer1.1.alpha2 1.8297055959701538
layer1.2.alpha1 0.9427851438522339
layer1.2.alpha2 1.7490026950836182
layer2.0.alpha1 1.1989494562149048
layer2.0.alpha2 1.4634528160095215
layer2.1.alpha1 0.923129677772522
layer2.1.alpha2 1.49673593044281
layer2.2.alpha1 0.7603487968444824
layer2.2.alpha2 1.451796293258667
layer2.3.alpha1 0.7481597065925598
layer2.3.alpha2 1.445694923400879
layer3.0.alpha1 0.9698783755302429
layer3.0.alpha2 1.419838309288025
layer3.1.alpha1 0.9495017528533936
layer3.1.alpha2 1.286944031715393
layer3.2.alpha1 0.7641656994819641
layer3.2.alpha2 1.2334822416305542
layer3.3.alpha1 0.49877601861953735
layer3.3.alpha2 1.1728349924087524
layer3.4.alpha1 0.4793642461299896
layer3.4.alpha2 1.1100969314575195
layer3.5.alpha1 0.3221105933189392
layer3.5.alpha2 1.0655303001403809
layer4.0.alpha1 0.479258269071579
layer4.0.alpha2 2.0

Epoch: [34][0/313]	Time 0.285 (0.285)	Data 0.176 (0.176)	Loss 0.4760 (0.4760)	Prec@1 82.031 (82.031)
Epoch: [34][50/313]	Time 0.109 (0.112)	Data 0.000 (0.004)	Loss 0.4851 (0.5524)	Prec@1 82.812 (81.235)
Epoch: [34][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.6292 (0.5644)	Prec@1 77.344 (80.778)
Epoch: [34][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.6151 (0.5634)	Prec@1 75.781 (80.821)
Epoch: [34][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4639 (0.5638)	Prec@1 84.375 (80.896)
Epoch: [34][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.6608 (0.5708)	Prec@1 76.562 (80.677)
Epoch: [34][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5420 (0.5718)	Prec@1 80.469 (80.617)
Test: [0/20]	Time 0.320 (0.320)	Loss 0.4380 (0.4380)	Prec@1 84.180 (84.180)
 * Prec@1 82.930
alpha1 1.7062182426452637
layer1.0.alpha1 1.1650251150131226
layer1.0.alpha2 1.7264695167541504
layer1.1.alpha1 1.0198241472244263
layer1.1.alpha2 1.6846799850463867
layer1.2.alpha1 0.99528

Epoch: [38][100/313]	Time 0.111 (0.110)	Data 0.000 (0.002)	Loss 0.5138 (0.5308)	Prec@1 82.031 (82.132)
Epoch: [38][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 0.5720 (0.5350)	Prec@1 82.031 (82.031)
Epoch: [38][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5449 (0.5368)	Prec@1 79.688 (81.868)
Epoch: [38][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4190 (0.5397)	Prec@1 85.938 (81.757)
Epoch: [38][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5232 (0.5400)	Prec@1 87.500 (81.756)
Test: [0/20]	Time 0.329 (0.329)	Loss 0.4677 (0.4677)	Prec@1 82.227 (82.227)
 * Prec@1 82.580
alpha1 1.7552944421768188
layer1.0.alpha1 1.1190208196640015
layer1.0.alpha2 1.6469979286193848
layer1.1.alpha1 0.9680771231651306
layer1.1.alpha2 1.6892757415771484
layer1.2.alpha1 0.8978970646858215
layer1.2.alpha2 1.8444130420684814
layer2.0.alpha1 1.0793882608413696
layer2.0.alpha2 1.407577395439148
layer2.1.alpha1 0.8439041972160339
layer2.1.alpha2 1.4725449085235596
layer2.2.alpha1 0

Epoch: [42][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6148 (0.5153)	Prec@1 81.250 (82.649)
Epoch: [42][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4799 (0.5172)	Prec@1 82.812 (82.479)
Epoch: [42][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5084 (0.5197)	Prec@1 83.594 (82.358)
Test: [0/20]	Time 0.319 (0.319)	Loss 0.4577 (0.4577)	Prec@1 85.156 (85.156)
 * Prec@1 83.370
alpha1 1.7859910726547241
layer1.0.alpha1 1.0459802150726318
layer1.0.alpha2 1.7713754177093506
layer1.1.alpha1 0.93184494972229
layer1.1.alpha2 1.6972694396972656
layer1.2.alpha1 0.9404841065406799
layer1.2.alpha2 1.7217833995819092
layer2.0.alpha1 1.1572312116622925
layer2.0.alpha2 1.4313372373580933
layer2.1.alpha1 0.7630910277366638
layer2.1.alpha2 1.450812816619873
layer2.2.alpha1 0.7956041693687439
layer2.2.alpha2 1.4243770837783813
layer2.3.alpha1 0.6397523880004883
layer2.3.alpha2 1.394796371459961
layer3.0.alpha1 1.037121295928955
layer3.0.alpha2 1.4384632110595703
layer3.1.alpha1 0

Epoch: [46][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.6748 (0.5030)	Prec@1 75.000 (82.929)
Test: [0/20]	Time 0.321 (0.321)	Loss 0.3848 (0.3848)	Prec@1 87.109 (87.109)
 * Prec@1 86.260
alpha1 1.8276519775390625
layer1.0.alpha1 1.0734286308288574
layer1.0.alpha2 1.7173362970352173
layer1.1.alpha1 0.887374758720398
layer1.1.alpha2 1.7482136487960815
layer1.2.alpha1 0.888933002948761
layer1.2.alpha2 1.785308837890625
layer2.0.alpha1 1.1344915628433228
layer2.0.alpha2 1.468488097190857
layer2.1.alpha1 0.7248337268829346
layer2.1.alpha2 1.453420639038086
layer2.2.alpha1 0.6621859073638916
layer2.2.alpha2 1.4808579683303833
layer2.3.alpha1 0.5694865584373474
layer2.3.alpha2 1.4728511571884155
layer3.0.alpha1 1.0634994506835938
layer3.0.alpha2 1.4982621669769287
layer3.1.alpha1 0.9062255620956421
layer3.1.alpha2 1.4032161235809326
layer3.2.alpha1 0.7041922807693481
layer3.2.alpha2 1.2707009315490723
layer3.3.alpha1 0.5321378707885742
layer3.3.alpha2 1.1436247825622559
layer3.4.alph

 * Prec@1 82.190
alpha1 1.7474561929702759
layer1.0.alpha1 1.1429226398468018
layer1.0.alpha2 1.6133537292480469
layer1.1.alpha1 0.9412088990211487
layer1.1.alpha2 1.7882955074310303
layer1.2.alpha1 0.9442703127861023
layer1.2.alpha2 1.7309110164642334
layer2.0.alpha1 1.0235145092010498
layer2.0.alpha2 1.4759567975997925
layer2.1.alpha1 0.694650411605835
layer2.1.alpha2 1.4338783025741577
layer2.2.alpha1 0.7249498963356018
layer2.2.alpha2 1.4490269422531128
layer2.3.alpha1 0.6580537557601929
layer2.3.alpha2 1.3777680397033691
layer3.0.alpha1 1.1014434099197388
layer3.0.alpha2 1.4899063110351562
layer3.1.alpha1 0.8961964249610901
layer3.1.alpha2 1.4299741983413696
layer3.2.alpha1 0.8293885588645935
layer3.2.alpha2 1.3235046863555908
layer3.3.alpha1 0.47841402888298035
layer3.3.alpha2 1.2362210750579834
layer3.4.alpha1 0.28266462683677673
layer3.4.alpha2 1.1884740591049194
layer3.5.alpha1 0.26209789514541626
layer3.5.alpha2 1.1027967929840088
layer4.0.alpha1 0.5063794255256653
layer4.0.a

Epoch: [55][0/313]	Time 0.267 (0.267)	Data 0.157 (0.157)	Loss 0.5199 (0.5199)	Prec@1 82.812 (82.812)
Epoch: [55][50/313]	Time 0.109 (0.111)	Data 0.000 (0.003)	Loss 0.5491 (0.4797)	Prec@1 79.688 (83.900)
Epoch: [55][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.5246 (0.4672)	Prec@1 82.031 (84.220)
Epoch: [55][150/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5175 (0.4706)	Prec@1 84.375 (83.935)
Epoch: [55][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4770 (0.4759)	Prec@1 85.938 (83.877)
Epoch: [55][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4032 (0.4730)	Prec@1 87.500 (83.865)
Epoch: [55][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.3809 (0.4736)	Prec@1 86.719 (83.838)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.4851 (0.4851)	Prec@1 83.594 (83.594)
 * Prec@1 84.170
alpha1 1.809998631477356
layer1.0.alpha1 1.0913623571395874
layer1.0.alpha2 1.6794095039367676
layer1.1.alpha1 0.8012081980705261
layer1.1.alpha2 1.6431105136871338
layer1.2.alpha1 0.911132

Epoch: [59][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.5075 (0.4651)	Prec@1 84.375 (84.352)
Epoch: [59][150/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5705 (0.4709)	Prec@1 78.125 (83.956)
Epoch: [59][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4394 (0.4749)	Prec@1 84.375 (83.850)
Epoch: [59][250/313]	Time 0.108 (0.108)	Data 0.000 (0.001)	Loss 0.5632 (0.4698)	Prec@1 81.250 (83.986)
Epoch: [59][300/313]	Time 0.108 (0.108)	Data 0.000 (0.001)	Loss 0.4328 (0.4705)	Prec@1 84.375 (83.957)
Test: [0/20]	Time 0.322 (0.322)	Loss 0.4150 (0.4150)	Prec@1 86.328 (86.328)
 * Prec@1 85.460
alpha1 1.874718189239502
layer1.0.alpha1 1.0956451892852783
layer1.0.alpha2 1.6593761444091797
layer1.1.alpha1 0.8582870960235596
layer1.1.alpha2 1.6967496871948242
layer1.2.alpha1 0.9345124959945679
layer1.2.alpha2 1.7540189027786255
layer2.0.alpha1 0.9662125110626221
layer2.0.alpha2 1.430991291999817
layer2.1.alpha1 0.7153216600418091
layer2.1.alpha2 1.4089515209197998
layer2.2.alpha1 0.

Epoch: [63][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.6222 (0.4548)	Prec@1 75.781 (84.367)
Epoch: [63][250/313]	Time 0.107 (0.109)	Data 0.000 (0.001)	Loss 0.4398 (0.4549)	Prec@1 85.156 (84.437)
Epoch: [63][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4558 (0.4584)	Prec@1 85.156 (84.297)
Test: [0/20]	Time 0.329 (0.329)	Loss 0.3586 (0.3586)	Prec@1 88.477 (88.477)
 * Prec@1 86.720
alpha1 1.942375659942627
layer1.0.alpha1 1.1392000913619995
layer1.0.alpha2 1.6375370025634766
layer1.1.alpha1 0.7135739922523499
layer1.1.alpha2 1.695646047592163
layer1.2.alpha1 0.8505682945251465
layer1.2.alpha2 1.716306447982788
layer2.0.alpha1 1.0829722881317139
layer2.0.alpha2 1.4662879705429077
layer2.1.alpha1 0.8222036361694336
layer2.1.alpha2 1.4872395992279053
layer2.2.alpha1 0.6325063705444336
layer2.2.alpha2 1.428015112876892
layer2.3.alpha1 0.657255232334137
layer2.3.alpha2 1.4972835779190063
layer3.0.alpha1 1.0875474214553833
layer3.0.alpha2 1.5228217840194702
layer3.1.alpha1 0

Test: [0/20]	Time 0.322 (0.322)	Loss 0.4137 (0.4137)	Prec@1 87.500 (87.500)
 * Prec@1 86.520
alpha1 1.7690379619598389
layer1.0.alpha1 1.1449075937271118
layer1.0.alpha2 1.6518770456314087
layer1.1.alpha1 0.8681900501251221
layer1.1.alpha2 1.6676623821258545
layer1.2.alpha1 0.8981747031211853
layer1.2.alpha2 1.6737031936645508
layer2.0.alpha1 1.0597636699676514
layer2.0.alpha2 1.4700764417648315
layer2.1.alpha1 0.6961566805839539
layer2.1.alpha2 1.457113265991211
layer2.2.alpha1 0.6311626434326172
layer2.2.alpha2 1.433872938156128
layer2.3.alpha1 0.6472292542457581
layer2.3.alpha2 1.5189050436019897
layer3.0.alpha1 1.0412858724594116
layer3.0.alpha2 1.4906766414642334
layer3.1.alpha1 0.7995349764823914
layer3.1.alpha2 1.4284274578094482
layer3.2.alpha1 0.7094268798828125
layer3.2.alpha2 1.330067753791809
layer3.3.alpha1 0.5720114707946777
layer3.3.alpha2 1.2856769561767578
layer3.4.alpha1 0.288417786359787
layer3.4.alpha2 1.2149815559387207
layer3.5.alpha1 0.2067163735628128
layer3.5.a

Epoch: [72][0/313]	Time 0.271 (0.271)	Data 0.156 (0.156)	Loss 0.2562 (0.2562)	Prec@1 89.062 (89.062)
Epoch: [72][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.4001 (0.4438)	Prec@1 85.156 (85.233)
Epoch: [72][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.4071 (0.4472)	Prec@1 88.281 (85.102)
Epoch: [72][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.4270 (0.4519)	Prec@1 84.375 (84.841)
Epoch: [72][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6029 (0.4496)	Prec@1 81.250 (84.826)
Epoch: [72][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4866 (0.4462)	Prec@1 85.156 (84.907)
Epoch: [72][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5896 (0.4474)	Prec@1 83.594 (84.928)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.3522 (0.3522)	Prec@1 88.672 (88.672)
 * Prec@1 86.890
alpha1 1.7999329566955566
layer1.0.alpha1 1.0779958963394165
layer1.0.alpha2 1.7223830223083496
layer1.1.alpha1 0.7352118492126465
layer1.1.alpha2 1.6721761226654053
layer1.2.alpha1 0.83596

Epoch: [76][100/313]	Time 0.110 (0.117)	Data 0.000 (0.002)	Loss 0.3923 (0.4301)	Prec@1 86.719 (85.543)
Epoch: [76][150/313]	Time 0.140 (0.117)	Data 0.000 (0.001)	Loss 0.3963 (0.4350)	Prec@1 83.594 (85.182)
Epoch: [76][200/313]	Time 0.108 (0.116)	Data 0.000 (0.001)	Loss 0.4552 (0.4349)	Prec@1 84.375 (85.281)
Epoch: [76][250/313]	Time 0.109 (0.116)	Data 0.000 (0.001)	Loss 0.4205 (0.4381)	Prec@1 87.500 (85.122)
Epoch: [76][300/313]	Time 0.109 (0.116)	Data 0.000 (0.001)	Loss 0.4121 (0.4359)	Prec@1 86.719 (85.203)
Test: [0/20]	Time 0.333 (0.333)	Loss 0.3708 (0.3708)	Prec@1 87.305 (87.305)
 * Prec@1 84.040
alpha1 1.753173589706421
layer1.0.alpha1 1.1785317659378052
layer1.0.alpha2 1.5606915950775146
layer1.1.alpha1 0.8137753009796143
layer1.1.alpha2 1.6278825998306274
layer1.2.alpha1 0.8101941347122192
layer1.2.alpha2 1.7991752624511719
layer2.0.alpha1 1.0742900371551514
layer2.0.alpha2 1.48679780960083
layer2.1.alpha1 0.6967580914497375
layer2.1.alpha2 1.4568206071853638
layer2.2.alpha1 0.6

Epoch: [80][200/313]	Time 0.109 (0.116)	Data 0.000 (0.001)	Loss 0.2878 (0.3349)	Prec@1 89.844 (88.930)
Epoch: [80][250/313]	Time 0.108 (0.116)	Data 0.000 (0.001)	Loss 0.2432 (0.3270)	Prec@1 92.188 (89.209)
Epoch: [80][300/313]	Time 0.109 (0.116)	Data 0.000 (0.001)	Loss 0.3606 (0.3182)	Prec@1 87.500 (89.506)
Test: [0/20]	Time 0.401 (0.401)	Loss 0.1846 (0.1846)	Prec@1 94.141 (94.141)
 * Prec@1 91.950
alpha1 1.6547191143035889
layer1.0.alpha1 1.0677624940872192
layer1.0.alpha2 1.567168116569519
layer1.1.alpha1 0.8466954231262207
layer1.1.alpha2 1.584686517715454
layer1.2.alpha1 0.818475067615509
layer1.2.alpha2 1.6428755521774292
layer2.0.alpha1 0.9676830768585205
layer2.0.alpha2 1.429107904434204
layer2.1.alpha1 0.7305823564529419
layer2.1.alpha2 1.4525209665298462
layer2.2.alpha1 0.6900168657302856
layer2.2.alpha2 1.4510332345962524
layer2.3.alpha1 0.6124082207679749
layer2.3.alpha2 1.4246540069580078
layer3.0.alpha1 1.0749471187591553
layer3.0.alpha2 1.4868580102920532
layer3.1.alpha1 

Epoch: [84][300/313]	Time 0.123 (0.116)	Data 0.000 (0.001)	Loss 0.1372 (0.2329)	Prec@1 95.312 (92.476)
Test: [0/20]	Time 0.389 (0.389)	Loss 0.2144 (0.2144)	Prec@1 93.164 (93.164)
 * Prec@1 92.600
alpha1 1.5946367979049683
layer1.0.alpha1 1.0141961574554443
layer1.0.alpha2 1.4996004104614258
layer1.1.alpha1 0.7954422831535339
layer1.1.alpha2 1.5128214359283447
layer1.2.alpha1 0.7692194581031799
layer1.2.alpha2 1.5724291801452637
layer2.0.alpha1 0.9252026081085205
layer2.0.alpha2 1.3328216075897217
layer2.1.alpha1 0.6471471190452576
layer2.1.alpha2 1.335288166999817
layer2.2.alpha1 0.6154276132583618
layer2.2.alpha2 1.3798960447311401
layer2.3.alpha1 0.5810850262641907
layer2.3.alpha2 1.3821769952774048
layer3.0.alpha1 1.0317678451538086
layer3.0.alpha2 1.3913865089416504
layer3.1.alpha1 0.8280764818191528
layer3.1.alpha2 1.3661199808120728
layer3.2.alpha1 0.6934848427772522
layer3.2.alpha2 1.2722185850143433
layer3.3.alpha1 0.48502644896507263
layer3.3.alpha2 1.2090293169021606
layer3.4

 * Prec@1 92.600
alpha1 1.5457289218902588
layer1.0.alpha1 0.9756566882133484
layer1.0.alpha2 1.4513094425201416
layer1.1.alpha1 0.7501845359802246
layer1.1.alpha2 1.5156314373016357
layer1.2.alpha1 0.7429919838905334
layer1.2.alpha2 1.4933143854141235
layer2.0.alpha1 0.8976156115531921
layer2.0.alpha2 1.2569096088409424
layer2.1.alpha1 0.6089692115783691
layer2.1.alpha2 1.2903274297714233
layer2.2.alpha1 0.592948317527771
layer2.2.alpha2 1.321402668952942
layer2.3.alpha1 0.5613116025924683
layer2.3.alpha2 1.3589611053466797
layer3.0.alpha1 1.0157524347305298
layer3.0.alpha2 1.336031198501587
layer3.1.alpha1 0.7923497557640076
layer3.1.alpha2 1.294767141342163
layer3.2.alpha1 0.6355655193328857
layer3.2.alpha2 1.2016217708587646
layer3.3.alpha1 0.4607127010822296
layer3.3.alpha2 1.14681077003479
layer3.4.alpha1 0.2770111560821533
layer3.4.alpha2 1.086606502532959
layer3.5.alpha1 0.19621224701404572
layer3.5.alpha2 1.0857200622558594
layer4.0.alpha1 0.41702061891555786
layer4.0.alpha2 1

Epoch: [93][0/313]	Time 0.269 (0.269)	Data 0.154 (0.154)	Loss 0.2288 (0.2288)	Prec@1 95.312 (95.312)
Epoch: [93][50/313]	Time 0.114 (0.120)	Data 0.000 (0.003)	Loss 0.1804 (0.2130)	Prec@1 96.094 (93.352)
Epoch: [93][100/313]	Time 0.109 (0.118)	Data 0.000 (0.002)	Loss 0.0699 (0.2025)	Prec@1 99.219 (93.549)
Epoch: [93][150/313]	Time 0.108 (0.117)	Data 0.000 (0.001)	Loss 0.1119 (0.2003)	Prec@1 97.656 (93.605)
Epoch: [93][200/313]	Time 0.110 (0.117)	Data 0.000 (0.001)	Loss 0.2131 (0.2025)	Prec@1 93.750 (93.455)
Epoch: [93][250/313]	Time 0.109 (0.117)	Data 0.000 (0.001)	Loss 0.2155 (0.2025)	Prec@1 93.750 (93.470)
Epoch: [93][300/313]	Time 0.109 (0.116)	Data 0.000 (0.001)	Loss 0.1396 (0.2032)	Prec@1 96.094 (93.426)
Test: [0/20]	Time 0.362 (0.362)	Loss 0.1921 (0.1921)	Prec@1 94.336 (94.336)
 * Prec@1 92.870
alpha1 1.500410795211792
layer1.0.alpha1 0.9148073196411133
layer1.0.alpha2 1.4120557308197021
layer1.1.alpha1 0.6998856067657471
layer1.1.alpha2 1.4334625005722046
layer1.2.alpha1 0.731171

Epoch: [97][100/313]	Time 0.109 (0.118)	Data 0.000 (0.002)	Loss 0.1987 (0.1947)	Prec@1 95.312 (93.742)
Epoch: [97][150/313]	Time 0.121 (0.117)	Data 0.000 (0.001)	Loss 0.2101 (0.1983)	Prec@1 92.969 (93.564)
Epoch: [97][200/313]	Time 0.109 (0.117)	Data 0.000 (0.001)	Loss 0.1982 (0.1963)	Prec@1 93.750 (93.653)
Epoch: [97][250/313]	Time 0.109 (0.117)	Data 0.000 (0.001)	Loss 0.2230 (0.1954)	Prec@1 92.969 (93.675)
Epoch: [97][300/313]	Time 0.109 (0.116)	Data 0.000 (0.001)	Loss 0.2173 (0.1953)	Prec@1 94.531 (93.685)
Test: [0/20]	Time 0.399 (0.399)	Loss 0.1964 (0.1964)	Prec@1 94.922 (94.922)
 * Prec@1 92.800
alpha1 1.4765255451202393
layer1.0.alpha1 0.9005546569824219
layer1.0.alpha2 1.3886820077896118
layer1.1.alpha1 0.6575896143913269
layer1.1.alpha2 1.4010095596313477
layer1.2.alpha1 0.6709086298942566
layer1.2.alpha2 1.4071789979934692
layer2.0.alpha1 0.8145642280578613
layer2.0.alpha2 1.2076257467269897
layer2.1.alpha1 0.5408515930175781
layer2.1.alpha2 1.205383539199829
layer2.2.alpha1 0

Epoch: [101][200/313]	Time 0.123 (0.117)	Data 0.000 (0.001)	Loss 0.1838 (0.1873)	Prec@1 94.531 (93.855)
Epoch: [101][250/313]	Time 0.114 (0.117)	Data 0.000 (0.001)	Loss 0.1804 (0.1893)	Prec@1 92.969 (93.871)
Epoch: [101][300/313]	Time 0.120 (0.117)	Data 0.000 (0.001)	Loss 0.1356 (0.1882)	Prec@1 96.094 (93.939)
Test: [0/20]	Time 0.344 (0.344)	Loss 0.1994 (0.1994)	Prec@1 94.141 (94.141)
 * Prec@1 92.820
alpha1 1.42070472240448
layer1.0.alpha1 0.8865939378738403
layer1.0.alpha2 1.3576805591583252
layer1.1.alpha1 0.6523462533950806
layer1.1.alpha2 1.3572964668273926
layer1.2.alpha1 0.6738380193710327
layer1.2.alpha2 1.375844120979309
layer2.0.alpha1 0.8055495023727417
layer2.0.alpha2 1.187220573425293
layer2.1.alpha1 0.5434094667434692
layer2.1.alpha2 1.172606348991394
layer2.2.alpha1 0.5265334248542786
layer2.2.alpha2 1.2042059898376465
layer2.3.alpha1 0.47590142488479614
layer2.3.alpha2 1.248658299446106
layer3.0.alpha1 0.9402987957000732
layer3.0.alpha2 1.19547438621521
layer3.1.alpha1 

Epoch: [105][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1807 (0.1834)	Prec@1 92.188 (94.025)
Test: [0/20]	Time 0.335 (0.335)	Loss 0.2014 (0.2014)	Prec@1 93.555 (93.555)
 * Prec@1 92.870
alpha1 1.4266929626464844
layer1.0.alpha1 0.8682205677032471
layer1.0.alpha2 1.3036717176437378
layer1.1.alpha1 0.6208968162536621
layer1.1.alpha2 1.335862636566162
layer1.2.alpha1 0.68681401014328
layer1.2.alpha2 1.3635956048965454
layer2.0.alpha1 0.766722559928894
layer2.0.alpha2 1.1623610258102417
layer2.1.alpha1 0.5093597769737244
layer2.1.alpha2 1.1474742889404297
layer2.2.alpha1 0.5055888295173645
layer2.2.alpha2 1.1744581460952759
layer2.3.alpha1 0.4707814157009125
layer2.3.alpha2 1.1973986625671387
layer3.0.alpha1 0.9182600975036621
layer3.0.alpha2 1.158768653869629
layer3.1.alpha1 0.6984725594520569
layer3.1.alpha2 1.1207271814346313
layer3.2.alpha1 0.4839012324810028
layer3.2.alpha2 1.0386658906936646
layer3.3.alpha1 0.36073023080825806
layer3.3.alpha2 1.0018810033798218
layer3.4.al

 * Prec@1 93.080
alpha1 1.398389458656311
layer1.0.alpha1 0.8389256596565247
layer1.0.alpha2 1.2643662691116333
layer1.1.alpha1 0.6199294924736023
layer1.1.alpha2 1.278612732887268
layer1.2.alpha1 0.6578540205955505
layer1.2.alpha2 1.369733214378357
layer2.0.alpha1 0.7978963851928711
layer2.0.alpha2 1.1114006042480469
layer2.1.alpha1 0.5107442140579224
layer2.1.alpha2 1.1402579545974731
layer2.2.alpha1 0.5057092905044556
layer2.2.alpha2 1.1634881496429443
layer2.3.alpha1 0.4848504066467285
layer2.3.alpha2 1.1891279220581055
layer3.0.alpha1 0.9123969674110413
layer3.0.alpha2 1.138243317604065
layer3.1.alpha1 0.658606231212616
layer3.1.alpha2 1.0887829065322876
layer3.2.alpha1 0.4707837998867035
layer3.2.alpha2 1.017424464225769
layer3.3.alpha1 0.3352021276950836
layer3.3.alpha2 0.9491218328475952
layer3.4.alpha1 0.2280673235654831
layer3.4.alpha2 0.9292393326759338
layer3.5.alpha1 0.1631244271993637
layer3.5.alpha2 0.919111967086792
layer4.0.alpha1 0.3263901174068451
layer4.0.alpha2 1.3

Epoch: [114][0/313]	Time 0.271 (0.271)	Data 0.155 (0.155)	Loss 0.2694 (0.2694)	Prec@1 90.625 (90.625)
Epoch: [114][50/313]	Time 0.108 (0.112)	Data 0.000 (0.003)	Loss 0.1167 (0.1744)	Prec@1 96.875 (94.072)
Epoch: [114][100/313]	Time 0.107 (0.110)	Data 0.000 (0.002)	Loss 0.1463 (0.1787)	Prec@1 92.969 (94.075)
Epoch: [114][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 0.1911 (0.1800)	Prec@1 94.531 (94.024)
Epoch: [114][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1429 (0.1773)	Prec@1 95.312 (94.166)
Epoch: [114][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.3814 (0.1773)	Prec@1 88.281 (94.183)
Epoch: [114][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1238 (0.1783)	Prec@1 96.875 (94.113)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.1622 (0.1622)	Prec@1 94.141 (94.141)
 * Prec@1 92.820
alpha1 1.3685739040374756
layer1.0.alpha1 0.7948209047317505
layer1.0.alpha2 1.2555660009384155
layer1.1.alpha1 0.6104722619056702
layer1.1.alpha2 1.2682812213897705
layer1.2.alpha1 

Epoch: [118][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.1316 (0.1764)	Prec@1 96.875 (94.392)
Epoch: [118][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.2020 (0.1736)	Prec@1 92.188 (94.392)
Epoch: [118][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.0708 (0.1754)	Prec@1 98.438 (94.372)
Epoch: [118][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1219 (0.1742)	Prec@1 98.438 (94.466)
Epoch: [118][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.2507 (0.1747)	Prec@1 92.969 (94.412)
Test: [0/20]	Time 0.315 (0.315)	Loss 0.1764 (0.1764)	Prec@1 94.727 (94.727)
 * Prec@1 92.790
alpha1 1.3259938955307007
layer1.0.alpha1 0.7587750554084778
layer1.0.alpha2 1.2264297008514404
layer1.1.alpha1 0.5921331644058228
layer1.1.alpha2 1.2450515031814575
layer1.2.alpha1 0.6090666651725769
layer1.2.alpha2 1.2913451194763184
layer2.0.alpha1 0.7557665705680847
layer2.0.alpha2 1.0607892274856567
layer2.1.alpha1 0.46517643332481384
layer2.1.alpha2 1.0746971368789673
layer2.2.a

Epoch: [122][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.2230 (0.1674)	Prec@1 94.531 (94.660)
Epoch: [122][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.3607 (0.1685)	Prec@1 88.281 (94.578)
Epoch: [122][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1551 (0.1682)	Prec@1 96.094 (94.635)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.1520 (0.1520)	Prec@1 94.531 (94.531)
 * Prec@1 92.980
alpha1 1.2811026573181152
layer1.0.alpha1 0.7495992183685303
layer1.0.alpha2 1.1831096410751343
layer1.1.alpha1 0.5388903021812439
layer1.1.alpha2 1.2093883752822876
layer1.2.alpha1 0.5944240093231201
layer1.2.alpha2 1.2765907049179077
layer2.0.alpha1 0.7242487668991089
layer2.0.alpha2 1.0724809169769287
layer2.1.alpha1 0.48153120279312134
layer2.1.alpha2 1.0820709466934204
layer2.2.alpha1 0.4636611044406891
layer2.2.alpha2 1.1177736520767212
layer2.3.alpha1 0.4212799370288849
layer2.3.alpha2 1.1586496829986572
layer3.0.alpha1 0.8481519818305969
layer3.0.alpha2 1.0868170261383057
layer3.1

Epoch: [126][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1292 (0.1717)	Prec@1 96.875 (94.326)
Epoch: [126][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1337 (0.1717)	Prec@1 93.750 (94.329)
Test: [0/20]	Time 0.318 (0.318)	Loss 0.1915 (0.1915)	Prec@1 94.141 (94.141)
 * Prec@1 92.940
alpha1 1.2671467065811157
layer1.0.alpha1 0.760596513748169
layer1.0.alpha2 1.1920392513275146
layer1.1.alpha1 0.5607871413230896
layer1.1.alpha2 1.15924870967865
layer1.2.alpha1 0.603197455406189
layer1.2.alpha2 1.2615488767623901
layer2.0.alpha1 0.7307007312774658
layer2.0.alpha2 1.0395824909210205
layer2.1.alpha1 0.46395689249038696
layer2.1.alpha2 1.0653074979782104
layer2.2.alpha1 0.4485989511013031
layer2.2.alpha2 1.1001923084259033
layer2.3.alpha1 0.43477222323417664
layer2.3.alpha2 1.1117994785308838
layer3.0.alpha1 0.8409996032714844
layer3.0.alpha2 1.0683975219726562
layer3.1.alpha1 0.5937931537628174
layer3.1.alpha2 1.0444858074188232
layer3.2.alpha1 0.4262304902076721
layer3.2.a

Test: [0/20]	Time 0.331 (0.331)	Loss 0.1943 (0.1943)	Prec@1 94.336 (94.336)
 * Prec@1 92.660
alpha1 1.2492605447769165
layer1.0.alpha1 0.7540556788444519
layer1.0.alpha2 1.1812463998794556
layer1.1.alpha1 0.5530132055282593
layer1.1.alpha2 1.1909265518188477
layer1.2.alpha1 0.5990074872970581
layer1.2.alpha2 1.2355616092681885
layer2.0.alpha1 0.7043567299842834
layer2.0.alpha2 1.0087846517562866
layer2.1.alpha1 0.44861099123954773
layer2.1.alpha2 1.0630345344543457
layer2.2.alpha1 0.40532809495925903
layer2.2.alpha2 1.0858508348464966
layer2.3.alpha1 0.4397321343421936
layer2.3.alpha2 1.1025967597961426
layer3.0.alpha1 0.8302791118621826
layer3.0.alpha2 1.0492135286331177
layer3.1.alpha1 0.6038914918899536
layer3.1.alpha2 1.006611943244934
layer3.2.alpha1 0.3869554102420807
layer3.2.alpha2 0.9522848129272461
layer3.3.alpha1 0.34103628993034363
layer3.3.alpha2 0.8927245140075684
layer3.4.alpha1 0.2018795907497406
layer3.4.alpha2 0.8818264603614807
layer3.5.alpha1 0.13805080950260162
lay

Epoch: [135][0/313]	Time 0.282 (0.282)	Data 0.167 (0.167)	Loss 0.1391 (0.1391)	Prec@1 95.312 (95.312)
Epoch: [135][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.1632 (0.1450)	Prec@1 94.531 (95.404)
Epoch: [135][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.2444 (0.1599)	Prec@1 93.750 (94.926)
Epoch: [135][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 0.1389 (0.1601)	Prec@1 95.312 (94.919)
Epoch: [135][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.0711 (0.1612)	Prec@1 98.438 (94.846)
Epoch: [135][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.0598 (0.1646)	Prec@1 98.438 (94.693)
Epoch: [135][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1932 (0.1654)	Prec@1 93.750 (94.658)
Test: [0/20]	Time 0.319 (0.319)	Loss 0.1997 (0.1997)	Prec@1 94.336 (94.336)
 * Prec@1 92.730
alpha1 1.2185629606246948
layer1.0.alpha1 0.7271982431411743
layer1.0.alpha2 1.123504638671875
layer1.1.alpha1 0.5152406096458435
layer1.1.alpha2 1.144272804260254
layer1.2.alpha1 0.

Epoch: [139][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.1430 (0.1669)	Prec@1 92.969 (94.454)
Epoch: [139][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 0.1650 (0.1630)	Prec@1 96.875 (94.630)
Epoch: [139][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1587 (0.1648)	Prec@1 94.531 (94.566)
Epoch: [139][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.2725 (0.1651)	Prec@1 92.188 (94.547)
Epoch: [139][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1328 (0.1638)	Prec@1 95.312 (94.601)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.1872 (0.1872)	Prec@1 94.922 (94.922)
 * Prec@1 92.800
alpha1 1.2298240661621094
layer1.0.alpha1 0.7080333232879639
layer1.0.alpha2 1.127779245376587
layer1.1.alpha1 0.5405083298683167
layer1.1.alpha2 1.1167216300964355
layer1.2.alpha1 0.5842786431312561
layer1.2.alpha2 1.2504233121871948
layer2.0.alpha1 0.6875380277633667
layer2.0.alpha2 0.9514802694320679
layer2.1.alpha1 0.4739231765270233
layer2.1.alpha2 1.0009688138961792
layer2.2.alp

Epoch: [143][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1989 (0.1569)	Prec@1 94.531 (94.924)
Epoch: [143][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1503 (0.1603)	Prec@1 96.094 (94.796)
Epoch: [143][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1781 (0.1598)	Prec@1 95.312 (94.793)
Test: [0/20]	Time 0.330 (0.330)	Loss 0.1835 (0.1835)	Prec@1 93.945 (93.945)
 * Prec@1 92.890
alpha1 1.2139036655426025
layer1.0.alpha1 0.7019639015197754
layer1.0.alpha2 1.0978015661239624
layer1.1.alpha1 0.5164363384246826
layer1.1.alpha2 1.1324706077575684
layer1.2.alpha1 0.5501953959465027
layer1.2.alpha2 1.170135259628296
layer2.0.alpha1 0.6897860765457153
layer2.0.alpha2 0.9496346712112427
layer2.1.alpha1 0.455206036567688
layer2.1.alpha2 0.9867278933525085
layer2.2.alpha1 0.4001162052154541
layer2.2.alpha2 1.0426020622253418
layer2.3.alpha1 0.41833487153053284
layer2.3.alpha2 1.0981134176254272
layer3.0.alpha1 0.7982817888259888
layer3.0.alpha2 1.0405341386795044
layer3.1.a

Epoch: [147][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1805 (0.1605)	Prec@1 92.969 (94.905)
Epoch: [147][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.0655 (0.1613)	Prec@1 98.438 (94.895)
Test: [0/20]	Time 0.329 (0.329)	Loss 0.1744 (0.1744)	Prec@1 94.336 (94.336)
 * Prec@1 92.900
alpha1 1.1649295091629028
layer1.0.alpha1 0.6828012466430664
layer1.0.alpha2 1.0676630735397339
layer1.1.alpha1 0.5216249227523804
layer1.1.alpha2 1.1100579500198364
layer1.2.alpha1 0.5494924187660217
layer1.2.alpha2 1.1765598058700562
layer2.0.alpha1 0.6892198920249939
layer2.0.alpha2 0.9541911482810974
layer2.1.alpha1 0.4646047055721283
layer2.1.alpha2 1.0141199827194214
layer2.2.alpha1 0.40337803959846497
layer2.2.alpha2 1.022439956665039
layer2.3.alpha1 0.4140152931213379
layer2.3.alpha2 1.090598702430725
layer3.0.alpha1 0.7943000793457031
layer3.0.alpha2 1.0489708185195923
layer3.1.alpha1 0.5444164872169495
layer3.1.alpha2 0.9897273778915405
layer3.2.alpha1 0.43935444951057434
layer3.2

Epoch: [151][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1227 (0.1394)	Prec@1 96.875 (95.492)
Test: [0/20]	Time 0.331 (0.331)	Loss 0.1509 (0.1509)	Prec@1 94.531 (94.531)
 * Prec@1 93.130
alpha1 1.1450355052947998
layer1.0.alpha1 0.6796496510505676
layer1.0.alpha2 1.0847922563552856
layer1.1.alpha1 0.491433322429657
layer1.1.alpha2 1.1015028953552246
layer1.2.alpha1 0.5593260526657104
layer1.2.alpha2 1.1706759929656982
layer2.0.alpha1 0.6844503879547119
layer2.0.alpha2 0.9612757563591003
layer2.1.alpha1 0.4320349097251892
layer2.1.alpha2 0.961780309677124
layer2.2.alpha1 0.41214650869369507
layer2.2.alpha2 1.0036017894744873
layer2.3.alpha1 0.4050374925136566
layer2.3.alpha2 1.0781729221343994
layer3.0.alpha1 0.7922537922859192
layer3.0.alpha2 1.0309466123580933
layer3.1.alpha1 0.5446218252182007
layer3.1.alpha2 0.9842133522033691
layer3.2.alpha1 0.44940194487571716
layer3.2.alpha2 0.8967893123626709
layer3.3.alpha1 0.28010210394859314
layer3.3.alpha2 0.849279522895813
layer3.

Test: [0/20]	Time 0.326 (0.326)	Loss 0.1552 (0.1552)	Prec@1 95.312 (95.312)
 * Prec@1 93.580
alpha1 1.1468119621276855
layer1.0.alpha1 0.6764621734619141
layer1.0.alpha2 1.0770772695541382
layer1.1.alpha1 0.492013543844223
layer1.1.alpha2 1.1024389266967773
layer1.2.alpha1 0.553909182548523
layer1.2.alpha2 1.1651054620742798
layer2.0.alpha1 0.6833129525184631
layer2.0.alpha2 0.9532438516616821
layer2.1.alpha1 0.42893049120903015
layer2.1.alpha2 0.9593490958213806
layer2.2.alpha1 0.4101560711860657
layer2.2.alpha2 1.0071934461593628
layer2.3.alpha1 0.4047885537147522
layer2.3.alpha2 1.0752649307250977
layer3.0.alpha1 0.7879959344863892
layer3.0.alpha2 1.0295883417129517
layer3.1.alpha1 0.545925498008728
layer3.1.alpha2 0.982002854347229
layer3.2.alpha1 0.44290557503700256
layer3.2.alpha2 0.8935194611549377
layer3.3.alpha1 0.28106996417045593
layer3.3.alpha2 0.8417251706123352
layer3.4.alpha1 0.17852339148521423
layer3.4.alpha2 0.8438495993614197
layer3.5.alpha1 0.1291918009519577
layer3

Epoch: [160][0/313]	Time 0.282 (0.282)	Data 0.166 (0.166)	Loss 0.1157 (0.1157)	Prec@1 96.875 (96.875)
Epoch: [160][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.0988 (0.1354)	Prec@1 96.094 (95.374)
Epoch: [160][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.1146 (0.1262)	Prec@1 96.875 (95.885)
Epoch: [160][150/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.0897 (0.1306)	Prec@1 97.656 (95.742)
Epoch: [160][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1131 (0.1314)	Prec@1 96.875 (95.709)
Epoch: [160][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1686 (0.1314)	Prec@1 93.750 (95.723)
Epoch: [160][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.0738 (0.1320)	Prec@1 97.656 (95.689)
Test: [0/20]	Time 0.321 (0.321)	Loss 0.1372 (0.1372)	Prec@1 95.703 (95.703)
 * Prec@1 93.580
alpha1 1.1359068155288696
layer1.0.alpha1 0.668702244758606
layer1.0.alpha2 1.0697113275527954
layer1.1.alpha1 0.4833233654499054
layer1.1.alpha2 1.0993314981460571
layer1.2.alpha1 0

Epoch: [164][100/313]	Time 0.107 (0.110)	Data 0.000 (0.002)	Loss 0.0675 (0.1292)	Prec@1 99.219 (95.692)
Epoch: [164][150/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.2105 (0.1269)	Prec@1 94.531 (95.825)
Epoch: [164][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1212 (0.1258)	Prec@1 96.094 (95.857)
Epoch: [164][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1635 (0.1273)	Prec@1 94.531 (95.829)
Epoch: [164][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1338 (0.1263)	Prec@1 97.656 (95.884)
Test: [0/20]	Time 0.320 (0.320)	Loss 0.1631 (0.1631)	Prec@1 95.508 (95.508)
 * Prec@1 93.550
alpha1 1.1346478462219238
layer1.0.alpha1 0.6654086709022522
layer1.0.alpha2 1.0641953945159912
layer1.1.alpha1 0.47867244482040405
layer1.1.alpha2 1.0938374996185303
layer1.2.alpha1 0.5462289452552795
layer1.2.alpha2 1.147471308708191
layer2.0.alpha1 0.6796857118606567
layer2.0.alpha2 0.9473450183868408
layer2.1.alpha1 0.4135299324989319
layer2.1.alpha2 0.9629179239273071
layer2.2.al

Epoch: [168][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1664 (0.1259)	Prec@1 93.750 (95.888)
Epoch: [168][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1224 (0.1248)	Prec@1 96.094 (95.885)
Epoch: [168][300/313]	Time 0.112 (0.109)	Data 0.000 (0.001)	Loss 0.1594 (0.1269)	Prec@1 95.312 (95.816)
Test: [0/20]	Time 0.321 (0.321)	Loss 0.1563 (0.1563)	Prec@1 95.898 (95.898)
 * Prec@1 93.370
alpha1 1.1308811902999878
layer1.0.alpha1 0.6682103872299194
layer1.0.alpha2 1.0579099655151367
layer1.1.alpha1 0.4793223440647125
layer1.1.alpha2 1.0927743911743164
layer1.2.alpha1 0.5384210348129272
layer1.2.alpha2 1.1430765390396118
layer2.0.alpha1 0.6832829117774963
layer2.0.alpha2 0.9462653398513794
layer2.1.alpha1 0.4175577461719513
layer2.1.alpha2 0.9606724381446838
layer2.2.alpha1 0.4097422957420349
layer2.2.alpha2 1.0010019540786743
layer2.3.alpha1 0.4015011787414551
layer2.3.alpha2 1.06105637550354
layer3.0.alpha1 0.7820574641227722
layer3.0.alpha2 1.0169492959976196
layer3.1.al

Epoch: [172][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.2352 (0.1296)	Prec@1 92.969 (95.772)
Test: [0/20]	Time 0.333 (0.333)	Loss 0.1730 (0.1730)	Prec@1 93.945 (93.945)
 * Prec@1 93.410
alpha1 1.1232537031173706
layer1.0.alpha1 0.6623971462249756
layer1.0.alpha2 1.0514527559280396
layer1.1.alpha1 0.4751683473587036
layer1.1.alpha2 1.081626534461975
layer1.2.alpha1 0.5355322360992432
layer1.2.alpha2 1.1390118598937988
layer2.0.alpha1 0.6788932681083679
layer2.0.alpha2 0.9396935701370239
layer2.1.alpha1 0.41555628180503845
layer2.1.alpha2 0.958233118057251
layer2.2.alpha1 0.4064897894859314
layer2.2.alpha2 0.9992625713348389
layer2.3.alpha1 0.40225258469581604
layer2.3.alpha2 1.057071328163147
layer3.0.alpha1 0.7813675999641418
layer3.0.alpha2 1.013181447982788
layer3.1.alpha1 0.5462113618850708
layer3.1.alpha2 0.9714510440826416
layer3.2.alpha1 0.43357375264167786
layer3.2.alpha2 0.8723371624946594
layer3.3.alpha1 0.2939956486225128
layer3.3.alpha2 0.8151694536209106
layer3.4

Test: [0/20]	Time 0.325 (0.325)	Loss 0.1913 (0.1913)	Prec@1 93.750 (93.750)
 * Prec@1 93.270
alpha1 1.1229842901229858
layer1.0.alpha1 0.658052921295166
layer1.0.alpha2 1.0526448488235474
layer1.1.alpha1 0.4724486768245697
layer1.1.alpha2 1.082424521446228
layer1.2.alpha1 0.5326019525527954
layer1.2.alpha2 1.134487509727478
layer2.0.alpha1 0.6700423955917358
layer2.0.alpha2 0.9364484548568726
layer2.1.alpha1 0.41017934679985046
layer2.1.alpha2 0.9503216743469238
layer2.2.alpha1 0.40435710549354553
layer2.2.alpha2 0.9883547425270081
layer2.3.alpha1 0.3990665376186371
layer2.3.alpha2 1.0567697286605835
layer3.0.alpha1 0.7782252430915833
layer3.0.alpha2 1.0063115358352661
layer3.1.alpha1 0.5416474342346191
layer3.1.alpha2 0.965814471244812
layer3.2.alpha1 0.43137118220329285
layer3.2.alpha2 0.8678380846977234
layer3.3.alpha1 0.2983505129814148
layer3.3.alpha2 0.8127109408378601
layer3.4.alpha1 0.17757289111614227
layer3.4.alpha2 0.8179670572280884
layer3.5.alpha1 0.1328008770942688
layer3

Epoch: [181][0/313]	Time 0.270 (0.270)	Data 0.161 (0.161)	Loss 0.0491 (0.0491)	Prec@1 98.438 (98.438)
Epoch: [181][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.1946 (0.1227)	Prec@1 95.312 (95.757)
Epoch: [181][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.1351 (0.1221)	Prec@1 96.094 (95.947)
Epoch: [181][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.1758 (0.1229)	Prec@1 94.531 (95.970)
Epoch: [181][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1432 (0.1227)	Prec@1 96.094 (95.997)
Epoch: [181][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1404 (0.1231)	Prec@1 95.312 (95.979)
Epoch: [181][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1854 (0.1237)	Prec@1 92.188 (95.951)
Test: [0/20]	Time 0.323 (0.323)	Loss 0.1667 (0.1667)	Prec@1 95.703 (95.703)
 * Prec@1 93.600
alpha1 1.1184478998184204
layer1.0.alpha1 0.6538568735122681
layer1.0.alpha2 1.046498417854309
layer1.1.alpha1 0.4736551344394684
layer1.1.alpha2 1.077471375465393
layer1.2.alpha1 0.

Epoch: [185][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.1144 (0.1190)	Prec@1 96.875 (96.179)
Epoch: [185][150/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1313 (0.1200)	Prec@1 95.312 (96.089)
Epoch: [185][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.0699 (0.1216)	Prec@1 99.219 (96.086)
Epoch: [185][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1170 (0.1208)	Prec@1 96.094 (96.109)
Epoch: [185][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1587 (0.1221)	Prec@1 96.875 (96.068)
Test: [0/20]	Time 0.323 (0.323)	Loss 0.1796 (0.1796)	Prec@1 94.727 (94.727)
 * Prec@1 93.640
alpha1 1.113624095916748
layer1.0.alpha1 0.6496083736419678
layer1.0.alpha2 1.0386254787445068
layer1.1.alpha1 0.47180286049842834
layer1.1.alpha2 1.0735379457473755
layer1.2.alpha1 0.5237091183662415
layer1.2.alpha2 1.1257151365280151
layer2.0.alpha1 0.6647774577140808
layer2.0.alpha2 0.9235512614250183
layer2.1.alpha1 0.4073537290096283
layer2.1.alpha2 0.9353235363960266
layer2.2.al

Epoch: [189][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1197 (0.1223)	Prec@1 94.531 (95.892)
Epoch: [189][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.1172 (0.1220)	Prec@1 96.094 (95.907)
Epoch: [189][300/313]	Time 0.107 (0.109)	Data 0.000 (0.001)	Loss 0.0981 (0.1235)	Prec@1 96.875 (95.881)
Test: [0/20]	Time 0.329 (0.329)	Loss 0.1764 (0.1764)	Prec@1 94.922 (94.922)
 * Prec@1 93.580
alpha1 1.1156200170516968
layer1.0.alpha1 0.6470284461975098
layer1.0.alpha2 1.0313042402267456
layer1.1.alpha1 0.46277695894241333
layer1.1.alpha2 1.0675678253173828
layer1.2.alpha1 0.5201451778411865
layer1.2.alpha2 1.1260441541671753
layer2.0.alpha1 0.6671329140663147
layer2.0.alpha2 0.922121524810791
layer2.1.alpha1 0.4099356234073639
layer2.1.alpha2 0.9369917511940002
layer2.2.alpha1 0.4012620151042938
layer2.2.alpha2 0.9787195920944214
layer2.3.alpha1 0.4061983525753021
layer2.3.alpha2 1.0476830005645752
layer3.0.alpha1 0.7727958559989929
layer3.0.alpha2 0.9981332421302795
layer3.1.

Epoch: [193][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.1356 (0.1248)	Prec@1 96.094 (95.886)
Test: [0/20]	Time 0.327 (0.327)	Loss 0.1906 (0.1906)	Prec@1 94.531 (94.531)
 * Prec@1 93.660
alpha1 1.1117303371429443
layer1.0.alpha1 0.6459274291992188
layer1.0.alpha2 1.028954029083252
layer1.1.alpha1 0.4608455300331116
layer1.1.alpha2 1.0628037452697754
layer1.2.alpha1 0.5193889737129211
layer1.2.alpha2 1.1167371273040771
layer2.0.alpha1 0.6645382046699524
layer2.0.alpha2 0.922364354133606
layer2.1.alpha1 0.41263484954833984
layer2.1.alpha2 0.9349226355552673
layer2.2.alpha1 0.4032549262046814
layer2.2.alpha2 0.9828205704689026
layer2.3.alpha1 0.40585076808929443
layer2.3.alpha2 1.0517452955245972
layer3.0.alpha1 0.7735390663146973
layer3.0.alpha2 0.9934795498847961
layer3.1.alpha1 0.541293203830719
layer3.1.alpha2 0.9508224129676819
layer3.2.alpha1 0.4288386106491089
layer3.2.alpha2 0.8521942496299744
layer3.3.alpha1 0.29235631227493286
layer3.3.alpha2 0.8026523590087891
layer3.

 * Prec@1 93.230
alpha1 1.1030535697937012
layer1.0.alpha1 0.6424785852432251
layer1.0.alpha2 1.0300673246383667
layer1.1.alpha1 0.46058857440948486
layer1.1.alpha2 1.0582972764968872
layer1.2.alpha1 0.5238473415374756
layer1.2.alpha2 1.115735650062561
layer2.0.alpha1 0.6676346063613892
layer2.0.alpha2 0.9225144982337952
layer2.1.alpha1 0.415228933095932
layer2.1.alpha2 0.9342049956321716
layer2.2.alpha1 0.4037855863571167
layer2.2.alpha2 0.9806639552116394
layer2.3.alpha1 0.40250152349472046
layer2.3.alpha2 1.049782633781433
layer3.0.alpha1 0.7709841728210449
layer3.0.alpha2 0.9873412847518921
layer3.1.alpha1 0.5361757278442383
layer3.1.alpha2 0.9476996064186096
layer3.2.alpha1 0.4266394078731537
layer3.2.alpha2 0.8513166904449463
layer3.3.alpha1 0.2937779724597931
layer3.3.alpha2 0.7942644953727722
layer3.4.alpha1 0.16577759385108948
layer3.4.alpha2 0.797274649143219
layer3.5.alpha1 0.12080103158950806
layer3.5.alpha2 0.830065131187439
layer4.0.alpha1 0.26748502254486084
layer4.0.alp

VBox(children=(Label(value=' 1.38MB of 1.38MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,199.0
prec,93.28
train_loss,0.11651
val_loss,0.23478
lr,0.001
_runtime,7351.0
_timestamp,1632968163.0
_step,199.0


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
prec,▁▃▅▆▇▇▇▆▇▇▇▇▇▇▇▇████████████████████████
train_loss,█▆▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▃▃▂▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████████▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


current lr 1.00000e-01
Epoch: [0][0/313]	Time 0.273 (0.273)	Data 0.161 (0.161)	Loss 12.4745 (12.4745)	Prec@1 11.719 (11.719)
Epoch: [0][50/313]	Time 0.108 (0.111)	Data 0.000 (0.003)	Loss 2.9527 (12.0631)	Prec@1 14.062 (11.581)
Epoch: [0][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 2.8935 (7.5485)	Prec@1 17.188 (11.989)
Epoch: [0][150/313]	Time 0.107 (0.109)	Data 0.000 (0.001)	Loss 2.8530 (6.0033)	Prec@1 15.625 (12.821)
Epoch: [0][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 2.7364 (5.2068)	Prec@1 15.625 (13.604)
Epoch: [0][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 2.6846 (4.7121)	Prec@1 22.656 (14.579)
Epoch: [0][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 2.5865 (4.3700)	Prec@1 24.219 (15.428)
Test: [0/20]	Time 0.325 (0.325)	Loss 2.0874 (2.0874)	Prec@1 20.703 (20.703)
 * Prec@1 21.190
alpha1 8.319682121276855
layer1.0.alpha1 8.322556495666504
layer1.0.alpha2 8.279410362243652
layer1.1.alpha1 8.32204532623291
layer1.1.alpha2 8.585256576538086
layer1.2.a

Epoch: [4][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 1.8017 (1.8446)	Prec@1 42.969 (37.505)
Epoch: [4][200/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 1.6869 (1.8273)	Prec@1 48.438 (38.192)
Epoch: [4][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 1.6881 (1.8195)	Prec@1 39.844 (38.350)
Epoch: [4][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 1.6581 (1.8077)	Prec@1 48.438 (38.445)
Test: [0/20]	Time 0.355 (0.355)	Loss 1.5024 (1.5024)	Prec@1 40.430 (40.430)
 * Prec@1 43.290
alpha1 3.882970094680786
layer1.0.alpha1 3.9033429622650146
layer1.0.alpha2 3.9082183837890625
layer1.1.alpha1 3.915431022644043
layer1.1.alpha2 4.054253578186035
layer1.2.alpha1 3.9072186946868896
layer1.2.alpha2 4.192047595977783
layer2.0.alpha1 3.909991502761841
layer2.0.alpha2 3.94856858253479
layer2.1.alpha1 3.91153621673584
layer2.1.alpha2 4.051334857940674
layer2.2.alpha1 3.9142391681671143
layer2.2.alpha2 4.117570877075195
layer2.3.alpha1 3.913560152053833
layer2.3.alpha2 4.539513587951

Epoch: [8][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 1.1826 (1.3100)	Prec@1 62.500 (54.734)
Test: [0/20]	Time 0.320 (0.320)	Loss 1.0356 (1.0356)	Prec@1 63.867 (63.867)
 * Prec@1 61.760
alpha1 2.2428016662597656
layer1.0.alpha1 2.1623361110687256
layer1.0.alpha2 2.3375463485717773
layer1.1.alpha1 2.0049984455108643
layer1.1.alpha2 2.352893590927124
layer1.2.alpha1 1.9843194484710693
layer1.2.alpha2 2.161364793777466
layer2.0.alpha1 1.9070497751235962
layer2.0.alpha2 2.29935884475708
layer2.1.alpha1 2.030308485031128
layer2.1.alpha2 2.381417751312256
layer2.2.alpha1 2.0689756870269775
layer2.2.alpha2 2.325901985168457
layer2.3.alpha1 1.9715300798416138
layer2.3.alpha2 2.3418126106262207
layer3.0.alpha1 1.9619793891906738
layer3.0.alpha2 2.4698643684387207
layer3.1.alpha1 2.162968635559082
layer3.1.alpha2 2.7466483116149902
layer3.2.alpha1 2.2901828289031982
layer3.2.alpha2 2.7835803031921387
layer3.3.alpha1 2.234790086746216
layer3.3.alpha2 2.844963312149048
layer3.4.alpha1 2.2

current lr 1.00000e-01
Epoch: [13][0/313]	Time 0.277 (0.277)	Data 0.167 (0.167)	Loss 0.8611 (0.8611)	Prec@1 70.312 (70.312)
Epoch: [13][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.8593 (0.9656)	Prec@1 67.969 (67.111)
Epoch: [13][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 1.0036 (0.9598)	Prec@1 65.625 (67.234)
Epoch: [13][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 1.0153 (0.9587)	Prec@1 61.719 (67.188)
Epoch: [13][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.8473 (0.9557)	Prec@1 71.875 (67.390)
Epoch: [13][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 1.0136 (0.9547)	Prec@1 64.062 (67.331)
Epoch: [13][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 1.1212 (0.9482)	Prec@1 60.156 (67.569)
Test: [0/20]	Time 0.329 (0.329)	Loss 0.7820 (0.7820)	Prec@1 74.219 (74.219)
 * Prec@1 70.400
alpha1 1.8887394666671753
layer1.0.alpha1 1.4385530948638916
layer1.0.alpha2 1.9451532363891602
layer1.1.alpha1 1.369049310684204
layer1.1.alpha2 1.7916715145111084
l

Epoch: [17][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.7684 (0.7828)	Prec@1 77.344 (73.523)
Epoch: [17][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.8527 (0.7838)	Prec@1 72.656 (73.293)
Epoch: [17][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.8069 (0.7747)	Prec@1 72.656 (73.694)
Epoch: [17][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.7609 (0.7734)	Prec@1 76.562 (73.749)
Epoch: [17][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 1.0409 (0.7769)	Prec@1 63.281 (73.557)
Test: [0/20]	Time 0.330 (0.330)	Loss 0.5847 (0.5847)	Prec@1 80.859 (80.859)
 * Prec@1 78.200
alpha1 1.9761608839035034
layer1.0.alpha1 1.3676644563674927
layer1.0.alpha2 1.945632815361023
layer1.1.alpha1 1.0230052471160889
layer1.1.alpha2 1.8106708526611328
layer1.2.alpha1 0.8862714171409607
layer1.2.alpha2 1.71625554561615
layer2.0.alpha1 0.9787718653678894
layer2.0.alpha2 1.5660370588302612
layer2.1.alpha1 0.9946480393409729
layer2.1.alpha2 1.648918867111206
layer2.2.alpha1 1.01

Epoch: [21][200/313]	Time 0.107 (0.109)	Data 0.000 (0.001)	Loss 0.6345 (0.6737)	Prec@1 78.125 (76.862)
Epoch: [21][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6068 (0.6769)	Prec@1 82.812 (76.868)
Epoch: [21][300/313]	Time 0.108 (0.108)	Data 0.000 (0.001)	Loss 0.8182 (0.6802)	Prec@1 74.219 (76.832)
Test: [0/20]	Time 0.333 (0.333)	Loss 0.7283 (0.7283)	Prec@1 76.953 (76.953)
 * Prec@1 75.380
alpha1 1.784737467765808
layer1.0.alpha1 1.1493327617645264
layer1.0.alpha2 1.8357861042022705
layer1.1.alpha1 0.9083999395370483
layer1.1.alpha2 1.6954426765441895
layer1.2.alpha1 0.770496666431427
layer1.2.alpha2 1.7286823987960815
layer2.0.alpha1 1.0085501670837402
layer2.0.alpha2 1.5356848239898682
layer2.1.alpha1 0.791534423828125
layer2.1.alpha2 1.589536428451538
layer2.2.alpha1 0.8945930004119873
layer2.2.alpha2 1.6041333675384521
layer2.3.alpha1 0.8167982697486877
layer2.3.alpha2 1.635270595550537
layer3.0.alpha1 1.146070957183838
layer3.0.alpha2 1.5573482513427734
layer3.1.alpha1 0.

Epoch: [25][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6453 (0.6284)	Prec@1 77.344 (78.823)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.4988 (0.4988)	Prec@1 84.375 (84.375)
 * Prec@1 82.380
alpha1 1.907018780708313
layer1.0.alpha1 1.068959355354309
layer1.0.alpha2 1.7906323671340942
layer1.1.alpha1 0.9078099727630615
layer1.1.alpha2 1.6970165967941284
layer1.2.alpha1 0.7185609936714172
layer1.2.alpha2 1.7679599523544312
layer2.0.alpha1 0.9795563220977783
layer2.0.alpha2 1.5412085056304932
layer2.1.alpha1 0.712599515914917
layer2.1.alpha2 1.561362385749817
layer2.2.alpha1 0.7323519587516785
layer2.2.alpha2 1.5042937994003296
layer2.3.alpha1 0.756216287612915
layer2.3.alpha2 1.5114690065383911
layer3.0.alpha1 0.9882108569145203
layer3.0.alpha2 1.4934139251708984
layer3.1.alpha1 0.8608906269073486
layer3.1.alpha2 1.5467759370803833
layer3.2.alpha1 0.6163215041160583
layer3.2.alpha2 1.4807026386260986
layer3.3.alpha1 0.695418655872345
layer3.3.alpha2 1.5166727304458618
layer3.4.alpha

Epoch: [30][0/313]	Time 0.279 (0.279)	Data 0.164 (0.164)	Loss 0.6462 (0.6462)	Prec@1 79.688 (79.688)
Epoch: [30][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.4051 (0.5744)	Prec@1 84.375 (80.162)
Epoch: [30][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.7300 (0.5849)	Prec@1 76.562 (79.912)
Epoch: [30][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 0.5361 (0.5853)	Prec@1 82.031 (79.920)
Epoch: [30][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6163 (0.5820)	Prec@1 77.344 (80.072)
Epoch: [30][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4901 (0.5834)	Prec@1 82.812 (79.983)
Epoch: [30][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.7014 (0.5849)	Prec@1 75.781 (80.028)
Test: [0/20]	Time 0.322 (0.322)	Loss 0.3665 (0.3665)	Prec@1 86.523 (86.523)
 * Prec@1 84.220
alpha1 1.8841830492019653
layer1.0.alpha1 1.0218169689178467
layer1.0.alpha2 1.9163575172424316
layer1.1.alpha1 0.8819392919540405
layer1.1.alpha2 1.7575873136520386
layer1.2.alpha1 0.73957

Epoch: [34][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.5481 (0.5208)	Prec@1 77.344 (82.294)
Epoch: [34][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.3441 (0.5284)	Prec@1 84.375 (82.062)
Epoch: [34][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4219 (0.5329)	Prec@1 84.375 (81.922)
Epoch: [34][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4707 (0.5350)	Prec@1 82.031 (81.785)
Epoch: [34][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5552 (0.5398)	Prec@1 82.031 (81.580)
Test: [0/20]	Time 0.323 (0.323)	Loss 0.7377 (0.7377)	Prec@1 77.539 (77.539)
 * Prec@1 75.570
alpha1 1.8822927474975586
layer1.0.alpha1 1.0764466524124146
layer1.0.alpha2 1.836973786354065
layer1.1.alpha1 0.7986162900924683
layer1.1.alpha2 1.696980357170105
layer1.2.alpha1 0.735164225101471
layer1.2.alpha2 1.7377445697784424
layer2.0.alpha1 0.9747268557548523
layer2.0.alpha2 1.5784121751785278
layer2.1.alpha1 0.6372524499893188
layer2.1.alpha2 1.5790425539016724
layer2.2.alpha1 0.7

Epoch: [38][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5812 (0.5241)	Prec@1 82.812 (82.241)
Epoch: [38][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5658 (0.5230)	Prec@1 77.344 (82.234)
Epoch: [38][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5044 (0.5250)	Prec@1 82.812 (82.231)
Test: [0/20]	Time 0.317 (0.317)	Loss 0.4346 (0.4346)	Prec@1 85.352 (85.352)
 * Prec@1 84.040
alpha1 1.780940055847168
layer1.0.alpha1 1.0165907144546509
layer1.0.alpha2 1.7834035158157349
layer1.1.alpha1 0.810481071472168
layer1.1.alpha2 1.7229740619659424
layer1.2.alpha1 0.6797521710395813
layer1.2.alpha2 1.6801339387893677
layer2.0.alpha1 0.873680055141449
layer2.0.alpha2 1.4844492673873901
layer2.1.alpha1 0.6723861694335938
layer2.1.alpha2 1.4674649238586426
layer2.2.alpha1 0.7076550722122192
layer2.2.alpha2 1.5050084590911865
layer2.3.alpha1 0.7448632121086121
layer2.3.alpha2 1.423767328262329
layer3.0.alpha1 0.9871334433555603
layer3.0.alpha2 1.495739459991455
layer3.1.alpha1 0

Epoch: [42][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4778 (0.5000)	Prec@1 85.938 (83.194)
Test: [0/20]	Time 0.315 (0.315)	Loss 0.4299 (0.4299)	Prec@1 85.547 (85.547)
 * Prec@1 84.720
alpha1 1.8126742839813232
layer1.0.alpha1 0.9972949028015137
layer1.0.alpha2 1.8510987758636475
layer1.1.alpha1 0.9395171403884888
layer1.1.alpha2 1.7025121450424194
layer1.2.alpha1 0.6273969411849976
layer1.2.alpha2 1.7806425094604492
layer2.0.alpha1 0.8956920504570007
layer2.0.alpha2 1.5959795713424683
layer2.1.alpha1 0.6477232575416565
layer2.1.alpha2 1.554557204246521
layer2.2.alpha1 0.7100924253463745
layer2.2.alpha2 1.4638986587524414
layer2.3.alpha1 0.762972354888916
layer2.3.alpha2 1.5394949913024902
layer3.0.alpha1 0.9412092566490173
layer3.0.alpha2 1.5626986026763916
layer3.1.alpha1 0.8358116149902344
layer3.1.alpha2 1.4938091039657593
layer3.2.alpha1 0.5666998624801636
layer3.2.alpha2 1.4824464321136475
layer3.3.alpha1 0.548715353012085
layer3.3.alpha2 1.452666997909546
layer3.4.alp

 * Prec@1 83.860
alpha1 1.8538012504577637
layer1.0.alpha1 1.1395044326782227
layer1.0.alpha2 1.7433216571807861
layer1.1.alpha1 0.7575498819351196
layer1.1.alpha2 1.6984732151031494
layer1.2.alpha1 0.6868067979812622
layer1.2.alpha2 1.7142950296401978
layer2.0.alpha1 0.9878934621810913
layer2.0.alpha2 1.5037704706192017
layer2.1.alpha1 0.5785092711448669
layer2.1.alpha2 1.4831448793411255
layer2.2.alpha1 0.7505292892456055
layer2.2.alpha2 1.4736419916152954
layer2.3.alpha1 0.7316408753395081
layer2.3.alpha2 1.6210423707962036
layer3.0.alpha1 0.9893857836723328
layer3.0.alpha2 1.5142487287521362
layer3.1.alpha1 0.8279698491096497
layer3.1.alpha2 1.4495095014572144
layer3.2.alpha1 0.5203103423118591
layer3.2.alpha2 1.429093360900879
layer3.3.alpha1 0.46552714705467224
layer3.3.alpha2 1.4284096956253052
layer3.4.alpha1 0.43859514594078064
layer3.4.alpha2 1.3844345808029175
layer3.5.alpha1 0.3894367516040802
layer3.5.alpha2 1.494179606437683
layer4.0.alpha1 0.6050871014595032
layer4.0.alp

Epoch: [51][0/313]	Time 0.282 (0.282)	Data 0.171 (0.171)	Loss 0.4159 (0.4159)	Prec@1 85.156 (85.156)
Epoch: [51][50/313]	Time 0.109 (0.112)	Data 0.000 (0.003)	Loss 0.4797 (0.4507)	Prec@1 84.375 (84.283)
Epoch: [51][100/313]	Time 0.107 (0.110)	Data 0.000 (0.002)	Loss 0.4897 (0.4544)	Prec@1 81.250 (84.228)
Epoch: [51][150/313]	Time 0.108 (0.110)	Data 0.000 (0.001)	Loss 0.5614 (0.4626)	Prec@1 80.469 (84.194)
Epoch: [51][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5392 (0.4658)	Prec@1 79.688 (84.122)
Epoch: [51][250/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4494 (0.4681)	Prec@1 83.594 (84.036)
Epoch: [51][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.5851 (0.4721)	Prec@1 81.250 (83.993)
Test: [0/20]	Time 0.340 (0.340)	Loss 0.4002 (0.4002)	Prec@1 87.305 (87.305)
 * Prec@1 84.990
alpha1 1.8097227811813354
layer1.0.alpha1 0.9530755281448364
layer1.0.alpha2 1.7613568305969238
layer1.1.alpha1 0.8094350695610046
layer1.1.alpha2 1.699447751045227
layer1.2.alpha1 0.738144

Epoch: [55][150/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.3028 (0.4426)	Prec@1 90.625 (85.244)
Epoch: [55][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.6079 (0.4479)	Prec@1 79.688 (85.016)
Epoch: [55][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5932 (0.4527)	Prec@1 82.812 (84.761)
Epoch: [55][300/313]	Time 0.107 (0.108)	Data 0.000 (0.001)	Loss 0.4318 (0.4556)	Prec@1 85.938 (84.653)
Test: [0/20]	Time 0.318 (0.318)	Loss 0.4285 (0.4285)	Prec@1 86.914 (86.914)
 * Prec@1 84.630
alpha1 1.9388550519943237
layer1.0.alpha1 1.0450515747070312
layer1.0.alpha2 1.815983772277832
layer1.1.alpha1 0.8801010251045227
layer1.1.alpha2 1.7302041053771973
layer1.2.alpha1 0.7086475491523743
layer1.2.alpha2 1.683657169342041
layer2.0.alpha1 1.037672519683838
layer2.0.alpha2 1.4768396615982056
layer2.1.alpha1 0.6430075764656067
layer2.1.alpha2 1.5517566204071045
layer2.2.alpha1 0.7573657035827637
layer2.2.alpha2 1.5216673612594604
layer2.3.alpha1 0.6996403932571411
layer2.3.alpha2 1

Epoch: [59][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4437 (0.4396)	Prec@1 85.938 (85.159)
Epoch: [59][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4717 (0.4410)	Prec@1 83.594 (85.125)
Test: [0/20]	Time 0.334 (0.334)	Loss 0.4257 (0.4257)	Prec@1 88.477 (88.477)
 * Prec@1 84.050
alpha1 1.8107248544692993
layer1.0.alpha1 0.9281637668609619
layer1.0.alpha2 1.6760931015014648
layer1.1.alpha1 0.8518319129943848
layer1.1.alpha2 1.6429719924926758
layer1.2.alpha1 0.7417388558387756
layer1.2.alpha2 1.853608250617981
layer2.0.alpha1 0.9985876679420471
layer2.0.alpha2 1.5165520906448364
layer2.1.alpha1 0.6447191834449768
layer2.1.alpha2 1.5388869047164917
layer2.2.alpha1 0.6583399176597595
layer2.2.alpha2 1.5543057918548584
layer2.3.alpha1 0.7122235894203186
layer2.3.alpha2 1.447913646697998
layer3.0.alpha1 1.0237224102020264
layer3.0.alpha2 1.5125808715820312
layer3.1.alpha1 0.7493532299995422
layer3.1.alpha2 1.511820912361145
layer3.2.alpha1 0.6463478207588196
layer3.2.alph

Test: [0/20]	Time 0.323 (0.323)	Loss 0.3745 (0.3745)	Prec@1 87.891 (87.891)
 * Prec@1 86.940
alpha1 1.7253901958465576
layer1.0.alpha1 1.0016907453536987
layer1.0.alpha2 1.6958547830581665
layer1.1.alpha1 0.7921651005744934
layer1.1.alpha2 1.784792184829712
layer1.2.alpha1 0.6633573770523071
layer1.2.alpha2 1.720357060432434
layer2.0.alpha1 1.0553357601165771
layer2.0.alpha2 1.5173906087875366
layer2.1.alpha1 0.5400073528289795
layer2.1.alpha2 1.4920690059661865
layer2.2.alpha1 0.6485477089881897
layer2.2.alpha2 1.4991203546524048
layer2.3.alpha1 0.714129626750946
layer2.3.alpha2 1.5096633434295654
layer3.0.alpha1 1.0505820512771606
layer3.0.alpha2 1.5141454935073853
layer3.1.alpha1 0.8217334747314453
layer3.1.alpha2 1.5213518142700195
layer3.2.alpha1 0.5625447630882263
layer3.2.alpha2 1.442961573600769
layer3.3.alpha1 0.5864977836608887
layer3.3.alpha2 1.4334439039230347
layer3.4.alpha1 0.4162198007106781
layer3.4.alpha2 1.4072941541671753
layer3.5.alpha1 0.4667569696903229
layer3.5.a

Epoch: [68][0/313]	Time 0.273 (0.273)	Data 0.158 (0.158)	Loss 0.3014 (0.3014)	Prec@1 91.406 (91.406)
Epoch: [68][50/313]	Time 0.108 (0.112)	Data 0.000 (0.003)	Loss 0.4491 (0.4110)	Prec@1 85.938 (85.968)
Epoch: [68][100/313]	Time 0.109 (0.110)	Data 0.000 (0.002)	Loss 0.3317 (0.4165)	Prec@1 89.062 (85.675)
Epoch: [68][150/313]	Time 0.109 (0.110)	Data 0.000 (0.001)	Loss 0.4144 (0.4162)	Prec@1 85.156 (85.803)
Epoch: [68][200/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.4601 (0.4243)	Prec@1 84.375 (85.541)
Epoch: [68][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4399 (0.4275)	Prec@1 82.031 (85.427)
Epoch: [68][300/313]	Time 0.109 (0.109)	Data 0.000 (0.001)	Loss 0.3192 (0.4263)	Prec@1 91.406 (85.561)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.4621 (0.4621)	Prec@1 84.961 (84.961)
 * Prec@1 83.670
alpha1 1.7310223579406738
layer1.0.alpha1 1.0406577587127686
layer1.0.alpha2 1.7575775384902954
layer1.1.alpha1 0.8237764239311218
layer1.1.alpha2 1.768979787826538
layer1.2.alpha1 0.761687

Epoch: [72][100/313]	Time 0.108 (0.110)	Data 0.000 (0.002)	Loss 0.5215 (0.4276)	Prec@1 82.812 (85.752)
Epoch: [72][150/313]	Time 0.107 (0.110)	Data 0.000 (0.001)	Loss 0.4687 (0.4291)	Prec@1 81.250 (85.648)
Epoch: [72][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.3914 (0.4328)	Prec@1 85.938 (85.487)
Epoch: [72][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.4743 (0.4345)	Prec@1 84.375 (85.393)
Epoch: [72][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.3790 (0.4312)	Prec@1 81.250 (85.455)
Test: [0/20]	Time 0.436 (0.436)	Loss 0.3882 (0.3882)	Prec@1 87.305 (87.305)
 * Prec@1 85.940
alpha1 1.8326767683029175
layer1.0.alpha1 0.9077818393707275
layer1.0.alpha2 1.674963116645813
layer1.1.alpha1 0.8663891553878784
layer1.1.alpha2 1.6023969650268555
layer1.2.alpha1 0.8540298342704773
layer1.2.alpha2 1.73549222946167
layer2.0.alpha1 0.948043167591095
layer2.0.alpha2 1.4793667793273926
layer2.1.alpha1 0.5452113151550293
layer2.1.alpha2 1.5432305335998535
layer2.2.alpha1 0.58

Epoch: [76][200/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.3948 (0.4138)	Prec@1 85.156 (86.132)
Epoch: [76][250/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.5429 (0.4190)	Prec@1 84.375 (85.953)
Epoch: [76][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.3693 (0.4174)	Prec@1 87.500 (86.018)
Test: [0/20]	Time 0.326 (0.326)	Loss 0.3714 (0.3714)	Prec@1 88.086 (88.086)
 * Prec@1 85.420
alpha1 1.7099756002426147
layer1.0.alpha1 1.0761703252792358
layer1.0.alpha2 1.6252111196517944
layer1.1.alpha1 0.6693291664123535
layer1.1.alpha2 1.7636651992797852
layer1.2.alpha1 0.7872782349586487
layer1.2.alpha2 1.863651156425476
layer2.0.alpha1 1.0493569374084473
layer2.0.alpha2 1.4772311449050903
layer2.1.alpha1 0.5273674130439758
layer2.1.alpha2 1.4543719291687012
layer2.2.alpha1 0.6986212134361267
layer2.2.alpha2 1.4506765604019165
layer2.3.alpha1 0.8040326833724976
layer2.3.alpha2 1.476707935333252
layer3.0.alpha1 1.0027281045913696
layer3.0.alpha2 1.5661274194717407
layer3.1.alpha

Epoch: [80][300/313]	Time 0.108 (0.109)	Data 0.000 (0.001)	Loss 0.2060 (0.2944)	Prec@1 93.750 (90.545)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.1976 (0.1976)	Prec@1 94.141 (94.141)
 * Prec@1 91.610
alpha1 1.7647191286087036
layer1.0.alpha1 0.9794735312461853
layer1.0.alpha2 1.5899333953857422
layer1.1.alpha1 0.7568426132202148
layer1.1.alpha2 1.67721426486969
layer1.2.alpha1 0.7693309187889099
layer1.2.alpha2 1.726096272468567
layer2.0.alpha1 0.9175556898117065
layer2.0.alpha2 1.4557971954345703
layer2.1.alpha1 0.6423971652984619
layer2.1.alpha2 1.3887097835540771
layer2.2.alpha1 0.615533709526062
layer2.2.alpha2 1.413305640220642
layer2.3.alpha1 0.7747532725334167
layer2.3.alpha2 1.4280091524124146
layer3.0.alpha1 1.0597890615463257
layer3.0.alpha2 1.4744327068328857
layer3.1.alpha1 0.6675503253936768
layer3.1.alpha2 1.4742155075073242
layer3.2.alpha1 0.661984920501709
layer3.2.alpha2 1.4755213260650635
layer3.3.alpha1 0.5929980278015137
layer3.3.alpha2 1.462079405784607
layer3.4.alpha1

 * Prec@1 92.150
alpha1 1.7053297758102417
layer1.0.alpha1 0.9488076567649841
layer1.0.alpha2 1.5363144874572754
layer1.1.alpha1 0.6942862272262573
layer1.1.alpha2 1.5614702701568604
layer1.2.alpha1 0.716280460357666
layer1.2.alpha2 1.6340223550796509
layer2.0.alpha1 0.8743224143981934
layer2.0.alpha2 1.365022897720337
layer2.1.alpha1 0.5777949690818787
layer2.1.alpha2 1.3362241983413696
layer2.2.alpha1 0.5766196250915527
layer2.2.alpha2 1.3487480878829956
layer2.3.alpha1 0.7084704041481018
layer2.3.alpha2 1.378822922706604
layer3.0.alpha1 1.0026510953903198
layer3.0.alpha2 1.3580902814865112
layer3.1.alpha1 0.6546390056610107
layer3.1.alpha2 1.3556196689605713
layer3.2.alpha1 0.6004029512405396
layer3.2.alpha2 1.3684604167938232
layer3.3.alpha1 0.5516225695610046
layer3.3.alpha2 1.3554024696350098
layer3.4.alpha1 0.43586283922195435
layer3.4.alpha2 1.3056795597076416
layer3.5.alpha1 0.35765308141708374
layer3.5.alpha2 1.3730463981628418
layer4.0.alpha1 0.5917247533798218
layer4.0.alph

current lr 1.00000e-02
Epoch: [89][0/313]	Time 0.269 (0.269)	Data 0.158 (0.158)	Loss 0.2468 (0.2468)	Prec@1 91.406 (91.406)
Epoch: [89][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.2935 (0.1948)	Prec@1 90.625 (93.842)
Epoch: [89][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1966 (0.1939)	Prec@1 92.188 (93.881)
Epoch: [89][150/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1525 (0.1964)	Prec@1 96.094 (93.791)
Epoch: [89][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1719 (0.1973)	Prec@1 92.969 (93.742)
Epoch: [89][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2359 (0.1995)	Prec@1 91.406 (93.669)
Epoch: [89][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.2898 (0.2001)	Prec@1 89.844 (93.641)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.2154 (0.2154)	Prec@1 94.531 (94.531)
 * Prec@1 92.420
alpha1 1.6596522331237793
layer1.0.alpha1 0.9129429459571838
layer1.0.alpha2 1.4614015817642212
layer1.1.alpha1 0.647330641746521
layer1.1.alpha2 1.4770913124084473
l

Epoch: [93][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1687 (0.1921)	Prec@1 93.750 (93.804)
Epoch: [93][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1857 (0.1882)	Prec@1 92.969 (93.864)
Epoch: [93][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.3482 (0.1890)	Prec@1 89.844 (93.820)
Epoch: [93][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1542 (0.1873)	Prec@1 94.531 (93.834)
Epoch: [93][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1714 (0.1880)	Prec@1 96.094 (93.781)
Test: [0/20]	Time 0.334 (0.334)	Loss 0.2285 (0.2285)	Prec@1 94.727 (94.727)
 * Prec@1 92.640
alpha1 1.5791964530944824
layer1.0.alpha1 0.8779440522193909
layer1.0.alpha2 1.382239818572998
layer1.1.alpha1 0.6349606513977051
layer1.1.alpha2 1.4469072818756104
layer1.2.alpha1 0.6558891534805298
layer1.2.alpha2 1.5048902034759521
layer2.0.alpha1 0.8466687798500061
layer2.0.alpha2 1.2620911598205566
layer2.1.alpha1 0.4808007776737213
layer2.1.alpha2 1.2257391214370728
layer2.2.alpha1 0

Epoch: [97][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1584 (0.1830)	Prec@1 94.531 (94.096)
Epoch: [97][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.3074 (0.1817)	Prec@1 92.188 (94.127)
Epoch: [97][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2384 (0.1845)	Prec@1 92.969 (94.046)
Test: [0/20]	Time 0.327 (0.327)	Loss 0.2175 (0.2175)	Prec@1 94.141 (94.141)
 * Prec@1 92.260
alpha1 1.4813716411590576
layer1.0.alpha1 0.8756899833679199
layer1.0.alpha2 1.3199659585952759
layer1.1.alpha1 0.6111630201339722
layer1.1.alpha2 1.4092336893081665
layer1.2.alpha1 0.6400418877601624
layer1.2.alpha2 1.452349305152893
layer2.0.alpha1 0.8009157776832581
layer2.0.alpha2 1.2250388860702515
layer2.1.alpha1 0.44294029474258423
layer2.1.alpha2 1.1898306608200073
layer2.2.alpha1 0.510308027267456
layer2.2.alpha2 1.2117003202438354
layer2.3.alpha1 0.5946788787841797
layer2.3.alpha2 1.2383249998092651
layer3.0.alpha1 0.8965728282928467
layer3.0.alpha2 1.191408634185791
layer3.1.alpha

Epoch: [101][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1630 (0.1766)	Prec@1 94.531 (94.251)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.1996 (0.1996)	Prec@1 94.336 (94.336)
 * Prec@1 92.440
alpha1 1.472294807434082
layer1.0.alpha1 0.8227015733718872
layer1.0.alpha2 1.298641562461853
layer1.1.alpha1 0.6012040376663208
layer1.1.alpha2 1.3547084331512451
layer1.2.alpha1 0.6203675270080566
layer1.2.alpha2 1.4025174379348755
layer2.0.alpha1 0.7905340194702148
layer2.0.alpha2 1.1698707342147827
layer2.1.alpha1 0.3995635509490967
layer2.1.alpha2 1.1626566648483276
layer2.2.alpha1 0.4679735600948334
layer2.2.alpha2 1.1815972328186035
layer2.3.alpha1 0.5848652720451355
layer2.3.alpha2 1.2136707305908203
layer3.0.alpha1 0.8824895024299622
layer3.0.alpha2 1.1723568439483643
layer3.1.alpha1 0.5871111154556274
layer3.1.alpha2 1.1588225364685059
layer3.2.alpha1 0.45895883440971375
layer3.2.alpha2 1.1327849626541138
layer3.3.alpha1 0.433402955532074
layer3.3.alpha2 1.1259996891021729
layer3.4.

 * Prec@1 92.570
alpha1 1.4214564561843872
layer1.0.alpha1 0.7974430918693542
layer1.0.alpha2 1.2764816284179688
layer1.1.alpha1 0.5799079537391663
layer1.1.alpha2 1.290103793144226
layer1.2.alpha1 0.5838562250137329
layer1.2.alpha2 1.3825775384902954
layer2.0.alpha1 0.7416152954101562
layer2.0.alpha2 1.1445362567901611
layer2.1.alpha1 0.4021916687488556
layer2.1.alpha2 1.1337361335754395
layer2.2.alpha1 0.43890058994293213
layer2.2.alpha2 1.128088116645813
layer2.3.alpha1 0.5781058073043823
layer2.3.alpha2 1.174701452255249
layer3.0.alpha1 0.8742339611053467
layer3.0.alpha2 1.1395776271820068
layer3.1.alpha1 0.5711097121238708
layer3.1.alpha2 1.1268996000289917
layer3.2.alpha1 0.4279705882072449
layer3.2.alpha2 1.1092967987060547
layer3.3.alpha1 0.4136439263820648
layer3.3.alpha2 1.0883105993270874
layer3.4.alpha1 0.3358227610588074
layer3.4.alpha2 1.085891842842102
layer3.5.alpha1 0.2974868714809418
layer3.5.alpha2 1.1079450845718384
layer4.0.alpha1 0.49998214840888977
layer4.0.alpha

Epoch: [110][0/313]	Time 0.272 (0.272)	Data 0.160 (0.160)	Loss 0.1862 (0.1862)	Prec@1 92.969 (92.969)
Epoch: [110][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.1667 (0.1590)	Prec@1 95.312 (94.914)
Epoch: [110][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.1571 (0.1660)	Prec@1 95.312 (94.748)
Epoch: [110][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1945 (0.1592)	Prec@1 95.312 (94.924)
Epoch: [110][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1498 (0.1612)	Prec@1 95.312 (94.838)
Epoch: [110][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1545 (0.1619)	Prec@1 94.531 (94.746)
Epoch: [110][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0972 (0.1646)	Prec@1 96.875 (94.591)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.2051 (0.2051)	Prec@1 94.922 (94.922)
 * Prec@1 92.350
alpha1 1.3676320314407349
layer1.0.alpha1 0.7473031878471375
layer1.0.alpha2 1.2314270734786987
layer1.1.alpha1 0.5385185480117798
layer1.1.alpha2 1.282785177230835
layer1.2.alpha1 0

Epoch: [114][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.2078 (0.1568)	Prec@1 92.969 (94.872)
Epoch: [114][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.0606 (0.1569)	Prec@1 99.219 (94.945)
Epoch: [114][200/313]	Time 0.127 (0.115)	Data 0.000 (0.001)	Loss 0.1694 (0.1588)	Prec@1 93.750 (94.776)
Epoch: [114][250/313]	Time 0.113 (0.115)	Data 0.000 (0.001)	Loss 0.1072 (0.1598)	Prec@1 97.656 (94.783)
Epoch: [114][300/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1284 (0.1616)	Prec@1 96.094 (94.749)
Test: [0/20]	Time 0.345 (0.345)	Loss 0.1969 (0.1969)	Prec@1 94.336 (94.336)
 * Prec@1 92.590
alpha1 1.3071528673171997
layer1.0.alpha1 0.7246663570404053
layer1.0.alpha2 1.2106691598892212
layer1.1.alpha1 0.5452737212181091
layer1.1.alpha2 1.1731995344161987
layer1.2.alpha1 0.5045931339263916
layer1.2.alpha2 1.3061143159866333
layer2.0.alpha1 0.7481468915939331
layer2.0.alpha2 1.070803165435791
layer2.1.alpha1 0.43854308128356934
layer2.1.alpha2 1.0760771036148071
layer2.2.al

Epoch: [118][200/313]	Time 0.114 (0.113)	Data 0.000 (0.001)	Loss 0.1763 (0.1611)	Prec@1 93.750 (94.729)
Epoch: [118][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1449 (0.1614)	Prec@1 98.438 (94.737)
Epoch: [118][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2246 (0.1618)	Prec@1 93.750 (94.703)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.1695 (0.1695)	Prec@1 95.508 (95.508)
 * Prec@1 92.580
alpha1 1.2860691547393799
layer1.0.alpha1 0.6976274251937866
layer1.0.alpha2 1.1582708358764648
layer1.1.alpha1 0.49344223737716675
layer1.1.alpha2 1.1873095035552979
layer1.2.alpha1 0.5103411078453064
layer1.2.alpha2 1.28341805934906
layer2.0.alpha1 0.7235788106918335
layer2.0.alpha2 1.0743088722229004
layer2.1.alpha1 0.3673408329486847
layer2.1.alpha2 1.0791929960250854
layer2.2.alpha1 0.41419416666030884
layer2.2.alpha2 1.077913761138916
layer2.3.alpha1 0.5337836742401123
layer2.3.alpha2 1.0831694602966309
layer3.0.alpha1 0.8627842664718628
layer3.0.alpha2 1.0429496765136719
layer3.1.a

Epoch: [122][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1632 (0.1529)	Prec@1 92.188 (94.936)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.1747 (0.1747)	Prec@1 94.727 (94.727)
 * Prec@1 92.620
alpha1 1.2634190320968628
layer1.0.alpha1 0.6739222407341003
layer1.0.alpha2 1.147173285484314
layer1.1.alpha1 0.5111408233642578
layer1.1.alpha2 1.168901801109314
layer1.2.alpha1 0.49820250272750854
layer1.2.alpha2 1.2426930665969849
layer2.0.alpha1 0.7110960483551025
layer2.0.alpha2 1.045596718788147
layer2.1.alpha1 0.36396822333335876
layer2.1.alpha2 1.0477451086044312
layer2.2.alpha1 0.40728652477264404
layer2.2.alpha2 1.0756824016571045
layer2.3.alpha1 0.5520244240760803
layer2.3.alpha2 1.108285903930664
layer3.0.alpha1 0.8368632793426514
layer3.0.alpha2 1.0522592067718506
layer3.1.alpha1 0.4652263820171356
layer3.1.alpha2 0.9989350438117981
layer3.2.alpha1 0.3791411221027374
layer3.2.alpha2 1.0091158151626587
layer3.3.alpha1 0.34292271733283997
layer3.3.alpha2 0.9932039976119995
layer3.

 * Prec@1 92.460
alpha1 1.2212501764297485
layer1.0.alpha1 0.6642706394195557
layer1.0.alpha2 1.1202208995819092
layer1.1.alpha1 0.4893471598625183
layer1.1.alpha2 1.0932749509811401
layer1.2.alpha1 0.4862286448478699
layer1.2.alpha2 1.2143903970718384
layer2.0.alpha1 0.6958761811256409
layer2.0.alpha2 1.0221973657608032
layer2.1.alpha1 0.3364322781562805
layer2.1.alpha2 1.0160841941833496
layer2.2.alpha1 0.39558911323547363
layer2.2.alpha2 1.024579644203186
layer2.3.alpha1 0.5261295437812805
layer2.3.alpha2 1.0779463052749634
layer3.0.alpha1 0.7957108616828918
layer3.0.alpha2 1.0144644975662231
layer3.1.alpha1 0.4617701768875122
layer3.1.alpha2 0.9782395362854004
layer3.2.alpha1 0.37861424684524536
layer3.2.alpha2 0.9875746369361877
layer3.3.alpha1 0.35622626543045044
layer3.3.alpha2 0.9926220774650574
layer3.4.alpha1 0.2706485390663147
layer3.4.alpha2 0.9792220592498779
layer3.5.alpha1 0.24273960292339325
layer3.5.alpha2 1.0438787937164307
layer4.0.alpha1 0.43439528346061707
layer4.0

current lr 1.00000e-02
Epoch: [131][0/313]	Time 0.281 (0.281)	Data 0.168 (0.168)	Loss 0.1490 (0.1490)	Prec@1 94.531 (94.531)
Epoch: [131][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 0.1273 (0.1500)	Prec@1 96.094 (95.420)
Epoch: [131][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1386 (0.1465)	Prec@1 95.312 (95.436)
Epoch: [131][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1055 (0.1484)	Prec@1 96.875 (95.235)
Epoch: [131][200/313]	Time 0.114 (0.114)	Data 0.000 (0.001)	Loss 0.0787 (0.1509)	Prec@1 98.438 (95.095)
Epoch: [131][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1751 (0.1513)	Prec@1 92.188 (95.063)
Epoch: [131][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2238 (0.1526)	Prec@1 92.969 (95.001)
Test: [0/20]	Time 0.337 (0.337)	Loss 0.1830 (0.1830)	Prec@1 94.336 (94.336)
 * Prec@1 92.750
alpha1 1.1941198110580444
layer1.0.alpha1 0.6629917621612549
layer1.0.alpha2 1.0918266773223877
layer1.1.alpha1 0.505323052406311
layer1.1.alpha2 1.07768738269

Epoch: [135][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.2136 (0.1496)	Prec@1 92.969 (94.838)
Epoch: [135][100/313]	Time 0.113 (0.115)	Data 0.000 (0.002)	Loss 0.0881 (0.1472)	Prec@1 96.094 (95.088)
Epoch: [135][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.2054 (0.1458)	Prec@1 93.750 (95.111)
Epoch: [135][200/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.1294 (0.1469)	Prec@1 94.531 (95.110)
Epoch: [135][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1559 (0.1482)	Prec@1 95.312 (95.048)
Epoch: [135][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2669 (0.1478)	Prec@1 90.625 (95.105)
Test: [0/20]	Time 0.327 (0.327)	Loss 0.1680 (0.1680)	Prec@1 94.922 (94.922)
 * Prec@1 92.530
alpha1 1.1388791799545288
layer1.0.alpha1 0.6092663407325745
layer1.0.alpha2 1.0819501876831055
layer1.1.alpha1 0.46747520565986633
layer1.1.alpha2 1.0649281740188599
layer1.2.alpha1 0.4466436505317688
layer1.2.alpha2 1.1522276401519775
layer2.0.alpha1 0.6814097166061401
layer2.0.alp

Epoch: [139][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.2054 (0.1465)	Prec@1 93.750 (95.111)
Epoch: [139][200/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.1761 (0.1436)	Prec@1 94.531 (95.188)
Epoch: [139][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1130 (0.1448)	Prec@1 96.094 (95.172)
Epoch: [139][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2038 (0.1456)	Prec@1 93.750 (95.146)
Test: [0/20]	Time 0.322 (0.322)	Loss 0.2020 (0.2020)	Prec@1 93.750 (93.750)
 * Prec@1 92.610
alpha1 1.1622295379638672
layer1.0.alpha1 0.6107863783836365
layer1.0.alpha2 1.054941177368164
layer1.1.alpha1 0.4397140145301819
layer1.1.alpha2 1.0598331689834595
layer1.2.alpha1 0.45297905802726746
layer1.2.alpha2 1.1313048601150513
layer2.0.alpha1 0.6786460876464844
layer2.0.alpha2 0.940723717212677
layer2.1.alpha1 0.31388404965400696
layer2.1.alpha2 0.9572860598564148
layer2.2.alpha1 0.3946801424026489
layer2.2.alpha2 0.9851776361465454
layer2.3.alpha1 0.5233539342880249
layer2.3.a

Epoch: [143][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1686 (0.1489)	Prec@1 94.531 (95.123)
Epoch: [143][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1086 (0.1492)	Prec@1 97.656 (95.128)
Test: [0/20]	Time 0.439 (0.439)	Loss 0.1736 (0.1736)	Prec@1 94.922 (94.922)
 * Prec@1 92.600
alpha1 1.1159290075302124
layer1.0.alpha1 0.5739445686340332
layer1.0.alpha2 1.015246033668518
layer1.1.alpha1 0.4606685936450958
layer1.1.alpha2 1.0270217657089233
layer1.2.alpha1 0.47246161103248596
layer1.2.alpha2 1.128257155418396
layer2.0.alpha1 0.6568231582641602
layer2.0.alpha2 0.9288172125816345
layer2.1.alpha1 0.3080238997936249
layer2.1.alpha2 0.9213945269584656
layer2.2.alpha1 0.3955511748790741
layer2.2.alpha2 0.9487242698669434
layer2.3.alpha1 0.4736863374710083
layer2.3.alpha2 1.009718418121338
layer3.0.alpha1 0.7612686157226562
layer3.0.alpha2 0.9653663039207458
layer3.1.alpha1 0.4608444571495056
layer3.1.alpha2 0.9471209645271301
layer3.2.alpha1 0.3804279863834381
layer3.2.a

Test: [0/20]	Time 0.322 (0.322)	Loss 0.1614 (0.1614)	Prec@1 94.922 (94.922)
 * Prec@1 92.480
alpha1 1.1192556619644165
layer1.0.alpha1 0.5785315632820129
layer1.0.alpha2 0.9970577359199524
layer1.1.alpha1 0.4383142292499542
layer1.1.alpha2 1.0394325256347656
layer1.2.alpha1 0.4731394648551941
layer1.2.alpha2 1.0958316326141357
layer2.0.alpha1 0.6529314517974854
layer2.0.alpha2 0.8943079710006714
layer2.1.alpha1 0.2741074860095978
layer2.1.alpha2 0.8922221064567566
layer2.2.alpha1 0.3915617763996124
layer2.2.alpha2 0.9531420469284058
layer2.3.alpha1 0.4785401225090027
layer2.3.alpha2 0.9853346943855286
layer3.0.alpha1 0.7561541795730591
layer3.0.alpha2 0.9437347650527954
layer3.1.alpha1 0.45997804403305054
layer3.1.alpha2 0.9425432085990906
layer3.2.alpha1 0.33028969168663025
layer3.2.alpha2 0.9419581890106201
layer3.3.alpha1 0.33685508370399475
layer3.3.alpha2 0.9155288338661194
layer3.4.alpha1 0.25200721621513367
layer3.4.alpha2 0.894603967666626
layer3.5.alpha1 0.24676761031150818
la

Epoch: [152][0/313]	Time 0.277 (0.277)	Data 0.165 (0.165)	Loss 0.1601 (0.1601)	Prec@1 95.312 (95.312)
Epoch: [152][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.0943 (0.1208)	Prec@1 96.875 (96.094)
Epoch: [152][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.0798 (0.1222)	Prec@1 97.656 (95.978)
Epoch: [152][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.1170 (0.1207)	Prec@1 96.094 (96.068)
Epoch: [152][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.0548 (0.1230)	Prec@1 98.438 (95.907)
Epoch: [152][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1094 (0.1237)	Prec@1 96.875 (95.898)
Epoch: [152][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1468 (0.1239)	Prec@1 95.312 (95.907)
Test: [0/20]	Time 0.331 (0.331)	Loss 0.1641 (0.1641)	Prec@1 94.727 (94.727)
 * Prec@1 93.000
alpha1 1.1153968572616577
layer1.0.alpha1 0.5437688231468201
layer1.0.alpha2 0.9727777242660522
layer1.1.alpha1 0.43934908509254456
layer1.1.alpha2 1.022209644317627
layer1.2.alpha1 

Epoch: [156][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.1640 (0.1243)	Prec@1 92.969 (95.864)
Epoch: [156][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.2046 (0.1263)	Prec@1 93.750 (95.753)
Epoch: [156][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.1988 (0.1251)	Prec@1 93.750 (95.856)
Epoch: [156][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1121 (0.1240)	Prec@1 93.750 (95.841)
Epoch: [156][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.0819 (0.1251)	Prec@1 97.656 (95.814)
Epoch: [156][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1840 (0.1238)	Prec@1 93.750 (95.860)
Test: [0/20]	Time 0.332 (0.332)	Loss 0.1606 (0.1606)	Prec@1 95.117 (95.117)
 * Prec@1 93.010
alpha1 1.11174738407135
layer1.0.alpha1 0.5451809763908386
layer1.0.alpha2 0.9683294892311096
layer1.1.alpha1 0.435057669878006
layer1.1.alpha2 1.0124647617340088
layer1.2.alpha1 0.4660838842391968
layer1.2.alpha2 1.1003577709197998
layer2.0.alpha1 0.6057285070419312
layer2.0.alpha2 

Epoch: [160][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.1472 (0.1168)	Prec@1 97.656 (96.130)
Epoch: [160][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0667 (0.1147)	Prec@1 98.438 (96.214)
Epoch: [160][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0813 (0.1164)	Prec@1 97.656 (96.156)
Epoch: [160][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1343 (0.1178)	Prec@1 95.312 (96.117)
Test: [0/20]	Time 0.349 (0.349)	Loss 0.1700 (0.1700)	Prec@1 94.531 (94.531)
 * Prec@1 93.070
alpha1 1.1130945682525635
layer1.0.alpha1 0.5495141744613647
layer1.0.alpha2 0.9642055034637451
layer1.1.alpha1 0.4369734525680542
layer1.1.alpha2 1.0042943954467773
layer1.2.alpha1 0.46600180864334106
layer1.2.alpha2 1.0907026529312134
layer2.0.alpha1 0.5972998142242432
layer2.0.alpha2 0.89454185962677
layer2.1.alpha1 0.2865443229675293
layer2.1.alpha2 0.9079381823539734
layer2.2.alpha1 0.3760441839694977
layer2.2.alpha2 0.9344829320907593
layer2.3.alpha1 0.46200448274612427
layer2.3.a

Epoch: [164][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1203 (0.1174)	Prec@1 96.875 (96.050)
Epoch: [164][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1365 (0.1163)	Prec@1 96.094 (96.094)
Test: [0/20]	Time 0.326 (0.326)	Loss 0.1565 (0.1565)	Prec@1 94.531 (94.531)
 * Prec@1 93.090
alpha1 1.1000919342041016
layer1.0.alpha1 0.5442543625831604
layer1.0.alpha2 0.959929347038269
layer1.1.alpha1 0.43434613943099976
layer1.1.alpha2 1.000208854675293
layer1.2.alpha1 0.4647614657878876
layer1.2.alpha2 1.0834964513778687
layer2.0.alpha1 0.5922088027000427
layer2.0.alpha2 0.8982036113739014
layer2.1.alpha1 0.2908024787902832
layer2.1.alpha2 0.9048601984977722
layer2.2.alpha1 0.3747413456439972
layer2.2.alpha2 0.9299123287200928
layer2.3.alpha1 0.4599943459033966
layer2.3.alpha2 0.9601032137870789
layer3.0.alpha1 0.7521041035652161
layer3.0.alpha2 0.9253883361816406
layer3.1.alpha1 0.46345701813697815
layer3.1.alpha2 0.8885858654975891
layer3.2.alpha1 0.33858275413513184
layer3.

Epoch: [168][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1235 (0.1145)	Prec@1 94.531 (96.211)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.1632 (0.1632)	Prec@1 94.141 (94.141)
 * Prec@1 93.100
alpha1 1.0965769290924072
layer1.0.alpha1 0.5464356541633606
layer1.0.alpha2 0.9671499729156494
layer1.1.alpha1 0.4289654493331909
layer1.1.alpha2 0.999322235584259
layer1.2.alpha1 0.46079957485198975
layer1.2.alpha2 1.0741733312606812
layer2.0.alpha1 0.5965515971183777
layer2.0.alpha2 0.8952062129974365
layer2.1.alpha1 0.2900770306587219
layer2.1.alpha2 0.8968219757080078
layer2.2.alpha1 0.37362566590309143
layer2.2.alpha2 0.9311570525169373
layer2.3.alpha1 0.45559412240982056
layer2.3.alpha2 0.9576173424720764
layer3.0.alpha1 0.7500870823860168
layer3.0.alpha2 0.9186102151870728
layer3.1.alpha1 0.4620518982410431
layer3.1.alpha2 0.8841605186462402
layer3.2.alpha1 0.3417576849460602
layer3.2.alpha2 0.8952198028564453
layer3.3.alpha1 0.32741186022758484
layer3.3.alpha2 0.8987287282943726
laye

 * Prec@1 93.180
alpha1 1.092434048652649
layer1.0.alpha1 0.5472463369369507
layer1.0.alpha2 0.9673143625259399
layer1.1.alpha1 0.430134654045105
layer1.1.alpha2 0.9939377903938293
layer1.2.alpha1 0.4574475884437561
layer1.2.alpha2 1.072361707687378
layer2.0.alpha1 0.5947561264038086
layer2.0.alpha2 0.8871841430664062
layer2.1.alpha1 0.290879487991333
layer2.1.alpha2 0.902023434638977
layer2.2.alpha1 0.3733171820640564
layer2.2.alpha2 0.9271097183227539
layer2.3.alpha1 0.45422470569610596
layer2.3.alpha2 0.9563208818435669
layer3.0.alpha1 0.7433863282203674
layer3.0.alpha2 0.911683976650238
layer3.1.alpha1 0.4600583016872406
layer3.1.alpha2 0.8757942318916321
layer3.2.alpha1 0.34097820520401
layer3.2.alpha2 0.8862953186035156
layer3.3.alpha1 0.326937198638916
layer3.3.alpha2 0.8887351751327515
layer3.4.alpha1 0.2444377988576889
layer3.4.alpha2 0.9012912511825562
layer3.5.alpha1 0.2440585345029831
layer3.5.alpha2 0.9448226094245911
layer4.0.alpha1 0.4282858073711395
layer4.0.alpha2 0.80

Epoch: [177][0/313]	Time 0.278 (0.278)	Data 0.161 (0.161)	Loss 0.1315 (0.1315)	Prec@1 95.312 (95.312)
Epoch: [177][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 0.1457 (0.1071)	Prec@1 94.531 (96.553)
Epoch: [177][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1144 (0.1114)	Prec@1 96.094 (96.388)
Epoch: [177][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.0566 (0.1092)	Prec@1 97.656 (96.389)
Epoch: [177][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.0444 (0.1099)	Prec@1 98.438 (96.308)
Epoch: [177][250/313]	Time 0.116 (0.113)	Data 0.000 (0.001)	Loss 0.1130 (0.1127)	Prec@1 96.094 (96.237)
Epoch: [177][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1426 (0.1135)	Prec@1 93.750 (96.208)
Test: [0/20]	Time 0.323 (0.323)	Loss 0.1545 (0.1545)	Prec@1 95.117 (95.117)
 * Prec@1 93.060
alpha1 1.0851489305496216
layer1.0.alpha1 0.5377792119979858
layer1.0.alpha2 0.9634898900985718
layer1.1.alpha1 0.4295738637447357
layer1.1.alpha2 0.9877682328224182
layer1.2.alpha1 

Epoch: [181][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.0947 (0.1006)	Prec@1 96.094 (96.744)
Epoch: [181][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1568 (0.1088)	Prec@1 95.312 (96.456)
Epoch: [181][200/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.1320 (0.1106)	Prec@1 96.094 (96.343)
Epoch: [181][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1556 (0.1113)	Prec@1 95.312 (96.312)
Epoch: [181][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1071 (0.1115)	Prec@1 96.875 (96.312)
Test: [0/20]	Time 0.333 (0.333)	Loss 0.1464 (0.1464)	Prec@1 95.117 (95.117)
 * Prec@1 93.120
alpha1 1.0850530862808228
layer1.0.alpha1 0.5349990725517273
layer1.0.alpha2 0.9548889398574829
layer1.1.alpha1 0.4289332330226898
layer1.1.alpha2 0.9894623160362244
layer1.2.alpha1 0.4513067603111267
layer1.2.alpha2 1.0577064752578735
layer2.0.alpha1 0.5917385816574097
layer2.0.alpha2 0.8838465809822083
layer2.1.alpha1 0.285757452249527
layer2.1.alpha2 0.8960022330284119
layer2.2.alp

Epoch: [185][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.2042 (0.1076)	Prec@1 93.750 (96.385)
Epoch: [185][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0255 (0.1083)	Prec@1 100.000 (96.358)
Epoch: [185][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1184 (0.1116)	Prec@1 96.094 (96.273)
Test: [0/20]	Time 0.343 (0.343)	Loss 0.1602 (0.1602)	Prec@1 94.336 (94.336)
 * Prec@1 93.130
alpha1 1.0798449516296387
layer1.0.alpha1 0.5313196182250977
layer1.0.alpha2 0.9497300386428833
layer1.1.alpha1 0.41958877444267273
layer1.1.alpha2 0.9737870693206787
layer1.2.alpha1 0.44499626755714417
layer1.2.alpha2 1.0486350059509277
layer2.0.alpha1 0.5877702236175537
layer2.0.alpha2 0.8770334124565125
layer2.1.alpha1 0.27574628591537476
layer2.1.alpha2 0.8910754919052124
layer2.2.alpha1 0.37324070930480957
layer2.2.alpha2 0.9155635237693787
layer2.3.alpha1 0.45359447598457336
layer2.3.alpha2 0.958828866481781
layer3.0.alpha1 0.7364436388015747
layer3.0.alpha2 0.8917693495750427
laye

Epoch: [189][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0665 (0.1086)	Prec@1 98.438 (96.400)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.1621 (0.1621)	Prec@1 95.312 (95.312)
 * Prec@1 93.050
alpha1 1.0765873193740845
layer1.0.alpha1 0.5242995023727417
layer1.0.alpha2 0.9444072246551514
layer1.1.alpha1 0.42604535818099976
layer1.1.alpha2 0.9713408350944519
layer1.2.alpha1 0.4447229206562042
layer1.2.alpha2 1.046927809715271
layer2.0.alpha1 0.591794490814209
layer2.0.alpha2 0.8729018568992615
layer2.1.alpha1 0.2737687826156616
layer2.1.alpha2 0.884434163570404
layer2.2.alpha1 0.3758562505245209
layer2.2.alpha2 0.9128630757331848
layer2.3.alpha1 0.4532380998134613
layer2.3.alpha2 0.957648754119873
layer3.0.alpha1 0.7339549660682678
layer3.0.alpha2 0.8882333040237427
layer3.1.alpha1 0.4506010413169861
layer3.1.alpha2 0.8560747504234314
layer3.2.alpha1 0.3349214196205139
layer3.2.alpha2 0.8628091812133789
layer3.3.alpha1 0.3199484944343567
layer3.3.alpha2 0.8721007704734802
layer3.4.a

 * Prec@1 93.230
alpha1 1.0731861591339111
layer1.0.alpha1 0.5264849066734314
layer1.0.alpha2 0.9340859651565552
layer1.1.alpha1 0.4220932722091675
layer1.1.alpha2 0.9752089977264404
layer1.2.alpha1 0.44484585523605347
layer1.2.alpha2 1.0461006164550781
layer2.0.alpha1 0.5895176529884338
layer2.0.alpha2 0.8715388774871826
layer2.1.alpha1 0.2788379490375519
layer2.1.alpha2 0.8762028813362122
layer2.2.alpha1 0.3727741241455078
layer2.2.alpha2 0.9087214469909668
layer2.3.alpha1 0.4507286846637726
layer2.3.alpha2 0.9528959393501282
layer3.0.alpha1 0.7326732277870178
layer3.0.alpha2 0.8817688226699829
layer3.1.alpha1 0.453853577375412
layer3.1.alpha2 0.8481449484825134
layer3.2.alpha1 0.33285075426101685
layer3.2.alpha2 0.8606818914413452
layer3.3.alpha1 0.3205607235431671
layer3.3.alpha2 0.8690642714500427
layer3.4.alpha1 0.2417013794183731
layer3.4.alpha2 0.8859503269195557
layer3.5.alpha1 0.23706425726413727
layer3.5.alpha2 0.9393324851989746
layer4.0.alpha1 0.4141312539577484
layer4.0.a

current lr 1.00000e-03
Epoch: [198][0/313]	Time 0.270 (0.270)	Data 0.155 (0.155)	Loss 0.0992 (0.0992)	Prec@1 95.312 (95.312)
Epoch: [198][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 0.1548 (0.1134)	Prec@1 96.094 (96.400)
Epoch: [198][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.0875 (0.1091)	Prec@1 96.875 (96.411)
Epoch: [198][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.1394 (0.1063)	Prec@1 93.750 (96.492)
Epoch: [198][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.0856 (0.1060)	Prec@1 98.438 (96.479)
Epoch: [198][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0886 (0.1075)	Prec@1 96.875 (96.461)
Epoch: [198][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1916 (0.1086)	Prec@1 94.531 (96.439)
Test: [0/20]	Time 0.363 (0.363)	Loss 0.1516 (0.1516)	Prec@1 95.117 (95.117)
 * Prec@1 93.150
alpha1 1.0761743783950806
layer1.0.alpha1 0.5229523181915283
layer1.0.alpha2 0.9355753660202026
layer1.1.alpha1 0.4190711975097656
layer1.1.alpha2 0.9639326930

VBox(children=(Label(value=' 2.09MB of 2.09MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,199.0
prec,93.22
train_loss,0.10932
val_loss,0.23564
lr,0.001
_runtime,7446.0
_timestamp,1632975613.0
_step,199.0


0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
prec,▁▃▅▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████████████
train_loss,█▆▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▄▃▃▂▂▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
lr,████████████████▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███


[34m[1mwandb[0m: wandb version 0.12.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


current lr 1.00000e-01
Epoch: [0][0/313]	Time 0.315 (0.315)	Data 0.202 (0.202)	Loss 12.9456 (12.9456)	Prec@1 10.938 (10.938)
Epoch: [0][50/313]	Time 0.113 (0.116)	Data 0.000 (0.004)	Loss 2.9236 (11.5568)	Prec@1 15.625 (12.178)
Epoch: [0][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 2.9062 (7.2798)	Prec@1 7.812 (12.933)
Epoch: [0][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 2.8976 (5.8012)	Prec@1 15.625 (14.218)
Epoch: [0][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 2.6214 (5.0342)	Prec@1 19.531 (15.563)
Epoch: [0][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 2.7026 (4.5631)	Prec@1 10.156 (16.276)
Epoch: [0][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 2.6052 (4.2388)	Prec@1 19.531 (16.951)
Test: [0/20]	Time 0.333 (0.333)	Loss 2.0699 (2.0699)	Prec@1 19.727 (19.727)
 * Prec@1 21.060
alpha1 8.296899795532227
layer1.0.alpha1 8.269583702087402
layer1.0.alpha2 8.184185028076172
layer1.1.alpha1 8.327836990356445
layer1.1.alpha2 8.125539779663086
layer1.2.a

Epoch: [4][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 1.8057 (1.8495)	Prec@1 42.188 (38.198)
Epoch: [4][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 1.7660 (1.8399)	Prec@1 40.625 (38.114)
Epoch: [4][250/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 1.9109 (1.8282)	Prec@1 37.500 (38.527)
Epoch: [4][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 1.8093 (1.8155)	Prec@1 44.531 (38.891)
Test: [0/20]	Time 0.326 (0.326)	Loss 1.5412 (1.5412)	Prec@1 45.898 (45.898)
 * Prec@1 45.240
alpha1 4.318203449249268
layer1.0.alpha1 4.073920249938965
layer1.0.alpha2 4.309177875518799
layer1.1.alpha1 3.915515184402466
layer1.1.alpha2 3.962339162826538
layer1.2.alpha1 3.9160187244415283
layer1.2.alpha2 3.628145933151245
layer2.0.alpha1 3.9107048511505127
layer2.0.alpha2 4.096043586730957
layer2.1.alpha1 3.9133412837982178
layer2.1.alpha2 4.161936283111572
layer2.2.alpha1 3.91274356842041
layer2.2.alpha2 4.1607890129089355
layer2.3.alpha1 3.9197580814361572
layer2.3.alpha2 4.5063638687

Epoch: [8][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 1.3670 (1.3280)	Prec@1 52.344 (54.202)
Test: [0/20]	Time 0.321 (0.321)	Loss 1.1403 (1.1403)	Prec@1 59.961 (59.961)
 * Prec@1 57.940
alpha1 3.095108985900879
layer1.0.alpha1 2.4139935970306396
layer1.0.alpha2 2.8652358055114746
layer1.1.alpha1 2.127312421798706
layer1.1.alpha2 2.628413438796997
layer1.2.alpha1 2.100795030593872
layer1.2.alpha2 2.2840285301208496
layer2.0.alpha1 2.0722124576568604
layer2.0.alpha2 2.569230318069458
layer2.1.alpha1 2.1124234199523926
layer2.1.alpha2 2.387746572494507
layer2.2.alpha1 2.116887092590332
layer2.2.alpha2 2.45918345451355
layer2.3.alpha1 2.0723280906677246
layer2.3.alpha2 2.435136318206787
layer3.0.alpha1 2.076763391494751
layer3.0.alpha2 2.6237661838531494
layer3.1.alpha1 2.3031857013702393
layer3.1.alpha2 2.770275592803955
layer3.2.alpha1 2.3239967823028564
layer3.2.alpha2 2.6948976516723633
layer3.3.alpha1 2.2382967472076416
layer3.3.alpha2 2.767385482788086
layer3.4.alpha1 2.2331

current lr 1.00000e-01
Epoch: [13][0/313]	Time 0.264 (0.264)	Data 0.152 (0.152)	Loss 0.8676 (0.8676)	Prec@1 71.875 (71.875)
Epoch: [13][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 1.0396 (0.9964)	Prec@1 63.281 (66.268)
Epoch: [13][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.8336 (0.9875)	Prec@1 71.094 (66.097)
Epoch: [13][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.9254 (0.9753)	Prec@1 76.562 (66.572)
Epoch: [13][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.9992 (0.9722)	Prec@1 65.625 (66.682)
Epoch: [13][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.8805 (0.9665)	Prec@1 71.094 (66.985)
Epoch: [13][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.8671 (0.9601)	Prec@1 71.875 (67.193)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.7950 (0.7950)	Prec@1 71.680 (71.680)
 * Prec@1 70.490
alpha1 2.538998603820801
layer1.0.alpha1 1.564520239830017
layer1.0.alpha2 2.268977403640747
layer1.1.alpha1 1.3753604888916016
layer1.1.alpha2 2.186842679977417
laye

Epoch: [17][100/313]	Time 0.112 (0.115)	Data 0.000 (0.002)	Loss 0.7395 (0.7648)	Prec@1 76.562 (74.257)
Epoch: [17][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.7820 (0.7666)	Prec@1 73.438 (74.115)
Epoch: [17][200/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.7057 (0.7677)	Prec@1 76.562 (74.137)
Epoch: [17][250/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.7215 (0.7680)	Prec@1 76.562 (74.091)
Epoch: [17][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.7426 (0.7733)	Prec@1 73.438 (74.009)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.5940 (0.5940)	Prec@1 80.273 (80.273)
 * Prec@1 77.800
alpha1 2.406421661376953
layer1.0.alpha1 1.2067656517028809
layer1.0.alpha2 2.1967175006866455
layer1.1.alpha1 1.150789499282837
layer1.1.alpha2 2.129369020462036
layer1.2.alpha1 1.2046093940734863
layer1.2.alpha2 2.132814645767212
layer2.0.alpha1 1.6234540939331055
layer2.0.alpha2 1.809770941734314
layer2.1.alpha1 1.1816887855529785
layer2.1.alpha2 1.7033628225326538
layer2.2.alpha1 1.011

Epoch: [21][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.7150 (0.6655)	Prec@1 71.094 (77.398)
Epoch: [21][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.9438 (0.6707)	Prec@1 65.625 (77.247)
Epoch: [21][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.7786 (0.6700)	Prec@1 73.438 (77.336)
Test: [0/20]	Time 0.332 (0.332)	Loss 0.6101 (0.6101)	Prec@1 78.711 (78.711)
 * Prec@1 80.000
alpha1 2.1895623207092285
layer1.0.alpha1 1.0798999071121216
layer1.0.alpha2 2.054888963699341
layer1.1.alpha1 0.9051435589790344
layer1.1.alpha2 2.0247201919555664
layer1.2.alpha1 1.0181177854537964
layer1.2.alpha2 1.9243954420089722
layer2.0.alpha1 1.4947773218154907
layer2.0.alpha2 1.7371262311935425
layer2.1.alpha1 1.0060968399047852
layer2.1.alpha2 1.5361303091049194
layer2.2.alpha1 0.85627681016922
layer2.2.alpha2 1.5822291374206543
layer2.3.alpha1 0.9075496792793274
layer2.3.alpha2 1.6849291324615479
layer3.0.alpha1 1.0302107334136963
layer3.0.alpha2 1.8028757572174072
layer3.1.alpha1

Epoch: [25][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.6134 (0.6033)	Prec@1 79.688 (79.612)
Test: [0/20]	Time 0.322 (0.322)	Loss 0.5509 (0.5509)	Prec@1 82.227 (82.227)
 * Prec@1 79.800
alpha1 1.968705415725708
layer1.0.alpha1 1.0215657949447632
layer1.0.alpha2 1.815796136856079
layer1.1.alpha1 0.817852795124054
layer1.1.alpha2 1.8678098917007446
layer1.2.alpha1 0.9455430507659912
layer1.2.alpha2 1.949877381324768
layer2.0.alpha1 1.4736100435256958
layer2.0.alpha2 1.583796739578247
layer2.1.alpha1 0.9285959005355835
layer2.1.alpha2 1.5553271770477295
layer2.2.alpha1 0.7270565629005432
layer2.2.alpha2 1.4705700874328613
layer2.3.alpha1 0.7369717955589294
layer2.3.alpha2 1.5265640020370483
layer3.0.alpha1 0.8703287839889526
layer3.0.alpha2 1.6793365478515625
layer3.1.alpha1 0.6991864442825317
layer3.1.alpha2 1.7950000762939453
layer3.2.alpha1 0.7014470100402832
layer3.2.alpha2 1.7419373989105225
layer3.3.alpha1 0.6999014616012573
layer3.3.alpha2 1.7640478610992432
layer3.4.alph

 * Prec@1 81.700
alpha1 1.8439342975616455
layer1.0.alpha1 1.0109037160873413
layer1.0.alpha2 1.8832095861434937
layer1.1.alpha1 0.8338772058486938
layer1.1.alpha2 1.8252049684524536
layer1.2.alpha1 0.8155490159988403
layer1.2.alpha2 1.9147686958312988
layer2.0.alpha1 1.3985971212387085
layer2.0.alpha2 1.5763568878173828
layer2.1.alpha1 0.8428449630737305
layer2.1.alpha2 1.5263818502426147
layer2.2.alpha1 0.7258032560348511
layer2.2.alpha2 1.5696985721588135
layer2.3.alpha1 0.7360451817512512
layer2.3.alpha2 1.749210238456726
layer3.0.alpha1 0.9736800193786621
layer3.0.alpha2 1.6755577325820923
layer3.1.alpha1 0.7586974501609802
layer3.1.alpha2 1.6335818767547607
layer3.2.alpha1 0.6612590551376343
layer3.2.alpha2 1.6849172115325928
layer3.3.alpha1 0.6197009086608887
layer3.3.alpha2 1.6841764450073242
layer3.4.alpha1 0.601250171661377
layer3.4.alpha2 1.6565048694610596
layer3.5.alpha1 0.6111422181129456
layer3.5.alpha2 1.664263367652893
layer4.0.alpha1 0.746074914932251
layer4.0.alpha2 

Epoch: [34][0/313]	Time 0.272 (0.272)	Data 0.157 (0.157)	Loss 0.5358 (0.5358)	Prec@1 80.469 (80.469)
Epoch: [34][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 0.5147 (0.5182)	Prec@1 85.938 (82.675)
Epoch: [34][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.5450 (0.5218)	Prec@1 83.594 (82.410)
Epoch: [34][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.4392 (0.5258)	Prec@1 84.375 (82.373)
Epoch: [34][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.5017 (0.5227)	Prec@1 82.812 (82.517)
Epoch: [34][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.6389 (0.5236)	Prec@1 77.344 (82.458)
Epoch: [34][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.5315 (0.5252)	Prec@1 82.812 (82.506)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.5380 (0.5380)	Prec@1 82.617 (82.617)
 * Prec@1 80.150
alpha1 1.8004775047302246
layer1.0.alpha1 0.968390703201294
layer1.0.alpha2 1.690621018409729
layer1.1.alpha1 0.7021170854568481
layer1.1.alpha2 1.829395055770874
layer1.2.alpha1 0.87311893

Epoch: [38][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.4999 (0.4897)	Prec@1 85.938 (83.493)
Epoch: [38][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.5096 (0.4874)	Prec@1 83.594 (83.475)
Epoch: [38][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.4869 (0.4944)	Prec@1 82.812 (83.368)
Epoch: [38][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.4369 (0.4957)	Prec@1 83.594 (83.342)
Epoch: [38][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.3501 (0.4979)	Prec@1 87.500 (83.300)
Test: [0/20]	Time 0.330 (0.330)	Loss 0.5351 (0.5351)	Prec@1 84.570 (84.570)
 * Prec@1 80.770
alpha1 1.7124158143997192
layer1.0.alpha1 1.031803011894226
layer1.0.alpha2 1.7210028171539307
layer1.1.alpha1 0.7293437123298645
layer1.1.alpha2 1.8483843803405762
layer1.2.alpha1 1.0382380485534668
layer1.2.alpha2 1.9391307830810547
layer2.0.alpha1 1.3739031553268433
layer2.0.alpha2 1.5290457010269165
layer2.1.alpha1 0.8238979578018188
layer2.1.alpha2 1.4852423667907715
layer2.2.alpha1 0

Epoch: [42][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4912 (0.4705)	Prec@1 82.031 (84.208)
Epoch: [42][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.3928 (0.4749)	Prec@1 88.281 (84.054)
Epoch: [42][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4625 (0.4724)	Prec@1 83.594 (84.196)
Test: [0/20]	Time 0.321 (0.321)	Loss 0.4352 (0.4352)	Prec@1 85.352 (85.352)
 * Prec@1 83.220
alpha1 1.729960322380066
layer1.0.alpha1 1.1425762176513672
layer1.0.alpha2 1.8084653615951538
layer1.1.alpha1 0.7602584362030029
layer1.1.alpha2 1.8467743396759033
layer1.2.alpha1 0.9871776700019836
layer1.2.alpha2 1.9834095239639282
layer2.0.alpha1 1.3196853399276733
layer2.0.alpha2 1.5931826829910278
layer2.1.alpha1 0.764388918876648
layer2.1.alpha2 1.54983389377594
layer2.2.alpha1 0.5793132781982422
layer2.2.alpha2 1.5548654794692993
layer2.3.alpha1 0.6586652398109436
layer2.3.alpha2 1.4769556522369385
layer3.0.alpha1 0.8671568036079407
layer3.0.alpha2 1.6218684911727905
layer3.1.alpha1 

Epoch: [46][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.5488 (0.4606)	Prec@1 82.812 (84.645)
Test: [0/20]	Time 0.328 (0.328)	Loss 0.4470 (0.4470)	Prec@1 84.570 (84.570)
 * Prec@1 83.330
alpha1 1.7156531810760498
layer1.0.alpha1 1.033280849456787
layer1.0.alpha2 1.732951045036316
layer1.1.alpha1 0.7880529165267944
layer1.1.alpha2 1.7032727003097534
layer1.2.alpha1 0.956667959690094
layer1.2.alpha2 1.9768567085266113
layer2.0.alpha1 1.4158166646957397
layer2.0.alpha2 1.5504043102264404
layer2.1.alpha1 0.7554389238357544
layer2.1.alpha2 1.4818115234375
layer2.2.alpha1 0.5647150874137878
layer2.2.alpha2 1.4635831117630005
layer2.3.alpha1 0.6651610732078552
layer2.3.alpha2 1.424095869064331
layer3.0.alpha1 0.949503481388092
layer3.0.alpha2 1.55253005027771
layer3.1.alpha1 0.6211035847663879
layer3.1.alpha2 1.6465181112289429
layer3.2.alpha1 0.5766919255256653
layer3.2.alpha2 1.6306482553482056
layer3.3.alpha1 0.6288729310035706
layer3.3.alpha2 1.5874077081680298
layer3.4.alpha1 0.

 * Prec@1 84.400
alpha1 1.7758461236953735
layer1.0.alpha1 1.0933480262756348
layer1.0.alpha2 1.7530274391174316
layer1.1.alpha1 0.8080283999443054
layer1.1.alpha2 1.7652910947799683
layer1.2.alpha1 1.0251784324645996
layer1.2.alpha2 1.9366551637649536
layer2.0.alpha1 1.3889933824539185
layer2.0.alpha2 1.4488105773925781
layer2.1.alpha1 0.7921507954597473
layer2.1.alpha2 1.5330262184143066
layer2.2.alpha1 0.5198239088058472
layer2.2.alpha2 1.5047526359558105
layer2.3.alpha1 0.5891201496124268
layer2.3.alpha2 1.4443026781082153
layer3.0.alpha1 0.943736732006073
layer3.0.alpha2 1.660888671875
layer3.1.alpha1 0.5659950971603394
layer3.1.alpha2 1.5706727504730225
layer3.2.alpha1 0.5252725481987
layer3.2.alpha2 1.5421619415283203
layer3.3.alpha1 0.6314228177070618
layer3.3.alpha2 1.5858805179595947
layer3.4.alpha1 0.5264595746994019
layer3.4.alpha2 1.5932443141937256
layer3.5.alpha1 0.4758683443069458
layer3.5.alpha2 1.5397202968597412
layer4.0.alpha1 0.8356930613517761
layer4.0.alpha2 1.38

Epoch: [55][0/313]	Time 0.279 (0.279)	Data 0.166 (0.166)	Loss 0.3367 (0.3367)	Prec@1 90.625 (90.625)
Epoch: [55][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.4642 (0.4208)	Prec@1 82.812 (86.060)
Epoch: [55][100/313]	Time 0.113 (0.115)	Data 0.000 (0.002)	Loss 0.3643 (0.4289)	Prec@1 85.938 (85.582)
Epoch: [55][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.5233 (0.4282)	Prec@1 78.906 (85.487)
Epoch: [55][200/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.5105 (0.4326)	Prec@1 78.125 (85.316)
Epoch: [55][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.6141 (0.4409)	Prec@1 77.344 (85.010)
Epoch: [55][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.4852 (0.4446)	Prec@1 84.375 (84.928)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.3625 (0.3625)	Prec@1 88.281 (88.281)
 * Prec@1 86.090
alpha1 1.8192936182022095
layer1.0.alpha1 1.0722712278366089
layer1.0.alpha2 1.7076568603515625
layer1.1.alpha1 0.7931718230247498
layer1.1.alpha2 1.8291584253311157
layer1.2.alpha1 1.00476

Epoch: [59][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.4714 (0.4120)	Prec@1 82.031 (86.471)
Epoch: [59][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.4270 (0.4171)	Prec@1 83.594 (86.124)
Epoch: [59][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4583 (0.4258)	Prec@1 80.469 (85.716)
Epoch: [59][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4827 (0.4249)	Prec@1 85.938 (85.816)
Epoch: [59][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.4217 (0.4275)	Prec@1 82.812 (85.670)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.3489 (0.3489)	Prec@1 87.891 (87.891)
 * Prec@1 86.300
alpha1 1.8283532857894897
layer1.0.alpha1 1.1092313528060913
layer1.0.alpha2 1.6840736865997314
layer1.1.alpha1 0.8359290361404419
layer1.1.alpha2 1.7435791492462158
layer1.2.alpha1 1.0670675039291382
layer1.2.alpha2 1.79726243019104
layer2.0.alpha1 1.2858327627182007
layer2.0.alpha2 1.5858830213546753
layer2.1.alpha1 0.6579084396362305
layer2.1.alpha2 1.461936116218567
layer2.2.alpha1 0.5

Epoch: [63][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.4641 (0.4220)	Prec@1 86.719 (85.693)
Epoch: [63][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.5218 (0.4247)	Prec@1 81.250 (85.688)
Epoch: [63][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4560 (0.4304)	Prec@1 85.156 (85.429)
Test: [0/20]	Time 0.323 (0.323)	Loss 0.5323 (0.5323)	Prec@1 83.008 (83.008)
 * Prec@1 82.250
alpha1 1.8044991493225098
layer1.0.alpha1 1.0927729606628418
layer1.0.alpha2 1.7488880157470703
layer1.1.alpha1 0.8340362310409546
layer1.1.alpha2 1.762656331062317
layer1.2.alpha1 1.0641860961914062
layer1.2.alpha2 1.839215636253357
layer2.0.alpha1 1.3559887409210205
layer2.0.alpha2 1.5194799900054932
layer2.1.alpha1 0.7361751198768616
layer2.1.alpha2 1.4090749025344849
layer2.2.alpha1 0.49445638060569763
layer2.2.alpha2 1.4790048599243164
layer2.3.alpha1 0.651683509349823
layer2.3.alpha2 1.407348394393921
layer3.0.alpha1 0.9188858866691589
layer3.0.alpha2 1.5950648784637451
layer3.1.alpha1

Epoch: [67][300/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.3726 (0.4189)	Prec@1 85.156 (85.984)
Test: [0/20]	Time 0.320 (0.320)	Loss 0.4852 (0.4852)	Prec@1 82.812 (82.812)
 * Prec@1 81.680
alpha1 1.7663862705230713
layer1.0.alpha1 1.0896251201629639
layer1.0.alpha2 1.5956085920333862
layer1.1.alpha1 0.7541103959083557
layer1.1.alpha2 1.732513666152954
layer1.2.alpha1 0.9428903460502625
layer1.2.alpha2 1.7634185552597046
layer2.0.alpha1 1.3123478889465332
layer2.0.alpha2 1.4045273065567017
layer2.1.alpha1 0.656425952911377
layer2.1.alpha2 1.3385570049285889
layer2.2.alpha1 0.4512464702129364
layer2.2.alpha2 1.4037117958068848
layer2.3.alpha1 0.6318394541740417
layer2.3.alpha2 1.4285515546798706
layer3.0.alpha1 0.8476644158363342
layer3.0.alpha2 1.5533605813980103
layer3.1.alpha1 0.6190032958984375
layer3.1.alpha2 1.5935243368148804
layer3.2.alpha1 0.5568645000457764
layer3.2.alpha2 1.559248924255371
layer3.3.alpha1 0.4797109365463257
layer3.3.alpha2 1.513376235961914
layer3.4.alp

 * Prec@1 86.220
alpha1 1.7368119955062866
layer1.0.alpha1 1.0317476987838745
layer1.0.alpha2 1.7510955333709717
layer1.1.alpha1 0.7407091856002808
layer1.1.alpha2 1.7586127519607544
layer1.2.alpha1 1.0491840839385986
layer1.2.alpha2 1.8171067237854004
layer2.0.alpha1 1.3390367031097412
layer2.0.alpha2 1.4828801155090332
layer2.1.alpha1 0.6987463235855103
layer2.1.alpha2 1.4087342023849487
layer2.2.alpha1 0.5719484686851501
layer2.2.alpha2 1.5117425918579102
layer2.3.alpha1 0.6336897015571594
layer2.3.alpha2 1.4383699893951416
layer3.0.alpha1 1.0333069562911987
layer3.0.alpha2 1.6075656414031982
layer3.1.alpha1 0.5603563189506531
layer3.1.alpha2 1.543966293334961
layer3.2.alpha1 0.44297850131988525
layer3.2.alpha2 1.563188910484314
layer3.3.alpha1 0.48008403182029724
layer3.3.alpha2 1.4773985147476196
layer3.4.alpha1 0.5997443795204163
layer3.4.alpha2 1.3975130319595337
layer3.5.alpha1 0.5238873362541199
layer3.5.alpha2 1.4790492057800293
layer4.0.alpha1 0.882109522819519
layer4.0.alph

Epoch: [76][0/313]	Time 0.272 (0.272)	Data 0.156 (0.156)	Loss 0.4161 (0.4161)	Prec@1 86.719 (86.719)
Epoch: [76][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 0.3334 (0.3933)	Prec@1 90.625 (87.255)
Epoch: [76][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.3014 (0.3949)	Prec@1 92.188 (86.889)
Epoch: [76][150/313]	Time 0.113 (0.114)	Data 0.000 (0.001)	Loss 0.5361 (0.4086)	Prec@1 82.031 (86.305)
Epoch: [76][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4014 (0.4158)	Prec@1 86.719 (86.039)
Epoch: [76][250/313]	Time 0.116 (0.113)	Data 0.000 (0.001)	Loss 0.4025 (0.4169)	Prec@1 83.594 (85.969)
Epoch: [76][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.4952 (0.4165)	Prec@1 80.469 (85.940)
Test: [0/20]	Time 0.318 (0.318)	Loss 0.3521 (0.3521)	Prec@1 88.672 (88.672)
 * Prec@1 85.680
alpha1 1.6811467409133911
layer1.0.alpha1 0.9915149807929993
layer1.0.alpha2 1.6707289218902588
layer1.1.alpha1 0.8560322523117065
layer1.1.alpha2 1.6999961137771606
layer1.2.alpha1 0.98749

Epoch: [80][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.3761 (0.3158)	Prec@1 85.156 (89.743)
Epoch: [80][150/313]	Time 0.112 (0.114)	Data 0.000 (0.001)	Loss 0.2505 (0.3066)	Prec@1 93.750 (90.025)
Epoch: [80][200/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.2055 (0.2963)	Prec@1 92.188 (90.326)
Epoch: [80][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.2210 (0.2897)	Prec@1 93.750 (90.585)
Epoch: [80][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2698 (0.2828)	Prec@1 90.625 (90.866)
Test: [0/20]	Time 0.335 (0.335)	Loss 0.2324 (0.2324)	Prec@1 91.992 (91.992)
 * Prec@1 91.140
alpha1 1.623401403427124
layer1.0.alpha1 0.9175552129745483
layer1.0.alpha2 1.633007287979126
layer1.1.alpha1 0.9028005599975586
layer1.1.alpha2 1.7257694005966187
layer1.2.alpha1 0.9625542759895325
layer1.2.alpha2 1.7206707000732422
layer2.0.alpha1 1.3606352806091309
layer2.0.alpha2 1.426994800567627
layer2.1.alpha1 0.4994387924671173
layer2.1.alpha2 1.3926841020584106
layer2.2.alpha1 0.5

Epoch: [84][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1506 (0.1975)	Prec@1 96.094 (93.696)
Epoch: [84][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.2558 (0.2002)	Prec@1 91.406 (93.641)
Epoch: [84][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1872 (0.1996)	Prec@1 93.750 (93.607)
Test: [0/20]	Time 0.345 (0.345)	Loss 0.2121 (0.2121)	Prec@1 93.750 (93.750)
 * Prec@1 91.570
alpha1 1.586804986000061
layer1.0.alpha1 0.8500450253486633
layer1.0.alpha2 1.5611190795898438
layer1.1.alpha1 0.8332880735397339
layer1.1.alpha2 1.6139510869979858
layer1.2.alpha1 0.9251516461372375
layer1.2.alpha2 1.6400549411773682
layer2.0.alpha1 1.2798880338668823
layer2.0.alpha2 1.342710256576538
layer2.1.alpha1 0.4241524636745453
layer2.1.alpha2 1.3434494733810425
layer2.2.alpha1 0.47271522879600525
layer2.2.alpha2 1.3071154356002808
layer2.3.alpha1 0.6387619972229004
layer2.3.alpha2 1.3074389696121216
layer3.0.alpha1 0.8843603134155273
layer3.0.alpha2 1.4181756973266602
layer3.1.alph

Epoch: [88][300/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.2393 (0.1925)	Prec@1 88.281 (93.820)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.1977 (0.1977)	Prec@1 93.750 (93.750)
 * Prec@1 91.700
alpha1 1.5444235801696777
layer1.0.alpha1 0.8763720393180847
layer1.0.alpha2 1.4783861637115479
layer1.1.alpha1 0.7676342129707336
layer1.1.alpha2 1.519484519958496
layer1.2.alpha1 0.8899394869804382
layer1.2.alpha2 1.5749989748001099
layer2.0.alpha1 1.2161736488342285
layer2.0.alpha2 1.3143808841705322
layer2.1.alpha1 0.3955088257789612
layer2.1.alpha2 1.2623554468154907
layer2.2.alpha1 0.4167103171348572
layer2.2.alpha2 1.2449883222579956
layer2.3.alpha1 0.5790937542915344
layer2.3.alpha2 1.2288126945495605
layer3.0.alpha1 0.8841807246208191
layer3.0.alpha2 1.3627411127090454
layer3.1.alpha1 0.4364205598831177
layer3.1.alpha2 1.3694175481796265
layer3.2.alpha1 0.39473041892051697
layer3.2.alpha2 1.3575193881988525
layer3.3.alpha1 0.4175489544868469
layer3.3.alpha2 1.3124581575393677
layer3.4

 * Prec@1 91.790
alpha1 1.5022218227386475
layer1.0.alpha1 0.8375179767608643
layer1.0.alpha2 1.4105688333511353
layer1.1.alpha1 0.7095540761947632
layer1.1.alpha2 1.4353407621383667
layer1.2.alpha1 0.8767784833908081
layer1.2.alpha2 1.5357251167297363
layer2.0.alpha1 1.1695153713226318
layer2.0.alpha2 1.2725470066070557
layer2.1.alpha1 0.3802175223827362
layer2.1.alpha2 1.234453558921814
layer2.2.alpha1 0.4125763177871704
layer2.2.alpha2 1.2202290296554565
layer2.3.alpha1 0.5443848967552185
layer2.3.alpha2 1.195401668548584
layer3.0.alpha1 0.80417799949646
layer3.0.alpha2 1.28573739528656
layer3.1.alpha1 0.4473029375076294
layer3.1.alpha2 1.299448847770691
layer3.2.alpha1 0.380603164434433
layer3.2.alpha2 1.2752058506011963
layer3.3.alpha1 0.40187472105026245
layer3.3.alpha2 1.2699462175369263
layer3.4.alpha1 0.433748722076416
layer3.4.alpha2 1.2931030988693237
layer3.5.alpha1 0.42282745242118835
layer3.5.alpha2 1.3247480392456055
layer4.0.alpha1 0.7562211155891418
layer4.0.alpha2 1.0

Epoch: [97][0/313]	Time 0.286 (0.286)	Data 0.164 (0.164)	Loss 0.1861 (0.1861)	Prec@1 94.531 (94.531)
Epoch: [97][50/313]	Time 0.113 (0.116)	Data 0.000 (0.003)	Loss 0.1893 (0.1725)	Prec@1 94.531 (94.439)
Epoch: [97][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.2424 (0.1711)	Prec@1 92.969 (94.431)
Epoch: [97][150/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1770 (0.1743)	Prec@1 96.875 (94.304)
Epoch: [97][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1001 (0.1761)	Prec@1 96.875 (94.286)
Epoch: [97][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1675 (0.1763)	Prec@1 95.312 (94.316)
Epoch: [97][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.0989 (0.1786)	Prec@1 96.875 (94.225)
Test: [0/20]	Time 0.331 (0.331)	Loss 0.2015 (0.2015)	Prec@1 93.945 (93.945)
 * Prec@1 91.960
alpha1 1.4222906827926636
layer1.0.alpha1 0.7696807980537415
layer1.0.alpha2 1.3536838293075562
layer1.1.alpha1 0.6449597477912903
layer1.1.alpha2 1.3669742345809937
layer1.2.alpha1 0.83416

Epoch: [101][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.1780 (0.1583)	Prec@1 94.531 (94.964)
Epoch: [101][150/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.2690 (0.1641)	Prec@1 88.281 (94.759)
Epoch: [101][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1526 (0.1650)	Prec@1 96.094 (94.718)
Epoch: [101][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1876 (0.1634)	Prec@1 92.188 (94.709)
Epoch: [101][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1935 (0.1665)	Prec@1 93.750 (94.632)
Test: [0/20]	Time 0.343 (0.343)	Loss 0.2064 (0.2064)	Prec@1 94.727 (94.727)
 * Prec@1 91.840
alpha1 1.3572415113449097
layer1.0.alpha1 0.7527919411659241
layer1.0.alpha2 1.2841899394989014
layer1.1.alpha1 0.5945157408714294
layer1.1.alpha2 1.3024804592132568
layer1.2.alpha1 0.7764060497283936
layer1.2.alpha2 1.393701434135437
layer2.0.alpha1 1.066273808479309
layer2.0.alpha2 1.1727399826049805
layer2.1.alpha1 0.39793235063552856
layer2.1.alpha2 1.1328678131103516
layer2.2.alp

Epoch: [105][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1663 (0.1655)	Prec@1 93.750 (94.667)
Epoch: [105][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1321 (0.1635)	Prec@1 95.312 (94.718)
Epoch: [105][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.2641 (0.1645)	Prec@1 93.750 (94.710)
Test: [0/20]	Time 0.322 (0.322)	Loss 0.2110 (0.2110)	Prec@1 93.945 (93.945)
 * Prec@1 92.070
alpha1 1.321645736694336
layer1.0.alpha1 0.7285133600234985
layer1.0.alpha2 1.2364994287490845
layer1.1.alpha1 0.550487220287323
layer1.1.alpha2 1.2507386207580566
layer1.2.alpha1 0.7652901411056519
layer1.2.alpha2 1.3401565551757812
layer2.0.alpha1 1.010663390159607
layer2.0.alpha2 1.1310861110687256
layer2.1.alpha1 0.3193560838699341
layer2.1.alpha2 1.080666422843933
layer2.2.alpha1 0.33368048071861267
layer2.2.alpha2 1.1158558130264282
layer2.3.alpha1 0.5252248644828796
layer2.3.alpha2 1.136214017868042
layer3.0.alpha1 0.7694860696792603
layer3.0.alpha2 1.129623532295227
layer3.1.alpha

Epoch: [109][300/313]	Time 0.286 (0.287)	Data 0.000 (0.001)	Loss 0.1606 (0.1605)	Prec@1 92.969 (94.716)
Test: [0/20]	Time 0.488 (0.488)	Loss 0.2320 (0.2320)	Prec@1 93.359 (93.359)
 * Prec@1 91.940
alpha1 1.3033877611160278
layer1.0.alpha1 0.716600239276886
layer1.0.alpha2 1.1916022300720215
layer1.1.alpha1 0.5114133954048157
layer1.1.alpha2 1.1801308393478394
layer1.2.alpha1 0.7648587226867676
layer1.2.alpha2 1.279441475868225
layer2.0.alpha1 0.9788507223129272
layer2.0.alpha2 1.1111425161361694
layer2.1.alpha1 0.3719399571418762
layer2.1.alpha2 1.0625709295272827
layer2.2.alpha1 0.2964096963405609
layer2.2.alpha2 1.0725287199020386
layer2.3.alpha1 0.49131256341934204
layer2.3.alpha2 1.1065905094146729
layer3.0.alpha1 0.7275149822235107
layer3.0.alpha2 1.1248080730438232
layer3.1.alpha1 0.377206951379776
layer3.1.alpha2 1.1267837285995483
layer3.2.alpha1 0.34257906675338745
layer3.2.alpha2 1.1206579208374023
layer3.3.alpha1 0.33016061782836914
layer3.3.alpha2 1.0901037454605103
layer3.

 * Prec@1 91.560
alpha1 1.2705390453338623
layer1.0.alpha1 0.7052340507507324
layer1.0.alpha2 1.1436278820037842
layer1.1.alpha1 0.4755876064300537
layer1.1.alpha2 1.1700350046157837
layer1.2.alpha1 0.701068639755249
layer1.2.alpha2 1.2405450344085693
layer2.0.alpha1 0.965907871723175
layer2.0.alpha2 1.074822187423706
layer2.1.alpha1 0.3419960141181946
layer2.1.alpha2 1.043221354484558
layer2.2.alpha1 0.3224770426750183
layer2.2.alpha2 1.033981204032898
layer2.3.alpha1 0.5094463229179382
layer2.3.alpha2 1.0854958295822144
layer3.0.alpha1 0.7217486500740051
layer3.0.alpha2 1.0887809991836548
layer3.1.alpha1 0.37799981236457825
layer3.1.alpha2 1.0930941104888916
layer3.2.alpha1 0.3356018662452698
layer3.2.alpha2 1.087699055671692
layer3.3.alpha1 0.35652148723602295
layer3.3.alpha2 1.0872142314910889
layer3.4.alpha1 0.3770224153995514
layer3.4.alpha2 1.1054662466049194
layer3.5.alpha1 0.3763757646083832
layer3.5.alpha2 1.137145757675171
layer4.0.alpha1 0.6198739409446716
layer4.0.alpha2 0

Epoch: [118][0/313]	Time 0.514 (0.514)	Data 0.236 (0.236)	Loss 0.1771 (0.1771)	Prec@1 94.531 (94.531)
Epoch: [118][50/313]	Time 0.287 (0.262)	Data 0.000 (0.005)	Loss 0.2732 (0.1674)	Prec@1 92.969 (94.470)
Epoch: [118][100/313]	Time 0.287 (0.272)	Data 0.000 (0.002)	Loss 0.1120 (0.1564)	Prec@1 96.875 (94.841)
Epoch: [118][150/313]	Time 0.281 (0.275)	Data 0.000 (0.002)	Loss 0.1862 (0.1598)	Prec@1 92.969 (94.702)
Epoch: [118][200/313]	Time 0.274 (0.277)	Data 0.000 (0.001)	Loss 0.1680 (0.1579)	Prec@1 94.531 (94.776)
Epoch: [118][250/313]	Time 0.116 (0.272)	Data 0.000 (0.001)	Loss 0.1994 (0.1575)	Prec@1 92.969 (94.743)
Epoch: [118][300/313]	Time 0.280 (0.269)	Data 0.000 (0.001)	Loss 0.0873 (0.1563)	Prec@1 96.875 (94.809)
Test: [0/20]	Time 0.514 (0.514)	Loss 0.2400 (0.2400)	Prec@1 92.383 (92.383)
 * Prec@1 91.740
alpha1 1.1795412302017212
layer1.0.alpha1 0.6850040555000305
layer1.0.alpha2 1.133172631263733
layer1.1.alpha1 0.44407761096954346
layer1.1.alpha2 1.1237895488739014
layer1.2.alpha1 

Epoch: [122][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.1862 (0.1474)	Prec@1 95.312 (95.150)
Epoch: [122][150/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1132 (0.1493)	Prec@1 95.312 (95.095)
Epoch: [122][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1042 (0.1472)	Prec@1 96.875 (95.180)
Epoch: [122][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1849 (0.1453)	Prec@1 95.312 (95.244)
Epoch: [122][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1108 (0.1478)	Prec@1 96.094 (95.136)
Test: [0/20]	Time 0.336 (0.336)	Loss 0.2102 (0.2102)	Prec@1 94.336 (94.336)
 * Prec@1 91.900
alpha1 1.1642227172851562
layer1.0.alpha1 0.6498845219612122
layer1.0.alpha2 1.0990065336227417
layer1.1.alpha1 0.40691447257995605
layer1.1.alpha2 1.067932367324829
layer1.2.alpha1 0.6792550086975098
layer1.2.alpha2 1.1860169172286987
layer2.0.alpha1 0.9469277262687683
layer2.0.alpha2 1.0157614946365356
layer2.1.alpha1 0.3107360601425171
layer2.1.alpha2 0.9587801098823547
layer2.2.al

Epoch: [126][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1170 (0.1488)	Prec@1 96.094 (95.219)
Epoch: [126][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1197 (0.1475)	Prec@1 96.094 (95.244)
Epoch: [126][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.2098 (0.1488)	Prec@1 92.969 (95.216)
Test: [0/20]	Time 0.332 (0.332)	Loss 0.1911 (0.1911)	Prec@1 94.141 (94.141)
 * Prec@1 92.100
alpha1 1.1813791990280151
layer1.0.alpha1 0.6692636609077454
layer1.0.alpha2 1.0701322555541992
layer1.1.alpha1 0.3714771568775177
layer1.1.alpha2 1.0611836910247803
layer1.2.alpha1 0.6570581197738647
layer1.2.alpha2 1.1423735618591309
layer2.0.alpha1 0.9184214472770691
layer2.0.alpha2 0.9697834253311157
layer2.1.alpha1 0.2994523048400879
layer2.1.alpha2 0.946312427520752
layer2.2.alpha1 0.31112560629844666
layer2.2.alpha2 0.9503462314605713
layer2.3.alpha1 0.457406222820282
layer2.3.alpha2 0.9937443137168884
layer3.0.alpha1 0.7049042582511902
layer3.0.alpha2 1.0207725763320923
layer3.1.a

Epoch: [130][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1405 (0.1473)	Prec@1 92.969 (95.128)
Test: [0/20]	Time 0.323 (0.323)	Loss 0.2098 (0.2098)	Prec@1 94.336 (94.336)
 * Prec@1 92.140
alpha1 1.1506264209747314
layer1.0.alpha1 0.6406971216201782
layer1.0.alpha2 1.0323032140731812
layer1.1.alpha1 0.33459073305130005
layer1.1.alpha2 1.0316054821014404
layer1.2.alpha1 0.6339995265007019
layer1.2.alpha2 1.1147679090499878
layer2.0.alpha1 0.8926094174385071
layer2.0.alpha2 0.9457962512969971
layer2.1.alpha1 0.31245890259742737
layer2.1.alpha2 0.904199481010437
layer2.2.alpha1 0.2978653609752655
layer2.2.alpha2 0.9385619163513184
layer2.3.alpha1 0.48407235741615295
layer2.3.alpha2 0.9739779233932495
layer3.0.alpha1 0.6833189725875854
layer3.0.alpha2 1.0361621379852295
layer3.1.alpha1 0.3053538203239441
layer3.1.alpha2 0.9920864105224609
layer3.2.alpha1 0.319474458694458
layer3.2.alpha2 1.006119966506958
layer3.3.alpha1 0.3279719650745392
layer3.3.alpha2 0.9739017486572266
layer3.

 * Prec@1 91.650
alpha1 1.091330647468567
layer1.0.alpha1 0.6053661704063416
layer1.0.alpha2 1.0381063222885132
layer1.1.alpha1 0.31462669372558594
layer1.1.alpha2 1.0185983180999756
layer1.2.alpha1 0.6016893982887268
layer1.2.alpha2 1.0999228954315186
layer2.0.alpha1 0.8683057427406311
layer2.0.alpha2 0.942062258720398
layer2.1.alpha1 0.3221735954284668
layer2.1.alpha2 0.9060779213905334
layer2.2.alpha1 0.315742164850235
layer2.2.alpha2 0.9141366481781006
layer2.3.alpha1 0.47071000933647156
layer2.3.alpha2 0.9409716725349426
layer3.0.alpha1 0.6819683313369751
layer3.0.alpha2 1.0155386924743652
layer3.1.alpha1 0.31646981835365295
layer3.1.alpha2 0.9674218893051147
layer3.2.alpha1 0.3107856512069702
layer3.2.alpha2 0.963545024394989
layer3.3.alpha1 0.3212665617465973
layer3.3.alpha2 0.9827788472175598
layer3.4.alpha1 0.31446361541748047
layer3.4.alpha2 0.9951710104942322
layer3.5.alpha1 0.320316344499588
layer3.5.alpha2 0.9902312159538269
layer4.0.alpha1 0.5688416957855225
layer4.0.alph

Epoch: [139][0/313]	Time 0.276 (0.276)	Data 0.165 (0.165)	Loss 0.0819 (0.0819)	Prec@1 97.656 (97.656)
Epoch: [139][50/313]	Time 0.112 (0.115)	Data 0.000 (0.003)	Loss 0.1326 (0.1480)	Prec@1 95.312 (95.021)
Epoch: [139][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1022 (0.1460)	Prec@1 96.875 (95.258)
Epoch: [139][150/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.2417 (0.1448)	Prec@1 92.188 (95.219)
Epoch: [139][200/313]	Time 0.114 (0.113)	Data 0.000 (0.001)	Loss 0.2025 (0.1447)	Prec@1 94.531 (95.184)
Epoch: [139][250/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1785 (0.1451)	Prec@1 93.750 (95.241)
Epoch: [139][300/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.1360 (0.1447)	Prec@1 96.094 (95.250)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.1969 (0.1969)	Prec@1 94.141 (94.141)
 * Prec@1 92.020
alpha1 1.0761992931365967
layer1.0.alpha1 0.6280133128166199
layer1.0.alpha2 0.978952944278717
layer1.1.alpha1 0.2794559895992279
layer1.1.alpha2 0.978503406047821
layer1.2.alpha1 0.

Epoch: [143][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.0798 (0.1332)	Prec@1 97.656 (95.552)
Epoch: [143][150/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1449 (0.1341)	Prec@1 94.531 (95.519)
Epoch: [143][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0944 (0.1329)	Prec@1 96.875 (95.631)
Epoch: [143][250/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.1827 (0.1336)	Prec@1 92.969 (95.627)
Epoch: [143][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1525 (0.1337)	Prec@1 96.094 (95.624)
Test: [0/20]	Time 0.331 (0.331)	Loss 0.1806 (0.1806)	Prec@1 94.727 (94.727)
 * Prec@1 92.000
alpha1 1.0320273637771606
layer1.0.alpha1 0.5994497537612915
layer1.0.alpha2 0.9866262674331665
layer1.1.alpha1 0.276113897562027
layer1.1.alpha2 1.001635193824768
layer1.2.alpha1 0.6050394177436829
layer1.2.alpha2 1.0486546754837036
layer2.0.alpha1 0.8443580865859985
layer2.0.alpha2 0.9024224281311035
layer2.1.alpha1 0.3429507911205292
layer2.1.alpha2 0.8797049522399902
layer2.2.alph

Epoch: [147][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.2083 (0.1352)	Prec@1 92.969 (95.546)
Epoch: [147][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0656 (0.1330)	Prec@1 97.656 (95.586)
Epoch: [147][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0792 (0.1352)	Prec@1 96.094 (95.518)
Test: [0/20]	Time 0.336 (0.336)	Loss 0.2090 (0.2090)	Prec@1 93.359 (93.359)
 * Prec@1 91.920
alpha1 1.0241389274597168
layer1.0.alpha1 0.5449215173721313
layer1.0.alpha2 0.9680302739143372
layer1.1.alpha1 0.26728618144989014
layer1.1.alpha2 0.9820877909660339
layer1.2.alpha1 0.6101440191268921
layer1.2.alpha2 1.0278819799423218
layer2.0.alpha1 0.8200972676277161
layer2.0.alpha2 0.8960495591163635
layer2.1.alpha1 0.30096882581710815
layer2.1.alpha2 0.8655206561088562
layer2.2.alpha1 0.2858949303627014
layer2.2.alpha2 0.870937705039978
layer2.3.alpha1 0.47548267245292664
layer2.3.alpha2 0.92377769947052
layer3.0.alpha1 0.6566519737243652
layer3.0.alpha2 0.9500207901000977
layer3.1.

Epoch: [151][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1197 (0.1248)	Prec@1 96.875 (95.834)
Test: [0/20]	Time 0.324 (0.324)	Loss 0.1874 (0.1874)	Prec@1 95.703 (95.703)
 * Prec@1 92.560
alpha1 1.020270586013794
layer1.0.alpha1 0.5530939698219299
layer1.0.alpha2 0.923821210861206
layer1.1.alpha1 0.2684800624847412
layer1.1.alpha2 0.9475619196891785
layer1.2.alpha1 0.609154999256134
layer1.2.alpha2 1.010238528251648
layer2.0.alpha1 0.801335334777832
layer2.0.alpha2 0.8954261541366577
layer2.1.alpha1 0.30531975626945496
layer2.1.alpha2 0.8514695763587952
layer2.2.alpha1 0.27426180243492126
layer2.2.alpha2 0.8675251007080078
layer2.3.alpha1 0.44095516204833984
layer2.3.alpha2 0.9093227982521057
layer3.0.alpha1 0.6580292582511902
layer3.0.alpha2 0.949799120426178
layer3.1.alpha1 0.3143974244594574
layer3.1.alpha2 0.922074556350708
layer3.2.alpha1 0.29696956276893616
layer3.2.alpha2 0.8952234983444214
layer3.3.alpha1 0.27119651436805725
layer3.3.alpha2 0.8948243260383606
layer3.4.

 * Prec@1 92.520
alpha1 1.0086296796798706
layer1.0.alpha1 0.5462715029716492
layer1.0.alpha2 0.9273387789726257
layer1.1.alpha1 0.2675423324108124
layer1.1.alpha2 0.9414269924163818
layer1.2.alpha1 0.606637179851532
layer1.2.alpha2 1.0050071477890015
layer2.0.alpha1 0.803136944770813
layer2.0.alpha2 0.8928000926971436
layer2.1.alpha1 0.3048475384712219
layer2.1.alpha2 0.852593719959259
layer2.2.alpha1 0.26899999380111694
layer2.2.alpha2 0.8655034303665161
layer2.3.alpha1 0.43693169951438904
layer2.3.alpha2 0.902621865272522
layer3.0.alpha1 0.6546005606651306
layer3.0.alpha2 0.9371508359909058
layer3.1.alpha1 0.3132783770561218
layer3.1.alpha2 0.9129887819290161
layer3.2.alpha1 0.2959230840206146
layer3.2.alpha2 0.8900638818740845
layer3.3.alpha1 0.26744142174720764
layer3.3.alpha2 0.892966091632843
layer3.4.alpha1 0.2960052788257599
layer3.4.alpha2 0.9343701601028442
layer3.5.alpha1 0.3333071172237396
layer3.5.alpha2 0.9793855547904968
layer4.0.alpha1 0.5715852975845337
layer4.0.alpha

current lr 1.00000e-03
Epoch: [160][0/313]	Time 0.272 (0.272)	Data 0.161 (0.161)	Loss 0.1741 (0.1741)	Prec@1 93.750 (93.750)
Epoch: [160][50/313]	Time 0.113 (0.115)	Data 0.000 (0.003)	Loss 0.0713 (0.1167)	Prec@1 97.656 (95.971)
Epoch: [160][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1612 (0.1222)	Prec@1 96.094 (95.846)
Epoch: [160][150/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1795 (0.1163)	Prec@1 95.312 (96.068)
Epoch: [160][200/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.1263 (0.1191)	Prec@1 97.656 (95.989)
Epoch: [160][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1489 (0.1182)	Prec@1 96.094 (96.007)
Epoch: [160][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0649 (0.1184)	Prec@1 98.438 (96.026)
Test: [0/20]	Time 0.325 (0.325)	Loss 0.1989 (0.1989)	Prec@1 95.117 (95.117)
 * Prec@1 92.670
alpha1 0.9977473616600037
layer1.0.alpha1 0.5482935309410095
layer1.0.alpha2 0.9295732975006104
layer1.1.alpha1 0.26660293340682983
layer1.1.alpha2 0.933772683

Epoch: [164][50/313]	Time 0.112 (0.116)	Data 0.000 (0.003)	Loss 0.1662 (0.1151)	Prec@1 95.312 (96.278)
Epoch: [164][100/313]	Time 0.112 (0.114)	Data 0.000 (0.002)	Loss 0.1719 (0.1166)	Prec@1 94.531 (96.078)
Epoch: [164][150/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.0731 (0.1186)	Prec@1 96.875 (96.006)
Epoch: [164][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1050 (0.1155)	Prec@1 96.094 (96.164)
Epoch: [164][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0851 (0.1152)	Prec@1 96.875 (96.184)
Epoch: [164][300/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.1331 (0.1162)	Prec@1 94.531 (96.135)
Test: [0/20]	Time 0.336 (0.336)	Loss 0.1864 (0.1864)	Prec@1 94.727 (94.727)
 * Prec@1 92.760
alpha1 0.9991459846496582
layer1.0.alpha1 0.5430333614349365
layer1.0.alpha2 0.9237130880355835
layer1.1.alpha1 0.26378604769706726
layer1.1.alpha2 0.9351170659065247
layer1.2.alpha1 0.6040881276130676
layer1.2.alpha2 0.9931049346923828
layer2.0.alpha1 0.793732762336731
layer2.0.alph

Epoch: [168][100/313]	Time 0.113 (0.114)	Data 0.000 (0.002)	Loss 0.1736 (0.1190)	Prec@1 92.969 (96.024)
Epoch: [168][150/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0781 (0.1156)	Prec@1 96.094 (96.058)
Epoch: [168][200/313]	Time 0.113 (0.113)	Data 0.000 (0.001)	Loss 0.0667 (0.1181)	Prec@1 97.656 (96.020)
Epoch: [168][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1244 (0.1171)	Prec@1 96.094 (96.025)
Epoch: [168][300/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0950 (0.1155)	Prec@1 96.875 (96.078)
Test: [0/20]	Time 0.331 (0.331)	Loss 0.1931 (0.1931)	Prec@1 94.727 (94.727)
 * Prec@1 92.700
alpha1 0.9905424118041992
layer1.0.alpha1 0.5392095446586609
layer1.0.alpha2 0.9195411801338196
layer1.1.alpha1 0.26224735379219055
layer1.1.alpha2 0.9273332953453064
layer1.2.alpha1 0.6051778793334961
layer1.2.alpha2 0.990781843662262
layer2.0.alpha1 0.7904358506202698
layer2.0.alpha2 0.8781182765960693
layer2.1.alpha1 0.2958199381828308
layer2.1.alpha2 0.8327129483222961
layer2.2.al

Epoch: [172][200/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.1031 (0.1166)	Prec@1 96.875 (96.156)
Epoch: [172][250/313]	Time 0.112 (0.113)	Data 0.000 (0.001)	Loss 0.0936 (0.1146)	Prec@1 96.094 (96.231)
Epoch: [172][300/313]	Time 0.111 (0.113)	Data 0.000 (0.001)	Loss 0.0614 (0.1147)	Prec@1 96.875 (96.187)
Test: [0/20]	Time 0.321 (0.321)	Loss 0.1863 (0.1863)	Prec@1 95.312 (95.312)
 * Prec@1 92.690
alpha1 0.9921781420707703
layer1.0.alpha1 0.5375100374221802
layer1.0.alpha2 0.9162448048591614
layer1.1.alpha1 0.26041439175605774
layer1.1.alpha2 0.9240275621414185
layer1.2.alpha1 0.6004846096038818
layer1.2.alpha2 0.9896199107170105
layer2.0.alpha1 0.7848914265632629
layer2.0.alpha2 0.8698849081993103
layer2.1.alpha1 0.29622215032577515
layer2.1.alpha2 0.8221192359924316
layer2.2.alpha1 0.2731914222240448
layer2.2.alpha2 0.8436102271080017
layer2.3.alpha1 0.4327099323272705
layer2.3.alpha2 0.8907290101051331
layer3.0.alpha1 0.648765504360199
layer3.0.alpha2 0.8807600140571594
layer3.1

Epoch: [176][300/313]	Time 0.266 (0.160)	Data 0.000 (0.001)	Loss 0.0843 (0.1097)	Prec@1 98.438 (96.392)
Test: [0/20]	Time 0.495 (0.495)	Loss 0.1903 (0.1903)	Prec@1 95.117 (95.117)
 * Prec@1 92.730
alpha1 0.987460196018219
layer1.0.alpha1 0.5338073968887329
layer1.0.alpha2 0.9107036590576172
layer1.1.alpha1 0.25799912214279175
layer1.1.alpha2 0.9177005887031555
layer1.2.alpha1 0.5973081588745117
layer1.2.alpha2 0.9951046705245972
layer2.0.alpha1 0.7808435559272766
layer2.0.alpha2 0.8681884407997131
layer2.1.alpha1 0.2964380383491516
layer2.1.alpha2 0.8322550654411316
layer2.2.alpha1 0.2663118243217468
layer2.2.alpha2 0.8401656746864319
layer2.3.alpha1 0.4273487329483032
layer2.3.alpha2 0.8875317573547363
layer3.0.alpha1 0.6486174464225769
layer3.0.alpha2 0.8763926029205322
layer3.1.alpha1 0.30929872393608093
layer3.1.alpha2 0.8838664293289185
layer3.2.alpha1 0.28555670380592346
layer3.2.alpha2 0.8743069767951965
layer3.3.alpha1 0.26507481932640076
layer3.3.alpha2 0.8808720111846924
laye