In [1]:
import torch
import timm
import numpy as np

from einops import repeat, rearrange
from einops.layers.torch import Rearrange

from timm.models.layers import trunc_normal_
from timm.models.vision_transformer import Block

In [2]:
def random_indexes(size : int):
    forward_indexes = np.arange(size)
    np.random.shuffle(forward_indexes)
    backward_indexes = np.argsort(forward_indexes)
    return forward_indexes, backward_indexes

random_indexes(5)

(array([4, 2, 3, 1, 0]), array([4, 3, 1, 2, 0], dtype=int64))

In [3]:
def take_indexes(sequences, indexes):
    return torch.gather(sequences, 0, repeat(indexes, 't b -> t b c', c=sequences.shape[-1]))

In [4]:
class PatchShuffle(torch.nn.Module):
    def __init__(self, ratio) -> None:
        super().__init__()
        self.ratio = ratio

    def forward(self, patches : torch.Tensor):
        T, B, C = patches.shape
        remain_T = int(T * (1 - self.ratio))

        indexes = [random_indexes(T) for _ in range(B)]
        forward_indexes = torch.as_tensor(np.stack([i[0] for i in indexes], axis=-1), dtype=torch.long).to(patches.device)
        backward_indexes = torch.as_tensor(np.stack([i[1] for i in indexes], axis=-1), dtype=torch.long).to(patches.device)

        patches = take_indexes(patches, forward_indexes)
        patches = patches[:remain_T]

        return patches, forward_indexes, backward_indexes

In [5]:
class MAE_Encoder(torch.nn.Module):
    def __init__(self,
                 image_size=32,
                 patch_size=2,
                 emb_dim=192,
                 num_layer=12,
                 num_head=3,
                 mask_ratio=0.75,
                 ) -> None:
        super().__init__()

        self.cls_token = torch.nn.Parameter(torch.zeros(1, 1, emb_dim))
        self.pos_embedding = torch.nn.Parameter(torch.zeros((image_size // patch_size) ** 2, 1, emb_dim))
        self.shuffle = PatchShuffle(mask_ratio)

        self.patchify = torch.nn.Conv2d(3, emb_dim, patch_size, patch_size)

        self.transformer = torch.nn.Sequential(*[Block(emb_dim, num_head) for _ in range(num_layer)])

        self.layer_norm = torch.nn.LayerNorm(emb_dim)

        self.init_weight()
        
    def init_weight(self):
        trunc_normal_(self.cls_token, std=.02)
        trunc_normal_(self.pos_embedding, std=.02)

    def forward(self, img):
        patches = self.patchify(img)
        patches = rearrange(patches, 'b c h w -> (h w) b c')
        patches = patches + self.pos_embedding

        patches, forward_indexes, backward_indexes = self.shuffle(patches)

        patches = torch.cat([self.cls_token.expand(-1, patches.shape[1], -1), patches], dim=0)
        patches = rearrange(patches, 't b c -> b t c')
        features = self.layer_norm(self.transformer(patches))
        features = rearrange(features, 'b t c -> t b c')

        return features, backward_indexes

In [6]:
class MAE_Decoder(torch.nn.Module):
    def __init__(self,
                 image_size=32,
                 patch_size=2,
                 emb_dim=192,
                 num_layer=4,
                 num_head=3,
                 ) -> None:
        super().__init__()

        self.mask_token = torch.nn.Parameter(torch.zeros(1, 1, emb_dim))
        self.pos_embedding = torch.nn.Parameter(torch.zeros((image_size // patch_size) ** 2 + 1, 1, emb_dim))

        self.transformer = torch.nn.Sequential(*[Block(emb_dim, num_head) for _ in range(num_layer)])

        self.head = torch.nn.Linear(emb_dim, 3 * patch_size ** 2)
        self.patch2img = Rearrange('(h w) b (c p1 p2) -> b c (h p1) (w p2)', p1=patch_size, p2=patch_size, h=image_size//patch_size)

        self.init_weight()

    def init_weight(self):
        trunc_normal_(self.mask_token, std=.02)
        trunc_normal_(self.pos_embedding, std=.02)

    def forward(self, features, backward_indexes):
        T = features.shape[0]
        backward_indexes = torch.cat([torch.zeros(1, backward_indexes.shape[1]).to(backward_indexes), backward_indexes + 1], dim=0)
        features = torch.cat([features, self.mask_token.expand(backward_indexes.shape[0] - features.shape[0], features.shape[1], -1)], dim=0)
        features = take_indexes(features, backward_indexes)
        features = features + self.pos_embedding

        features = rearrange(features, 't b c -> b t c')
        features = self.transformer(features)
        features = rearrange(features, 'b t c -> t b c')
        features = features[1:] # remove global feature

        patches = self.head(features)
        mask = torch.zeros_like(patches)
        mask[T:] = 1
        mask = take_indexes(mask, backward_indexes[1:] - 1)
        img = self.patch2img(patches)
        mask = self.patch2img(mask)

        return img, mask

In [7]:
class MAE_ViT(torch.nn.Module):
    def __init__(self,
                 image_size=32,
                 patch_size=2,
                 emb_dim=192,
                 encoder_layer=12,
                 encoder_head=3,
                 decoder_layer=4,
                 decoder_head=3,
                 mask_ratio=0.75,
                 ) -> None:
        super().__init__()

        self.encoder = MAE_Encoder(image_size, patch_size, emb_dim, encoder_layer, encoder_head, mask_ratio)
        self.decoder = MAE_Decoder(image_size, patch_size, emb_dim, decoder_layer, decoder_head)

    def forward(self, img):
        features, backward_indexes = self.encoder(img)
        predicted_img, mask = self.decoder(features,  backward_indexes)
        return predicted_img, mask

In [8]:
class ViT_Classifier(torch.nn.Module):
    def __init__(self, encoder : MAE_Encoder, num_classes=10) -> None:
        super().__init__()
        self.cls_token = encoder.cls_token
        self.pos_embedding = encoder.pos_embedding
        self.patchify = encoder.patchify
        self.transformer = encoder.transformer
        self.layer_norm = encoder.layer_norm
        self.head = torch.nn.Linear(self.pos_embedding.shape[-1], num_classes)

    def forward(self, img):
        patches = self.patchify(img)
        patches = rearrange(patches, 'b c h w -> (h w) b c')
        patches = patches + self.pos_embedding
        patches = torch.cat([self.cls_token.expand(-1, patches.shape[1], -1), patches], dim=0)
        patches = rearrange(patches, 't b c -> b t c')
        features = self.layer_norm(self.transformer(patches))
        features = rearrange(features, 'b t c -> t b c')
        logits = self.head(features[0])
        return logits


In [9]:
shuffle = PatchShuffle(0.75)
a = torch.rand(16, 2, 10)
b, forward_indexes, backward_indexes = shuffle(a)
print(b.shape)

img = torch.rand(2, 3, 32, 32)
encoder = MAE_Encoder()
decoder = MAE_Decoder()
features, backward_indexes = encoder(img)
print(forward_indexes.shape)
predicted_img, mask = decoder(features, backward_indexes)
print(predicted_img.shape)
loss = torch.mean((predicted_img - img) ** 2 * mask / 0.75)
print(loss)

torch.Size([4, 2, 10])
torch.Size([16, 2])
torch.Size([2, 3, 32, 32])
tensor(0.4309, grad_fn=<MeanBackward0>)


In [10]:
import argparse
from tqdm import tqdm
import torchvision
from torchvision.transforms import ToTensor, Compose, Normalize
import math 
import cv2


parser = argparse.ArgumentParser()
seed = 42
batch_size = 4096
max_device_batch_size = 512
base_learning_rate = 1.5e-4
weight_decay =0.05
mask_ratio =0.75
total_epoch = 1000
warmup_epoch = 200
# model_path='vit-t-mae.pt'



batch_size = batch_size
load_batch_size = min(max_device_batch_size, batch_size)

assert batch_size % load_batch_size == 0
steps_per_update = batch_size // load_batch_size

train_dataset = torchvision.datasets.CIFAR10('data', train=True, download=True, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))
val_dataset = torchvision.datasets.CIFAR10('data', train=False, download=True, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))
dataloader = torch.utils.data.DataLoader(train_dataset, load_batch_size, shuffle=True, num_workers=4)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = MAE_ViT(mask_ratio=mask_ratio).to(device)
optim = torch.optim.AdamW(model.parameters(), lr=base_learning_rate * batch_size / 256, betas=(0.9, 0.95), weight_decay=weight_decay)
lr_func = lambda epoch: min((epoch + 1) / (warmup_epoch + 1e-8), 0.5 * (math.cos(epoch / total_epoch * math.pi) + 1))
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda=lr_func, verbose=True)

step_count = 0
optim.zero_grad()
for e in range(total_epoch):
    model.train()
    losses = []
    for img, label in tqdm(iter(dataloader)):
        step_count += 1
        img = img.to(device)
        predicted_img, mask = model(img)
        loss = torch.mean((predicted_img - img) ** 2 * mask) / mask_ratio
        loss.backward()
        if step_count % steps_per_update == 0:
            optim.step()
            optim.zero_grad()
        losses.append(loss.item())
    lr_scheduler.step()
    avg_loss = sum(losses) / len(losses)
    print(f'In epoch {e}, average traning loss is {avg_loss}.')

    ''' visualize the first 16 predicted images on val dataset'''
    if (e % 50 == 0) or (e == total_epoch - 1):
        model.eval()
        with torch.no_grad():
            val_img = torch.stack([val_dataset[i][0] for i in range(16)])
            val_img = val_img.to(device)
            predicted_val_img, mask = model(val_img)
            predicted_val_img = predicted_val_img * mask + val_img * (1 - mask)
            img = torch.cat([val_img * (1 - mask), predicted_val_img, val_img], dim=0)
            img = rearrange(img, '(v h1 w1) c h w -> c (h1 h) (w1 v w)', w1=2, v=3)
            cv2.imwrite('mae_image_' + f'{e}.jpg', (((img.permute(1, 2, 0) + 1)/2)*255).detach().cpu().numpy())
        
        ''' save model '''
        torch.save(model, model_path)

Files already downloaded and verified
Files already downloaded and verified
Adjusting learning rate of group 0 to 1.2000e-05.


100%|██████████| 98/98 [01:05<00:00,  1.50it/s]


Adjusting learning rate of group 0 to 2.4000e-05.
In epoch 0, average traning loss is 0.27295249837393665.


100%|██████████| 98/98 [01:01<00:00,  1.59it/s]


Adjusting learning rate of group 0 to 3.6000e-05.
In epoch 1, average traning loss is 0.19851451792887279.


100%|██████████| 98/98 [01:02<00:00,  1.58it/s]


Adjusting learning rate of group 0 to 4.8000e-05.
In epoch 2, average traning loss is 0.18619874864816666.


100%|██████████| 98/98 [01:02<00:00,  1.57it/s]


Adjusting learning rate of group 0 to 6.0000e-05.
In epoch 3, average traning loss is 0.18266101941770438.


100%|██████████| 98/98 [01:02<00:00,  1.56it/s]


Adjusting learning rate of group 0 to 7.2000e-05.
In epoch 4, average traning loss is 0.18090699856378595.


100%|██████████| 98/98 [01:01<00:00,  1.59it/s]


Adjusting learning rate of group 0 to 8.4000e-05.
In epoch 5, average traning loss is 0.17992701913629258.


100%|██████████| 98/98 [01:01<00:00,  1.59it/s]


Adjusting learning rate of group 0 to 9.6000e-05.
In epoch 6, average traning loss is 0.1807910679858558.


100%|██████████| 98/98 [00:51<00:00,  1.91it/s]


Adjusting learning rate of group 0 to 1.0800e-04.
In epoch 7, average traning loss is 0.17859188132748313.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.2000e-04.
In epoch 8, average traning loss is 0.17588642908602345.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.3200e-04.
In epoch 9, average traning loss is 0.17516805383623862.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.4400e-04.
In epoch 10, average traning loss is 0.1710838160040427.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.5600e-04.
In epoch 11, average traning loss is 0.1676954614872835.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.6800e-04.
In epoch 12, average traning loss is 0.167855047297721.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 1.8000e-04.
In epoch 13, average traning loss is 0.16157110263498461.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.9200e-04.
In epoch 14, average traning loss is 0.15879778913697418.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.0400e-04.
In epoch 15, average traning loss is 0.15864003951452216.


100%|██████████| 98/98 [00:47<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.1600e-04.
In epoch 16, average traning loss is 0.15728990155823377.


100%|██████████| 98/98 [00:47<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.2800e-04.
In epoch 17, average traning loss is 0.15403562191189552.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.4000e-04.
In epoch 18, average traning loss is 0.1541944181128424.


100%|██████████| 98/98 [00:47<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.5200e-04.
In epoch 19, average traning loss is 0.14930743404797145.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.6400e-04.
In epoch 20, average traning loss is 0.14874873143069597.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.7600e-04.
In epoch 21, average traning loss is 0.14414711007658315.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.8800e-04.
In epoch 22, average traning loss is 0.1443442198999074.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 3.0000e-04.
In epoch 23, average traning loss is 0.1433783879085463.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 3.1200e-04.
In epoch 24, average traning loss is 0.1382181627713904.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 3.2400e-04.
In epoch 25, average traning loss is 0.1391430460676855.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 3.3600e-04.
In epoch 26, average traning loss is 0.13468567266756173.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 3.4800e-04.
In epoch 27, average traning loss is 0.13009593012381573.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 3.6000e-04.
In epoch 28, average traning loss is 0.1324664732175214.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 3.7200e-04.
In epoch 29, average traning loss is 0.12798260388021565.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 3.8400e-04.
In epoch 30, average traning loss is 0.12942881844177537.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 3.9600e-04.
In epoch 31, average traning loss is 0.1272678874737146.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 4.0800e-04.
In epoch 32, average traning loss is 0.12379453612529502.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 4.2000e-04.
In epoch 33, average traning loss is 0.12527056198034967.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 4.3200e-04.
In epoch 34, average traning loss is 0.1217560371269985.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 4.4400e-04.
In epoch 35, average traning loss is 0.1202228123284116.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 4.5600e-04.
In epoch 36, average traning loss is 0.11583706265201374.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 4.6800e-04.
In epoch 37, average traning loss is 0.1156429722905159.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 4.8000e-04.
In epoch 38, average traning loss is 0.11017781382008475.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 4.9200e-04.
In epoch 39, average traning loss is 0.10788457110828283.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 5.0400e-04.
In epoch 40, average traning loss is 0.10463742911815643.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 5.1600e-04.
In epoch 41, average traning loss is 0.10584770827269067.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 5.2800e-04.
In epoch 42, average traning loss is 0.10127151483783917.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 5.4000e-04.
In epoch 43, average traning loss is 0.09973940032781387.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 5.5200e-04.
In epoch 44, average traning loss is 0.09520459920167923.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 5.6400e-04.
In epoch 45, average traning loss is 0.09147444998427313.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 5.7600e-04.
In epoch 46, average traning loss is 0.08651330337232473.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 5.8800e-04.
In epoch 47, average traning loss is 0.08269242721856856.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 6.0000e-04.
In epoch 48, average traning loss is 0.080051453518016.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 6.1200e-04.
In epoch 49, average traning loss is 0.07680720455792485.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 6.2400e-04.
In epoch 50, average traning loss is 0.07901771587072587.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 6.3600e-04.
In epoch 51, average traning loss is 0.07313387035107126.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 6.4800e-04.
In epoch 52, average traning loss is 0.07072342019908283.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 6.6000e-04.
In epoch 53, average traning loss is 0.0706451843891825.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 6.7200e-04.
In epoch 54, average traning loss is 0.06715548715117026.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 6.8400e-04.
In epoch 55, average traning loss is 0.07037262542515385.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 6.9600e-04.
In epoch 56, average traning loss is 0.06580673496485973.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 7.0800e-04.
In epoch 57, average traning loss is 0.06767912885668326.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 7.2000e-04.
In epoch 58, average traning loss is 0.06435580038446553.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 7.3200e-04.
In epoch 59, average traning loss is 0.06294436319446077.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 7.4400e-04.
In epoch 60, average traning loss is 0.06466725818356689.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 7.5600e-04.
In epoch 61, average traning loss is 0.06353111367444603.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 7.6800e-04.
In epoch 62, average traning loss is 0.06170638943357127.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 7.8000e-04.
In epoch 63, average traning loss is 0.06084853714826156.


100%|██████████| 98/98 [00:49<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 7.9200e-04.
In epoch 64, average traning loss is 0.06262806863809119.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 8.0400e-04.
In epoch 65, average traning loss is 0.06138432504875319.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 8.1600e-04.
In epoch 66, average traning loss is 0.059975499135194996.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 8.2800e-04.
In epoch 67, average traning loss is 0.06043288552639436.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 8.4000e-04.
In epoch 68, average traning loss is 0.058746415551523774.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 8.5200e-04.
In epoch 69, average traning loss is 0.05936106199360624.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 8.6400e-04.
In epoch 70, average traning loss is 0.05792064770904123.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 8.7600e-04.
In epoch 71, average traning loss is 0.05822570097385621.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 8.8800e-04.
In epoch 72, average traning loss is 0.05615034418142572.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 9.0000e-04.
In epoch 73, average traning loss is 0.05898232465343816.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 9.1200e-04.
In epoch 74, average traning loss is 0.05577024247269241.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 9.2400e-04.
In epoch 75, average traning loss is 0.05572625253425569.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 9.3600e-04.
In epoch 76, average traning loss is 0.0549082909995804.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 9.4800e-04.
In epoch 77, average traning loss is 0.054054112116597136.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 9.6000e-04.
In epoch 78, average traning loss is 0.05606801230080274.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 9.7200e-04.
In epoch 79, average traning loss is 0.054333031937784076.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 9.8400e-04.
In epoch 80, average traning loss is 0.05372871044186913.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 9.9600e-04.
In epoch 81, average traning loss is 0.0537213390807108.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.0080e-03.
In epoch 82, average traning loss is 0.05409204819220669.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.0200e-03.
In epoch 83, average traning loss is 0.05416967655170937.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.0320e-03.
In epoch 84, average traning loss is 0.0522055373508103.


100%|██████████| 98/98 [00:47<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 1.0440e-03.
In epoch 85, average traning loss is 0.0521726951535259.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.0560e-03.
In epoch 86, average traning loss is 0.05135234316088715.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.0680e-03.
In epoch 87, average traning loss is 0.051815224735408415.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.0800e-03.
In epoch 88, average traning loss is 0.0514844681459422.


100%|██████████| 98/98 [00:48<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 1.0920e-03.
In epoch 89, average traning loss is 0.05293973698755916.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.1040e-03.
In epoch 90, average traning loss is 0.050325787683226625.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.1160e-03.
In epoch 91, average traning loss is 0.050226236788593996.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.1280e-03.
In epoch 92, average traning loss is 0.0504988290901695.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.1400e-03.
In epoch 93, average traning loss is 0.049642108342781356.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.1520e-03.
In epoch 94, average traning loss is 0.053479649240569194.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.1640e-03.
In epoch 95, average traning loss is 0.05306856524275274.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.1760e-03.
In epoch 96, average traning loss is 0.04932223207184246.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.1880e-03.
In epoch 97, average traning loss is 0.04941611584960198.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.2000e-03.
In epoch 98, average traning loss is 0.05015747902952895.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.2120e-03.
In epoch 99, average traning loss is 0.05016468571765082.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.2240e-03.
In epoch 100, average traning loss is 0.048459527785984835.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.2360e-03.
In epoch 101, average traning loss is 0.04765983488486738.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.2480e-03.
In epoch 102, average traning loss is 0.04913550448052737.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.2600e-03.
In epoch 103, average traning loss is 0.047827720528050344.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.2720e-03.
In epoch 104, average traning loss is 0.047783982198761434.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.2840e-03.
In epoch 105, average traning loss is 0.04847418703138828.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.2960e-03.
In epoch 106, average traning loss is 0.046881327329545606.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.3080e-03.
In epoch 107, average traning loss is 0.04769625305673297.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.3200e-03.
In epoch 108, average traning loss is 0.047597789718788495.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.3320e-03.
In epoch 109, average traning loss is 0.04765727066872071.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.3440e-03.
In epoch 110, average traning loss is 0.047822912508735854.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.3560e-03.
In epoch 111, average traning loss is 0.04744537262131973.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.3680e-03.
In epoch 112, average traning loss is 0.04670039119617063.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.3800e-03.
In epoch 113, average traning loss is 0.0454982802728001.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.3920e-03.
In epoch 114, average traning loss is 0.04614334500261715.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.4040e-03.
In epoch 115, average traning loss is 0.04607484257799022.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.4160e-03.
In epoch 116, average traning loss is 0.0457027686417711.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.4280e-03.
In epoch 117, average traning loss is 0.04587738022056161.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.4400e-03.
In epoch 118, average traning loss is 0.04516118118653492.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.4520e-03.
In epoch 119, average traning loss is 0.04577925808879794.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.4640e-03.
In epoch 120, average traning loss is 0.044495997950434685.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.4760e-03.
In epoch 121, average traning loss is 0.04747165089511141.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.4880e-03.
In epoch 122, average traning loss is 0.04403412912269028.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.5000e-03.
In epoch 123, average traning loss is 0.04773498277122877.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.5120e-03.
In epoch 124, average traning loss is 0.0455625059501249.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.5240e-03.
In epoch 125, average traning loss is 0.04292927811644515.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.5360e-03.
In epoch 126, average traning loss is 0.04661263293605678.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.5480e-03.
In epoch 127, average traning loss is 0.044465827546557604.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.5600e-03.
In epoch 128, average traning loss is 0.04394427880796851.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.5720e-03.
In epoch 129, average traning loss is 0.04300621824757177.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.5840e-03.
In epoch 130, average traning loss is 0.042648148437847894.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.5960e-03.
In epoch 131, average traning loss is 0.04716615451081675.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.6080e-03.
In epoch 132, average traning loss is 0.04610431445192317.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.6200e-03.
In epoch 133, average traning loss is 0.04273960944645259.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.6320e-03.
In epoch 134, average traning loss is 0.04457531386644256.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.6440e-03.
In epoch 135, average traning loss is 0.04313508569433981.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.6560e-03.
In epoch 136, average traning loss is 0.04231540908162691.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.6680e-03.
In epoch 137, average traning loss is 0.0419439936474878.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.6800e-03.
In epoch 138, average traning loss is 0.04841600320473009.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.6920e-03.
In epoch 139, average traning loss is 0.043379293412578346.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7040e-03.
In epoch 140, average traning loss is 0.04195283360931338.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7160e-03.
In epoch 141, average traning loss is 0.04181042636687658.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7280e-03.
In epoch 142, average traning loss is 0.0426051201564925.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7400e-03.
In epoch 143, average traning loss is 0.04591545930170283.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7520e-03.
In epoch 144, average traning loss is 0.04258250251260339.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.7640e-03.
In epoch 145, average traning loss is 0.04096376770461092.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7760e-03.
In epoch 146, average traning loss is 0.04316643823166283.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7880e-03.
In epoch 147, average traning loss is 0.04257755373053405.


100%|██████████| 98/98 [00:49<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 1.8000e-03.
In epoch 148, average traning loss is 0.040372736904085896.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8120e-03.
In epoch 149, average traning loss is 0.045591894879328965.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.8240e-03.
In epoch 150, average traning loss is 0.04210205417962707.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.8360e-03.
In epoch 151, average traning loss is 0.04167002591551566.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8480e-03.
In epoch 152, average traning loss is 0.04066181513575875.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8600e-03.
In epoch 153, average traning loss is 0.04358991246898564.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.8720e-03.
In epoch 154, average traning loss is 0.041347812138953985.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 1.8840e-03.
In epoch 155, average traning loss is 0.04027933133195857.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8960e-03.
In epoch 156, average traning loss is 0.04118249858064311.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.9080e-03.
In epoch 157, average traning loss is 0.04035805405250618.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.9200e-03.
In epoch 158, average traning loss is 0.03979928628066365.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.9320e-03.
In epoch 159, average traning loss is 0.04115965521457244.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.9440e-03.
In epoch 160, average traning loss is 0.039877367034858585.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.9560e-03.
In epoch 161, average traning loss is 0.039741689187227466.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.9680e-03.
In epoch 162, average traning loss is 0.04254444498492747.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.9800e-03.
In epoch 163, average traning loss is 0.041269670297600786.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.9920e-03.
In epoch 164, average traning loss is 0.03913467369821607.


100%|██████████| 98/98 [00:51<00:00,  1.90it/s]


Adjusting learning rate of group 0 to 2.0040e-03.
In epoch 165, average traning loss is 0.04037761844086404.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.0160e-03.
In epoch 166, average traning loss is 0.03903907784545908.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.0280e-03.
In epoch 167, average traning loss is 0.04157185014717433.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.0400e-03.
In epoch 168, average traning loss is 0.039899234108778894.


100%|██████████| 98/98 [00:55<00:00,  1.78it/s]


Adjusting learning rate of group 0 to 2.0520e-03.
In epoch 169, average traning loss is 0.03914760802017183.


100%|██████████| 98/98 [00:55<00:00,  1.77it/s]


Adjusting learning rate of group 0 to 2.0640e-03.
In epoch 170, average traning loss is 0.03868262500179057.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.0760e-03.
In epoch 171, average traning loss is 0.04046000722719698.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.0880e-03.
In epoch 172, average traning loss is 0.039515724671738486.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1000e-03.
In epoch 173, average traning loss is 0.03905528935850883.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1120e-03.
In epoch 174, average traning loss is 0.038522549467731496.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1240e-03.
In epoch 175, average traning loss is 0.04017701570172699.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1360e-03.
In epoch 176, average traning loss is 0.03961659633383459.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1480e-03.
In epoch 177, average traning loss is 0.0387342060174869.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1600e-03.
In epoch 178, average traning loss is 0.04031720666252837.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1720e-03.
In epoch 179, average traning loss is 0.03878893733632808.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1840e-03.
In epoch 180, average traning loss is 0.037905546665495754.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1960e-03.
In epoch 181, average traning loss is 0.03819340288791121.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.2071e-03.
In epoch 182, average traning loss is 0.038714327108190984.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.2050e-03.
In epoch 183, average traning loss is 0.03828232446495367.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.2030e-03.
In epoch 184, average traning loss is 0.037859956372757346.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.2009e-03.
In epoch 185, average traning loss is 0.04009444623881457.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1988e-03.
In epoch 186, average traning loss is 0.039656163288318384.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1967e-03.
In epoch 187, average traning loss is 0.03787833581469497.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1946e-03.
In epoch 188, average traning loss is 0.037206110791588316.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1925e-03.
In epoch 189, average traning loss is 0.03964568933053893.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1904e-03.
In epoch 190, average traning loss is 0.03879153393969244.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1882e-03.
In epoch 191, average traning loss is 0.03738203618143286.


100%|██████████| 98/98 [00:51<00:00,  1.89it/s]


Adjusting learning rate of group 0 to 2.1861e-03.
In epoch 192, average traning loss is 0.03693487495183945.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1839e-03.
In epoch 193, average traning loss is 0.037816732247569124.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1818e-03.
In epoch 194, average traning loss is 0.03713677831146182.


100%|██████████| 98/98 [00:53<00:00,  1.84it/s]


Adjusting learning rate of group 0 to 2.1796e-03.
In epoch 195, average traning loss is 0.0400621661422204.


100%|██████████| 98/98 [00:53<00:00,  1.84it/s]


Adjusting learning rate of group 0 to 2.1774e-03.
In epoch 196, average traning loss is 0.038382017642867805.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1752e-03.
In epoch 197, average traning loss is 0.037081468014084566.


100%|██████████| 98/98 [00:53<00:00,  1.83it/s]


Adjusting learning rate of group 0 to 2.1730e-03.
In epoch 198, average traning loss is 0.036690287786174794.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1708e-03.
In epoch 199, average traning loss is 0.03744367711550119.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1686e-03.
In epoch 200, average traning loss is 0.03741241189442119.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1664e-03.
In epoch 201, average traning loss is 0.03688110392160562.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.1641e-03.
In epoch 202, average traning loss is 0.03732839374974066.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.1619e-03.
In epoch 203, average traning loss is 0.036730109847017696.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.1596e-03.
In epoch 204, average traning loss is 0.03646559420288825.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.1574e-03.
In epoch 205, average traning loss is 0.03703498372770086.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1551e-03.
In epoch 206, average traning loss is 0.036744738871953925.


100%|██████████| 98/98 [00:55<00:00,  1.77it/s]


Adjusting learning rate of group 0 to 2.1528e-03.
In epoch 207, average traning loss is 0.03665417345354752.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1505e-03.
In epoch 208, average traning loss is 0.03632574441025452.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1482e-03.
In epoch 209, average traning loss is 0.036463393620690523.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1459e-03.
In epoch 210, average traning loss is 0.03655762297614497.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1435e-03.
In epoch 211, average traning loss is 0.03604898921080998.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1412e-03.
In epoch 212, average traning loss is 0.03709493965214612.


100%|██████████| 98/98 [00:54<00:00,  1.79it/s]


Adjusting learning rate of group 0 to 2.1389e-03.
In epoch 213, average traning loss is 0.03611301212590568.


100%|██████████| 98/98 [00:54<00:00,  1.81it/s]


Adjusting learning rate of group 0 to 2.1365e-03.
In epoch 214, average traning loss is 0.035814407148531506.


100%|██████████| 98/98 [00:51<00:00,  1.89it/s]


Adjusting learning rate of group 0 to 2.1342e-03.
In epoch 215, average traning loss is 0.03647011176359897.


100%|██████████| 98/98 [00:53<00:00,  1.82it/s]


Adjusting learning rate of group 0 to 2.1318e-03.
In epoch 216, average traning loss is 0.036610872123618514.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1294e-03.
In epoch 217, average traning loss is 0.03661472137485232.


100%|██████████| 98/98 [00:54<00:00,  1.78it/s]


Adjusting learning rate of group 0 to 2.1270e-03.
In epoch 218, average traning loss is 0.03556267586441673.


100%|██████████| 98/98 [00:52<00:00,  1.88it/s]


Adjusting learning rate of group 0 to 2.1246e-03.
In epoch 219, average traning loss is 0.036601184620236864.


100%|██████████| 98/98 [00:52<00:00,  1.87it/s]


Adjusting learning rate of group 0 to 2.1222e-03.
In epoch 220, average traning loss is 0.03563220890200868.


100%|██████████| 98/98 [00:54<00:00,  1.80it/s]


Adjusting learning rate of group 0 to 2.1198e-03.
In epoch 221, average traning loss is 0.03602472016093682.


100%|██████████| 98/98 [00:50<00:00,  1.92it/s]


Adjusting learning rate of group 0 to 2.1174e-03.
In epoch 222, average traning loss is 0.035597040641064545.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 2.1149e-03.
In epoch 223, average traning loss is 0.03594388573297432.


100%|██████████| 98/98 [00:47<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.1125e-03.
In epoch 224, average traning loss is 0.035600628209661464.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.1100e-03.
In epoch 225, average traning loss is 0.03571120767417003.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.1076e-03.
In epoch 226, average traning loss is 0.03570309623467679.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.1051e-03.
In epoch 227, average traning loss is 0.03542748469935388.


100%|██████████| 98/98 [00:47<00:00,  2.04it/s]


Adjusting learning rate of group 0 to 2.1026e-03.
In epoch 228, average traning loss is 0.03531457639622445.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.1001e-03.
In epoch 229, average traning loss is 0.03533664881726917.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0976e-03.
In epoch 230, average traning loss is 0.035525476598009774.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0951e-03.
In epoch 231, average traning loss is 0.03525545660938535.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0926e-03.
In epoch 232, average traning loss is 0.0350201212401901.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0901e-03.
In epoch 233, average traning loss is 0.03516883060944324.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0876e-03.
In epoch 234, average traning loss is 0.03513873938699158.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.0850e-03.
In epoch 235, average traning loss is 0.03518579086782981.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0825e-03.
In epoch 236, average traning loss is 0.034750871930499465.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0799e-03.
In epoch 237, average traning loss is 0.03571610297171437.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0773e-03.
In epoch 238, average traning loss is 0.03494448435245728.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0748e-03.
In epoch 239, average traning loss is 0.03468332610720275.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0722e-03.
In epoch 240, average traning loss is 0.03520362881221333.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0696e-03.
In epoch 241, average traning loss is 0.034677740186452866.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0670e-03.
In epoch 242, average traning loss is 0.034857168451559783.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0644e-03.
In epoch 243, average traning loss is 0.034924483725002835.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0618e-03.
In epoch 244, average traning loss is 0.03505381934192716.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0591e-03.
In epoch 245, average traning loss is 0.034696170566033344.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0565e-03.
In epoch 246, average traning loss is 0.03484261492077185.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.0538e-03.
In epoch 247, average traning loss is 0.03453785952712808.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0512e-03.
In epoch 248, average traning loss is 0.03460591639943269.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0485e-03.
In epoch 249, average traning loss is 0.03462323573018823.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0459e-03.
In epoch 250, average traning loss is 0.034172519395241935.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0432e-03.
In epoch 251, average traning loss is 0.034910950087467016.


100%|██████████| 98/98 [00:47<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 2.0405e-03.
In epoch 252, average traning loss is 0.03436466002342652.


100%|██████████| 98/98 [00:47<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 2.0378e-03.
In epoch 253, average traning loss is 0.03467571495899132.


100%|██████████| 98/98 [00:47<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 2.0351e-03.
In epoch 254, average traning loss is 0.03433482379329448.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0324e-03.
In epoch 255, average traning loss is 0.034198498695480584.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0297e-03.
In epoch 256, average traning loss is 0.034119393828572056.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0269e-03.
In epoch 257, average traning loss is 0.03409423637298905.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0242e-03.
In epoch 258, average traning loss is 0.03424390523257304.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 2.0215e-03.
In epoch 259, average traning loss is 0.034163435008756966.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0187e-03.
In epoch 260, average traning loss is 0.03414778331560748.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0159e-03.
In epoch 261, average traning loss is 0.03413593274902324.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 2.0132e-03.
In epoch 262, average traning loss is 0.03410787907029901.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0104e-03.
In epoch 263, average traning loss is 0.0339534088817178.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0076e-03.
In epoch 264, average traning loss is 0.034124555256293744.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0048e-03.
In epoch 265, average traning loss is 0.033836210108533195.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 2.0020e-03.
In epoch 266, average traning loss is 0.03379817078916394.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9992e-03.
In epoch 267, average traning loss is 0.03397014448229147.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9964e-03.
In epoch 268, average traning loss is 0.03365381202679508.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9936e-03.
In epoch 269, average traning loss is 0.03419874426053495.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9907e-03.
In epoch 270, average traning loss is 0.03356399877491046.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9879e-03.
In epoch 271, average traning loss is 0.034493559211188435.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9851e-03.
In epoch 272, average traning loss is 0.03382229683350543.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.9822e-03.
In epoch 273, average traning loss is 0.033577789419463704.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9793e-03.
In epoch 274, average traning loss is 0.03363641032150814.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9765e-03.
In epoch 275, average traning loss is 0.033437329135379015.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.9736e-03.
In epoch 276, average traning loss is 0.03385248278476754.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9707e-03.
In epoch 277, average traning loss is 0.033463624895227195.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.9678e-03.
In epoch 278, average traning loss is 0.033525969833135605.


100%|██████████| 98/98 [00:47<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 1.9649e-03.
In epoch 279, average traning loss is 0.03362568241677114.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9620e-03.
In epoch 280, average traning loss is 0.03332395380248829.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9591e-03.
In epoch 281, average traning loss is 0.03339789937041244.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9562e-03.
In epoch 282, average traning loss is 0.033242130880149046.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9532e-03.
In epoch 283, average traning loss is 0.03337744836296354.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9503e-03.
In epoch 284, average traning loss is 0.03359309777769507.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9473e-03.
In epoch 285, average traning loss is 0.03320988001567977.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9444e-03.
In epoch 286, average traning loss is 0.033277894332244685.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9414e-03.
In epoch 287, average traning loss is 0.03306905854949538.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.9385e-03.
In epoch 288, average traning loss is 0.03365138549433679.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9355e-03.
In epoch 289, average traning loss is 0.03304068793599703.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.9325e-03.
In epoch 290, average traning loss is 0.03310423519234268.


100%|██████████| 98/98 [00:47<00:00,  2.06it/s]


Adjusting learning rate of group 0 to 1.9295e-03.
In epoch 291, average traning loss is 0.03289948521676112.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 1.9265e-03.
In epoch 292, average traning loss is 0.03324171072062181.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.9235e-03.
In epoch 293, average traning loss is 0.03327696314271616.


100%|██████████| 98/98 [00:48<00:00,  2.02it/s]


Adjusting learning rate of group 0 to 1.9205e-03.
In epoch 294, average traning loss is 0.032831959293356965.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.9175e-03.
In epoch 295, average traning loss is 0.03321009513218792.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.9145e-03.
In epoch 296, average traning loss is 0.032907002358412256.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.9114e-03.
In epoch 297, average traning loss is 0.03279982054872172.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.9084e-03.
In epoch 298, average traning loss is 0.03294341826849446.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.9053e-03.
In epoch 299, average traning loss is 0.032898638321428884.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.9023e-03.
In epoch 300, average traning loss is 0.03280939081949847.


100%|██████████| 98/98 [00:48<00:00,  2.03it/s]


Adjusting learning rate of group 0 to 1.8992e-03.
In epoch 301, average traning loss is 0.03320905562414198.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.8962e-03.
In epoch 302, average traning loss is 0.03284107454653297.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8931e-03.
In epoch 303, average traning loss is 0.032612082700492165.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8900e-03.
In epoch 304, average traning loss is 0.03279564415617865.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8869e-03.
In epoch 305, average traning loss is 0.03266524747774309.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.8838e-03.
In epoch 306, average traning loss is 0.032815191229539256.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.8807e-03.
In epoch 307, average traning loss is 0.03248154113487321.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8776e-03.
In epoch 308, average traning loss is 0.03287822106967167.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8745e-03.
In epoch 309, average traning loss is 0.03252928280176557.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8714e-03.
In epoch 310, average traning loss is 0.03264050708361426.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8683e-03.
In epoch 311, average traning loss is 0.03244656759637351.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8651e-03.
In epoch 312, average traning loss is 0.032807069754570116.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8620e-03.
In epoch 313, average traning loss is 0.03253775907262248.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8588e-03.
In epoch 314, average traning loss is 0.03244191459475123.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8557e-03.
In epoch 315, average traning loss is 0.03243631166310943.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8525e-03.
In epoch 316, average traning loss is 0.032556455487347376.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8493e-03.
In epoch 317, average traning loss is 0.032619462044415425.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8462e-03.
In epoch 318, average traning loss is 0.032566871265975794.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8430e-03.
In epoch 319, average traning loss is 0.032174277841588675.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8398e-03.
In epoch 320, average traning loss is 0.03262948856822082.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8366e-03.
In epoch 321, average traning loss is 0.03220600891402181.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8334e-03.
In epoch 322, average traning loss is 0.03243520394043655.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8302e-03.
In epoch 323, average traning loss is 0.03214460582833509.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8270e-03.
In epoch 324, average traning loss is 0.03221532067626107.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.8238e-03.
In epoch 325, average traning loss is 0.03228423643705188.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8206e-03.
In epoch 326, average traning loss is 0.03212127258659017.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8173e-03.
In epoch 327, average traning loss is 0.03218575249597126.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8141e-03.
In epoch 328, average traning loss is 0.032175216253618806.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8108e-03.
In epoch 329, average traning loss is 0.03219335345665411.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8076e-03.
In epoch 330, average traning loss is 0.032142042087353.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.8043e-03.
In epoch 331, average traning loss is 0.031992487514353525.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.8011e-03.
In epoch 332, average traning loss is 0.032715254406235655.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 1.7978e-03.
In epoch 333, average traning loss is 0.03203119240625172.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7946e-03.
In epoch 334, average traning loss is 0.031937488031630615.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7913e-03.
In epoch 335, average traning loss is 0.03206896382783141.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7880e-03.
In epoch 336, average traning loss is 0.03190462040353795.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7847e-03.
In epoch 337, average traning loss is 0.03196742123334992.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7814e-03.
In epoch 338, average traning loss is 0.03208227957389793.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7781e-03.
In epoch 339, average traning loss is 0.0318701388694498.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7748e-03.
In epoch 340, average traning loss is 0.031855501975788146.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7715e-03.
In epoch 341, average traning loss is 0.03184217174670526.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7682e-03.
In epoch 342, average traning loss is 0.03180577163109366.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7648e-03.
In epoch 343, average traning loss is 0.03197601422363398.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7615e-03.
In epoch 344, average traning loss is 0.03174722877007966.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7582e-03.
In epoch 345, average traning loss is 0.03180245832749167.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 1.7548e-03.
In epoch 346, average traning loss is 0.03178277294322544.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7515e-03.
In epoch 347, average traning loss is 0.03161999782813447.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 1.7481e-03.
In epoch 348, average traning loss is 0.03180244072739567.


100%|██████████| 98/98 [00:49<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 1.7448e-03.
In epoch 349, average traning loss is 0.031771368290088614.


100%|██████████| 98/98 [00:50<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 1.7414e-03.
In epoch 350, average traning loss is 0.03163176288410109.


100%|██████████| 98/98 [00:50<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 1.7381e-03.
In epoch 351, average traning loss is 0.031589464018387456.


100%|██████████| 98/98 [00:49<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 1.7347e-03.
In epoch 352, average traning loss is 0.0317450245006048.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 1.7313e-03.
In epoch 353, average traning loss is 0.03199529581304107.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.7279e-03.
In epoch 354, average traning loss is 0.03150696603923428.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.7245e-03.
In epoch 355, average traning loss is 0.031569416558712114.


100%|██████████| 98/98 [00:46<00:00,  2.09it/s]


Adjusting learning rate of group 0 to 1.7211e-03.
In epoch 356, average traning loss is 0.03161662727670402.


100%|██████████| 98/98 [00:46<00:00,  2.09it/s]


Adjusting learning rate of group 0 to 1.7177e-03.
In epoch 357, average traning loss is 0.03143694164345459.


100%|██████████| 98/98 [00:46<00:00,  2.09it/s]


Adjusting learning rate of group 0 to 1.7143e-03.
In epoch 358, average traning loss is 0.03139843751809427.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.7109e-03.
In epoch 359, average traning loss is 0.03149865058307745.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.7075e-03.
In epoch 360, average traning loss is 0.03133790428769224.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.7041e-03.
In epoch 361, average traning loss is 0.031440088164289386.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.7007e-03.
In epoch 362, average traning loss is 0.03138395607927624.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.6973e-03.
In epoch 363, average traning loss is 0.03134460192249746.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6938e-03.
In epoch 364, average traning loss is 0.031218701954550888.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6904e-03.
In epoch 365, average traning loss is 0.03125285494084261.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6869e-03.
In epoch 366, average traning loss is 0.03120577707886696.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6835e-03.
In epoch 367, average traning loss is 0.03123995199875564.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.6800e-03.
In epoch 368, average traning loss is 0.031197576821610635.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6766e-03.
In epoch 369, average traning loss is 0.03147530209805284.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6731e-03.
In epoch 370, average traning loss is 0.0309983780797647.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6696e-03.
In epoch 371, average traning loss is 0.031158488433884114.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6662e-03.
In epoch 372, average traning loss is 0.031241286507978732.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6627e-03.
In epoch 373, average traning loss is 0.031007770638988942.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6592e-03.
In epoch 374, average traning loss is 0.03102772549859115.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6557e-03.
In epoch 375, average traning loss is 0.03104968462139368.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6522e-03.
In epoch 376, average traning loss is 0.031062131443498085.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6488e-03.
In epoch 377, average traning loss is 0.03112194554081985.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6453e-03.
In epoch 378, average traning loss is 0.03108006618840962.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6417e-03.
In epoch 379, average traning loss is 0.03085618129722318.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.6382e-03.
In epoch 380, average traning loss is 0.03092913806666525.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6347e-03.
In epoch 381, average traning loss is 0.0308885535187259.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6312e-03.
In epoch 382, average traning loss is 0.03097105174496466.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6277e-03.
In epoch 383, average traning loss is 0.03095189029616969.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.6242e-03.
In epoch 384, average traning loss is 0.030739958112945363.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6206e-03.
In epoch 385, average traning loss is 0.03119057882577181.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6171e-03.
In epoch 386, average traning loss is 0.030972224736244093.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6136e-03.
In epoch 387, average traning loss is 0.03075360200767006.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6100e-03.
In epoch 388, average traning loss is 0.03100731818727693.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6065e-03.
In epoch 389, average traning loss is 0.030907214866304884.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.6029e-03.
In epoch 390, average traning loss is 0.030771002401502764.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5994e-03.
In epoch 391, average traning loss is 0.03077909243958337.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5958e-03.
In epoch 392, average traning loss is 0.030619075236727997.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5923e-03.
In epoch 393, average traning loss is 0.030618042304959833.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5887e-03.
In epoch 394, average traning loss is 0.030677263301854232.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5851e-03.
In epoch 395, average traning loss is 0.03071242377009927.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.5816e-03.
In epoch 396, average traning loss is 0.030738064962230166.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5780e-03.
In epoch 397, average traning loss is 0.030555648634172216.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5744e-03.
In epoch 398, average traning loss is 0.030596581553774222.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5708e-03.
In epoch 399, average traning loss is 0.03078769576944867.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5672e-03.
In epoch 400, average traning loss is 0.03054251234826385.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5636e-03.
In epoch 401, average traning loss is 0.030670705992652446.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5600e-03.
In epoch 402, average traning loss is 0.03080013702262421.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.5564e-03.
In epoch 403, average traning loss is 0.030484181372638867.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5528e-03.
In epoch 404, average traning loss is 0.030487524744655405.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5492e-03.
In epoch 405, average traning loss is 0.03051884816389303.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.5456e-03.
In epoch 406, average traning loss is 0.030501926374830762.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5420e-03.
In epoch 407, average traning loss is 0.030362402526091556.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.5384e-03.
In epoch 408, average traning loss is 0.030718570404058815.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5348e-03.
In epoch 409, average traning loss is 0.030392837330546915.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]

Adjusting learning rate of group 0 to 1.5312e-03.
In epoch 410, average traning loss is 0.03027088432667815.



100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5275e-03.
In epoch 411, average traning loss is 0.030545476025768688.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5239e-03.
In epoch 412, average traning loss is 0.03043415100902927.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.5203e-03.
In epoch 413, average traning loss is 0.030243465277765478.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5166e-03.
In epoch 414, average traning loss is 0.030480914521126116.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5130e-03.
In epoch 415, average traning loss is 0.0303364705142318.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5094e-03.
In epoch 416, average traning loss is 0.030334670077629234.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5057e-03.
In epoch 417, average traning loss is 0.030416417904958432.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.5021e-03.
In epoch 418, average traning loss is 0.030271404042687952.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4984e-03.
In epoch 419, average traning loss is 0.030653156397141973.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4948e-03.
In epoch 420, average traning loss is 0.03019227152120094.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4911e-03.
In epoch 421, average traning loss is 0.030122466879535695.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4875e-03.
In epoch 422, average traning loss is 0.03027340551192055.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4838e-03.
In epoch 423, average traning loss is 0.03027042077512157.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4801e-03.
In epoch 424, average traning loss is 0.030231122801802596.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4765e-03.
In epoch 425, average traning loss is 0.03024050253158321.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4728e-03.
In epoch 426, average traning loss is 0.030106564299488554.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4691e-03.
In epoch 427, average traning loss is 0.030178409658980613.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4654e-03.
In epoch 428, average traning loss is 0.030087885085721404.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4618e-03.
In epoch 429, average traning loss is 0.03024293835826066.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4581e-03.
In epoch 430, average traning loss is 0.030199686160349116.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4544e-03.
In epoch 431, average traning loss is 0.030173787961200793.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4507e-03.
In epoch 432, average traning loss is 0.030044666206350133.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4470e-03.
In epoch 433, average traning loss is 0.030039968748329853.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4433e-03.
In epoch 434, average traning loss is 0.030093477240630558.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4397e-03.
In epoch 435, average traning loss is 0.030017871884819195.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.4360e-03.
In epoch 436, average traning loss is 0.030001080404891044.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4323e-03.
In epoch 437, average traning loss is 0.030107258058780312.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.4286e-03.
In epoch 438, average traning loss is 0.02995533642492124.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4249e-03.
In epoch 439, average traning loss is 0.029947122311865797.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4212e-03.
In epoch 440, average traning loss is 0.02998506439355563.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.4174e-03.
In epoch 441, average traning loss is 0.029913201852112402.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4137e-03.
In epoch 442, average traning loss is 0.03003845105365831.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.4100e-03.
In epoch 443, average traning loss is 0.029879192840688084.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4063e-03.
In epoch 444, average traning loss is 0.029863224383823727.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4026e-03.
In epoch 445, average traning loss is 0.029839422439738195.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3989e-03.
In epoch 446, average traning loss is 0.02992229560884286.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3952e-03.
In epoch 447, average traning loss is 0.02986795842951658.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3914e-03.
In epoch 448, average traning loss is 0.02987234649837625.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3877e-03.
In epoch 449, average traning loss is 0.029922729814235046.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3840e-03.
In epoch 450, average traning loss is 0.029878650620883823.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3803e-03.
In epoch 451, average traning loss is 0.029805838833658064.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3765e-03.
In epoch 452, average traning loss is 0.029802178222762078.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3728e-03.
In epoch 453, average traning loss is 0.029780267547740012.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3691e-03.
In epoch 454, average traning loss is 0.02983638494066438.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3653e-03.
In epoch 455, average traning loss is 0.029795921958830893.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3616e-03.
In epoch 456, average traning loss is 0.029690944551661307.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3579e-03.
In epoch 457, average traning loss is 0.02982263455205426.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3541e-03.
In epoch 458, average traning loss is 0.029695241103823086.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3504e-03.
In epoch 459, average traning loss is 0.02970142968531166.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.3467e-03.
In epoch 460, average traning loss is 0.029735301849337255.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3429e-03.
In epoch 461, average traning loss is 0.029694272699404736.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3392e-03.
In epoch 462, average traning loss is 0.029675823104168688.


100%|██████████| 98/98 [00:47<00:00,  2.07it/s]


Adjusting learning rate of group 0 to 1.3354e-03.
In epoch 463, average traning loss is 0.02966183581750612.


100%|██████████| 98/98 [00:47<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 1.3317e-03.
In epoch 464, average traning loss is 0.029624987070505718.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3279e-03.
In epoch 465, average traning loss is 0.029728471168449948.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3242e-03.
In epoch 466, average traning loss is 0.029602249000905727.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3204e-03.
In epoch 467, average traning loss is 0.02957351794656442.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3167e-03.
In epoch 468, average traning loss is 0.029536439942158.


100%|██████████| 98/98 [00:46<00:00,  2.09it/s]


Adjusting learning rate of group 0 to 1.3129e-03.
In epoch 469, average traning loss is 0.029648256716223394.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3092e-03.
In epoch 470, average traning loss is 0.029511920062406938.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3054e-03.
In epoch 471, average traning loss is 0.029700648058585976.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3017e-03.
In epoch 472, average traning loss is 0.029528055073959485.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2979e-03.
In epoch 473, average traning loss is 0.02942888531833887.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2942e-03.
In epoch 474, average traning loss is 0.02951988551233496.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.2904e-03.
In epoch 475, average traning loss is 0.02955705408311012.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2866e-03.
In epoch 476, average traning loss is 0.029416124849599237.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2829e-03.
In epoch 477, average traning loss is 0.02950044957046606.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2791e-03.
In epoch 478, average traning loss is 0.029444587588006138.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2753e-03.
In epoch 479, average traning loss is 0.029462491162121296.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2716e-03.
In epoch 480, average traning loss is 0.029632240476808985.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2678e-03.
In epoch 481, average traning loss is 0.029419703238016487.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2641e-03.
In epoch 482, average traning loss is 0.029426166464631656.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2603e-03.
In epoch 483, average traning loss is 0.029501793849072894.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2565e-03.
In epoch 484, average traning loss is 0.029428011946836297.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2528e-03.
In epoch 485, average traning loss is 0.029313944546239718.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2490e-03.
In epoch 486, average traning loss is 0.029342090444905416.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2452e-03.
In epoch 487, average traning loss is 0.029244195688896035.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2415e-03.
In epoch 488, average traning loss is 0.029306083898611213.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2377e-03.
In epoch 489, average traning loss is 0.029384252881365164.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2339e-03.
In epoch 490, average traning loss is 0.029269248380192688.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2302e-03.
In epoch 491, average traning loss is 0.029300810959266156.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2264e-03.
In epoch 492, average traning loss is 0.029330559930174936.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2226e-03.
In epoch 493, average traning loss is 0.029281935521534512.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2188e-03.
In epoch 494, average traning loss is 0.02930925246708247.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2151e-03.
In epoch 495, average traning loss is 0.029285603164866263.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2113e-03.
In epoch 496, average traning loss is 0.029300319981210087.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.2075e-03.
In epoch 497, average traning loss is 0.029192874436171686.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2038e-03.
In epoch 498, average traning loss is 0.029126186551032017.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2000e-03.
In epoch 499, average traning loss is 0.02921743324140505.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1962e-03.
In epoch 500, average traning loss is 0.029236737353613183.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1925e-03.
In epoch 501, average traning loss is 0.029225705305532534.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1887e-03.
In epoch 502, average traning loss is 0.029167383795185964.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.1849e-03.
In epoch 503, average traning loss is 0.029215635522743876.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1812e-03.
In epoch 504, average traning loss is 0.029313697688737695.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1774e-03.
In epoch 505, average traning loss is 0.029164130818478917.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.1736e-03.
In epoch 506, average traning loss is 0.028997327638219814.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1698e-03.
In epoch 507, average traning loss is 0.029152199340870186.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1661e-03.
In epoch 508, average traning loss is 0.029074148636083215.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1623e-03.
In epoch 509, average traning loss is 0.029167491562512457.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1585e-03.
In epoch 510, average traning loss is 0.02901568923297585.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1548e-03.
In epoch 511, average traning loss is 0.029049107443769366.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1510e-03.
In epoch 512, average traning loss is 0.02907012216746807.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1472e-03.
In epoch 513, average traning loss is 0.029018183732975503.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1435e-03.
In epoch 514, average traning loss is 0.029086125881544182.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1397e-03.
In epoch 515, average traning loss is 0.028977826351718028.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1359e-03.
In epoch 516, average traning loss is 0.02906026016464647.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1322e-03.
In epoch 517, average traning loss is 0.02894748667521136.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1284e-03.
In epoch 518, average traning loss is 0.029008225727902383.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1247e-03.
In epoch 519, average traning loss is 0.02896945174707442.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1209e-03.
In epoch 520, average traning loss is 0.02907172301594092.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.1171e-03.
In epoch 521, average traning loss is 0.02892490453561958.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1134e-03.
In epoch 522, average traning loss is 0.028990643688154464.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1096e-03.
In epoch 523, average traning loss is 0.028913432865270546.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1058e-03.
In epoch 524, average traning loss is 0.028944259870569318.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1021e-03.
In epoch 525, average traning loss is 0.028889217394955303.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0983e-03.
In epoch 526, average traning loss is 0.028890020442100203.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0946e-03.
In epoch 527, average traning loss is 0.028856861576133847.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0908e-03.
In epoch 528, average traning loss is 0.02889762223907271.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0871e-03.
In epoch 529, average traning loss is 0.028916263382653802.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0833e-03.
In epoch 530, average traning loss is 0.028748916062925543.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0796e-03.
In epoch 531, average traning loss is 0.028877442436558858.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0758e-03.
In epoch 532, average traning loss is 0.02882149969512711.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0721e-03.
In epoch 533, average traning loss is 0.028820819240443562.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0683e-03.
In epoch 534, average traning loss is 0.0288907879469346.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0646e-03.
In epoch 535, average traning loss is 0.028817793012273555.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0608e-03.
In epoch 536, average traning loss is 0.028763489569632376.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0571e-03.
In epoch 537, average traning loss is 0.028696228807069818.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0533e-03.
In epoch 538, average traning loss is 0.028973796518937667.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0496e-03.
In epoch 539, average traning loss is 0.028679686237354667.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0459e-03.
In epoch 540, average traning loss is 0.028721376398236166.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0421e-03.
In epoch 541, average traning loss is 0.028718553711565172.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.0384e-03.
In epoch 542, average traning loss is 0.028658777703436052.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0347e-03.
In epoch 543, average traning loss is 0.028709578004722694.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.0309e-03.
In epoch 544, average traning loss is 0.02875669749111545.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0272e-03.
In epoch 545, average traning loss is 0.02869295608252287.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0235e-03.
In epoch 546, average traning loss is 0.028713005746961857.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0197e-03.
In epoch 547, average traning loss is 0.02861633953847447.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0160e-03.
In epoch 548, average traning loss is 0.02864618287706862.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.0123e-03.
In epoch 549, average traning loss is 0.028692657051950086.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0086e-03.
In epoch 550, average traning loss is 0.02855096679484966.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0048e-03.
In epoch 551, average traning loss is 0.028596895157682652.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.0011e-03.
In epoch 552, average traning loss is 0.02865496657940806.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.9740e-04.
In epoch 553, average traning loss is 0.028544026845115787.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.9369e-04.
In epoch 554, average traning loss is 0.028534533284908653.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.8997e-04.
In epoch 555, average traning loss is 0.02854086091855959.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.8626e-04.
In epoch 556, average traning loss is 0.028543891670296386.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.8255e-04.
In epoch 557, average traning loss is 0.028592731138425215.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.7885e-04.
In epoch 558, average traning loss is 0.028543962146706726.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.7514e-04.
In epoch 559, average traning loss is 0.028558165196101278.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.7144e-04.
In epoch 560, average traning loss is 0.028504981089155286.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.6774e-04.
In epoch 561, average traning loss is 0.028585084866048122.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.6404e-04.
In epoch 562, average traning loss is 0.028475988523236344.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.6035e-04.
In epoch 563, average traning loss is 0.02849047840097729.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 9.5666e-04.
In epoch 564, average traning loss is 0.02846822212925371.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.5296e-04.
In epoch 565, average traning loss is 0.028547505125859563.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.4928e-04.
In epoch 566, average traning loss is 0.02846167209957327.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.4559e-04.
In epoch 567, average traning loss is 0.02843662068171769.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.4191e-04.
In epoch 568, average traning loss is 0.028640204774481908.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.3823e-04.
In epoch 569, average traning loss is 0.02842920049264723.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.3455e-04.
In epoch 570, average traning loss is 0.028437468926517332.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.3088e-04.
In epoch 571, average traning loss is 0.02835410498842901.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.2720e-04.
In epoch 572, average traning loss is 0.028445402350352735.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.2353e-04.
In epoch 573, average traning loss is 0.028441029448746418.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 9.1987e-04.
In epoch 574, average traning loss is 0.028420152009597847.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 9.1620e-04.
In epoch 575, average traning loss is 0.028365928033480838.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.1254e-04.
In epoch 576, average traning loss is 0.028378410321869413.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.0888e-04.
In epoch 577, average traning loss is 0.02844148783051238.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.0523e-04.
In epoch 578, average traning loss is 0.02829411127889643.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 9.0157e-04.
In epoch 579, average traning loss is 0.0282548762173677.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.9792e-04.
In epoch 580, average traning loss is 0.028341190300273652.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.9428e-04.
In epoch 581, average traning loss is 0.028226193838885853.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 8.9063e-04.
In epoch 582, average traning loss is 0.028445218043515876.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.8699e-04.
In epoch 583, average traning loss is 0.028344349460486248.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.8335e-04.
In epoch 584, average traning loss is 0.028217886403507114.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.7972e-04.
In epoch 585, average traning loss is 0.028176045691480443.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.7609e-04.
In epoch 586, average traning loss is 0.028267857523597017.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 8.7246e-04.
In epoch 587, average traning loss is 0.02819933310835337.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.6883e-04.
In epoch 588, average traning loss is 0.028343851126882494.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.6521e-04.
In epoch 589, average traning loss is 0.028249924223185802.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.6159e-04.
In epoch 590, average traning loss is 0.028195150234565442.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.5798e-04.
In epoch 591, average traning loss is 0.028204242793881163.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.5437e-04.
In epoch 592, average traning loss is 0.028210476687063977.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.5076e-04.
In epoch 593, average traning loss is 0.028202478146674682.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.4715e-04.
In epoch 594, average traning loss is 0.028194090997686192.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.4355e-04.
In epoch 595, average traning loss is 0.028194572339404603.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.3995e-04.
In epoch 596, average traning loss is 0.02823061480814097.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.3636e-04.
In epoch 597, average traning loss is 0.028171752161365384.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.3277e-04.
In epoch 598, average traning loss is 0.028130612849276895.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 8.2918e-04.
In epoch 599, average traning loss is 0.02819025318841545.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.2560e-04.
In epoch 600, average traning loss is 0.028030079181249043.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.2202e-04.
In epoch 601, average traning loss is 0.028093329706818472.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.1844e-04.
In epoch 602, average traning loss is 0.028064820155197262.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.1487e-04.
In epoch 603, average traning loss is 0.02811306391899683.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.1130e-04.
In epoch 604, average traning loss is 0.028059214334554817.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 8.0773e-04.
In epoch 605, average traning loss is 0.028076189627148668.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 8.0417e-04.
In epoch 606, average traning loss is 0.028085079215162873.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 8.0062e-04.
In epoch 607, average traning loss is 0.028030307051174496.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.9706e-04.
In epoch 608, average traning loss is 0.028121056719398012.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.9351e-04.
In epoch 609, average traning loss is 0.028034593415807704.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 7.8997e-04.
In epoch 610, average traning loss is 0.028009527495929172.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 7.8643e-04.
In epoch 611, average traning loss is 0.027995422691562955.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 7.8289e-04.
In epoch 612, average traning loss is 0.02801368246805303.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 7.7936e-04.
In epoch 613, average traning loss is 0.02796825738053541.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 7.7583e-04.
In epoch 614, average traning loss is 0.027947595419020067.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 7.7231e-04.
In epoch 615, average traning loss is 0.027928562283667982.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.6879e-04.
In epoch 616, average traning loss is 0.02792764411364891.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.6527e-04.
In epoch 617, average traning loss is 0.027975150498048384.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.6176e-04.
In epoch 618, average traning loss is 0.02788633172761421.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 7.5825e-04.
In epoch 619, average traning loss is 0.02792241205746422.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.5475e-04.
In epoch 620, average traning loss is 0.02790987292038543.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 7.5125e-04.
In epoch 621, average traning loss is 0.027909585901973198.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.4775e-04.
In epoch 622, average traning loss is 0.027997817273954957.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 7.4427e-04.
In epoch 623, average traning loss is 0.02783031394819216.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 7.4078e-04.
In epoch 624, average traning loss is 0.027881558946504886.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 7.3730e-04.
In epoch 625, average traning loss is 0.02784928700373489.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.3382e-04.
In epoch 626, average traning loss is 0.02789994407141087.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.3035e-04.
In epoch 627, average traning loss is 0.027928759438955054.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.2688e-04.
In epoch 628, average traning loss is 0.027890358823446596.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.2342e-04.
In epoch 629, average traning loss is 0.02784388407836763.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.1997e-04.
In epoch 630, average traning loss is 0.027779132697959334.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.1651e-04.
In epoch 631, average traning loss is 0.02782632203354519.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.1306e-04.
In epoch 632, average traning loss is 0.027825382043968658.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 7.0962e-04.
In epoch 633, average traning loss is 0.027784246400150717.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.0618e-04.
In epoch 634, average traning loss is 0.0277998329957529.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 7.0275e-04.
In epoch 635, average traning loss is 0.02782748533146722.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.9932e-04.
In epoch 636, average traning loss is 0.027753783427939122.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.9590e-04.
In epoch 637, average traning loss is 0.027772402048719173.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 6.9248e-04.
In epoch 638, average traning loss is 0.02769384356405662.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.8906e-04.
In epoch 639, average traning loss is 0.027742885832427715.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 6.8566e-04.
In epoch 640, average traning loss is 0.027732689902946656.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.8225e-04.
In epoch 641, average traning loss is 0.027781235073141907.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.7885e-04.
In epoch 642, average traning loss is 0.02775249744252283.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.7546e-04.
In epoch 643, average traning loss is 0.02767132078202403.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.7207e-04.
In epoch 644, average traning loss is 0.027664115994560475.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.6869e-04.
In epoch 645, average traning loss is 0.02776709130528022.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.6531e-04.
In epoch 646, average traning loss is 0.027655484648991605.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 6.6194e-04.
In epoch 647, average traning loss is 0.027670345174110666.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.5857e-04.
In epoch 648, average traning loss is 0.027609405297862023.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.5521e-04.
In epoch 649, average traning loss is 0.027662467952741653.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 6.5186e-04.
In epoch 650, average traning loss is 0.027639340514279142.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 6.4850e-04.
In epoch 651, average traning loss is 0.027658941281237165.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.4516e-04.
In epoch 652, average traning loss is 0.02764268955025746.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 6.4182e-04.
In epoch 653, average traning loss is 0.027595127609615425.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 6.3848e-04.
In epoch 654, average traning loss is 0.027531947921581413.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.3516e-04.
In epoch 655, average traning loss is 0.027603339004729475.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.3183e-04.
In epoch 656, average traning loss is 0.027577878907322884.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.2851e-04.
In epoch 657, average traning loss is 0.027546714193054607.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.2520e-04.
In epoch 658, average traning loss is 0.027579548673666254.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.2190e-04.
In epoch 659, average traning loss is 0.027596920804709803.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.1859e-04.
In epoch 660, average traning loss is 0.027583945808666094.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.1530e-04.
In epoch 661, average traning loss is 0.027537561192804455.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.1201e-04.
In epoch 662, average traning loss is 0.027602080103694176.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 6.0873e-04.
In epoch 663, average traning loss is 0.027525560588252788.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 6.0545e-04.
In epoch 664, average traning loss is 0.02755198150645105.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 6.0218e-04.
In epoch 665, average traning loss is 0.02748053030548047.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.9891e-04.
In epoch 666, average traning loss is 0.027522303867248857.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 5.9565e-04.
In epoch 667, average traning loss is 0.027471143828362833.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 5.9240e-04.
In epoch 668, average traning loss is 0.02746844190952121.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 5.8915e-04.
In epoch 669, average traning loss is 0.02751290756372773.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.8591e-04.
In epoch 670, average traning loss is 0.027489975798038805.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 5.8267e-04.
In epoch 671, average traning loss is 0.027427524575317393.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.7944e-04.
In epoch 672, average traning loss is 0.027419340131538256.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.7622e-04.
In epoch 673, average traning loss is 0.027485604930136884.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.7300e-04.
In epoch 674, average traning loss is 0.027362651438737402.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 5.6979e-04.
In epoch 675, average traning loss is 0.02745480223425797.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.6659e-04.
In epoch 676, average traning loss is 0.02741805245453606.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.6339e-04.
In epoch 677, average traning loss is 0.027397027239203453.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.6019e-04.
In epoch 678, average traning loss is 0.02739075521881483.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 5.5701e-04.
In epoch 679, average traning loss is 0.027399619984231433.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.5383e-04.
In epoch 680, average traning loss is 0.027413678260482088.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 5.5065e-04.
In epoch 681, average traning loss is 0.027318487704104305.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 5.4749e-04.
In epoch 682, average traning loss is 0.027384986796853493.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 5.4433e-04.
In epoch 683, average traning loss is 0.027343296426899578.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 5.4117e-04.
In epoch 684, average traning loss is 0.027381084783344854.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 5.3802e-04.
In epoch 685, average traning loss is 0.02736882681065068.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.3488e-04.
In epoch 686, average traning loss is 0.027316822727419893.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.3175e-04.
In epoch 687, average traning loss is 0.027285217744659404.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.2862e-04.
In epoch 688, average traning loss is 0.027321519558222925.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 5.2550e-04.
In epoch 689, average traning loss is 0.02729692838477845.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 5.2239e-04.
In epoch 690, average traning loss is 0.027339711614257217.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.1928e-04.
In epoch 691, average traning loss is 0.02730346414051494.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.1618e-04.
In epoch 692, average traning loss is 0.02731804103039357.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.1308e-04.
In epoch 693, average traning loss is 0.0273086420659508.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.0999e-04.
In epoch 694, average traning loss is 0.027244091185988212.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 5.0691e-04.
In epoch 695, average traning loss is 0.02721010213147621.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.0384e-04.
In epoch 696, average traning loss is 0.02719049307764793.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 5.0077e-04.
In epoch 697, average traning loss is 0.027193479372986724.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.9771e-04.
In epoch 698, average traning loss is 0.02722069756032861.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.9466e-04.
In epoch 699, average traning loss is 0.027237308781822116.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.9161e-04.
In epoch 700, average traning loss is 0.02719413348454602.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 4.8857e-04.
In epoch 701, average traning loss is 0.02718626407488268.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 4.8554e-04.
In epoch 702, average traning loss is 0.02720139436043647.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.8251e-04.
In epoch 703, average traning loss is 0.027240807798748115.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.7950e-04.
In epoch 704, average traning loss is 0.027234991290131395.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.7648e-04.
In epoch 705, average traning loss is 0.027174316802803352.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 4.7348e-04.
In epoch 706, average traning loss is 0.02714228094080273.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.7048e-04.
In epoch 707, average traning loss is 0.027098818663127567.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.6749e-04.
In epoch 708, average traning loss is 0.027137063918825314.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.6451e-04.
In epoch 709, average traning loss is 0.02709769740776748.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.6154e-04.
In epoch 710, average traning loss is 0.02712409451071705.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.5857e-04.
In epoch 711, average traning loss is 0.02709724047050184.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.5561e-04.
In epoch 712, average traning loss is 0.02709979714103499.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.5265e-04.
In epoch 713, average traning loss is 0.0271386966603447.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.4971e-04.
In epoch 714, average traning loss is 0.027068407312795823.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.4677e-04.
In epoch 715, average traning loss is 0.027043172519425958.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.4384e-04.
In epoch 716, average traning loss is 0.02708593729351248.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.4092e-04.
In epoch 717, average traning loss is 0.027147331313059037.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.3800e-04.
In epoch 718, average traning loss is 0.027033235212521895.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.3509e-04.
In epoch 719, average traning loss is 0.027039860185159713.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.3219e-04.
In epoch 720, average traning loss is 0.02707923068760001.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.2930e-04.
In epoch 721, average traning loss is 0.027067609188355962.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.2641e-04.
In epoch 722, average traning loss is 0.027017001176671107.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.2353e-04.
In epoch 723, average traning loss is 0.027059584209809497.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.2066e-04.
In epoch 724, average traning loss is 0.02695676186407099.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.1780e-04.
In epoch 725, average traning loss is 0.026999240646100774.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.1494e-04.
In epoch 726, average traning loss is 0.026955304629340465.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.1210e-04.
In epoch 727, average traning loss is 0.02697912599815398.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.0926e-04.
In epoch 728, average traning loss is 0.02701310334461076.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 4.0643e-04.
In epoch 729, average traning loss is 0.026951117479071324.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 4.0360e-04.
In epoch 730, average traning loss is 0.026938991260011584.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 4.0079e-04.
In epoch 731, average traning loss is 0.026991054377689654.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.9798e-04.
In epoch 732, average traning loss is 0.02692686832909073.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.9518e-04.
In epoch 733, average traning loss is 0.02688535664002506.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.9238e-04.
In epoch 734, average traning loss is 0.02692620851555649.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.8960e-04.
In epoch 735, average traning loss is 0.02686671180916684.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.8682e-04.
In epoch 736, average traning loss is 0.02693848296695826.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.8406e-04.
In epoch 737, average traning loss is 0.026929411828061755.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.8130e-04.
In epoch 738, average traning loss is 0.026884524284728935.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.7854e-04.
In epoch 739, average traning loss is 0.026935244606313656.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.7580e-04.
In epoch 740, average traning loss is 0.026868929307223583.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.7306e-04.
In epoch 741, average traning loss is 0.02688302122512642.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 3.7034e-04.
In epoch 742, average traning loss is 0.026854890892852326.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.6762e-04.
In epoch 743, average traning loss is 0.026871278825006922.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.6490e-04.
In epoch 744, average traning loss is 0.026856514340152546.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.6220e-04.
In epoch 745, average traning loss is 0.026879480469743817.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.5951e-04.
In epoch 746, average traning loss is 0.0268766423497273.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.5682e-04.
In epoch 747, average traning loss is 0.0268315461521246.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.5414e-04.
In epoch 748, average traning loss is 0.026816961982724617.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.5147e-04.
In epoch 749, average traning loss is 0.026752819516220872.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.4881e-04.
In epoch 750, average traning loss is 0.02676867883728475.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.4616e-04.
In epoch 751, average traning loss is 0.026818086621256506.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.4351e-04.
In epoch 752, average traning loss is 0.02677967475385082.


100%|██████████| 98/98 [00:45<00:00,  2.15it/s]


Adjusting learning rate of group 0 to 3.4088e-04.
In epoch 753, average traning loss is 0.026716445325588693.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.3825e-04.
In epoch 754, average traning loss is 0.026811165906184792.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.3563e-04.
In epoch 755, average traning loss is 0.026795197582366516.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.3302e-04.
In epoch 756, average traning loss is 0.026726039278568054.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.3042e-04.
In epoch 757, average traning loss is 0.026794288155375694.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.2782e-04.
In epoch 758, average traning loss is 0.026762736086942712.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.2524e-04.
In epoch 759, average traning loss is 0.026710566912530636.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.2266e-04.
In epoch 760, average traning loss is 0.026743123687955797.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 3.2009e-04.
In epoch 761, average traning loss is 0.026731630825266545.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.1753e-04.
In epoch 762, average traning loss is 0.02674722165933677.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.1498e-04.
In epoch 763, average traning loss is 0.026765434394533535.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.1244e-04.
In epoch 764, average traning loss is 0.026772823488833954.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.0991e-04.
In epoch 765, average traning loss is 0.026743182627370164.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 3.0739e-04.
In epoch 766, average traning loss is 0.026669526016529724.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 3.0487e-04.
In epoch 767, average traning loss is 0.026672792906055644.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 3.0236e-04.
In epoch 768, average traning loss is 0.026689374154167517.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.9987e-04.
In epoch 769, average traning loss is 0.026668021341367643.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.9738e-04.
In epoch 770, average traning loss is 0.0266715319142962.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.9490e-04.
In epoch 771, average traning loss is 0.026691323317283272.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 2.9243e-04.
In epoch 772, average traning loss is 0.026673096213109638.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 2.8997e-04.
In epoch 773, average traning loss is 0.026634170008557185.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.8751e-04.
In epoch 774, average traning loss is 0.026712308162633255.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.8507e-04.
In epoch 775, average traning loss is 0.02669082344414628.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.8263e-04.
In epoch 776, average traning loss is 0.02661987563253057.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 2.8021e-04.
In epoch 777, average traning loss is 0.026650739985765243.


100%|██████████| 98/98 [00:45<00:00,  2.15it/s]


Adjusting learning rate of group 0 to 2.7779e-04.
In epoch 778, average traning loss is 0.02665363176136601.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 2.7538e-04.
In epoch 779, average traning loss is 0.026588273367711475.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 2.7299e-04.
In epoch 780, average traning loss is 0.0265598970332316.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.7060e-04.
In epoch 781, average traning loss is 0.026590918323823383.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 2.6822e-04.
In epoch 782, average traning loss is 0.026604412047534574.


100%|██████████| 98/98 [00:45<00:00,  2.15it/s]


Adjusting learning rate of group 0 to 2.6585e-04.
In epoch 783, average traning loss is 0.026673067171050578.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.6348e-04.
In epoch 784, average traning loss is 0.026585015563332304.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 2.6113e-04.
In epoch 785, average traning loss is 0.02656486145771888.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.5879e-04.
In epoch 786, average traning loss is 0.02658744728458779.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.5645e-04.
In epoch 787, average traning loss is 0.02658743491130216.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.5413e-04.
In epoch 788, average traning loss is 0.026552166067519967.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.5181e-04.
In epoch 789, average traning loss is 0.02655830160163495.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.4951e-04.
In epoch 790, average traning loss is 0.02655850706279886.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.4721e-04.
In epoch 791, average traning loss is 0.02660909656207172.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.4492e-04.
In epoch 792, average traning loss is 0.02646583938324938.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.4265e-04.
In epoch 793, average traning loss is 0.026512888678330546.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.4038e-04.
In epoch 794, average traning loss is 0.026565312312877908.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.3812e-04.
In epoch 795, average traning loss is 0.026510479346830016.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.3587e-04.
In epoch 796, average traning loss is 0.026490333071928853.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.3363e-04.
In epoch 797, average traning loss is 0.026541901505267133.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.3140e-04.
In epoch 798, average traning loss is 0.026524267976685445.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.2918e-04.
In epoch 799, average traning loss is 0.02653990957733928.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.2697e-04.
In epoch 800, average traning loss is 0.02650851872274462.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.2477e-04.
In epoch 801, average traning loss is 0.026498640431281254.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.2258e-04.
In epoch 802, average traning loss is 0.02646691055626285.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 2.2039e-04.
In epoch 803, average traning loss is 0.02649174069947734.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.1822e-04.
In epoch 804, average traning loss is 0.02646724062458593.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.1606e-04.
In epoch 805, average traning loss is 0.026412611028977802.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.1390e-04.
In epoch 806, average traning loss is 0.026445034910373543.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.1176e-04.
In epoch 807, average traning loss is 0.026514090027432054.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.0963e-04.
In epoch 808, average traning loss is 0.02643422341468383.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.0750e-04.
In epoch 809, average traning loss is 0.026407512513046363.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 2.0539e-04.
In epoch 810, average traning loss is 0.026429761068097184.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 2.0328e-04.
In epoch 811, average traning loss is 0.026390798180839236.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 2.0119e-04.
In epoch 812, average traning loss is 0.02641665692232093.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.9911e-04.
In epoch 813, average traning loss is 0.026427989939645846.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.9703e-04.
In epoch 814, average traning loss is 0.026403392608068427.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.9497e-04.
In epoch 815, average traning loss is 0.026398380933215424.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.9291e-04.
In epoch 816, average traning loss is 0.02644446724075444.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.9087e-04.
In epoch 817, average traning loss is 0.026359326054095005.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.8883e-04.
In epoch 818, average traning loss is 0.026371115628553897.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.8681e-04.
In epoch 819, average traning loss is 0.026374032796949755.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.8479e-04.
In epoch 820, average traning loss is 0.026327690128617142.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.8279e-04.
In epoch 821, average traning loss is 0.026357819516287774.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.8079e-04.
In epoch 822, average traning loss is 0.02637962869615579.


100%|██████████| 98/98 [00:45<00:00,  2.15it/s]


Adjusting learning rate of group 0 to 1.7881e-04.
In epoch 823, average traning loss is 0.026352852811010515.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.7683e-04.
In epoch 824, average traning loss is 0.02634546985583646.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.7487e-04.
In epoch 825, average traning loss is 0.02635905889756217.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.7291e-04.
In epoch 826, average traning loss is 0.02632478210238778.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.7097e-04.
In epoch 827, average traning loss is 0.026356882681804045.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.6903e-04.
In epoch 828, average traning loss is 0.02632936928421259.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.6711e-04.
In epoch 829, average traning loss is 0.026372590178281675.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.6520e-04.
In epoch 830, average traning loss is 0.02631370210069783.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.6329e-04.
In epoch 831, average traning loss is 0.026287691002445563.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.6140e-04.
In epoch 832, average traning loss is 0.02634447508928727.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.5952e-04.
In epoch 833, average traning loss is 0.02631008512891677.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.5764e-04.
In epoch 834, average traning loss is 0.026314149914803554.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5578e-04.
In epoch 835, average traning loss is 0.02629929454046853.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.5393e-04.
In epoch 836, average traning loss is 0.02633795130769817.


100%|██████████| 98/98 [00:45<00:00,  2.15it/s]


Adjusting learning rate of group 0 to 1.5209e-04.
In epoch 837, average traning loss is 0.026324623682517177.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.5025e-04.
In epoch 838, average traning loss is 0.026262514768358395.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4843e-04.
In epoch 839, average traning loss is 0.026284228288093393.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4662e-04.
In epoch 840, average traning loss is 0.02632197547627955.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.4482e-04.
In epoch 841, average traning loss is 0.026263612987739698.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.4303e-04.
In epoch 842, average traning loss is 0.026300970864083086.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.4125e-04.
In epoch 843, average traning loss is 0.026269812269934585.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3948e-04.
In epoch 844, average traning loss is 0.026223782544993624.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.3772e-04.
In epoch 845, average traning loss is 0.02624167477217864.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3597e-04.
In epoch 846, average traning loss is 0.026240575925580094.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.3424e-04.
In epoch 847, average traning loss is 0.026187387257054145.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3251e-04.
In epoch 848, average traning loss is 0.0261974462060904.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.3079e-04.
In epoch 849, average traning loss is 0.026227671139854556.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2909e-04.
In epoch 850, average traning loss is 0.026280709124189252.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2739e-04.
In epoch 851, average traning loss is 0.02622250824862597.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2571e-04.
In epoch 852, average traning loss is 0.02620301351939537.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.2403e-04.
In epoch 853, average traning loss is 0.026256867228265927.


100%|██████████| 98/98 [00:46<00:00,  2.10it/s]


Adjusting learning rate of group 0 to 1.2237e-04.
In epoch 854, average traning loss is 0.0262210465473484.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.2071e-04.
In epoch 855, average traning loss is 0.026117114863377446.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.1907e-04.
In epoch 856, average traning loss is 0.026210820511439626.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.1744e-04.
In epoch 857, average traning loss is 0.026207467255999848.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.1582e-04.
In epoch 858, average traning loss is 0.026148825940885106.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.1421e-04.
In epoch 859, average traning loss is 0.0261653032419937.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 1.1261e-04.
In epoch 860, average traning loss is 0.02622059736476869.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.1102e-04.
In epoch 861, average traning loss is 0.0262383495704556.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.0944e-04.
In epoch 862, average traning loss is 0.026184077583709543.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0787e-04.
In epoch 863, average traning loss is 0.026213122037600498.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0632e-04.
In epoch 864, average traning loss is 0.026210763643742824.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0477e-04.
In epoch 865, average traning loss is 0.02619964272087934.


100%|██████████| 98/98 [00:46<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 1.0323e-04.
In epoch 866, average traning loss is 0.02611280430336388.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 1.0171e-04.
In epoch 867, average traning loss is 0.02618120076628972.


100%|██████████| 98/98 [00:46<00:00,  2.12it/s]


Adjusting learning rate of group 0 to 1.0020e-04.
In epoch 868, average traning loss is 0.026123653089969744.


100%|██████████| 98/98 [00:46<00:00,  2.11it/s]


Adjusting learning rate of group 0 to 9.8694e-05.
In epoch 869, average traning loss is 0.026090808421829526.


100%|██████████| 98/98 [00:45<00:00,  2.13it/s]


Adjusting learning rate of group 0 to 9.7203e-05.
In epoch 870, average traning loss is 0.02619081524637889.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 9.5722e-05.
In epoch 871, average traning loss is 0.02615093591870094.


100%|██████████| 98/98 [00:45<00:00,  2.14it/s]


Adjusting learning rate of group 0 to 9.4252e-05.
In epoch 872, average traning loss is 0.02613701296932235.


100%|██████████| 98/98 [00:47<00:00,  2.08it/s]


Adjusting learning rate of group 0 to 9.2793e-05.
In epoch 873, average traning loss is 0.026158344266670092.


100%|██████████| 98/98 [00:47<00:00,  2.05it/s]


Adjusting learning rate of group 0 to 9.1345e-05.
In epoch 874, average traning loss is 0.026145514823040183.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 8.9907e-05.
In epoch 875, average traning loss is 0.026108010139848505.


100%|██████████| 98/98 [00:48<00:00,  2.01it/s]


Adjusting learning rate of group 0 to 8.8481e-05.
In epoch 876, average traning loss is 0.026121151006343414.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 8.7066e-05.
In epoch 877, average traning loss is 0.026083076505788734.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 8.5662e-05.
In epoch 878, average traning loss is 0.026018740020084138.


100%|██████████| 98/98 [00:48<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 8.4268e-05.
In epoch 879, average traning loss is 0.026081882056076915.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 8.2886e-05.
In epoch 880, average traning loss is 0.026050927478592008.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 8.1515e-05.
In epoch 881, average traning loss is 0.026050173677504063.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 8.0154e-05.
In epoch 882, average traning loss is 0.026087890854295418.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 7.8805e-05.
In epoch 883, average traning loss is 0.02614241494436045.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 7.7467e-05.
In epoch 884, average traning loss is 0.026107794795261353.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 7.6140e-05.
In epoch 885, average traning loss is 0.02605511181588684.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 7.4824e-05.
In epoch 886, average traning loss is 0.026107458492779.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 7.3519e-05.
In epoch 887, average traning loss is 0.02604892716876098.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 7.2226e-05.
In epoch 888, average traning loss is 0.02607301080941546.


100%|██████████| 98/98 [00:49<00:00,  1.96it/s]


Adjusting learning rate of group 0 to 7.0943e-05.
In epoch 889, average traning loss is 0.026114975976548632.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 6.9672e-05.
In epoch 890, average traning loss is 0.02605040002690286.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 6.8411e-05.
In epoch 891, average traning loss is 0.02603280340910566.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 6.7162e-05.
In epoch 892, average traning loss is 0.02615874800450948.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 6.5924e-05.
In epoch 893, average traning loss is 0.026075662873989464.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 6.4698e-05.
In epoch 894, average traning loss is 0.026036115154167826.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 6.3482e-05.
In epoch 895, average traning loss is 0.026011917207922255.


100%|██████████| 98/98 [00:49<00:00,  2.00it/s]


Adjusting learning rate of group 0 to 6.2278e-05.
In epoch 896, average traning loss is 0.026113497587491055.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 6.1085e-05.
In epoch 897, average traning loss is 0.026042936730901807.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 5.9903e-05.
In epoch 898, average traning loss is 0.02607777467643728.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 5.8732e-05.
In epoch 899, average traning loss is 0.02603578261498894.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 5.7573e-05.
In epoch 900, average traning loss is 0.026070851794615085.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 5.6425e-05.
In epoch 901, average traning loss is 0.026051815105031947.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 5.5288e-05.
In epoch 902, average traning loss is 0.026025753069136823.


100%|██████████| 98/98 [00:49<00:00,  1.97it/s]


Adjusting learning rate of group 0 to 5.4163e-05.
In epoch 903, average traning loss is 0.026076982482051363.


100%|██████████| 98/98 [00:49<00:00,  1.98it/s]


Adjusting learning rate of group 0 to 5.3048e-05.
In epoch 904, average traning loss is 0.026024821974641205.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 5.1946e-05.
In epoch 905, average traning loss is 0.025962269503851325.


100%|██████████| 98/98 [00:49<00:00,  1.99it/s]


Adjusting learning rate of group 0 to 5.0854e-05.
In epoch 906, average traning loss is 0.025973026032502552.


KeyboardInterrupt: 

# Training the Classifier

In [12]:
import os
import argparse
import math
import torch
import torchvision
from torchvision.transforms import ToTensor, Compose, Normalize
from tqdm import tqdm


seed = 42
batch_size = 128
max_device_batch_size = 256
base_learning_rate = 1e-3
weight_decay = 0.05
total_epoch = 100
warmup_epoch = 5
pretrained_model_path = 'vit-t-mae.pt'
output_model_path ='vit-t-classifier-from_pretrained.pt'


batch_size = batch_size
load_batch_size = min(max_device_batch_size, batch_size)

assert batch_size % load_batch_size == 0
steps_per_update = batch_size // load_batch_size

train_dataset = torchvision.datasets.CIFAR10('data', train=True, download=True, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))
val_dataset = torchvision.datasets.CIFAR10('data', train=False, download=True, transform=Compose([ToTensor(), Normalize(0.5, 0.5)]))
train_dataloader = torch.utils.data.DataLoader(train_dataset, load_batch_size, shuffle=True, num_workers=4)
val_dataloader = torch.utils.data.DataLoader(val_dataset, load_batch_size, shuffle=False, num_workers=4)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

if pretrained_model_path is not None:
    model = torch.load(pretrained_model_path, map_location='cpu')
else:
    model = MAE_ViT()
model = ViT_Classifier(model.encoder, num_classes=10).to(device)



Files already downloaded and verified
Files already downloaded and verified


In [13]:
loss_fn = torch.nn.CrossEntropyLoss()
acc_fn = lambda logit, label: torch.mean((logit.argmax(dim=-1) == label).float())

optim = torch.optim.AdamW(model.parameters(), lr=base_learning_rate * batch_size / 256, betas=(0.9, 0.999), weight_decay=weight_decay)
lr_func = lambda epoch: min((epoch + 1) / (warmup_epoch + 1e-8), 0.5 * (math.cos(epoch / total_epoch * math.pi) + 1))
lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optim, lr_lambda=lr_func, verbose=True)

Adjusting learning rate of group 0 to 1.0000e-04.


In [14]:


best_val_acc = 0
step_count = 0
optim.zero_grad()
for e in range(total_epoch):
    model.train()
    losses = []
    acces = []
    for img, label in tqdm(iter(train_dataloader)):
        step_count += 1
        img = img.to(device)
        label = label.to(device)
        logits = model(img)
        loss = loss_fn(logits, label)
        acc = acc_fn(logits, label)
        loss.backward()
        if step_count % steps_per_update == 0:
            optim.step()
            optim.zero_grad()
        losses.append(loss.item())
        acces.append(acc.item())
    lr_scheduler.step()
    avg_train_loss = sum(losses) / len(losses)
    avg_train_acc = sum(acces) / len(acces)
    print(f'In epoch {e}, average training loss is {avg_train_loss}, average training acc is {avg_train_acc}.')

    model.eval()
    with torch.no_grad():
        losses = []
        acces = []
        for img, label in tqdm(iter(val_dataloader)):
            img = img.to(device)
            label = label.to(device)
            logits = model(img)
            loss = loss_fn(logits, label)
            acc = acc_fn(logits, label)
            losses.append(loss.item())
            acces.append(acc.item())
        avg_val_loss = sum(losses) / len(losses)
        avg_val_acc = sum(acces) / len(acces)
        print(f'In epoch {e}, average validation loss is {avg_val_loss}, average validation acc is {avg_val_acc}.')  

    if avg_val_acc > best_val_acc:
        best_val_acc = avg_val_acc
        print(f'saving best model with acc {best_val_acc} at {e} epoch!')       
        torch.save(model, output_model_path)

100%|██████████| 391/391 [01:41<00:00,  3.87it/s]


Adjusting learning rate of group 0 to 2.0000e-04.
In epoch 0, average training loss is 0.8092588828805157, average training acc is 0.749932065186903.


100%|██████████| 79/79 [00:06<00:00, 12.38it/s]


In epoch 0, average validation loss is 0.5546492560754849, average validation acc is 0.8222903481012658.
saving best model with acc 0.8222903481012658 at 0 epoch!


100%|██████████| 391/391 [01:43<00:00,  3.79it/s]


Adjusting learning rate of group 0 to 3.0000e-04.
In epoch 1, average training loss is 0.4715146217733393, average training acc is 0.8454483695652174.


100%|██████████| 79/79 [00:06<00:00, 12.56it/s]


In epoch 1, average validation loss is 0.4984543546091152, average validation acc is 0.834256329113924.
saving best model with acc 0.834256329113924 at 1 epoch!


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 4.0000e-04.
In epoch 2, average training loss is 0.3823876475053065, average training acc is 0.8727701406954499.


100%|██████████| 79/79 [00:06<00:00, 12.46it/s]


In epoch 2, average validation loss is 0.4729167155072659, average validation acc is 0.840684335443038.
saving best model with acc 0.840684335443038 at 2 epoch!


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 4.9803e-04.
In epoch 3, average training loss is 0.33977085889300424, average training acc is 0.885246163743841.


100%|██████████| 79/79 [00:06<00:00, 12.54it/s]


In epoch 3, average validation loss is 0.48131733472588695, average validation acc is 0.8379153481012658.


100%|██████████| 391/391 [01:47<00:00,  3.63it/s]


Adjusting learning rate of group 0 to 4.9692e-04.
In epoch 4, average training loss is 0.30732533465261047, average training acc is 0.8955522697904835.


100%|██████████| 79/79 [00:06<00:00, 12.22it/s]


In epoch 4, average validation loss is 0.4718705702431594, average validation acc is 0.8449367088607594.
saving best model with acc 0.8449367088607594 at 4 epoch!


100%|██████████| 391/391 [01:47<00:00,  3.62it/s]


Adjusting learning rate of group 0 to 4.9557e-04.
In epoch 5, average training loss is 0.25423051270148944, average training acc is 0.914122442455243.


100%|██████████| 79/79 [00:06<00:00, 12.51it/s]


In epoch 5, average validation loss is 0.48160936372189583, average validation acc is 0.8443433544303798.


100%|██████████| 391/391 [01:47<00:00,  3.62it/s]


Adjusting learning rate of group 0 to 4.9398e-04.
In epoch 6, average training loss is 0.2108131613763397, average training acc is 0.9272858056875751.


100%|██████████| 79/79 [00:06<00:00, 12.35it/s]


In epoch 6, average validation loss is 0.47957642583907406, average validation acc is 0.8446400316455697.


100%|██████████| 391/391 [01:48<00:00,  3.60it/s]


Adjusting learning rate of group 0 to 4.9215e-04.
In epoch 7, average training loss is 0.18042440136985097, average training acc is 0.9387468030995421.


100%|██████████| 79/79 [00:06<00:00, 12.40it/s]


In epoch 7, average validation loss is 0.4901231825351715, average validation acc is 0.8485957278481012.
saving best model with acc 0.8485957278481012 at 7 epoch!


100%|██████████| 391/391 [01:48<00:00,  3.60it/s]


Adjusting learning rate of group 0 to 4.9007e-04.
In epoch 8, average training loss is 0.15164572181527877, average training acc is 0.9480498721227621.


100%|██████████| 79/79 [00:06<00:00, 12.53it/s]


In epoch 8, average validation loss is 0.4912558986416346, average validation acc is 0.8559137658227848.
saving best model with acc 0.8559137658227848 at 8 epoch!


100%|██████████| 391/391 [01:49<00:00,  3.57it/s]


Adjusting learning rate of group 0 to 4.8776e-04.
In epoch 9, average training loss is 0.13950247662927945, average training acc is 0.9512827686031761.


100%|██████████| 79/79 [00:06<00:00, 12.38it/s]


In epoch 9, average validation loss is 0.4892511794084235, average validation acc is 0.8582871835443038.
saving best model with acc 0.8582871835443038 at 9 epoch!


100%|██████████| 391/391 [01:48<00:00,  3.60it/s]


Adjusting learning rate of group 0 to 4.8522e-04.
In epoch 10, average training loss is 0.1161088297319839, average training acc is 0.9597106777188723.


100%|██████████| 79/79 [00:06<00:00, 12.38it/s]


In epoch 10, average validation loss is 0.5005949692258352, average validation acc is 0.857001582278481.


100%|██████████| 391/391 [01:48<00:00,  3.60it/s]


Adjusting learning rate of group 0 to 4.8244e-04.
In epoch 11, average training loss is 0.11460668337352746, average training acc is 0.9612372122762148.


100%|██████████| 79/79 [00:06<00:00, 12.74it/s]


In epoch 11, average validation loss is 0.5008450482465043, average validation acc is 0.8605617088607594.
saving best model with acc 0.8605617088607594 at 11 epoch!


100%|██████████| 391/391 [01:48<00:00,  3.59it/s]


Adjusting learning rate of group 0 to 4.7944e-04.
In epoch 12, average training loss is 0.10881925941637867, average training acc is 0.9621443414627133.


100%|██████████| 79/79 [00:06<00:00, 12.69it/s]


In epoch 12, average validation loss is 0.503437374966054, average validation acc is 0.8592761075949367.


100%|██████████| 391/391 [01:49<00:00,  3.58it/s]


Adjusting learning rate of group 0 to 4.7621e-04.
In epoch 13, average training loss is 0.09473688315952677, average training acc is 0.9675231777188723.


100%|██████████| 79/79 [00:06<00:00, 12.51it/s]


In epoch 13, average validation loss is 0.5620209970806218, average validation acc is 0.8441455696202531.


100%|██████████| 391/391 [01:49<00:00,  3.57it/s]


Adjusting learning rate of group 0 to 4.7275e-04.
In epoch 14, average training loss is 0.08537368332762318, average training acc is 0.9709239131044549.


100%|██████████| 79/79 [00:06<00:00, 12.37it/s]


In epoch 14, average validation loss is 0.5909798284874687, average validation acc is 0.8423655063291139.


100%|██████████| 391/391 [01:49<00:00,  3.57it/s]


Adjusting learning rate of group 0 to 4.6908e-04.
In epoch 15, average training loss is 0.08503256307538513, average training acc is 0.9707800511204069.


100%|██████████| 79/79 [00:06<00:00, 12.32it/s]


In epoch 15, average validation loss is 0.521845360156856, average validation acc is 0.8626384493670886.
saving best model with acc 0.8626384493670886 at 15 epoch!


100%|██████████| 391/391 [01:50<00:00,  3.54it/s]


Adjusting learning rate of group 0 to 4.6519e-04.
In epoch 16, average training loss is 0.08151754396526939, average training acc is 0.9715752877542735.


100%|██████████| 79/79 [00:06<00:00, 12.38it/s]


In epoch 16, average validation loss is 0.5232162524627734, average validation acc is 0.8558148734177216.


100%|██████████| 391/391 [01:49<00:00,  3.56it/s]


Adjusting learning rate of group 0 to 4.6108e-04.
In epoch 17, average training loss is 0.07273908439890274, average training acc is 0.9755155050846012.


100%|██████████| 79/79 [00:06<00:00, 12.59it/s]


In epoch 17, average validation loss is 0.5645346075673646, average validation acc is 0.8512658227848101.


100%|██████████| 391/391 [01:48<00:00,  3.59it/s]


Adjusting learning rate of group 0 to 4.5677e-04.
In epoch 18, average training loss is 0.07176914598668932, average training acc is 0.9756473786080889.


100%|██████████| 79/79 [00:06<00:00, 12.57it/s]


In epoch 18, average validation loss is 0.5463785500843313, average validation acc is 0.861748417721519.


100%|██████████| 391/391 [01:48<00:00,  3.60it/s]


Adjusting learning rate of group 0 to 4.5225e-04.
In epoch 19, average training loss is 0.0643666934977045, average training acc is 0.9779171995494677.


100%|██████████| 79/79 [00:06<00:00, 12.34it/s]


In epoch 19, average validation loss is 0.5443740796439255, average validation acc is 0.8612539556962026.


100%|██████████| 391/391 [01:48<00:00,  3.60it/s]


Adjusting learning rate of group 0 to 4.4754e-04.
In epoch 20, average training loss is 0.06376871205342319, average training acc is 0.9776414641943734.


100%|██████████| 79/79 [00:06<00:00, 12.76it/s]


In epoch 20, average validation loss is 0.6261422279514844, average validation acc is 0.8467167721518988.


100%|██████████| 391/391 [01:49<00:00,  3.57it/s]


Adjusting learning rate of group 0 to 4.4263e-04.
In epoch 21, average training loss is 0.06677096930172895, average training acc is 0.9767862852577054.


100%|██████████| 79/79 [00:06<00:00, 12.96it/s]


In epoch 21, average validation loss is 0.584846763671199, average validation acc is 0.8527492088607594.


100%|██████████| 391/391 [01:46<00:00,  3.67it/s]


Adjusting learning rate of group 0 to 4.3753e-04.
In epoch 22, average training loss is 0.05644998595456752, average training acc is 0.9811940537694165.


100%|██████████| 79/79 [00:06<00:00, 13.05it/s]


In epoch 22, average validation loss is 0.5825380577316767, average validation acc is 0.8476068037974683.


100%|██████████| 391/391 [01:47<00:00,  3.65it/s]


Adjusting learning rate of group 0 to 4.3224e-04.
In epoch 23, average training loss is 0.05097952040503054, average training acc is 0.9818853900560638.


100%|██████████| 79/79 [00:06<00:00, 12.90it/s]


In epoch 23, average validation loss is 0.5979391211950327, average validation acc is 0.8542325949367089.


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 4.2678e-04.
In epoch 24, average training loss is 0.05708466205612549, average training acc is 0.9801310741383097.


100%|██████████| 79/79 [00:06<00:00, 12.82it/s]


In epoch 24, average validation loss is 0.6013690927360631, average validation acc is 0.8553204113924051.


100%|██████████| 391/391 [01:45<00:00,  3.71it/s]


Adjusting learning rate of group 0 to 4.2114e-04.
In epoch 25, average training loss is 0.053713259125804845, average training acc is 0.9818214514981145.


100%|██████████| 79/79 [00:06<00:00, 12.91it/s]


In epoch 25, average validation loss is 0.5990769021873232, average validation acc is 0.8549248417721519.


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 4.1533e-04.
In epoch 26, average training loss is 0.048606577087574834, average training acc is 0.9838515026185214.


100%|██████████| 79/79 [00:06<00:00, 12.82it/s]


In epoch 26, average validation loss is 0.5582658720167377, average validation acc is 0.8620450949367089.


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 4.0936e-04.
In epoch 27, average training loss is 0.051527983944891664, average training acc is 0.9821771099744245.


100%|██████████| 79/79 [00:06<00:00, 12.93it/s]


In epoch 27, average validation loss is 0.5628088478800617, average validation acc is 0.8637262658227848.
saving best model with acc 0.8637262658227848 at 27 epoch!


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 4.0323e-04.
In epoch 28, average training loss is 0.046747243334956064, average training acc is 0.9842750959079284.


100%|██████████| 79/79 [00:06<00:00, 13.00it/s]


In epoch 28, average validation loss is 0.5547978240477888, average validation acc is 0.861748417721519.


100%|██████████| 391/391 [01:46<00:00,  3.66it/s]


Adjusting learning rate of group 0 to 3.9695e-04.
In epoch 29, average training loss is 0.04064882244757565, average training acc is 0.9859015345573425.


100%|██████████| 79/79 [00:06<00:00, 12.82it/s]


In epoch 29, average validation loss is 0.6186274806909924, average validation acc is 0.8601661392405063.


100%|██████████| 391/391 [01:46<00:00,  3.67it/s]


Adjusting learning rate of group 0 to 3.9052e-04.
In epoch 30, average training loss is 0.0400939331475712, average training acc is 0.9857696611862963.


100%|██████████| 79/79 [00:06<00:00, 12.64it/s]


In epoch 30, average validation loss is 0.6476732284962377, average validation acc is 0.8517602848101266.


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 3.8396e-04.
In epoch 31, average training loss is 0.040834049848468064, average training acc is 0.9862731777493606.


100%|██████████| 79/79 [00:06<00:00, 12.64it/s]


In epoch 31, average validation loss is 0.5751183393258082, average validation acc is 0.8644185126582279.
saving best model with acc 0.8644185126582279 at 31 epoch!


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 3.7726e-04.
In epoch 32, average training loss is 0.04018619831989679, average training acc is 0.9860174233651222.


100%|██████████| 79/79 [00:06<00:00, 12.70it/s]


In epoch 32, average validation loss is 0.5866644812535636, average validation acc is 0.8628362341772152.


100%|██████████| 391/391 [01:46<00:00,  3.69it/s]


Adjusting learning rate of group 0 to 3.7044e-04.
In epoch 33, average training loss is 0.03711950368411086, average training acc is 0.9868326406649617.


100%|██████████| 79/79 [00:06<00:00, 12.54it/s]


In epoch 33, average validation loss is 0.5789365078075023, average validation acc is 0.8631329113924051.


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 3.6350e-04.
In epoch 34, average training loss is 0.03259238757459861, average training acc is 0.9890864770430738.


100%|██████████| 79/79 [00:06<00:00, 12.82it/s]


In epoch 34, average validation loss is 0.5854387224852284, average validation acc is 0.8663963607594937.
saving best model with acc 0.8663963607594937 at 34 epoch!


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 3.5644e-04.
In epoch 35, average training loss is 0.030739561260125274, average training acc is 0.9895580243271636.


100%|██████████| 79/79 [00:06<00:00, 12.76it/s]


In epoch 35, average validation loss is 0.5823520010785211, average validation acc is 0.8697587025316456.
saving best model with acc 0.8697587025316456 at 35 epoch!


100%|██████████| 391/391 [01:48<00:00,  3.59it/s]


Adjusting learning rate of group 0 to 3.4929e-04.
In epoch 36, average training loss is 0.03310428506246937, average training acc is 0.9886468990379588.


100%|██████████| 79/79 [00:06<00:00, 12.85it/s]


In epoch 36, average validation loss is 0.6676604329030725, average validation acc is 0.8506724683544303.


100%|██████████| 391/391 [01:47<00:00,  3.64it/s]


Adjusting learning rate of group 0 to 3.4203e-04.
In epoch 37, average training loss is 0.03147744701441635, average training acc is 0.9891783887772914.


100%|██████████| 79/79 [00:06<00:00, 12.66it/s]


In epoch 37, average validation loss is 0.5992068904864637, average validation acc is 0.8674841772151899.


100%|██████████| 391/391 [01:45<00:00,  3.69it/s]


Adjusting learning rate of group 0 to 3.3468e-04.
In epoch 38, average training loss is 0.026480471644469577, average training acc is 0.990948689258312.


100%|██████████| 79/79 [00:06<00:00, 12.82it/s]


In epoch 38, average validation loss is 0.579425045206577, average validation acc is 0.8694620253164557.


100%|██████████| 391/391 [01:46<00:00,  3.69it/s]


Adjusting learning rate of group 0 to 3.2725e-04.
In epoch 39, average training loss is 0.026454390139471443, average training acc is 0.9911764706187236.


100%|██████████| 79/79 [00:06<00:00, 12.63it/s]


In epoch 39, average validation loss is 0.5703269058390509, average validation acc is 0.8725276898734177.
saving best model with acc 0.8725276898734177 at 39 epoch!


100%|██████████| 391/391 [01:45<00:00,  3.69it/s]


Adjusting learning rate of group 0 to 3.1975e-04.
In epoch 40, average training loss is 0.028274875346814162, average training acc is 0.9902373721532505.


100%|██████████| 79/79 [00:06<00:00, 12.83it/s]


In epoch 40, average validation loss is 0.5923376984988586, average validation acc is 0.8668908227848101.


100%|██████████| 391/391 [01:45<00:00,  3.69it/s]


Adjusting learning rate of group 0 to 3.1217e-04.
In epoch 41, average training loss is 0.02531788310677985, average training acc is 0.9916080562659847.


100%|██████████| 79/79 [00:06<00:00, 12.96it/s]


In epoch 41, average validation loss is 0.6034003292457967, average validation acc is 0.8685719936708861.


100%|██████████| 391/391 [01:46<00:00,  3.67it/s]


Adjusting learning rate of group 0 to 3.0454e-04.
In epoch 42, average training loss is 0.02403855361633927, average training acc is 0.9919956841737109.


100%|██████████| 79/79 [00:06<00:00, 12.76it/s]


In epoch 42, average validation loss is 0.6420080673091019, average validation acc is 0.8614517405063291.


100%|██████████| 391/391 [01:45<00:00,  3.71it/s]


Adjusting learning rate of group 0 to 2.9685e-04.
In epoch 43, average training loss is 0.022164296506650392, average training acc is 0.992746962915601.


100%|██████████| 79/79 [00:06<00:00, 13.07it/s]


In epoch 43, average validation loss is 0.6420605209054826, average validation acc is 0.8658030063291139.


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 2.8911e-04.
In epoch 44, average training loss is 0.02092304814126357, average training acc is 0.9930466751918159.


100%|██████████| 79/79 [00:06<00:00, 12.81it/s]


In epoch 44, average validation loss is 0.5869935075693493, average validation acc is 0.8750988924050633.
saving best model with acc 0.8750988924050633 at 44 epoch!


100%|██████████| 391/391 [01:45<00:00,  3.70it/s]


Adjusting learning rate of group 0 to 2.8133e-04.
In epoch 45, average training loss is 0.01815817006738604, average training acc is 0.993985773657289.


100%|██████████| 79/79 [00:06<00:00, 12.80it/s]


In epoch 45, average validation loss is 0.6135237550810922, average validation acc is 0.8661985759493671.


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 2.7353e-04.
In epoch 46, average training loss is 0.020812634517566143, average training acc is 0.992746962915601.


100%|██████████| 79/79 [00:06<00:00, 12.89it/s]


In epoch 46, average validation loss is 0.6670222886001007, average validation acc is 0.8595727848101266.


100%|██████████| 391/391 [01:46<00:00,  3.68it/s]


Adjusting learning rate of group 0 to 2.6570e-04.
In epoch 47, average training loss is 0.01852897750100602, average training acc is 0.9939138427414858.


100%|██████████| 79/79 [00:06<00:00, 12.84it/s]


In epoch 47, average validation loss is 0.6099797973904428, average validation acc is 0.8643196202531646.


 59%|█████▉    | 231/391 [01:08<00:47,  3.36it/s]


RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.