In [1]:
%matplotlib notebook
import os, sys
import logging
import random
import h5py
import shutil
import time
import argparse
import numpy as np
import sigpy.plot as pl
import torch
import sigpy as sp
import torchvision
from torch import optim
from tensorboardX import SummaryWriter
from torch.nn import functional as F
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib
# import custom libraries
from utils import transforms as T
from utils import subsample as ss
from utils import complex_utils as cplx
from utils.resnet2p1d import generate_model
from utils.flare_utils import roll
# import custom classes
from utils.datasets import SliceData
from subsample_fastmri import MaskFunc
from MoDL_single import UnrolledModel
import argparse
from models.SAmodel import MyNetwork
from models.Unrolled import Unrolled
from models.UnrolledRef import UnrolledRef
from models.UnrolledTransformer import UnrolledTrans
import matplotlib.pyplot as plt
from ImageFusionBlock import ImageFusionBlock
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
%load_ext autoreload
%autoreload 0
from ImageFusion_Dualbranch_Fusion.densefuse_net import DenseFuseNet
from ImageFusion_Dualbranch_Fusion.channel_fusion import channel_f as channel_fusion
import itertools
from RCAN import CombinedNetwork
from models.FusionNet import FusionNet
from recon_net_wrap import ViTfuser
#from UnrolledViT import UnrolledViT
from UnrolledViTcomplex import UnrolledViT
from torchvision import models
from FSloss_wrap import VGGLoss,ResNet18Backbone,FeatureEmbedding,contrastive_loss,VGGPerceptualLoss
from fastmri.data import transforms, subsample
from fastmri.losses import SSIMLoss

In [2]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

In [3]:
class DataTransform:
    """
    Data Transformer for training unrolled reconstruction models.
    """

    def __init__(self, mask_func, args, use_seed=False):
        self.mask_func = mask_func
        self.use_seed = use_seed
        self.rng = np.random.RandomState()
    def get_mask_func(self, factor):
        center_fractions = 0.08 * 4/factor# RandomMaskFuncEquiSpacedMaskFunc
        mask_func = subsample.EquiSpacedMaskFunc(
        center_fractions=[center_fractions],
        accelerations=[factor], 
        )
        return mask_func
    
    def __call__(self, kspace, target, reference_kspace, reference,slice):
        im_lowres = abs(sp.ifft(sp.resize(sp.resize(kspace,(256,24)),(256,160))))
        magnitude_vals = im_lowres.reshape(-1)
        k = int(round(0.05 * magnitude_vals.shape[0]))
        scale = magnitude_vals[magnitude_vals.argsort()[::-1][k]]
        kspace = kspace/scale
        target = target/scale
        # Convert everything from numpy arrays to tensors
        kspace_torch = cplx.to_tensor(kspace).float()   
        target_torch = cplx.to_tensor(target).float()  
        target_torch = T.ifft2(T.kspace_cut(T.fft2(target_torch),0.67,0.67)) 
        kspace_torch = T.awgn_torch(kspace_torch,30,L=1) # 10dB for simulations
        ## Masking
        mask_func = self.get_mask_func(3)
        kspace_torch = T.kspace_cut(kspace_torch,0.67,0.67)
        kspace_torch = transforms.apply_mask(kspace_torch, mask_func)[0]
        # kspace_torch = kspace_torch*mask_torch # For poisson
        
        mask = np.abs(cplx.to_numpy(kspace_torch))!=0
        mask_torch = torch.stack([torch.tensor(mask).float(),torch.tensor(mask).float()],dim=2)
        
        ### Reference addition ###
        im_lowres_ref = abs(sp.ifft(sp.resize(sp.resize(reference_kspace,(256,24)),(256,160))))
        magnitude_vals_ref = im_lowres_ref.reshape(-1)
        k_ref = int(round(0.05 * magnitude_vals_ref.shape[0]))
        scale_ref = magnitude_vals_ref[magnitude_vals_ref.argsort()[::-1][k_ref]]
        reference = reference / scale_ref
        reference_torch = cplx.to_tensor(reference).float()
        reference_torch_kspace = T.fft2(reference_torch)
        reference_torch_kspace = T.kspace_cut(reference_torch_kspace,0.67,0.67)
        reference_torch = T.ifft2(reference_torch_kspace)
        

        return kspace_torch,target_torch,mask_torch, reference_torch 

In [4]:
def create_datasets(args):
    # Generate k-t undersampling masks
    train_mask = MaskFunc([0.08],[4])
    train_data = SliceData(
        root=str(args.data_path),
        transform=DataTransform(train_mask, args),
        sample_rate=1
    )
    return train_data
def create_data_loaders(args):
    train_data = create_datasets(args)
#     print(train_data[0])

    train_loader = DataLoader(
        dataset=train_data,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
    )
    return train_loader
def build_optim(args, params):
    optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)
    return optimizer

In [5]:
#Hyper parameters
params = Namespace()
params.data_path = "./registered_data/"
params.batch_size = 4 #4
params.num_grad_steps = 1 #4
params.num_cg_steps = 8 #8
params.share_weights = True
params.modl_lamda = 0.05
params.lr = 0.0001 
params.weight_decay = 0
params.lr_step_size = 5
params.lr_gamma = 0.3
params.epoch = 401
params.reference_mode = 1
params.reference_lambda = 0.1


In [6]:
train_loader = create_data_loaders(params)



In [7]:
VGGloss = VGGPerceptualLoss().to(device)

def extract_patches(images, patch_size=(20, 20), stride=(20, 20)):
    # images: Tensor of shape (batch_size, 1, 180, 110)
    patches = images.unfold(2, patch_size[0], stride[0]).unfold(3, patch_size[1], stride[1])
    patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
    patches = patches.view(images.size(0), -1, 1, patch_size[0], patch_size[1])
    return patches  # Returns patches of shape (batch_size, num_patches, 1, patch_size[0], patch_size[1])


# Example usage:
patch_size = (20, 20)
stride = (20, 20)  # Non-overlapping patches
def feature_space_loss(features1, features2):
    return F.mse_loss(features1, features2)
def pad_image(images):
    # images: Tensor of shape (batch_size, 1, 172, 108)
    padded_images = F.pad(images, (6, 6, 4, 4), mode='constant', value=0)
    return padded_images  # Shape will be (batch_size, 1, 180, 120)


from vision_transformer import VisionTransformer
from recon_net import ReconNet
net = VisionTransformer(
    avrg_img_size=320,
    patch_size=(10,10),
    in_chans=1, embed_dim=64, 
    depth=10, num_heads=16,
    )
model = ReconNet(net).to(device)

## For ViT only training
optimizer = optim.Adam(model.parameters(), lr=0.0)
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer=optimizer, 
    max_lr=0.0002,
    steps_per_epoch=len(train_loader),
    epochs=400,
    pct_start=0.01,
    anneal_strategy='linear',
    cycle_momentum=False,
    base_momentum=0., 
    max_momentum=0.,
    div_factor = 25.,
    final_div_factor=1.,
)




In [8]:

criterion = SSIMLoss().to(device)
## Fine tune
cp = torch.load('./L2_checkpoints_ViT_only/model_180.pt', map_location=device)
model.load_state_dict(cp['model'])

epochs_plot = []
losses_plot = []

for epoch in range(params.epoch):
    model.train()
    avg_loss = 0.
    running_loss = 0.0
    for iter, data in enumerate(train_loader):
        input,target,mask,reference = data
        input = input.to(device).float()
        target = target.to(device).float()
        mask = mask.to(device)
        reference = reference.to(device).float()
        image = T.ifft2(input)
        image = image.permute(0,3,1,2)

        target_image = target.permute(0,3,1,2) 

        real_part_tar = target_image[:,0,:,:].unsqueeze(1)
        imag_part_tar = target_image[:,1,:,:].unsqueeze(1)
        mag_tar = torch.sqrt(real_part_tar**2 + imag_part_tar**2).to(device)

        real_part = image[:,0,:,:].unsqueeze(1)
        imag_part = image[:,1,:,:].unsqueeze(1)
        input = torch.sqrt(real_part**2 + imag_part**2)

        in_pad, wpad, hpad = model.pad(input)
        input_norm,mean,std = model.norm(in_pad.float())
        features = model.net.forward_features(input_norm)
        head_out = model.net.head(features)
        head_out_img = model.net.seq2img(head_out, (180, 110))

        # un-norm
        merged = model.unnorm(head_out_img, mean, std) 

        # un-pad 
        im_out = model.unpad(merged,wpad,hpad)
        im_out = im_out.permute(0, 2, 3, 1)

        # SSIM calcs
        maxval = torch.max(torch.cat((im_out,mag_tar.permute(0,2,3,1)),dim=1))
        im_out = im_out.permute(0,3,1,2)
        
        #print(features_out.shape)
        data_range = torch.tensor([maxval], device=device).view(1, 1, 1, 1).expand(im_out.size(0), im_out.size(1), im_out.size(2)-6, im_out.size(3)-6)

        # pad:
        im_out_pad = torch.cat((im_out,im_out,im_out),dim =1)/maxval
        mag_tar_pad = torch.cat((mag_tar,mag_tar,mag_tar),dim =1)/maxval

        # SSIM loss 
        loss = criterion(im_out, mag_tar.to(device), data_range.to(device))

        running_loss = running_loss + loss.item()
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()

        avg_loss = 0.99 * avg_loss + 0.01 * loss.item() if iter > 0 else loss.item()
        if iter % 400 == 0:
            logging.info(
                f'Epoch = [{epoch:3d}/{params.epoch:3d}] '
                f'Iter = [{iter:4d}/{len(train_loader):4d}] '
                f'Loss = {loss.item():.4g} Avg Loss = {avg_loss:.4g}'
            )
    #Saving the model
    exp_dir = "L2_checkpoints_ViT_only/"#L2_checkpoints_myTrain
    if epoch % 50 == 0:
        torch.save(
            {
                'epoch': epoch,
                'params': params,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'exp_dir': exp_dir
            },
            f=os.path.join(exp_dir, 'model_%d.pt'%(epoch))
    )
    running_loss = running_loss / len(train_loader)
    scheduler.step()
    current_lr = scheduler.get_last_lr()[0]
    print(f'Epoch {epoch+1}, Learning rate: {current_lr}')

    # Append epoch and average loss to plot lists
    epochs_plot.append(epoch)
    losses_plot.append(running_loss)

# Plotting the loss curve
plt.figure()
plt.plot(epochs_plot, losses_plot, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('SA unrolled with Reference L2 train Loss')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(exp_dir, 'loss_plot_plato_down.png'))  # Save plot as an image

# Save all_losses to a file for later comparison
losses_file = os.path.join(exp_dir, 'all_losses.txt')
with open(losses_file, 'w') as f:
    for loss in losses_plot:
        f.write(f'{loss}\n')

INFO:root:Epoch = [  0/401] Iter = [   0/ 134] Loss = 0.1755 Avg Loss = 0.1755


Epoch 1, Learning rate: 8.358878504672896e-06


INFO:root:Epoch = [  1/401] Iter = [   0/ 134] Loss = 0.1285 Avg Loss = 0.1285


Epoch 2, Learning rate: 8.717757009345794e-06


INFO:root:Epoch = [  2/401] Iter = [   0/ 134] Loss = 0.1289 Avg Loss = 0.1289


Epoch 3, Learning rate: 9.076635514018691e-06


INFO:root:Epoch = [  3/401] Iter = [   0/ 134] Loss = 0.1076 Avg Loss = 0.1076


Epoch 4, Learning rate: 9.43551401869159e-06


INFO:root:Epoch = [  4/401] Iter = [   0/ 134] Loss = 0.1023 Avg Loss = 0.1023


Epoch 5, Learning rate: 9.794392523364486e-06


INFO:root:Epoch = [  5/401] Iter = [   0/ 134] Loss = 0.0907 Avg Loss = 0.0907


Epoch 6, Learning rate: 1.0153271028037382e-05


INFO:root:Epoch = [  6/401] Iter = [   0/ 134] Loss = 0.09845 Avg Loss = 0.09845


Epoch 7, Learning rate: 1.051214953271028e-05


INFO:root:Epoch = [  7/401] Iter = [   0/ 134] Loss = 0.09059 Avg Loss = 0.09059


Epoch 8, Learning rate: 1.0871028037383177e-05


INFO:root:Epoch = [  8/401] Iter = [   0/ 134] Loss = 0.1 Avg Loss = 0.1


Epoch 9, Learning rate: 1.1229906542056074e-05


INFO:root:Epoch = [  9/401] Iter = [   0/ 134] Loss = 0.09425 Avg Loss = 0.09425


Epoch 10, Learning rate: 1.1588785046728972e-05


INFO:root:Epoch = [ 10/401] Iter = [   0/ 134] Loss = 0.08729 Avg Loss = 0.08729


Epoch 11, Learning rate: 1.194766355140187e-05


INFO:root:Epoch = [ 11/401] Iter = [   0/ 134] Loss = 0.1093 Avg Loss = 0.1093


Epoch 12, Learning rate: 1.2306542056074767e-05


INFO:root:Epoch = [ 12/401] Iter = [   0/ 134] Loss = 0.1079 Avg Loss = 0.1079


Epoch 13, Learning rate: 1.2665420560747663e-05


INFO:root:Epoch = [ 13/401] Iter = [   0/ 134] Loss = 0.1005 Avg Loss = 0.1005


Epoch 14, Learning rate: 1.302429906542056e-05


INFO:root:Epoch = [ 14/401] Iter = [   0/ 134] Loss = 0.09015 Avg Loss = 0.09015


Epoch 15, Learning rate: 1.3383177570093458e-05


INFO:root:Epoch = [ 15/401] Iter = [   0/ 134] Loss = 0.094 Avg Loss = 0.094


Epoch 16, Learning rate: 1.3742056074766356e-05


INFO:root:Epoch = [ 16/401] Iter = [   0/ 134] Loss = 0.09019 Avg Loss = 0.09019


Epoch 17, Learning rate: 1.4100934579439253e-05


INFO:root:Epoch = [ 17/401] Iter = [   0/ 134] Loss = 0.08502 Avg Loss = 0.08502


Epoch 18, Learning rate: 1.445981308411215e-05


INFO:root:Epoch = [ 18/401] Iter = [   0/ 134] Loss = 0.08961 Avg Loss = 0.08961


Epoch 19, Learning rate: 1.4818691588785046e-05


INFO:root:Epoch = [ 19/401] Iter = [   0/ 134] Loss = 0.08406 Avg Loss = 0.08406


Epoch 20, Learning rate: 1.5177570093457942e-05


INFO:root:Epoch = [ 20/401] Iter = [   0/ 134] Loss = 0.09638 Avg Loss = 0.09638


Epoch 21, Learning rate: 1.553644859813084e-05


INFO:root:Epoch = [ 21/401] Iter = [   0/ 134] Loss = 0.08346 Avg Loss = 0.08346


Epoch 22, Learning rate: 1.589532710280374e-05


INFO:root:Epoch = [ 22/401] Iter = [   0/ 134] Loss = 0.09284 Avg Loss = 0.09284


Epoch 23, Learning rate: 1.6254205607476635e-05


INFO:root:Epoch = [ 23/401] Iter = [   0/ 134] Loss = 0.09051 Avg Loss = 0.09051


Epoch 24, Learning rate: 1.6613084112149532e-05


INFO:root:Epoch = [ 24/401] Iter = [   0/ 134] Loss = 0.08728 Avg Loss = 0.08728


Epoch 25, Learning rate: 1.6971962616822432e-05


INFO:root:Epoch = [ 25/401] Iter = [   0/ 134] Loss = 0.08983 Avg Loss = 0.08983


Epoch 26, Learning rate: 1.733084112149533e-05


INFO:root:Epoch = [ 26/401] Iter = [   0/ 134] Loss = 0.08029 Avg Loss = 0.08029


Epoch 27, Learning rate: 1.7689719626168225e-05


INFO:root:Epoch = [ 27/401] Iter = [   0/ 134] Loss = 0.08143 Avg Loss = 0.08143


Epoch 28, Learning rate: 1.804859813084112e-05


INFO:root:Epoch = [ 28/401] Iter = [   0/ 134] Loss = 0.08807 Avg Loss = 0.08807


Epoch 29, Learning rate: 1.8407476635514018e-05


INFO:root:Epoch = [ 29/401] Iter = [   0/ 134] Loss = 0.08271 Avg Loss = 0.08271


Epoch 30, Learning rate: 1.8766355140186915e-05


INFO:root:Epoch = [ 30/401] Iter = [   0/ 134] Loss = 0.09948 Avg Loss = 0.09948


Epoch 31, Learning rate: 1.912523364485981e-05


INFO:root:Epoch = [ 31/401] Iter = [   0/ 134] Loss = 0.09496 Avg Loss = 0.09496


Epoch 32, Learning rate: 1.948411214953271e-05


INFO:root:Epoch = [ 32/401] Iter = [   0/ 134] Loss = 0.07719 Avg Loss = 0.07719


Epoch 33, Learning rate: 1.9842990654205608e-05


INFO:root:Epoch = [ 33/401] Iter = [   0/ 134] Loss = 0.07289 Avg Loss = 0.07289


Epoch 34, Learning rate: 2.0201869158878504e-05


INFO:root:Epoch = [ 34/401] Iter = [   0/ 134] Loss = 0.08349 Avg Loss = 0.08349


Epoch 35, Learning rate: 2.05607476635514e-05


INFO:root:Epoch = [ 35/401] Iter = [   0/ 134] Loss = 0.1013 Avg Loss = 0.1013


Epoch 36, Learning rate: 2.09196261682243e-05


INFO:root:Epoch = [ 36/401] Iter = [   0/ 134] Loss = 0.08262 Avg Loss = 0.08262


Epoch 37, Learning rate: 2.1278504672897197e-05


INFO:root:Epoch = [ 37/401] Iter = [   0/ 134] Loss = 0.0794 Avg Loss = 0.0794


Epoch 38, Learning rate: 2.1637383177570094e-05


INFO:root:Epoch = [ 38/401] Iter = [   0/ 134] Loss = 0.07976 Avg Loss = 0.07976


Epoch 39, Learning rate: 2.199626168224299e-05


INFO:root:Epoch = [ 39/401] Iter = [   0/ 134] Loss = 0.07677 Avg Loss = 0.07677


Epoch 40, Learning rate: 2.2355140186915887e-05


INFO:root:Epoch = [ 40/401] Iter = [   0/ 134] Loss = 0.104 Avg Loss = 0.104


Epoch 41, Learning rate: 2.2714018691588783e-05


INFO:root:Epoch = [ 41/401] Iter = [   0/ 134] Loss = 0.0896 Avg Loss = 0.0896


Epoch 42, Learning rate: 2.307289719626168e-05


INFO:root:Epoch = [ 42/401] Iter = [   0/ 134] Loss = 0.07528 Avg Loss = 0.07528


Epoch 43, Learning rate: 2.3431775700934583e-05


INFO:root:Epoch = [ 43/401] Iter = [   0/ 134] Loss = 0.08381 Avg Loss = 0.08381


Epoch 44, Learning rate: 2.379065420560748e-05


INFO:root:Epoch = [ 44/401] Iter = [   0/ 134] Loss = 0.07319 Avg Loss = 0.07319


Epoch 45, Learning rate: 2.414953271028037e-05


INFO:root:Epoch = [ 45/401] Iter = [   0/ 134] Loss = 0.08171 Avg Loss = 0.08171


Epoch 46, Learning rate: 2.4508411214953273e-05


INFO:root:Epoch = [ 46/401] Iter = [   0/ 134] Loss = 0.07364 Avg Loss = 0.07364


Epoch 47, Learning rate: 2.486728971962617e-05


INFO:root:Epoch = [ 47/401] Iter = [   0/ 134] Loss = 0.08543 Avg Loss = 0.08543


Epoch 48, Learning rate: 2.5226168224299066e-05


INFO:root:Epoch = [ 48/401] Iter = [   0/ 134] Loss = 0.09678 Avg Loss = 0.09678


Epoch 49, Learning rate: 2.5585046728971963e-05


INFO:root:Epoch = [ 49/401] Iter = [   0/ 134] Loss = 0.07812 Avg Loss = 0.07812


Epoch 50, Learning rate: 2.594392523364486e-05


INFO:root:Epoch = [ 50/401] Iter = [   0/ 134] Loss = 0.06799 Avg Loss = 0.06799


Epoch 51, Learning rate: 2.6302803738317756e-05


INFO:root:Epoch = [ 51/401] Iter = [   0/ 134] Loss = 0.06951 Avg Loss = 0.06951


Epoch 52, Learning rate: 2.6661682242990652e-05


INFO:root:Epoch = [ 52/401] Iter = [   0/ 134] Loss = 0.07499 Avg Loss = 0.07499


Epoch 53, Learning rate: 2.702056074766355e-05


INFO:root:Epoch = [ 53/401] Iter = [   0/ 134] Loss = 0.08568 Avg Loss = 0.08568


Epoch 54, Learning rate: 2.7379439252336452e-05


INFO:root:Epoch = [ 54/401] Iter = [   0/ 134] Loss = 0.08893 Avg Loss = 0.08893


Epoch 55, Learning rate: 2.773831775700935e-05


INFO:root:Epoch = [ 55/401] Iter = [   0/ 134] Loss = 0.09328 Avg Loss = 0.09328


Epoch 56, Learning rate: 2.8097196261682245e-05


INFO:root:Epoch = [ 56/401] Iter = [   0/ 134] Loss = 0.07588 Avg Loss = 0.07588


Epoch 57, Learning rate: 2.8456074766355142e-05


INFO:root:Epoch = [ 57/401] Iter = [   0/ 134] Loss = 0.09021 Avg Loss = 0.09021


Epoch 58, Learning rate: 2.881495327102804e-05


INFO:root:Epoch = [ 58/401] Iter = [   0/ 134] Loss = 0.07301 Avg Loss = 0.07301


Epoch 59, Learning rate: 2.9173831775700935e-05


INFO:root:Epoch = [ 59/401] Iter = [   0/ 134] Loss = 0.05822 Avg Loss = 0.05822


Epoch 60, Learning rate: 2.953271028037383e-05


INFO:root:Epoch = [ 60/401] Iter = [   0/ 134] Loss = 0.08305 Avg Loss = 0.08305


Epoch 61, Learning rate: 2.9891588785046728e-05


INFO:root:Epoch = [ 61/401] Iter = [   0/ 134] Loss = 0.08715 Avg Loss = 0.08715


Epoch 62, Learning rate: 3.0250467289719624e-05


INFO:root:Epoch = [ 62/401] Iter = [   0/ 134] Loss = 0.08173 Avg Loss = 0.08173


Epoch 63, Learning rate: 3.060934579439252e-05


INFO:root:Epoch = [ 63/401] Iter = [   0/ 134] Loss = 0.08866 Avg Loss = 0.08866


Epoch 64, Learning rate: 3.0968224299065424e-05


INFO:root:Epoch = [ 64/401] Iter = [   0/ 134] Loss = 0.08194 Avg Loss = 0.08194


Epoch 65, Learning rate: 3.132710280373832e-05


INFO:root:Epoch = [ 65/401] Iter = [   0/ 134] Loss = 0.08705 Avg Loss = 0.08705


Epoch 66, Learning rate: 3.168598130841122e-05


INFO:root:Epoch = [ 66/401] Iter = [   0/ 134] Loss = 0.08256 Avg Loss = 0.08256


Epoch 67, Learning rate: 3.2044859813084114e-05


INFO:root:Epoch = [ 67/401] Iter = [   0/ 134] Loss = 0.08005 Avg Loss = 0.08005


Epoch 68, Learning rate: 3.240373831775701e-05


INFO:root:Epoch = [ 68/401] Iter = [   0/ 134] Loss = 0.0806 Avg Loss = 0.0806


Epoch 69, Learning rate: 3.276261682242991e-05


INFO:root:Epoch = [ 69/401] Iter = [   0/ 134] Loss = 0.07626 Avg Loss = 0.07626


Epoch 70, Learning rate: 3.3121495327102804e-05


INFO:root:Epoch = [ 70/401] Iter = [   0/ 134] Loss = 0.06709 Avg Loss = 0.06709


Epoch 71, Learning rate: 3.34803738317757e-05


INFO:root:Epoch = [ 71/401] Iter = [   0/ 134] Loss = 0.08349 Avg Loss = 0.08349


Epoch 72, Learning rate: 3.38392523364486e-05


INFO:root:Epoch = [ 72/401] Iter = [   0/ 134] Loss = 0.0825 Avg Loss = 0.0825


Epoch 73, Learning rate: 3.419813084112149e-05


INFO:root:Epoch = [ 73/401] Iter = [   0/ 134] Loss = 0.07784 Avg Loss = 0.07784


Epoch 74, Learning rate: 3.455700934579439e-05


INFO:root:Epoch = [ 74/401] Iter = [   0/ 134] Loss = 0.07549 Avg Loss = 0.07549


Epoch 75, Learning rate: 3.4915887850467286e-05


INFO:root:Epoch = [ 75/401] Iter = [   0/ 134] Loss = 0.08164 Avg Loss = 0.08164


Epoch 76, Learning rate: 3.527476635514019e-05


INFO:root:Epoch = [ 76/401] Iter = [   0/ 134] Loss = 0.08196 Avg Loss = 0.08196


Epoch 77, Learning rate: 3.5633644859813086e-05


INFO:root:Epoch = [ 77/401] Iter = [   0/ 134] Loss = 0.07017 Avg Loss = 0.07017


Epoch 78, Learning rate: 3.599252336448598e-05


INFO:root:Epoch = [ 78/401] Iter = [   0/ 134] Loss = 0.07823 Avg Loss = 0.07823


Epoch 79, Learning rate: 3.635140186915888e-05


INFO:root:Epoch = [ 79/401] Iter = [   0/ 134] Loss = 0.06378 Avg Loss = 0.06378


Epoch 80, Learning rate: 3.6710280373831776e-05


INFO:root:Epoch = [ 80/401] Iter = [   0/ 134] Loss = 0.07186 Avg Loss = 0.07186


Epoch 81, Learning rate: 3.706915887850467e-05


INFO:root:Epoch = [ 81/401] Iter = [   0/ 134] Loss = 0.06859 Avg Loss = 0.06859


Epoch 82, Learning rate: 3.742803738317757e-05


INFO:root:Epoch = [ 82/401] Iter = [   0/ 134] Loss = 0.07053 Avg Loss = 0.07053


Epoch 83, Learning rate: 3.7786915887850466e-05


INFO:root:Epoch = [ 83/401] Iter = [   0/ 134] Loss = 0.08841 Avg Loss = 0.08841


Epoch 84, Learning rate: 3.814579439252336e-05


INFO:root:Epoch = [ 84/401] Iter = [   0/ 134] Loss = 0.07303 Avg Loss = 0.07303


Epoch 85, Learning rate: 3.850467289719626e-05


INFO:root:Epoch = [ 85/401] Iter = [   0/ 134] Loss = 0.07339 Avg Loss = 0.07339


Epoch 86, Learning rate: 3.886355140186916e-05


INFO:root:Epoch = [ 86/401] Iter = [   0/ 134] Loss = 0.08116 Avg Loss = 0.08116


Epoch 87, Learning rate: 3.922242990654206e-05


INFO:root:Epoch = [ 87/401] Iter = [   0/ 134] Loss = 0.0635 Avg Loss = 0.0635


Epoch 88, Learning rate: 3.9581308411214955e-05


INFO:root:Epoch = [ 88/401] Iter = [   0/ 134] Loss = 0.08374 Avg Loss = 0.08374


Epoch 89, Learning rate: 3.994018691588785e-05


INFO:root:Epoch = [ 89/401] Iter = [   0/ 134] Loss = 0.07277 Avg Loss = 0.07277


Epoch 90, Learning rate: 4.029906542056074e-05


INFO:root:Epoch = [ 90/401] Iter = [   0/ 134] Loss = 0.07063 Avg Loss = 0.07063


Epoch 91, Learning rate: 4.0657943925233645e-05


INFO:root:Epoch = [ 91/401] Iter = [   0/ 134] Loss = 0.06874 Avg Loss = 0.06874


Epoch 92, Learning rate: 4.101682242990654e-05


INFO:root:Epoch = [ 92/401] Iter = [   0/ 134] Loss = 0.06609 Avg Loss = 0.06609


Epoch 93, Learning rate: 4.137570093457944e-05


INFO:root:Epoch = [ 93/401] Iter = [   0/ 134] Loss = 0.06617 Avg Loss = 0.06617


Epoch 94, Learning rate: 4.1734579439252334e-05


INFO:root:Epoch = [ 94/401] Iter = [   0/ 134] Loss = 0.06746 Avg Loss = 0.06746


Epoch 95, Learning rate: 4.209345794392523e-05


INFO:root:Epoch = [ 95/401] Iter = [   0/ 134] Loss = 0.06913 Avg Loss = 0.06913


Epoch 96, Learning rate: 4.245233644859813e-05


INFO:root:Epoch = [ 96/401] Iter = [   0/ 134] Loss = 0.08598 Avg Loss = 0.08598


Epoch 97, Learning rate: 4.281121495327103e-05


INFO:root:Epoch = [ 97/401] Iter = [   0/ 134] Loss = 0.0693 Avg Loss = 0.0693


Epoch 98, Learning rate: 4.317009345794393e-05


INFO:root:Epoch = [ 98/401] Iter = [   0/ 134] Loss = 0.06679 Avg Loss = 0.06679


Epoch 99, Learning rate: 4.3528971962616824e-05


INFO:root:Epoch = [ 99/401] Iter = [   0/ 134] Loss = 0.0682 Avg Loss = 0.0682


Epoch 100, Learning rate: 4.388785046728972e-05


INFO:root:Epoch = [100/401] Iter = [   0/ 134] Loss = 0.08815 Avg Loss = 0.08815


Epoch 101, Learning rate: 4.424672897196261e-05


INFO:root:Epoch = [101/401] Iter = [   0/ 134] Loss = 0.07992 Avg Loss = 0.07992


Epoch 102, Learning rate: 4.4605607476635513e-05


INFO:root:Epoch = [102/401] Iter = [   0/ 134] Loss = 0.06929 Avg Loss = 0.06929


Epoch 103, Learning rate: 4.496448598130841e-05


INFO:root:Epoch = [103/401] Iter = [   0/ 134] Loss = 0.07875 Avg Loss = 0.07875


Epoch 104, Learning rate: 4.5323364485981307e-05


INFO:root:Epoch = [104/401] Iter = [   0/ 134] Loss = 0.07045 Avg Loss = 0.07045


Epoch 105, Learning rate: 4.56822429906542e-05


INFO:root:Epoch = [105/401] Iter = [   0/ 134] Loss = 0.07884 Avg Loss = 0.07884


Epoch 106, Learning rate: 4.60411214953271e-05


INFO:root:Epoch = [106/401] Iter = [   0/ 134] Loss = 0.07479 Avg Loss = 0.07479


Epoch 107, Learning rate: 4.64e-05


INFO:root:Epoch = [107/401] Iter = [   0/ 134] Loss = 0.07305 Avg Loss = 0.07305


Epoch 108, Learning rate: 4.67588785046729e-05


INFO:root:Epoch = [108/401] Iter = [   0/ 134] Loss = 0.07178 Avg Loss = 0.07178


Epoch 109, Learning rate: 4.7117757009345796e-05


INFO:root:Epoch = [109/401] Iter = [   0/ 134] Loss = 0.07038 Avg Loss = 0.07038


Epoch 110, Learning rate: 4.747663551401869e-05


INFO:root:Epoch = [110/401] Iter = [   0/ 134] Loss = 0.06478 Avg Loss = 0.06478


Epoch 111, Learning rate: 4.783551401869158e-05


INFO:root:Epoch = [111/401] Iter = [   0/ 134] Loss = 0.08191 Avg Loss = 0.08191


Epoch 112, Learning rate: 4.8194392523364486e-05


INFO:root:Epoch = [112/401] Iter = [   0/ 134] Loss = 0.1076 Avg Loss = 0.1076


Epoch 113, Learning rate: 4.855327102803738e-05


INFO:root:Epoch = [113/401] Iter = [   0/ 134] Loss = 0.0642 Avg Loss = 0.0642


Epoch 114, Learning rate: 4.891214953271028e-05


INFO:root:Epoch = [114/401] Iter = [   0/ 134] Loss = 0.08114 Avg Loss = 0.08114


Epoch 115, Learning rate: 4.9271028037383175e-05


INFO:root:Epoch = [115/401] Iter = [   0/ 134] Loss = 0.07445 Avg Loss = 0.07445


Epoch 116, Learning rate: 4.962990654205607e-05


INFO:root:Epoch = [116/401] Iter = [   0/ 134] Loss = 0.07602 Avg Loss = 0.07602


Epoch 117, Learning rate: 4.9988785046728975e-05


INFO:root:Epoch = [117/401] Iter = [   0/ 134] Loss = 0.06899 Avg Loss = 0.06899


Epoch 118, Learning rate: 5.034766355140187e-05


INFO:root:Epoch = [118/401] Iter = [   0/ 134] Loss = 0.07408 Avg Loss = 0.07408


Epoch 119, Learning rate: 5.070654205607477e-05


INFO:root:Epoch = [119/401] Iter = [   0/ 134] Loss = 0.07102 Avg Loss = 0.07102


Epoch 120, Learning rate: 5.1065420560747665e-05


INFO:root:Epoch = [120/401] Iter = [   0/ 134] Loss = 0.06489 Avg Loss = 0.06489


Epoch 121, Learning rate: 5.142429906542056e-05


INFO:root:Epoch = [121/401] Iter = [   0/ 134] Loss = 0.07915 Avg Loss = 0.07915


Epoch 122, Learning rate: 5.178317757009345e-05


INFO:root:Epoch = [122/401] Iter = [   0/ 134] Loss = 0.08969 Avg Loss = 0.08969


Epoch 123, Learning rate: 5.2142056074766355e-05


INFO:root:Epoch = [123/401] Iter = [   0/ 134] Loss = 0.07741 Avg Loss = 0.07741


Epoch 124, Learning rate: 5.250093457943925e-05


INFO:root:Epoch = [124/401] Iter = [   0/ 134] Loss = 0.07828 Avg Loss = 0.07828


Epoch 125, Learning rate: 5.285981308411215e-05


INFO:root:Epoch = [125/401] Iter = [   0/ 134] Loss = 0.07734 Avg Loss = 0.07734


Epoch 126, Learning rate: 5.3218691588785044e-05


INFO:root:Epoch = [126/401] Iter = [   0/ 134] Loss = 0.06625 Avg Loss = 0.06625


Epoch 127, Learning rate: 5.357757009345794e-05


INFO:root:Epoch = [127/401] Iter = [   0/ 134] Loss = 0.07943 Avg Loss = 0.07943


Epoch 128, Learning rate: 5.3936448598130844e-05


INFO:root:Epoch = [128/401] Iter = [   0/ 134] Loss = 0.07789 Avg Loss = 0.07789


Epoch 129, Learning rate: 5.429532710280374e-05


INFO:root:Epoch = [129/401] Iter = [   0/ 134] Loss = 0.07831 Avg Loss = 0.07831


Epoch 130, Learning rate: 5.465420560747664e-05


INFO:root:Epoch = [130/401] Iter = [   0/ 134] Loss = 0.07003 Avg Loss = 0.07003


Epoch 131, Learning rate: 5.5013084112149534e-05


INFO:root:Epoch = [131/401] Iter = [   0/ 134] Loss = 0.08904 Avg Loss = 0.08904


Epoch 132, Learning rate: 5.537196261682243e-05


INFO:root:Epoch = [132/401] Iter = [   0/ 134] Loss = 0.08195 Avg Loss = 0.08195


Epoch 133, Learning rate: 5.573084112149533e-05


INFO:root:Epoch = [133/401] Iter = [   0/ 134] Loss = 0.0743 Avg Loss = 0.0743


Epoch 134, Learning rate: 5.608971962616822e-05


INFO:root:Epoch = [134/401] Iter = [   0/ 134] Loss = 0.06872 Avg Loss = 0.06872


Epoch 135, Learning rate: 5.644859813084111e-05


INFO:root:Epoch = [135/401] Iter = [   0/ 134] Loss = 0.06983 Avg Loss = 0.06983


Epoch 136, Learning rate: 5.6807476635514016e-05


INFO:root:Epoch = [136/401] Iter = [   0/ 134] Loss = 0.07522 Avg Loss = 0.07522


Epoch 137, Learning rate: 5.716635514018692e-05


INFO:root:Epoch = [137/401] Iter = [   0/ 134] Loss = 0.08134 Avg Loss = 0.08134


Epoch 138, Learning rate: 5.752523364485981e-05


INFO:root:Epoch = [138/401] Iter = [   0/ 134] Loss = 0.06685 Avg Loss = 0.06685


Epoch 139, Learning rate: 5.788411214953271e-05


INFO:root:Epoch = [139/401] Iter = [   0/ 134] Loss = 0.06763 Avg Loss = 0.06763


Epoch 140, Learning rate: 5.82429906542056e-05


INFO:root:Epoch = [140/401] Iter = [   0/ 134] Loss = 0.07785 Avg Loss = 0.07785


Epoch 141, Learning rate: 5.8601869158878506e-05


INFO:root:Epoch = [141/401] Iter = [   0/ 134] Loss = 0.08316 Avg Loss = 0.08316


Epoch 142, Learning rate: 5.89607476635514e-05


INFO:root:Epoch = [142/401] Iter = [   0/ 134] Loss = 0.06758 Avg Loss = 0.06758


Epoch 143, Learning rate: 5.931962616822429e-05


INFO:root:Epoch = [143/401] Iter = [   0/ 134] Loss = 0.05893 Avg Loss = 0.05893


Epoch 144, Learning rate: 5.9678504672897196e-05


INFO:root:Epoch = [144/401] Iter = [   0/ 134] Loss = 0.06746 Avg Loss = 0.06746


Epoch 145, Learning rate: 6.0037383177570085e-05


INFO:root:Epoch = [145/401] Iter = [   0/ 134] Loss = 0.07009 Avg Loss = 0.07009


Epoch 146, Learning rate: 6.039626168224299e-05


INFO:root:Epoch = [146/401] Iter = [   0/ 134] Loss = 0.08129 Avg Loss = 0.08129


Epoch 147, Learning rate: 6.075514018691589e-05


INFO:root:Epoch = [147/401] Iter = [   0/ 134] Loss = 0.07895 Avg Loss = 0.07895


Epoch 148, Learning rate: 6.111401869158879e-05


INFO:root:Epoch = [148/401] Iter = [   0/ 134] Loss = 0.06911 Avg Loss = 0.06911


Epoch 149, Learning rate: 6.147289719626169e-05


INFO:root:Epoch = [149/401] Iter = [   0/ 134] Loss = 0.0688 Avg Loss = 0.0688


Epoch 150, Learning rate: 6.183177570093458e-05


INFO:root:Epoch = [150/401] Iter = [   0/ 134] Loss = 0.07089 Avg Loss = 0.07089


Epoch 151, Learning rate: 6.219065420560748e-05


INFO:root:Epoch = [151/401] Iter = [   0/ 134] Loss = 0.08038 Avg Loss = 0.08038


Epoch 152, Learning rate: 6.254953271028037e-05


INFO:root:Epoch = [152/401] Iter = [   0/ 134] Loss = 0.058 Avg Loss = 0.058


Epoch 153, Learning rate: 6.290841121495327e-05


INFO:root:Epoch = [153/401] Iter = [   0/ 134] Loss = 0.06215 Avg Loss = 0.06215


Epoch 154, Learning rate: 6.326728971962617e-05


INFO:root:Epoch = [154/401] Iter = [   0/ 134] Loss = 0.0801 Avg Loss = 0.0801


Epoch 155, Learning rate: 6.362616822429906e-05


INFO:root:Epoch = [155/401] Iter = [   0/ 134] Loss = 0.0711 Avg Loss = 0.0711


Epoch 156, Learning rate: 6.398504672897196e-05


INFO:root:Epoch = [156/401] Iter = [   0/ 134] Loss = 0.07708 Avg Loss = 0.07708


Epoch 157, Learning rate: 6.434392523364487e-05


INFO:root:Epoch = [157/401] Iter = [   0/ 134] Loss = 0.08729 Avg Loss = 0.08729


Epoch 158, Learning rate: 6.470280373831775e-05


INFO:root:Epoch = [158/401] Iter = [   0/ 134] Loss = 0.07419 Avg Loss = 0.07419


Epoch 159, Learning rate: 6.506168224299066e-05


INFO:root:Epoch = [159/401] Iter = [   0/ 134] Loss = 0.06001 Avg Loss = 0.06001


Epoch 160, Learning rate: 6.542056074766355e-05


INFO:root:Epoch = [160/401] Iter = [   0/ 134] Loss = 0.07758 Avg Loss = 0.07758


Epoch 161, Learning rate: 6.577943925233646e-05


INFO:root:Epoch = [161/401] Iter = [   0/ 134] Loss = 0.06353 Avg Loss = 0.06353


Epoch 162, Learning rate: 6.613831775700935e-05


INFO:root:Epoch = [162/401] Iter = [   0/ 134] Loss = 0.07514 Avg Loss = 0.07514


Epoch 163, Learning rate: 6.649719626168225e-05


INFO:root:Epoch = [163/401] Iter = [   0/ 134] Loss = 0.05452 Avg Loss = 0.05452


Epoch 164, Learning rate: 6.685607476635515e-05


INFO:root:Epoch = [164/401] Iter = [   0/ 134] Loss = 0.06285 Avg Loss = 0.06285


Epoch 165, Learning rate: 6.721495327102803e-05


INFO:root:Epoch = [165/401] Iter = [   0/ 134] Loss = 0.0758 Avg Loss = 0.0758


Epoch 166, Learning rate: 6.757383177570094e-05


INFO:root:Epoch = [166/401] Iter = [   0/ 134] Loss = 0.08313 Avg Loss = 0.08313


Epoch 167, Learning rate: 6.793271028037384e-05


INFO:root:Epoch = [167/401] Iter = [   0/ 134] Loss = 0.07753 Avg Loss = 0.07753


Epoch 168, Learning rate: 6.829158878504673e-05


INFO:root:Epoch = [168/401] Iter = [   0/ 134] Loss = 0.07501 Avg Loss = 0.07501


Epoch 169, Learning rate: 6.865046728971963e-05


INFO:root:Epoch = [169/401] Iter = [   0/ 134] Loss = 0.06571 Avg Loss = 0.06571


Epoch 170, Learning rate: 6.900934579439253e-05


INFO:root:Epoch = [170/401] Iter = [   0/ 134] Loss = 0.06482 Avg Loss = 0.06482


Epoch 171, Learning rate: 6.936822429906542e-05


INFO:root:Epoch = [171/401] Iter = [   0/ 134] Loss = 0.06422 Avg Loss = 0.06422


Epoch 172, Learning rate: 6.972710280373833e-05


INFO:root:Epoch = [172/401] Iter = [   0/ 134] Loss = 0.06798 Avg Loss = 0.06798


Epoch 173, Learning rate: 7.008598130841122e-05


INFO:root:Epoch = [173/401] Iter = [   0/ 134] Loss = 0.1381 Avg Loss = 0.1381


Epoch 174, Learning rate: 7.044485981308413e-05


INFO:root:Epoch = [174/401] Iter = [   0/ 134] Loss = 0.07468 Avg Loss = 0.07468


Epoch 175, Learning rate: 7.080373831775701e-05


INFO:root:Epoch = [175/401] Iter = [   0/ 134] Loss = 0.06768 Avg Loss = 0.06768


Epoch 176, Learning rate: 7.116261682242992e-05


INFO:root:Epoch = [176/401] Iter = [   0/ 134] Loss = 0.08552 Avg Loss = 0.08552


Epoch 177, Learning rate: 7.152149532710282e-05


INFO:root:Epoch = [177/401] Iter = [   0/ 134] Loss = 0.07163 Avg Loss = 0.07163


Epoch 178, Learning rate: 7.188037383177571e-05


INFO:root:Epoch = [178/401] Iter = [   0/ 134] Loss = 0.06444 Avg Loss = 0.06444


Epoch 179, Learning rate: 7.223925233644861e-05


INFO:root:Epoch = [179/401] Iter = [   0/ 134] Loss = 0.0747 Avg Loss = 0.0747


Epoch 180, Learning rate: 7.259813084112149e-05


INFO:root:Epoch = [180/401] Iter = [   0/ 134] Loss = 0.07847 Avg Loss = 0.07847


Epoch 181, Learning rate: 7.29570093457944e-05


INFO:root:Epoch = [181/401] Iter = [   0/ 134] Loss = 0.06711 Avg Loss = 0.06711


Epoch 182, Learning rate: 7.33158878504673e-05


INFO:root:Epoch = [182/401] Iter = [   0/ 134] Loss = 0.06914 Avg Loss = 0.06914


Epoch 183, Learning rate: 7.36747663551402e-05


INFO:root:Epoch = [183/401] Iter = [   0/ 134] Loss = 0.06222 Avg Loss = 0.06222


Epoch 184, Learning rate: 7.403364485981309e-05


INFO:root:Epoch = [184/401] Iter = [   0/ 134] Loss = 0.07033 Avg Loss = 0.07033


Epoch 185, Learning rate: 7.439252336448599e-05


INFO:root:Epoch = [185/401] Iter = [   0/ 134] Loss = 0.09661 Avg Loss = 0.09661


Epoch 186, Learning rate: 7.475140186915888e-05


INFO:root:Epoch = [186/401] Iter = [   0/ 134] Loss = 0.08928 Avg Loss = 0.08928


Epoch 187, Learning rate: 7.511028037383178e-05


INFO:root:Epoch = [187/401] Iter = [   0/ 134] Loss = 0.07216 Avg Loss = 0.07216


Epoch 188, Learning rate: 7.546915887850468e-05


INFO:root:Epoch = [188/401] Iter = [   0/ 134] Loss = 0.06658 Avg Loss = 0.06658


Epoch 189, Learning rate: 7.582803738317757e-05


INFO:root:Epoch = [189/401] Iter = [   0/ 134] Loss = 0.07144 Avg Loss = 0.07144


Epoch 190, Learning rate: 7.618691588785047e-05


INFO:root:Epoch = [190/401] Iter = [   0/ 134] Loss = 0.07386 Avg Loss = 0.07386


Epoch 191, Learning rate: 7.654579439252337e-05


INFO:root:Epoch = [191/401] Iter = [   0/ 134] Loss = 0.05896 Avg Loss = 0.05896


Epoch 192, Learning rate: 7.690467289719626e-05


INFO:root:Epoch = [192/401] Iter = [   0/ 134] Loss = 0.06371 Avg Loss = 0.06371


Epoch 193, Learning rate: 7.726355140186916e-05


INFO:root:Epoch = [193/401] Iter = [   0/ 134] Loss = 0.074 Avg Loss = 0.074


Epoch 194, Learning rate: 7.762242990654207e-05


INFO:root:Epoch = [194/401] Iter = [   0/ 134] Loss = 0.06324 Avg Loss = 0.06324


Epoch 195, Learning rate: 7.798130841121495e-05


INFO:root:Epoch = [195/401] Iter = [   0/ 134] Loss = 0.09262 Avg Loss = 0.09262


Epoch 196, Learning rate: 7.834018691588786e-05


INFO:root:Epoch = [196/401] Iter = [   0/ 134] Loss = 0.06845 Avg Loss = 0.06845


Epoch 197, Learning rate: 7.869906542056075e-05


INFO:root:Epoch = [197/401] Iter = [   0/ 134] Loss = 0.06494 Avg Loss = 0.06494


Epoch 198, Learning rate: 7.905794392523366e-05


INFO:root:Epoch = [198/401] Iter = [   0/ 134] Loss = 0.07389 Avg Loss = 0.07389


Epoch 199, Learning rate: 7.941682242990655e-05


INFO:root:Epoch = [199/401] Iter = [   0/ 134] Loss = 0.06579 Avg Loss = 0.06579


Epoch 200, Learning rate: 7.977570093457945e-05


INFO:root:Epoch = [200/401] Iter = [   0/ 134] Loss = 0.1001 Avg Loss = 0.1001


Epoch 201, Learning rate: 8.013457943925235e-05


INFO:root:Epoch = [201/401] Iter = [   0/ 134] Loss = 0.06414 Avg Loss = 0.06414


Epoch 202, Learning rate: 8.049345794392523e-05


INFO:root:Epoch = [202/401] Iter = [   0/ 134] Loss = 0.0747 Avg Loss = 0.0747


Epoch 203, Learning rate: 8.085233644859814e-05


INFO:root:Epoch = [203/401] Iter = [   0/ 134] Loss = 0.07058 Avg Loss = 0.07058


Epoch 204, Learning rate: 8.121121495327104e-05


INFO:root:Epoch = [204/401] Iter = [   0/ 134] Loss = 0.08498 Avg Loss = 0.08498


Epoch 205, Learning rate: 8.157009345794393e-05


INFO:root:Epoch = [205/401] Iter = [   0/ 134] Loss = 0.06759 Avg Loss = 0.06759


Epoch 206, Learning rate: 8.192897196261683e-05


INFO:root:Epoch = [206/401] Iter = [   0/ 134] Loss = 0.06416 Avg Loss = 0.06416


Epoch 207, Learning rate: 8.228785046728973e-05


INFO:root:Epoch = [207/401] Iter = [   0/ 134] Loss = 0.06371 Avg Loss = 0.06371


Epoch 208, Learning rate: 8.264672897196262e-05


INFO:root:Epoch = [208/401] Iter = [   0/ 134] Loss = 0.08884 Avg Loss = 0.08884


Epoch 209, Learning rate: 8.300560747663552e-05


INFO:root:Epoch = [209/401] Iter = [   0/ 134] Loss = 0.06725 Avg Loss = 0.06725


Epoch 210, Learning rate: 8.336448598130842e-05


INFO:root:Epoch = [210/401] Iter = [   0/ 134] Loss = 0.1135 Avg Loss = 0.1135


Epoch 211, Learning rate: 8.372336448598131e-05


INFO:root:Epoch = [211/401] Iter = [   0/ 134] Loss = 0.08276 Avg Loss = 0.08276


Epoch 212, Learning rate: 8.408224299065421e-05


INFO:root:Epoch = [212/401] Iter = [   0/ 134] Loss = 0.07579 Avg Loss = 0.07579


Epoch 213, Learning rate: 8.44411214953271e-05


INFO:root:Epoch = [213/401] Iter = [   0/ 134] Loss = 0.07883 Avg Loss = 0.07883


Epoch 214, Learning rate: 8.480000000000001e-05


INFO:root:Epoch = [214/401] Iter = [   0/ 134] Loss = 0.08804 Avg Loss = 0.08804


Epoch 215, Learning rate: 8.51588785046729e-05


INFO:root:Epoch = [215/401] Iter = [   0/ 134] Loss = 0.06692 Avg Loss = 0.06692


Epoch 216, Learning rate: 8.551775700934581e-05


INFO:root:Epoch = [216/401] Iter = [   0/ 134] Loss = 0.07328 Avg Loss = 0.07328


Epoch 217, Learning rate: 8.587663551401869e-05


INFO:root:Epoch = [217/401] Iter = [   0/ 134] Loss = 0.06214 Avg Loss = 0.06214


Epoch 218, Learning rate: 8.62355140186916e-05


INFO:root:Epoch = [218/401] Iter = [   0/ 134] Loss = 0.08795 Avg Loss = 0.08795


Epoch 219, Learning rate: 8.65943925233645e-05


INFO:root:Epoch = [219/401] Iter = [   0/ 134] Loss = 0.06062 Avg Loss = 0.06062


Epoch 220, Learning rate: 8.69532710280374e-05


INFO:root:Epoch = [220/401] Iter = [   0/ 134] Loss = 0.0762 Avg Loss = 0.0762


Epoch 221, Learning rate: 8.731214953271029e-05


INFO:root:Epoch = [221/401] Iter = [   0/ 134] Loss = 0.08984 Avg Loss = 0.08984


Epoch 222, Learning rate: 8.767102803738317e-05


INFO:root:Epoch = [222/401] Iter = [   0/ 134] Loss = 0.07219 Avg Loss = 0.07219


Epoch 223, Learning rate: 8.802990654205608e-05


INFO:root:Epoch = [223/401] Iter = [   0/ 134] Loss = 0.06337 Avg Loss = 0.06337


Epoch 224, Learning rate: 8.838878504672898e-05


INFO:root:Epoch = [224/401] Iter = [   0/ 134] Loss = 0.07044 Avg Loss = 0.07044


Epoch 225, Learning rate: 8.874766355140188e-05


INFO:root:Epoch = [225/401] Iter = [   0/ 134] Loss = 0.06655 Avg Loss = 0.06655


Epoch 226, Learning rate: 8.910654205607477e-05


INFO:root:Epoch = [226/401] Iter = [   0/ 134] Loss = 0.06275 Avg Loss = 0.06275


Epoch 227, Learning rate: 8.946542056074767e-05


INFO:root:Epoch = [227/401] Iter = [   0/ 134] Loss = 0.06766 Avg Loss = 0.06766


Epoch 228, Learning rate: 8.982429906542057e-05


INFO:root:Epoch = [228/401] Iter = [   0/ 134] Loss = 0.07791 Avg Loss = 0.07791


Epoch 229, Learning rate: 9.018317757009346e-05


INFO:root:Epoch = [229/401] Iter = [   0/ 134] Loss = 0.06682 Avg Loss = 0.06682


Epoch 230, Learning rate: 9.054205607476636e-05


INFO:root:Epoch = [230/401] Iter = [   0/ 134] Loss = 0.06748 Avg Loss = 0.06748


Epoch 231, Learning rate: 9.090093457943926e-05


INFO:root:Epoch = [231/401] Iter = [   0/ 134] Loss = 0.06344 Avg Loss = 0.06344


Epoch 232, Learning rate: 9.125981308411215e-05


INFO:root:Epoch = [232/401] Iter = [   0/ 134] Loss = 0.07202 Avg Loss = 0.07202


Epoch 233, Learning rate: 9.161869158878505e-05


INFO:root:Epoch = [233/401] Iter = [   0/ 134] Loss = 0.05204 Avg Loss = 0.05204


Epoch 234, Learning rate: 9.197757009345796e-05


INFO:root:Epoch = [234/401] Iter = [   0/ 134] Loss = 0.06423 Avg Loss = 0.06423


Epoch 235, Learning rate: 9.233644859813084e-05


INFO:root:Epoch = [235/401] Iter = [   0/ 134] Loss = 0.07476 Avg Loss = 0.07476


Epoch 236, Learning rate: 9.269532710280375e-05


INFO:root:Epoch = [236/401] Iter = [   0/ 134] Loss = 0.0558 Avg Loss = 0.0558


Epoch 237, Learning rate: 9.305420560747664e-05


INFO:root:Epoch = [237/401] Iter = [   0/ 134] Loss = 0.1095 Avg Loss = 0.1095


Epoch 238, Learning rate: 9.341308411214955e-05


INFO:root:Epoch = [238/401] Iter = [   0/ 134] Loss = 0.05672 Avg Loss = 0.05672


Epoch 239, Learning rate: 9.377196261682244e-05


INFO:root:Epoch = [239/401] Iter = [   0/ 134] Loss = 0.1051 Avg Loss = 0.1051


Epoch 240, Learning rate: 9.413084112149534e-05


INFO:root:Epoch = [240/401] Iter = [   0/ 134] Loss = 0.08036 Avg Loss = 0.08036


Epoch 241, Learning rate: 9.448971962616824e-05


INFO:root:Epoch = [241/401] Iter = [   0/ 134] Loss = 0.06081 Avg Loss = 0.06081


Epoch 242, Learning rate: 9.484859813084113e-05


INFO:root:Epoch = [242/401] Iter = [   0/ 134] Loss = 0.06993 Avg Loss = 0.06993


Epoch 243, Learning rate: 9.520747663551403e-05


INFO:root:Epoch = [243/401] Iter = [   0/ 134] Loss = 0.05956 Avg Loss = 0.05956


Epoch 244, Learning rate: 9.556635514018691e-05


INFO:root:Epoch = [244/401] Iter = [   0/ 134] Loss = 0.06805 Avg Loss = 0.06805


Epoch 245, Learning rate: 9.592523364485982e-05


INFO:root:Epoch = [245/401] Iter = [   0/ 134] Loss = 0.07009 Avg Loss = 0.07009


Epoch 246, Learning rate: 9.628411214953272e-05


INFO:root:Epoch = [246/401] Iter = [   0/ 134] Loss = 0.1044 Avg Loss = 0.1044


Epoch 247, Learning rate: 9.664299065420561e-05


INFO:root:Epoch = [247/401] Iter = [   0/ 134] Loss = 0.06163 Avg Loss = 0.06163


Epoch 248, Learning rate: 9.700186915887851e-05


INFO:root:Epoch = [248/401] Iter = [   0/ 134] Loss = 0.07716 Avg Loss = 0.07716


Epoch 249, Learning rate: 9.736074766355141e-05


INFO:root:Epoch = [249/401] Iter = [   0/ 134] Loss = 0.06189 Avg Loss = 0.06189


Epoch 250, Learning rate: 9.77196261682243e-05


INFO:root:Epoch = [250/401] Iter = [   0/ 134] Loss = 0.06272 Avg Loss = 0.06272


Epoch 251, Learning rate: 9.80785046728972e-05


INFO:root:Epoch = [251/401] Iter = [   0/ 134] Loss = 0.06013 Avg Loss = 0.06013


Epoch 252, Learning rate: 9.84373831775701e-05


INFO:root:Epoch = [252/401] Iter = [   0/ 134] Loss = 0.05084 Avg Loss = 0.05084


Epoch 253, Learning rate: 9.8796261682243e-05


INFO:root:Epoch = [253/401] Iter = [   0/ 134] Loss = 0.07516 Avg Loss = 0.07516


Epoch 254, Learning rate: 9.915514018691589e-05


INFO:root:Epoch = [254/401] Iter = [   0/ 134] Loss = 0.08902 Avg Loss = 0.08902


Epoch 255, Learning rate: 9.951401869158879e-05


INFO:root:Epoch = [255/401] Iter = [   0/ 134] Loss = 0.05699 Avg Loss = 0.05699


Epoch 256, Learning rate: 9.98728971962617e-05


INFO:root:Epoch = [256/401] Iter = [   0/ 134] Loss = 0.08837 Avg Loss = 0.08837


Epoch 257, Learning rate: 0.00010023177570093458


INFO:root:Epoch = [257/401] Iter = [   0/ 134] Loss = 0.06397 Avg Loss = 0.06397


Epoch 258, Learning rate: 0.00010059065420560749


INFO:root:Epoch = [258/401] Iter = [   0/ 134] Loss = 0.06754 Avg Loss = 0.06754


Epoch 259, Learning rate: 0.00010094953271028037


INFO:root:Epoch = [259/401] Iter = [   0/ 134] Loss = 0.06098 Avg Loss = 0.06098


Epoch 260, Learning rate: 0.00010130841121495328


INFO:root:Epoch = [260/401] Iter = [   0/ 134] Loss = 0.09692 Avg Loss = 0.09692


Epoch 261, Learning rate: 0.00010166728971962618


INFO:root:Epoch = [261/401] Iter = [   0/ 134] Loss = 0.06896 Avg Loss = 0.06896


Epoch 262, Learning rate: 0.00010202616822429908


INFO:root:Epoch = [262/401] Iter = [   0/ 134] Loss = 0.05976 Avg Loss = 0.05976


Epoch 263, Learning rate: 0.00010238504672897197


INFO:root:Epoch = [263/401] Iter = [   0/ 134] Loss = 0.0582 Avg Loss = 0.0582


Epoch 264, Learning rate: 0.00010274392523364487


INFO:root:Epoch = [264/401] Iter = [   0/ 134] Loss = 0.05621 Avg Loss = 0.05621


Epoch 265, Learning rate: 0.00010310280373831777


INFO:root:Epoch = [265/401] Iter = [   0/ 134] Loss = 0.07199 Avg Loss = 0.07199


Epoch 266, Learning rate: 0.00010346168224299066


INFO:root:Epoch = [266/401] Iter = [   0/ 134] Loss = 0.07076 Avg Loss = 0.07076


Epoch 267, Learning rate: 0.00010382056074766356


INFO:root:Epoch = [267/401] Iter = [   0/ 134] Loss = 0.07618 Avg Loss = 0.07618


Epoch 268, Learning rate: 0.00010417943925233646


INFO:root:Epoch = [268/401] Iter = [   0/ 134] Loss = 0.05263 Avg Loss = 0.05263


Epoch 269, Learning rate: 0.00010453831775700937


INFO:root:Epoch = [269/401] Iter = [   0/ 134] Loss = 0.06617 Avg Loss = 0.06617


Epoch 270, Learning rate: 0.00010489719626168223


INFO:root:Epoch = [270/401] Iter = [   0/ 134] Loss = 0.08223 Avg Loss = 0.08223


Epoch 271, Learning rate: 0.00010525607476635515


INFO:root:Epoch = [271/401] Iter = [   0/ 134] Loss = 0.07353 Avg Loss = 0.07353


Epoch 272, Learning rate: 0.00010561495327102804


INFO:root:Epoch = [272/401] Iter = [   0/ 134] Loss = 0.06321 Avg Loss = 0.06321


Epoch 273, Learning rate: 0.00010597383177570094


INFO:root:Epoch = [273/401] Iter = [   0/ 134] Loss = 0.0922 Avg Loss = 0.0922


Epoch 274, Learning rate: 0.00010633271028037385


INFO:root:Epoch = [274/401] Iter = [   0/ 134] Loss = 0.1212 Avg Loss = 0.1212


Epoch 275, Learning rate: 0.00010669158878504673


INFO:root:Epoch = [275/401] Iter = [   0/ 134] Loss = 0.06612 Avg Loss = 0.06612


Epoch 276, Learning rate: 0.00010705046728971963


INFO:root:Epoch = [276/401] Iter = [   0/ 134] Loss = 0.08491 Avg Loss = 0.08491


Epoch 277, Learning rate: 0.00010740934579439252


INFO:root:Epoch = [277/401] Iter = [   0/ 134] Loss = 0.07372 Avg Loss = 0.07372


Epoch 278, Learning rate: 0.00010776822429906543


INFO:root:Epoch = [278/401] Iter = [   0/ 134] Loss = 0.06283 Avg Loss = 0.06283


Epoch 279, Learning rate: 0.00010812710280373833


INFO:root:Epoch = [279/401] Iter = [   0/ 134] Loss = 0.06644 Avg Loss = 0.06644


Epoch 280, Learning rate: 0.00010848598130841121


INFO:root:Epoch = [280/401] Iter = [   0/ 134] Loss = 0.06272 Avg Loss = 0.06272


Epoch 281, Learning rate: 0.00010884485981308411


INFO:root:Epoch = [281/401] Iter = [   0/ 134] Loss = 0.06388 Avg Loss = 0.06388


Epoch 282, Learning rate: 0.00010920373831775702


INFO:root:Epoch = [282/401] Iter = [   0/ 134] Loss = 0.04698 Avg Loss = 0.04698


Epoch 283, Learning rate: 0.00010956261682242992


INFO:root:Epoch = [283/401] Iter = [   0/ 134] Loss = 0.09908 Avg Loss = 0.09908


Epoch 284, Learning rate: 0.00010992149532710281


INFO:root:Epoch = [284/401] Iter = [   0/ 134] Loss = 0.0693 Avg Loss = 0.0693


Epoch 285, Learning rate: 0.0001102803738317757


INFO:root:Epoch = [285/401] Iter = [   0/ 134] Loss = 0.06722 Avg Loss = 0.06722


Epoch 286, Learning rate: 0.0001106392523364486


INFO:root:Epoch = [286/401] Iter = [   0/ 134] Loss = 0.06038 Avg Loss = 0.06038


Epoch 287, Learning rate: 0.0001109981308411215


INFO:root:Epoch = [287/401] Iter = [   0/ 134] Loss = 0.06274 Avg Loss = 0.06274


Epoch 288, Learning rate: 0.0001113570093457944


INFO:root:Epoch = [288/401] Iter = [   0/ 134] Loss = 0.06547 Avg Loss = 0.06547


Epoch 289, Learning rate: 0.00011171588785046731


INFO:root:Epoch = [289/401] Iter = [   0/ 134] Loss = 0.06562 Avg Loss = 0.06562


Epoch 290, Learning rate: 0.00011207476635514018


INFO:root:Epoch = [290/401] Iter = [   0/ 134] Loss = 0.07312 Avg Loss = 0.07312


Epoch 291, Learning rate: 0.00011243364485981309


INFO:root:Epoch = [291/401] Iter = [   0/ 134] Loss = 0.06117 Avg Loss = 0.06117


Epoch 292, Learning rate: 0.00011279252336448599


INFO:root:Epoch = [292/401] Iter = [   0/ 134] Loss = 0.08615 Avg Loss = 0.08615


Epoch 293, Learning rate: 0.00011315140186915888


INFO:root:Epoch = [293/401] Iter = [   0/ 134] Loss = 0.06445 Avg Loss = 0.06445


Epoch 294, Learning rate: 0.00011351028037383179


INFO:root:Epoch = [294/401] Iter = [   0/ 134] Loss = 0.06619 Avg Loss = 0.06619


Epoch 295, Learning rate: 0.00011386915887850468


INFO:root:Epoch = [295/401] Iter = [   0/ 134] Loss = 0.05658 Avg Loss = 0.05658


Epoch 296, Learning rate: 0.00011422803738317757


INFO:root:Epoch = [296/401] Iter = [   0/ 134] Loss = 0.05831 Avg Loss = 0.05831


Epoch 297, Learning rate: 0.00011458691588785047


INFO:root:Epoch = [297/401] Iter = [   0/ 134] Loss = 0.06242 Avg Loss = 0.06242


Epoch 298, Learning rate: 0.00011494579439252338


INFO:root:Epoch = [298/401] Iter = [   0/ 134] Loss = 0.09312 Avg Loss = 0.09312


Epoch 299, Learning rate: 0.00011530467289719628


INFO:root:Epoch = [299/401] Iter = [   0/ 134] Loss = 0.06179 Avg Loss = 0.06179


Epoch 300, Learning rate: 0.00011566355140186916


INFO:root:Epoch = [300/401] Iter = [   0/ 134] Loss = 0.0713 Avg Loss = 0.0713


Epoch 301, Learning rate: 0.00011602242990654206


INFO:root:Epoch = [301/401] Iter = [   0/ 134] Loss = 0.06311 Avg Loss = 0.06311


Epoch 302, Learning rate: 0.00011638130841121497


INFO:root:Epoch = [302/401] Iter = [   0/ 134] Loss = 0.06498 Avg Loss = 0.06498


Epoch 303, Learning rate: 0.00011674018691588786


INFO:root:Epoch = [303/401] Iter = [   0/ 134] Loss = 0.06456 Avg Loss = 0.06456


Epoch 304, Learning rate: 0.00011709906542056076


INFO:root:Epoch = [304/401] Iter = [   0/ 134] Loss = 0.04893 Avg Loss = 0.04893


Epoch 305, Learning rate: 0.00011745794392523364


INFO:root:Epoch = [305/401] Iter = [   0/ 134] Loss = 0.07365 Avg Loss = 0.07365


Epoch 306, Learning rate: 0.00011781682242990655


INFO:root:Epoch = [306/401] Iter = [   0/ 134] Loss = 0.06691 Avg Loss = 0.06691


Epoch 307, Learning rate: 0.00011817570093457945


INFO:root:Epoch = [307/401] Iter = [   0/ 134] Loss = 0.07147 Avg Loss = 0.07147


Epoch 308, Learning rate: 0.00011853457943925234


INFO:root:Epoch = [308/401] Iter = [   0/ 134] Loss = 0.0724 Avg Loss = 0.0724


Epoch 309, Learning rate: 0.00011889345794392525


INFO:root:Epoch = [309/401] Iter = [   0/ 134] Loss = 0.06624 Avg Loss = 0.06624


Epoch 310, Learning rate: 0.00011925233644859812


INFO:root:Epoch = [310/401] Iter = [   0/ 134] Loss = 0.06607 Avg Loss = 0.06607


Epoch 311, Learning rate: 0.00011961121495327103


INFO:root:Epoch = [311/401] Iter = [   0/ 134] Loss = 0.05506 Avg Loss = 0.05506


Epoch 312, Learning rate: 0.00011997009345794393


INFO:root:Epoch = [312/401] Iter = [   0/ 134] Loss = 0.0668 Avg Loss = 0.0668


Epoch 313, Learning rate: 0.00012032897196261684


INFO:root:Epoch = [313/401] Iter = [   0/ 134] Loss = 0.06234 Avg Loss = 0.06234


Epoch 314, Learning rate: 0.00012068785046728974


INFO:root:Epoch = [314/401] Iter = [   0/ 134] Loss = 0.06291 Avg Loss = 0.06291


Epoch 315, Learning rate: 0.00012104672897196262


INFO:root:Epoch = [315/401] Iter = [   0/ 134] Loss = 0.0558 Avg Loss = 0.0558


Epoch 316, Learning rate: 0.00012140560747663552


INFO:root:Epoch = [316/401] Iter = [   0/ 134] Loss = 0.05835 Avg Loss = 0.05835


Epoch 317, Learning rate: 0.00012176448598130841


INFO:root:Epoch = [317/401] Iter = [   0/ 134] Loss = 0.05962 Avg Loss = 0.05962


Epoch 318, Learning rate: 0.00012212336448598132


INFO:root:Epoch = [318/401] Iter = [   0/ 134] Loss = 0.06952 Avg Loss = 0.06952


Epoch 319, Learning rate: 0.00012248224299065422


INFO:root:Epoch = [319/401] Iter = [   0/ 134] Loss = 0.05901 Avg Loss = 0.05901


Epoch 320, Learning rate: 0.0001228411214953271


INFO:root:Epoch = [320/401] Iter = [   0/ 134] Loss = 0.06573 Avg Loss = 0.06573


Epoch 321, Learning rate: 0.00012319999999999999


INFO:root:Epoch = [321/401] Iter = [   0/ 134] Loss = 0.05433 Avg Loss = 0.05433


Epoch 322, Learning rate: 0.0001235588785046729


INFO:root:Epoch = [322/401] Iter = [   0/ 134] Loss = 0.05475 Avg Loss = 0.05475


Epoch 323, Learning rate: 0.0001239177570093458


INFO:root:Epoch = [323/401] Iter = [   0/ 134] Loss = 0.05697 Avg Loss = 0.05697


Epoch 324, Learning rate: 0.0001242766355140187


INFO:root:Epoch = [324/401] Iter = [   0/ 134] Loss = 0.06047 Avg Loss = 0.06047


Epoch 325, Learning rate: 0.00012463551401869157


INFO:root:Epoch = [325/401] Iter = [   0/ 134] Loss = 0.08882 Avg Loss = 0.08882


Epoch 326, Learning rate: 0.0001249943925233645


INFO:root:Epoch = [326/401] Iter = [   0/ 134] Loss = 0.07979 Avg Loss = 0.07979


Epoch 327, Learning rate: 0.0001253532710280374


INFO:root:Epoch = [327/401] Iter = [   0/ 134] Loss = 0.06512 Avg Loss = 0.06512


Epoch 328, Learning rate: 0.0001257121495327103


INFO:root:Epoch = [328/401] Iter = [   0/ 134] Loss = 0.1038 Avg Loss = 0.1038


Epoch 329, Learning rate: 0.00012607102803738319


INFO:root:Epoch = [329/401] Iter = [   0/ 134] Loss = 0.08706 Avg Loss = 0.08706


Epoch 330, Learning rate: 0.00012642990654205605


INFO:root:Epoch = [330/401] Iter = [   0/ 134] Loss = 0.08269 Avg Loss = 0.08269


Epoch 331, Learning rate: 0.00012678878504672898


INFO:root:Epoch = [331/401] Iter = [   0/ 134] Loss = 0.07811 Avg Loss = 0.07811


Epoch 332, Learning rate: 0.00012714766355140188


INFO:root:Epoch = [332/401] Iter = [   0/ 134] Loss = 0.06606 Avg Loss = 0.06606


Epoch 333, Learning rate: 0.00012750654205607477


INFO:root:Epoch = [333/401] Iter = [   0/ 134] Loss = 0.05663 Avg Loss = 0.05663


Epoch 334, Learning rate: 0.00012786542056074767


INFO:root:Epoch = [334/401] Iter = [   0/ 134] Loss = 0.06236 Avg Loss = 0.06236


Epoch 335, Learning rate: 0.00012822429906542056


INFO:root:Epoch = [335/401] Iter = [   0/ 134] Loss = 0.05535 Avg Loss = 0.05535


Epoch 336, Learning rate: 0.00012858317757009346


INFO:root:Epoch = [336/401] Iter = [   0/ 134] Loss = 0.06377 Avg Loss = 0.06377


Epoch 337, Learning rate: 0.00012894205607476636


INFO:root:Epoch = [337/401] Iter = [   0/ 134] Loss = 0.1579 Avg Loss = 0.1579


Epoch 338, Learning rate: 0.00012930093457943925


INFO:root:Epoch = [338/401] Iter = [   0/ 134] Loss = 0.05475 Avg Loss = 0.05475


Epoch 339, Learning rate: 0.00012965981308411215


INFO:root:Epoch = [339/401] Iter = [   0/ 134] Loss = 0.06399 Avg Loss = 0.06399


Epoch 340, Learning rate: 0.00013001869158878505


INFO:root:Epoch = [340/401] Iter = [   0/ 134] Loss = 0.04383 Avg Loss = 0.04383


Epoch 341, Learning rate: 0.00013037757009345794


INFO:root:Epoch = [341/401] Iter = [   0/ 134] Loss = 0.05715 Avg Loss = 0.05715


Epoch 342, Learning rate: 0.00013073644859813084


INFO:root:Epoch = [342/401] Iter = [   0/ 134] Loss = 0.0634 Avg Loss = 0.0634


Epoch 343, Learning rate: 0.00013109532710280376


INFO:root:Epoch = [343/401] Iter = [   0/ 134] Loss = 0.08067 Avg Loss = 0.08067


Epoch 344, Learning rate: 0.00013145420560747666


INFO:root:Epoch = [344/401] Iter = [   0/ 134] Loss = 0.06077 Avg Loss = 0.06077


Epoch 345, Learning rate: 0.00013181308411214953


INFO:root:Epoch = [345/401] Iter = [   0/ 134] Loss = 0.05236 Avg Loss = 0.05236


Epoch 346, Learning rate: 0.00013217196261682243


INFO:root:Epoch = [346/401] Iter = [   0/ 134] Loss = 0.06759 Avg Loss = 0.06759


Epoch 347, Learning rate: 0.00013253084112149532


INFO:root:Epoch = [347/401] Iter = [   0/ 134] Loss = 0.05415 Avg Loss = 0.05415


Epoch 348, Learning rate: 0.00013288971962616825


INFO:root:Epoch = [348/401] Iter = [   0/ 134] Loss = 0.06182 Avg Loss = 0.06182


Epoch 349, Learning rate: 0.00013324859813084114


INFO:root:Epoch = [349/401] Iter = [   0/ 134] Loss = 0.05356 Avg Loss = 0.05356


Epoch 350, Learning rate: 0.000133607476635514


INFO:root:Epoch = [350/401] Iter = [   0/ 134] Loss = 0.06817 Avg Loss = 0.06817


Epoch 351, Learning rate: 0.0001339663551401869


INFO:root:Epoch = [351/401] Iter = [   0/ 134] Loss = 0.0597 Avg Loss = 0.0597


Epoch 352, Learning rate: 0.00013432523364485983


INFO:root:Epoch = [352/401] Iter = [   0/ 134] Loss = 0.0834 Avg Loss = 0.0834


Epoch 353, Learning rate: 0.00013468411214953273


INFO:root:Epoch = [353/401] Iter = [   0/ 134] Loss = 0.06105 Avg Loss = 0.06105


Epoch 354, Learning rate: 0.00013504299065420563


INFO:root:Epoch = [354/401] Iter = [   0/ 134] Loss = 0.04814 Avg Loss = 0.04814


Epoch 355, Learning rate: 0.0001354018691588785


INFO:root:Epoch = [355/401] Iter = [   0/ 134] Loss = 0.05966 Avg Loss = 0.05966


Epoch 356, Learning rate: 0.00013576074766355142


INFO:root:Epoch = [356/401] Iter = [   0/ 134] Loss = 0.07245 Avg Loss = 0.07245


Epoch 357, Learning rate: 0.00013611962616822432


INFO:root:Epoch = [357/401] Iter = [   0/ 134] Loss = 0.04982 Avg Loss = 0.04982


Epoch 358, Learning rate: 0.0001364785046728972


INFO:root:Epoch = [358/401] Iter = [   0/ 134] Loss = 0.05919 Avg Loss = 0.05919


Epoch 359, Learning rate: 0.0001368373831775701


INFO:root:Epoch = [359/401] Iter = [   0/ 134] Loss = 0.0563 Avg Loss = 0.0563


Epoch 360, Learning rate: 0.00013719626168224298


INFO:root:Epoch = [360/401] Iter = [   0/ 134] Loss = 0.06853 Avg Loss = 0.06853


Epoch 361, Learning rate: 0.0001375551401869159


INFO:root:Epoch = [361/401] Iter = [   0/ 134] Loss = 0.05587 Avg Loss = 0.05587


Epoch 362, Learning rate: 0.0001379140186915888


INFO:root:Epoch = [362/401] Iter = [   0/ 134] Loss = 0.06131 Avg Loss = 0.06131


Epoch 363, Learning rate: 0.0001382728971962617


INFO:root:Epoch = [363/401] Iter = [   0/ 134] Loss = 0.05456 Avg Loss = 0.05456


Epoch 364, Learning rate: 0.0001386317757009346


INFO:root:Epoch = [364/401] Iter = [   0/ 134] Loss = 0.07276 Avg Loss = 0.07276


Epoch 365, Learning rate: 0.0001389906542056075


INFO:root:Epoch = [365/401] Iter = [   0/ 134] Loss = 0.05825 Avg Loss = 0.05825


Epoch 366, Learning rate: 0.00013934953271028038


INFO:root:Epoch = [366/401] Iter = [   0/ 134] Loss = 0.05053 Avg Loss = 0.05053


Epoch 367, Learning rate: 0.00013970841121495328


INFO:root:Epoch = [367/401] Iter = [   0/ 134] Loss = 0.04941 Avg Loss = 0.04941


Epoch 368, Learning rate: 0.00014006728971962618


INFO:root:Epoch = [368/401] Iter = [   0/ 134] Loss = 0.0609 Avg Loss = 0.0609


Epoch 369, Learning rate: 0.00014042616822429907


INFO:root:Epoch = [369/401] Iter = [   0/ 134] Loss = 0.06525 Avg Loss = 0.06525


Epoch 370, Learning rate: 0.00014078504672897197


INFO:root:Epoch = [370/401] Iter = [   0/ 134] Loss = 0.06867 Avg Loss = 0.06867


Epoch 371, Learning rate: 0.00014114392523364487


INFO:root:Epoch = [371/401] Iter = [   0/ 134] Loss = 0.05259 Avg Loss = 0.05259


Epoch 372, Learning rate: 0.00014150280373831776


INFO:root:Epoch = [372/401] Iter = [   0/ 134] Loss = 0.07628 Avg Loss = 0.07628


Epoch 373, Learning rate: 0.00014186168224299066


INFO:root:Epoch = [373/401] Iter = [   0/ 134] Loss = 0.08079 Avg Loss = 0.08079


Epoch 374, Learning rate: 0.00014222056074766356


INFO:root:Epoch = [374/401] Iter = [   0/ 134] Loss = 0.06766 Avg Loss = 0.06766


Epoch 375, Learning rate: 0.00014257943925233645


INFO:root:Epoch = [375/401] Iter = [   0/ 134] Loss = 0.05861 Avg Loss = 0.05861


Epoch 376, Learning rate: 0.00014293831775700935


INFO:root:Epoch = [376/401] Iter = [   0/ 134] Loss = 0.05122 Avg Loss = 0.05122


Epoch 377, Learning rate: 0.00014329719626168225


INFO:root:Epoch = [377/401] Iter = [   0/ 134] Loss = 0.05346 Avg Loss = 0.05346


Epoch 378, Learning rate: 0.00014365607476635514


INFO:root:Epoch = [378/401] Iter = [   0/ 134] Loss = 0.05659 Avg Loss = 0.05659


Epoch 379, Learning rate: 0.00014401495327102804


INFO:root:Epoch = [379/401] Iter = [   0/ 134] Loss = 0.07344 Avg Loss = 0.07344


Epoch 380, Learning rate: 0.00014437383177570094


INFO:root:Epoch = [380/401] Iter = [   0/ 134] Loss = 0.07271 Avg Loss = 0.07271


Epoch 381, Learning rate: 0.00014473271028037383


INFO:root:Epoch = [381/401] Iter = [   0/ 134] Loss = 0.06765 Avg Loss = 0.06765


Epoch 382, Learning rate: 0.00014509158878504673


INFO:root:Epoch = [382/401] Iter = [   0/ 134] Loss = 0.06455 Avg Loss = 0.06455


Epoch 383, Learning rate: 0.00014545046728971965


INFO:root:Epoch = [383/401] Iter = [   0/ 134] Loss = 0.04728 Avg Loss = 0.04728


Epoch 384, Learning rate: 0.00014580934579439252


INFO:root:Epoch = [384/401] Iter = [   0/ 134] Loss = 0.05485 Avg Loss = 0.05485


Epoch 385, Learning rate: 0.00014616822429906542


INFO:root:Epoch = [385/401] Iter = [   0/ 134] Loss = 0.05974 Avg Loss = 0.05974


Epoch 386, Learning rate: 0.00014652710280373832


INFO:root:Epoch = [386/401] Iter = [   0/ 134] Loss = 0.05068 Avg Loss = 0.05068


Epoch 387, Learning rate: 0.00014688598130841124


INFO:root:Epoch = [387/401] Iter = [   0/ 134] Loss = 0.05987 Avg Loss = 0.05987


Epoch 388, Learning rate: 0.00014724485981308414


INFO:root:Epoch = [388/401] Iter = [   0/ 134] Loss = 0.05232 Avg Loss = 0.05232


Epoch 389, Learning rate: 0.000147603738317757


INFO:root:Epoch = [389/401] Iter = [   0/ 134] Loss = 0.04879 Avg Loss = 0.04879


Epoch 390, Learning rate: 0.0001479626168224299


INFO:root:Epoch = [390/401] Iter = [   0/ 134] Loss = 0.05148 Avg Loss = 0.05148


Epoch 391, Learning rate: 0.0001483214953271028


INFO:root:Epoch = [391/401] Iter = [   0/ 134] Loss = 0.04885 Avg Loss = 0.04885


Epoch 392, Learning rate: 0.00014868037383177572


INFO:root:Epoch = [392/401] Iter = [   0/ 134] Loss = 0.07672 Avg Loss = 0.07672


Epoch 393, Learning rate: 0.00014903925233644862


INFO:root:Epoch = [393/401] Iter = [   0/ 134] Loss = 0.05285 Avg Loss = 0.05285


Epoch 394, Learning rate: 0.0001493981308411215


INFO:root:Epoch = [394/401] Iter = [   0/ 134] Loss = 0.07538 Avg Loss = 0.07538


Epoch 395, Learning rate: 0.00014975700934579438


INFO:root:Epoch = [395/401] Iter = [   0/ 134] Loss = 0.06236 Avg Loss = 0.06236


Epoch 396, Learning rate: 0.0001501158878504673


INFO:root:Epoch = [396/401] Iter = [   0/ 134] Loss = 0.06306 Avg Loss = 0.06306


Epoch 397, Learning rate: 0.0001504747663551402


INFO:root:Epoch = [397/401] Iter = [   0/ 134] Loss = 0.06629 Avg Loss = 0.06629


Epoch 398, Learning rate: 0.0001508336448598131


INFO:root:Epoch = [398/401] Iter = [   0/ 134] Loss = 0.05512 Avg Loss = 0.05512


Epoch 399, Learning rate: 0.00015119252336448597


INFO:root:Epoch = [399/401] Iter = [   0/ 134] Loss = 0.05478 Avg Loss = 0.05478


Epoch 400, Learning rate: 0.0001515514018691589


INFO:root:Epoch = [400/401] Iter = [   0/ 134] Loss = 0.06318 Avg Loss = 0.06318


Epoch 401, Learning rate: 0.0001519102803738318


<IPython.core.display.Javascript object>

: 