In [1]:
%matplotlib notebook
import os, sys
import logging
import random
import h5py
import shutil
import time
import argparse
import numpy as np
import sigpy.plot as pl
import torch
import sigpy as sp
import torchvision
from torch import optim
from tensorboardX import SummaryWriter
from torch.nn import functional as F
import torch.nn as nn
from torch.utils.data import DataLoader
import matplotlib
# import custom libraries
from utils import transforms as T
from utils import subsample as ss
from utils import complex_utils as cplx
from utils.resnet2p1d import generate_model
from utils.flare_utils import roll
# import custom classes
from utils.datasets_Sairam import SliceData
from subsample_fastmri import MaskFunc
from MoDL_single import UnrolledModel
import argparse
from models.SAmodel import MyNetwork
from models.Unrolled import Unrolled
from models.UnrolledRef import UnrolledRef
from models.UnrolledTransformer import UnrolledTrans
import matplotlib.pyplot as plt
from ImageFusionBlock import ImageFusionBlock
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
%load_ext autoreload
%autoreload 0
from ImageFusion_Dualbranch_Fusion.densefuse_net import DenseFuseNet
from ImageFusion_Dualbranch_Fusion.channel_fusion import channel_f as channel_fusion
import itertools
from RCAN import CombinedNetwork
from models.FusionNet import FusionNet
from recon_net_wrap import ViTfuser
#from UnrolledViT import UnrolledViT
from UnrolledViT_Sairam import UnrolledViT

from fastmri.data import transforms, subsample

In [2]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

In [3]:
class DataTransform:
    """
    Data Transformer for training unrolled reconstruction models.
    """

    def __init__(self, mask_func, args, use_seed=False):
        self.mask_func = mask_func
        self.use_seed = use_seed
        self.rng = np.random.RandomState()
    def get_mask_func(self, factor):
        center_fractions = 0.1 * 4/factor# RandomMaskFuncEquiSpacedMaskFunc
        mask_func = subsample.EquiSpacedMaskFunc(
        center_fractions=[center_fractions],
        accelerations=[factor], 
        )
        return mask_func
    
    def __call__(self, kspace, target, reference_kspace, reference,slice):
        im_lowres = abs(sp.ifft(sp.resize(sp.resize(kspace,(155,24)),(155,155))))
        magnitude_vals = im_lowres.reshape(-1)
        k = int(round(0.05 * magnitude_vals.shape[0]))
        scale = magnitude_vals[magnitude_vals.argsort()[::-1][k]]
        kspace = kspace/scale
        target = target/scale
        # Convert everything from numpy arrays to tensors
        kspace_torch = cplx.to_tensor(kspace).float()   
        target_torch = cplx.to_tensor(target).float()  
        target_torch = T.ifft2(T.fft2(target_torch)) 
        # Use poisson mask instead
        #mask2 = sp.mri.poisson((256,160), 5, calib=(18, 14), dtype=float, crop_corner=False, return_density=True, seed=0, max_attempts=6, tol=0.01)
        #mask2[128-10:128+9,80-8:80+7] = 1
        #mask_torch = torch.stack([torch.tensor(mask2).float(),torch.tensor(mask2).float()],dim=2)
        #mask_torch = T.kspace_crop(mask_torch,0.67)
        #kspace_torch = T.kspace_cut(mask_torch,0.5)
        kspace_torch = T.awgn_torch(kspace_torch,30,L=1) # 10dB for simulations
        ## Masking
        mask_func = self.get_mask_func(3)
        kspace_torch = transforms.apply_mask(kspace_torch, mask_func)[0]
        # kspace_torch = kspace_torch*mask_torch # For poisson
        
        mask = np.abs(cplx.to_numpy(kspace_torch))!=0
        mask_torch = torch.stack([torch.tensor(mask).float(),torch.tensor(mask).float()],dim=2)
        
        ### Reference addition ###
        im_lowres_ref = abs(sp.ifft(sp.resize(sp.resize(reference_kspace,(155,24)),(155,155))))
        magnitude_vals_ref = im_lowres_ref.reshape(-1)
        k_ref = int(round(0.05 * magnitude_vals_ref.shape[0]))
        scale_ref = magnitude_vals_ref[magnitude_vals_ref.argsort()[::-1][k_ref]]
        reference = reference / scale_ref
        reference_torch = cplx.to_tensor(reference).float()
        reference_torch_kspace = T.fft2(reference_torch)
        reference_torch_kspace = reference_torch_kspace
        reference_torch = T.ifft2(reference_torch_kspace)
        

        return kspace_torch,target_torch,mask_torch, reference_torch 

In [4]:
def create_datasets(args):
    # Generate k-t undersampling masks
    train_mask = MaskFunc([0.08],[4])
    train_data = SliceData(
        root=str(args.data_path),
        transform=DataTransform(train_mask, args),
        sample_rate=1
    )
    return train_data
def create_data_loaders(args):
    train_data = create_datasets(args)
#     print(train_data[0])

    train_loader = DataLoader(
        dataset=train_data,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
    )
    return train_loader
def build_optim(args, params):
    optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay)
    return optimizer

In [5]:
#Hyper parameters
params = Namespace()
#params.data_path = "./registered_data/patient23b/"
params.data_path = "./reg_data_sairam/"
params.batch_size = 2 #4
params.num_grad_steps = 1 #4
params.num_cg_steps = 8 #8
params.share_weights = True
params.modl_lamda = 0.05
params.lr = 0.0005 #0.0005
#params.lr = 0.0001
params.weight_decay = 0
params.lr_step_size = 20
params.lr_gamma = 0.5
params.epoch = 241
params.reference_mode = 1
params.reference_lambda = 0.1


In [6]:

train_loader = create_data_loaders(params)

In [7]:
from torchvision import models
#model_ft = models.resnet18(weights='DEFAULT').to(device).requires_grad_(False)
#model_ft.fc = nn.Identity()
#model_ft = models.vgg16(weights='DEFAULT').to(device)#.requires_grad_(False)
from FSloss_wrap import VGGLoss,ResNet18Backbone,FeatureEmbedding,contrastive_loss,VGGPerceptualLoss
#VGGloss = VGGLoss().to(device)
VGGloss = VGGPerceptualLoss().to(device)
#UFLoss = ResNet18Backbone().to(device)
#UFLoss = VGGLoss().to(device)
#UFLoss = models.vgg16(pretrained=True).features[:8+1].to(device)
#UFLoss.eval()

def extract_patches(images, patch_size=(20, 20), stride=(20, 20)):
    # images: Tensor of shape (batch_size, 1, 180, 110)
    patches = images.unfold(2, patch_size[0], stride[0]).unfold(3, patch_size[1], stride[1])
    patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
    patches = patches.view(images.size(0), -1, 1, patch_size[0], patch_size[1])
    return patches  # Returns patches of shape (batch_size, num_patches, 1, patch_size[0], patch_size[1])


# Example usage:
patch_size = (20, 20)
stride = (20, 20)  # Non-overlapping patches
def feature_space_loss(features1, features2):
    return F.mse_loss(features1, features2)
def pad_image(images):
    # images: Tensor of shape (batch_size, 1, 172, 108)
    padded_images = F.pad(images, (6, 6, 4, 4), mode='constant', value=0)
    return padded_images  # Shape will be (batch_size, 1, 180, 120)
#modelLoss = ResNet18Backbone().to(device)
#embedding_model = FeatureEmbedding(modelLoss).to(device)
#memory_bank = torch.randn(16, 128)  # Assuming num_patches is the number of different patches stored.
#memory_bank = nn.functional.normalize(memory_bank, p=2, dim=1)  # Normalize the memory bank vectors


from vision_transformer import VisionTransformer
net = VisionTransformer(
  avrg_img_size=320,
  patch_size = (10,10),
  in_chans=1,
  embed_dim=64,
  depth=10,
  num_heads=16

)

from recon_net import ReconNet
model = UnrolledViT(params).to(device)
#model2 = ReconNet(net).to(device)#.requires_grad_(False)
#cp = torch.load('./lsdir-2x+hq50k_vit_epoch_60.pt', map_location=device)
#model2.load_state_dict(cp['model_state_dict'])

"""
model.requires_grad_(False)

for net in model.similaritynets:
    net.param1.requires_grad_(True)
    net.param2.requires_grad_(True)
    #net.recon_net.net.head.requires_grad_(True)
"""
"""
optimizer = optim.Adam(model.parameters(), lr=0.0)
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer=optimizer, 
    max_lr=0.0001,
    steps_per_epoch=len(train_loader),
    epochs=params.epoch,
    pct_start=0.01,
    anneal_strategy='linear',
    cycle_momentum=False,
    base_momentum=0., 
    max_momentum=0.,
    div_factor = 25.,
    final_div_factor=1.,
)
"""
optimizer = build_optim(params,  model.parameters())
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, params.lr_step_size, params.lr_gamma)




shared weights


  cp = torch.load('./checkpoints_trained_start/model_100.pt', map_location=self.device) # Try new pretrained 11.12


In [8]:
### Load for fine-tunning

checkpoint_file = "./checkpoints_fuser_sairam_pre/model_ViTFuserPre_100.pt"
checkpoint = torch.load(checkpoint_file,map_location=device)
model.load_state_dict(checkpoint['model'])

from fastmri.losses import SSIMLoss
criterion = SSIMLoss().to(device)
criterionMSE = nn.MSELoss()
#criterion = nn.L1Loss()

epochs_plot = []
losses_plot = []

for epoch in range(params.epoch):
    model.train()
    avg_loss = 0.
    running_loss = 0.0
    for iter, data in enumerate(train_loader):
        input,target,mask,reference = data
        input = input.to(device).float()
        target = target.to(device).float()
        mask = mask.to(device)
        reference = reference.to(device).float()
        image = T.ifft2(input)
        image = image.permute(0,3,1,2)

        #print(f'image shape: {image.shape}')
        #print(f'reference shape: {reference.shape}')
        ## Target
        target_image = target.permute(0,3,1,2) 
        #print(f'ref size: {reference_image.shape}')
        real_part_tar = target_image[:,0,:,:].unsqueeze(1)
        imag_part_tar = target_image[:,1,:,:].unsqueeze(1)
        mag_tar = torch.sqrt(real_part_tar**2 + imag_part_tar**2).to(device)
        ## Reference
        ref_image = reference.permute(0,3,1,2) 
        real_part_ref = ref_image[:,0,:,:].unsqueeze(1)
        imag_part_ref = ref_image[:,1,:,:].unsqueeze(1)
        mag_ref = torch.sqrt(real_part_ref**2 + imag_part_ref**2).to(device)
        """
        in_pad, wpad, hpad = model2.pad(mag_tar)
        input_norm,mean,std = model2.norm(in_pad.float())
        # Feature extract
        #print(mag_tar.shape)
        mag_tar = torch.cat((mag_tar,mag_tar,mag_tar),dim =1).to(device)
        
        features_target = vgg16_model(torch.cat((mag_tar,mag_tar,mag_tar),dim =1).to(device)).data
        """
        #print(f'Features target: {features_target.shape}')
        im_out = model(input,reference)#.squeeze(3)

        """
        # Plot the concatenated image
        real_part = image[0,0,:,:]
        imag_part = image[0,1,:,:]
        mag_image = torch.sqrt(real_part**2 + imag_part**2)
        real_part_ref = reference[0,:,:,0]
        imag_part_ref = reference[0,:,:,1]
        mag_ref = torch.sqrt(real_part_ref**2 + imag_part_ref**2)
        mag_ref = mag_ref.cpu().detach().numpy()
        print(f'Mag ref: {mag_ref.shape}')
        import matplotlib.pyplot as plt
        %matplotlib inline
        print(im_out.shape)
        print(mag_tar.shape)
        im_out = im_out.cpu().detach().numpy().squeeze(0)
        concat = np.concatenate((mag_ref,mag_image.cpu().detach().numpy(),np.abs(im_out),mag_tar.squeeze(0).cpu().detach().numpy()),axis=1)
        plt.figure(figsize=(12, 6))
        plt.imshow(concat, cmap='gray')
        plt.title('reference                         in                           out                       target   ')
        plt.axis('off')
        plt.show()
        
        l = torch(mag_tar)
        """
        #loss = criterion(im_out,features_target)
        # SSIM

        maxval = torch.max(torch.cat((im_out,mag_tar.permute(0,2,3,1)),dim=1))
        im_out = im_out.permute(0,3,1,2)

        #features_out = vgg16_model(torch.cat((im_out,im_out,im_out),dim =1))
        
        #print(features_out.shape)
        data_range = torch.tensor([maxval], device=device).view(1, 1, 1, 1).expand(im_out.size(0), im_out.size(1), im_out.size(2)-6, im_out.size(3)-6)
        #print(mag_tar.shape)
        #print(im_out.shape)
        #print(data_range.shape)
        # SSIM
        #loss = criterion(im_out, mag_tar.to(device), data_range.to(device))
        # pad:
        im_out_pad = torch.cat((im_out,im_out,im_out),dim =1)/maxval
        mag_tar_pad = torch.cat((mag_tar,mag_tar,mag_tar),dim =1)/maxval
        #loss = nn.MSELoss()(model_ft.features(im_out_pad), model_ft.features(mag_tar_pad))
        
        # SSIM + style - USED !!!!
        #print(f'ssim is : {+ criterion(im_out, mag_tar.to(device), data_range.to(device))}')
        loss =  1*criterion(im_out, mag_tar.to(device), data_range.to(device)) + 0.5*VGGloss(im_out,mag_ref.to(device)) + 0.00*criterion(im_out, mag_ref.to(device), data_range.to(device)) # For tests2
        
        # No Fuser
        #loss =  1*criterion(im_out, mag_tar.to(device), data_range.to(device)) 

        # SSIM loss for grant
        #loss = criterion(im_out, mag_tar.to(device), data_range.to(device))
        #loss = criterionMSE(im_out,mag_tar.to(device))
        """
        padded_out = pad_image(im_out)
        padded_target = pad_image(mag_tar)
        #print(f'padded out size: {padded_out.shape}')
        out_patches = extract_patches(padded_out, patch_size, stride)
        target_patches = extract_patches(padded_target, patch_size, stride)
        #print(f'out_patches size: {out_patches.shape}')
        loss = 0
        loss_tmp = 0
        # Forward pass for each patch
        for i in range(out_patches.size(1)):
            image_patch = out_patches[:, i]  # Shape: (batch_size, 1, 20, 20)
            target_patch = target_patches[:, i]  # Shape: (batch_size, 1, 20, 20)
            
            #Tripple to use in resnet:
            #image_patch = torch.cat((image_patch,image_patch,image_patch),dim=1)
            #target_patch = torch.cat((target_patch,target_patch,target_patch),dim=1)
            #print(f'image patch: {image_patch.shape}')
            # Compute feature space loss

            #features = UFLoss(image_patch,target_patch)
            #target_features = UFLoss(target_patch)
        
            loss_tmp += VGGloss(image_patch,target_patch.to(device))  #divide beacuse of channels
        loss = loss_tmp/(170*100) + criterion(im_out, mag_tar.to(device), data_range.to(device))
        """
        # L1
        #loss = criterion(features_out, features_out)
        # MSE
        #loss = criterion(im_out,mag_tar.permute(0,2,3,1))
        
        running_loss = running_loss + loss.item()
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        avg_loss = 0.99 * avg_loss + 0.01 * loss.item() if iter > 0 else loss.item()
        if iter % 400 == 0:
            logging.info(
                f'Epoch = [{epoch:3d}/{params.epoch:3d}] '
                f'Iter = [{iter:4d}/{len(train_loader):4d}] '
                f'Loss = {loss.item():.4g} Avg Loss = {avg_loss:.4g}'
            )
    #Saving the model
    exp_dir = "L2_checkpoints_Sairam/"
    if epoch % 240 == 0:
        torch.save(
            {
                'epoch': epoch,
                'params': params,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'exp_dir': exp_dir
            },
            f=os.path.join(exp_dir, 'model_%d.pt'%(epoch))
    )
    running_loss = running_loss / len(train_loader)
    #scheduler.step(running_loss)
    scheduler.step()
    current_lr = scheduler.get_last_lr()[0]
    print(f'Epoch {epoch+1}, Learning rate: {current_lr}')

    #print(f'Epoch {epoch+1}, Learning rate: {scheduler.get_last_lr()[0]}')
    # Append epoch and average loss to plot lists
    epochs_plot.append(epoch)
    losses_plot.append(running_loss)

# Plotting the loss curve
plt.figure()
plt.plot(epochs_plot, losses_plot, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('SA unrolled with Reference L2 train Loss')
plt.legend()
plt.grid(True)
plt.savefig(os.path.join(exp_dir, 'loss_plot_plato_down.png'))  # Save plot as an image

# Save all_losses to a file for later comparison
losses_file = os.path.join(exp_dir, 'all_losses.txt')
with open(losses_file, 'w') as f:
    for loss in losses_plot:
        f.write(f'{loss}\n')

INFO:root:Epoch = [  0/241] Iter = [   0/   7] Loss = 0.1202 Avg Loss = 0.1202


Epoch 1, Learning rate: 0.0005


INFO:root:Epoch = [  1/241] Iter = [   0/   7] Loss = 0.09137 Avg Loss = 0.09137


Epoch 2, Learning rate: 0.0005


INFO:root:Epoch = [  2/241] Iter = [   0/   7] Loss = 0.06777 Avg Loss = 0.06777


Epoch 3, Learning rate: 0.0005


INFO:root:Epoch = [  3/241] Iter = [   0/   7] Loss = 0.07094 Avg Loss = 0.07094


Epoch 4, Learning rate: 0.0005


INFO:root:Epoch = [  4/241] Iter = [   0/   7] Loss = 0.05211 Avg Loss = 0.05211


Epoch 5, Learning rate: 0.0005


INFO:root:Epoch = [  5/241] Iter = [   0/   7] Loss = 0.04849 Avg Loss = 0.04849


Epoch 6, Learning rate: 0.0005


INFO:root:Epoch = [  6/241] Iter = [   0/   7] Loss = 0.04353 Avg Loss = 0.04353


Epoch 7, Learning rate: 0.0005


INFO:root:Epoch = [  7/241] Iter = [   0/   7] Loss = 0.04232 Avg Loss = 0.04232


Epoch 8, Learning rate: 0.0005


INFO:root:Epoch = [  8/241] Iter = [   0/   7] Loss = 0.04132 Avg Loss = 0.04132


Epoch 9, Learning rate: 0.0005


INFO:root:Epoch = [  9/241] Iter = [   0/   7] Loss = 0.03745 Avg Loss = 0.03745


Epoch 10, Learning rate: 0.0005


INFO:root:Epoch = [ 10/241] Iter = [   0/   7] Loss = 0.03507 Avg Loss = 0.03507


Epoch 11, Learning rate: 0.0005


INFO:root:Epoch = [ 11/241] Iter = [   0/   7] Loss = 0.03459 Avg Loss = 0.03459


Epoch 12, Learning rate: 0.0005


INFO:root:Epoch = [ 12/241] Iter = [   0/   7] Loss = 0.03789 Avg Loss = 0.03789


Epoch 13, Learning rate: 0.0005


INFO:root:Epoch = [ 13/241] Iter = [   0/   7] Loss = 0.03289 Avg Loss = 0.03289


Epoch 14, Learning rate: 0.0005


INFO:root:Epoch = [ 14/241] Iter = [   0/   7] Loss = 0.03464 Avg Loss = 0.03464


Epoch 15, Learning rate: 0.0005


INFO:root:Epoch = [ 15/241] Iter = [   0/   7] Loss = 0.03826 Avg Loss = 0.03826


Epoch 16, Learning rate: 0.0005


INFO:root:Epoch = [ 16/241] Iter = [   0/   7] Loss = 0.03794 Avg Loss = 0.03794


Epoch 17, Learning rate: 0.0005


INFO:root:Epoch = [ 17/241] Iter = [   0/   7] Loss = 0.05125 Avg Loss = 0.05125


Epoch 18, Learning rate: 0.0005


INFO:root:Epoch = [ 18/241] Iter = [   0/   7] Loss = 0.04413 Avg Loss = 0.04413


Epoch 19, Learning rate: 0.0005


INFO:root:Epoch = [ 19/241] Iter = [   0/   7] Loss = 0.04477 Avg Loss = 0.04477


Epoch 20, Learning rate: 0.00025


INFO:root:Epoch = [ 20/241] Iter = [   0/   7] Loss = 0.03172 Avg Loss = 0.03172


Epoch 21, Learning rate: 0.00025


INFO:root:Epoch = [ 21/241] Iter = [   0/   7] Loss = 0.02826 Avg Loss = 0.02826


Epoch 22, Learning rate: 0.00025


INFO:root:Epoch = [ 22/241] Iter = [   0/   7] Loss = 0.02843 Avg Loss = 0.02843


Epoch 23, Learning rate: 0.00025


INFO:root:Epoch = [ 23/241] Iter = [   0/   7] Loss = 0.03079 Avg Loss = 0.03079


Epoch 24, Learning rate: 0.00025


INFO:root:Epoch = [ 24/241] Iter = [   0/   7] Loss = 0.03963 Avg Loss = 0.03963


Epoch 25, Learning rate: 0.00025


INFO:root:Epoch = [ 25/241] Iter = [   0/   7] Loss = 0.03154 Avg Loss = 0.03154


Epoch 26, Learning rate: 0.00025


INFO:root:Epoch = [ 26/241] Iter = [   0/   7] Loss = 0.02838 Avg Loss = 0.02838


Epoch 27, Learning rate: 0.00025


INFO:root:Epoch = [ 27/241] Iter = [   0/   7] Loss = 0.03352 Avg Loss = 0.03352


Epoch 28, Learning rate: 0.00025


INFO:root:Epoch = [ 28/241] Iter = [   0/   7] Loss = 0.03033 Avg Loss = 0.03033


Epoch 29, Learning rate: 0.00025


INFO:root:Epoch = [ 29/241] Iter = [   0/   7] Loss = 0.03314 Avg Loss = 0.03314


Epoch 30, Learning rate: 0.00025


INFO:root:Epoch = [ 30/241] Iter = [   0/   7] Loss = 0.02951 Avg Loss = 0.02951


Epoch 31, Learning rate: 0.00025


INFO:root:Epoch = [ 31/241] Iter = [   0/   7] Loss = 0.03376 Avg Loss = 0.03376


Epoch 32, Learning rate: 0.00025


INFO:root:Epoch = [ 32/241] Iter = [   0/   7] Loss = 0.02737 Avg Loss = 0.02737


Epoch 33, Learning rate: 0.00025


INFO:root:Epoch = [ 33/241] Iter = [   0/   7] Loss = 0.05163 Avg Loss = 0.05163


Epoch 34, Learning rate: 0.00025


INFO:root:Epoch = [ 34/241] Iter = [   0/   7] Loss = 0.03208 Avg Loss = 0.03208


Epoch 35, Learning rate: 0.00025


INFO:root:Epoch = [ 35/241] Iter = [   0/   7] Loss = 0.02599 Avg Loss = 0.02599


Epoch 36, Learning rate: 0.00025


INFO:root:Epoch = [ 36/241] Iter = [   0/   7] Loss = 0.03107 Avg Loss = 0.03107


Epoch 37, Learning rate: 0.00025


INFO:root:Epoch = [ 37/241] Iter = [   0/   7] Loss = 0.02717 Avg Loss = 0.02717


Epoch 38, Learning rate: 0.00025


INFO:root:Epoch = [ 38/241] Iter = [   0/   7] Loss = 0.02578 Avg Loss = 0.02578


Epoch 39, Learning rate: 0.00025


INFO:root:Epoch = [ 39/241] Iter = [   0/   7] Loss = 0.0272 Avg Loss = 0.0272


Epoch 40, Learning rate: 0.000125


INFO:root:Epoch = [ 40/241] Iter = [   0/   7] Loss = 0.03991 Avg Loss = 0.03991


Epoch 41, Learning rate: 0.000125


INFO:root:Epoch = [ 41/241] Iter = [   0/   7] Loss = 0.02459 Avg Loss = 0.02459


Epoch 42, Learning rate: 0.000125


INFO:root:Epoch = [ 42/241] Iter = [   0/   7] Loss = 0.02535 Avg Loss = 0.02535


Epoch 43, Learning rate: 0.000125


INFO:root:Epoch = [ 43/241] Iter = [   0/   7] Loss = 0.0428 Avg Loss = 0.0428


Epoch 44, Learning rate: 0.000125


INFO:root:Epoch = [ 44/241] Iter = [   0/   7] Loss = 0.04847 Avg Loss = 0.04847


Epoch 45, Learning rate: 0.000125


INFO:root:Epoch = [ 45/241] Iter = [   0/   7] Loss = 0.02873 Avg Loss = 0.02873


Epoch 46, Learning rate: 0.000125


INFO:root:Epoch = [ 46/241] Iter = [   0/   7] Loss = 0.02737 Avg Loss = 0.02737


Epoch 47, Learning rate: 0.000125


INFO:root:Epoch = [ 47/241] Iter = [   0/   7] Loss = 0.0242 Avg Loss = 0.0242


Epoch 48, Learning rate: 0.000125


INFO:root:Epoch = [ 48/241] Iter = [   0/   7] Loss = 0.02643 Avg Loss = 0.02643


Epoch 49, Learning rate: 0.000125


INFO:root:Epoch = [ 49/241] Iter = [   0/   7] Loss = 0.02823 Avg Loss = 0.02823


Epoch 50, Learning rate: 0.000125


INFO:root:Epoch = [ 50/241] Iter = [   0/   7] Loss = 0.03008 Avg Loss = 0.03008


Epoch 51, Learning rate: 0.000125


INFO:root:Epoch = [ 51/241] Iter = [   0/   7] Loss = 0.0303 Avg Loss = 0.0303


Epoch 52, Learning rate: 0.000125


INFO:root:Epoch = [ 52/241] Iter = [   0/   7] Loss = 0.03111 Avg Loss = 0.03111


Epoch 53, Learning rate: 0.000125


INFO:root:Epoch = [ 53/241] Iter = [   0/   7] Loss = 0.0247 Avg Loss = 0.0247


Epoch 54, Learning rate: 0.000125


INFO:root:Epoch = [ 54/241] Iter = [   0/   7] Loss = 0.02728 Avg Loss = 0.02728


Epoch 55, Learning rate: 0.000125


INFO:root:Epoch = [ 55/241] Iter = [   0/   7] Loss = 0.02423 Avg Loss = 0.02423


Epoch 56, Learning rate: 0.000125


INFO:root:Epoch = [ 56/241] Iter = [   0/   7] Loss = 0.02741 Avg Loss = 0.02741


Epoch 57, Learning rate: 0.000125


INFO:root:Epoch = [ 57/241] Iter = [   0/   7] Loss = 0.02917 Avg Loss = 0.02917


Epoch 58, Learning rate: 0.000125


INFO:root:Epoch = [ 58/241] Iter = [   0/   7] Loss = 0.0297 Avg Loss = 0.0297


Epoch 59, Learning rate: 0.000125


INFO:root:Epoch = [ 59/241] Iter = [   0/   7] Loss = 0.02873 Avg Loss = 0.02873


Epoch 60, Learning rate: 6.25e-05


INFO:root:Epoch = [ 60/241] Iter = [   0/   7] Loss = 0.02754 Avg Loss = 0.02754


Epoch 61, Learning rate: 6.25e-05


INFO:root:Epoch = [ 61/241] Iter = [   0/   7] Loss = 0.03106 Avg Loss = 0.03106


Epoch 62, Learning rate: 6.25e-05


INFO:root:Epoch = [ 62/241] Iter = [   0/   7] Loss = 0.05683 Avg Loss = 0.05683


Epoch 63, Learning rate: 6.25e-05


INFO:root:Epoch = [ 63/241] Iter = [   0/   7] Loss = 0.03116 Avg Loss = 0.03116


Epoch 64, Learning rate: 6.25e-05


INFO:root:Epoch = [ 64/241] Iter = [   0/   7] Loss = 0.03074 Avg Loss = 0.03074


Epoch 65, Learning rate: 6.25e-05


INFO:root:Epoch = [ 65/241] Iter = [   0/   7] Loss = 0.03043 Avg Loss = 0.03043


Epoch 66, Learning rate: 6.25e-05


INFO:root:Epoch = [ 66/241] Iter = [   0/   7] Loss = 0.02802 Avg Loss = 0.02802


Epoch 67, Learning rate: 6.25e-05


INFO:root:Epoch = [ 67/241] Iter = [   0/   7] Loss = 0.02186 Avg Loss = 0.02186


Epoch 68, Learning rate: 6.25e-05


INFO:root:Epoch = [ 68/241] Iter = [   0/   7] Loss = 0.03322 Avg Loss = 0.03322


Epoch 69, Learning rate: 6.25e-05


INFO:root:Epoch = [ 69/241] Iter = [   0/   7] Loss = 0.0283 Avg Loss = 0.0283


Epoch 70, Learning rate: 6.25e-05


INFO:root:Epoch = [ 70/241] Iter = [   0/   7] Loss = 0.02264 Avg Loss = 0.02264


Epoch 71, Learning rate: 6.25e-05


INFO:root:Epoch = [ 71/241] Iter = [   0/   7] Loss = 0.02803 Avg Loss = 0.02803


Epoch 72, Learning rate: 6.25e-05


INFO:root:Epoch = [ 72/241] Iter = [   0/   7] Loss = 0.02746 Avg Loss = 0.02746


Epoch 73, Learning rate: 6.25e-05


INFO:root:Epoch = [ 73/241] Iter = [   0/   7] Loss = 0.03367 Avg Loss = 0.03367


Epoch 74, Learning rate: 6.25e-05


INFO:root:Epoch = [ 74/241] Iter = [   0/   7] Loss = 0.02831 Avg Loss = 0.02831


Epoch 75, Learning rate: 6.25e-05


INFO:root:Epoch = [ 75/241] Iter = [   0/   7] Loss = 0.03305 Avg Loss = 0.03305


Epoch 76, Learning rate: 6.25e-05


INFO:root:Epoch = [ 76/241] Iter = [   0/   7] Loss = 0.02542 Avg Loss = 0.02542


Epoch 77, Learning rate: 6.25e-05


INFO:root:Epoch = [ 77/241] Iter = [   0/   7] Loss = 0.03581 Avg Loss = 0.03581


Epoch 78, Learning rate: 6.25e-05


INFO:root:Epoch = [ 78/241] Iter = [   0/   7] Loss = 0.0249 Avg Loss = 0.0249


Epoch 79, Learning rate: 6.25e-05


INFO:root:Epoch = [ 79/241] Iter = [   0/   7] Loss = 0.02829 Avg Loss = 0.02829


Epoch 80, Learning rate: 3.125e-05


INFO:root:Epoch = [ 80/241] Iter = [   0/   7] Loss = 0.02571 Avg Loss = 0.02571


Epoch 81, Learning rate: 3.125e-05


INFO:root:Epoch = [ 81/241] Iter = [   0/   7] Loss = 0.02545 Avg Loss = 0.02545


Epoch 82, Learning rate: 3.125e-05


INFO:root:Epoch = [ 82/241] Iter = [   0/   7] Loss = 0.02336 Avg Loss = 0.02336


Epoch 83, Learning rate: 3.125e-05


INFO:root:Epoch = [ 83/241] Iter = [   0/   7] Loss = 0.02635 Avg Loss = 0.02635


Epoch 84, Learning rate: 3.125e-05


INFO:root:Epoch = [ 84/241] Iter = [   0/   7] Loss = 0.02766 Avg Loss = 0.02766


Epoch 85, Learning rate: 3.125e-05


INFO:root:Epoch = [ 85/241] Iter = [   0/   7] Loss = 0.02705 Avg Loss = 0.02705


Epoch 86, Learning rate: 3.125e-05


INFO:root:Epoch = [ 86/241] Iter = [   0/   7] Loss = 0.02751 Avg Loss = 0.02751


Epoch 87, Learning rate: 3.125e-05


INFO:root:Epoch = [ 87/241] Iter = [   0/   7] Loss = 0.0322 Avg Loss = 0.0322


Epoch 88, Learning rate: 3.125e-05


INFO:root:Epoch = [ 88/241] Iter = [   0/   7] Loss = 0.03023 Avg Loss = 0.03023


Epoch 89, Learning rate: 3.125e-05


INFO:root:Epoch = [ 89/241] Iter = [   0/   7] Loss = 0.02371 Avg Loss = 0.02371


Epoch 90, Learning rate: 3.125e-05


INFO:root:Epoch = [ 90/241] Iter = [   0/   7] Loss = 0.04342 Avg Loss = 0.04342


Epoch 91, Learning rate: 3.125e-05


INFO:root:Epoch = [ 91/241] Iter = [   0/   7] Loss = 0.02552 Avg Loss = 0.02552


Epoch 92, Learning rate: 3.125e-05


INFO:root:Epoch = [ 92/241] Iter = [   0/   7] Loss = 0.02708 Avg Loss = 0.02708


Epoch 93, Learning rate: 3.125e-05


INFO:root:Epoch = [ 93/241] Iter = [   0/   7] Loss = 0.03271 Avg Loss = 0.03271


Epoch 94, Learning rate: 3.125e-05


INFO:root:Epoch = [ 94/241] Iter = [   0/   7] Loss = 0.03527 Avg Loss = 0.03527


Epoch 95, Learning rate: 3.125e-05


INFO:root:Epoch = [ 95/241] Iter = [   0/   7] Loss = 0.03194 Avg Loss = 0.03194


Epoch 96, Learning rate: 3.125e-05


INFO:root:Epoch = [ 96/241] Iter = [   0/   7] Loss = 0.03282 Avg Loss = 0.03282


Epoch 97, Learning rate: 3.125e-05


INFO:root:Epoch = [ 97/241] Iter = [   0/   7] Loss = 0.02817 Avg Loss = 0.02817


Epoch 98, Learning rate: 3.125e-05


INFO:root:Epoch = [ 98/241] Iter = [   0/   7] Loss = 0.02609 Avg Loss = 0.02609


Epoch 99, Learning rate: 3.125e-05


INFO:root:Epoch = [ 99/241] Iter = [   0/   7] Loss = 0.02654 Avg Loss = 0.02654


Epoch 100, Learning rate: 1.5625e-05


INFO:root:Epoch = [100/241] Iter = [   0/   7] Loss = 0.02508 Avg Loss = 0.02508


Epoch 101, Learning rate: 1.5625e-05


INFO:root:Epoch = [101/241] Iter = [   0/   7] Loss = 0.03061 Avg Loss = 0.03061


Epoch 102, Learning rate: 1.5625e-05


INFO:root:Epoch = [102/241] Iter = [   0/   7] Loss = 0.03039 Avg Loss = 0.03039


Epoch 103, Learning rate: 1.5625e-05


INFO:root:Epoch = [103/241] Iter = [   0/   7] Loss = 0.04147 Avg Loss = 0.04147


Epoch 104, Learning rate: 1.5625e-05


INFO:root:Epoch = [104/241] Iter = [   0/   7] Loss = 0.03104 Avg Loss = 0.03104


Epoch 105, Learning rate: 1.5625e-05


INFO:root:Epoch = [105/241] Iter = [   0/   7] Loss = 0.03418 Avg Loss = 0.03418


Epoch 106, Learning rate: 1.5625e-05


INFO:root:Epoch = [106/241] Iter = [   0/   7] Loss = 0.032 Avg Loss = 0.032


Epoch 107, Learning rate: 1.5625e-05


INFO:root:Epoch = [107/241] Iter = [   0/   7] Loss = 0.03095 Avg Loss = 0.03095


Epoch 108, Learning rate: 1.5625e-05


INFO:root:Epoch = [108/241] Iter = [   0/   7] Loss = 0.02219 Avg Loss = 0.02219


Epoch 109, Learning rate: 1.5625e-05


INFO:root:Epoch = [109/241] Iter = [   0/   7] Loss = 0.02504 Avg Loss = 0.02504


Epoch 110, Learning rate: 1.5625e-05


INFO:root:Epoch = [110/241] Iter = [   0/   7] Loss = 0.02497 Avg Loss = 0.02497


Epoch 111, Learning rate: 1.5625e-05


INFO:root:Epoch = [111/241] Iter = [   0/   7] Loss = 0.02732 Avg Loss = 0.02732


Epoch 112, Learning rate: 1.5625e-05


INFO:root:Epoch = [112/241] Iter = [   0/   7] Loss = 0.02333 Avg Loss = 0.02333


Epoch 113, Learning rate: 1.5625e-05


INFO:root:Epoch = [113/241] Iter = [   0/   7] Loss = 0.02932 Avg Loss = 0.02932


Epoch 114, Learning rate: 1.5625e-05


INFO:root:Epoch = [114/241] Iter = [   0/   7] Loss = 0.02468 Avg Loss = 0.02468


Epoch 115, Learning rate: 1.5625e-05


INFO:root:Epoch = [115/241] Iter = [   0/   7] Loss = 0.02465 Avg Loss = 0.02465


Epoch 116, Learning rate: 1.5625e-05


INFO:root:Epoch = [116/241] Iter = [   0/   7] Loss = 0.02402 Avg Loss = 0.02402


Epoch 117, Learning rate: 1.5625e-05


INFO:root:Epoch = [117/241] Iter = [   0/   7] Loss = 0.02566 Avg Loss = 0.02566


Epoch 118, Learning rate: 1.5625e-05


INFO:root:Epoch = [118/241] Iter = [   0/   7] Loss = 0.026 Avg Loss = 0.026


Epoch 119, Learning rate: 1.5625e-05


INFO:root:Epoch = [119/241] Iter = [   0/   7] Loss = 0.04094 Avg Loss = 0.04094


Epoch 120, Learning rate: 7.8125e-06


INFO:root:Epoch = [120/241] Iter = [   0/   7] Loss = 0.02776 Avg Loss = 0.02776


Epoch 121, Learning rate: 7.8125e-06


INFO:root:Epoch = [121/241] Iter = [   0/   7] Loss = 0.02313 Avg Loss = 0.02313


Epoch 122, Learning rate: 7.8125e-06


INFO:root:Epoch = [122/241] Iter = [   0/   7] Loss = 0.02627 Avg Loss = 0.02627


Epoch 123, Learning rate: 7.8125e-06


INFO:root:Epoch = [123/241] Iter = [   0/   7] Loss = 0.02601 Avg Loss = 0.02601


Epoch 124, Learning rate: 7.8125e-06


INFO:root:Epoch = [124/241] Iter = [   0/   7] Loss = 0.03704 Avg Loss = 0.03704


Epoch 125, Learning rate: 7.8125e-06


INFO:root:Epoch = [125/241] Iter = [   0/   7] Loss = 0.02327 Avg Loss = 0.02327


Epoch 126, Learning rate: 7.8125e-06


INFO:root:Epoch = [126/241] Iter = [   0/   7] Loss = 0.02485 Avg Loss = 0.02485


Epoch 127, Learning rate: 7.8125e-06


INFO:root:Epoch = [127/241] Iter = [   0/   7] Loss = 0.0302 Avg Loss = 0.0302


Epoch 128, Learning rate: 7.8125e-06


INFO:root:Epoch = [128/241] Iter = [   0/   7] Loss = 0.0241 Avg Loss = 0.0241


Epoch 129, Learning rate: 7.8125e-06


INFO:root:Epoch = [129/241] Iter = [   0/   7] Loss = 0.0281 Avg Loss = 0.0281


Epoch 130, Learning rate: 7.8125e-06


INFO:root:Epoch = [130/241] Iter = [   0/   7] Loss = 0.02529 Avg Loss = 0.02529


Epoch 131, Learning rate: 7.8125e-06


INFO:root:Epoch = [131/241] Iter = [   0/   7] Loss = 0.0264 Avg Loss = 0.0264


Epoch 132, Learning rate: 7.8125e-06


INFO:root:Epoch = [132/241] Iter = [   0/   7] Loss = 0.0315 Avg Loss = 0.0315


Epoch 133, Learning rate: 7.8125e-06


INFO:root:Epoch = [133/241] Iter = [   0/   7] Loss = 0.02255 Avg Loss = 0.02255


Epoch 134, Learning rate: 7.8125e-06


INFO:root:Epoch = [134/241] Iter = [   0/   7] Loss = 0.02689 Avg Loss = 0.02689


Epoch 135, Learning rate: 7.8125e-06


INFO:root:Epoch = [135/241] Iter = [   0/   7] Loss = 0.02338 Avg Loss = 0.02338


Epoch 136, Learning rate: 7.8125e-06


INFO:root:Epoch = [136/241] Iter = [   0/   7] Loss = 0.02344 Avg Loss = 0.02344


Epoch 137, Learning rate: 7.8125e-06


INFO:root:Epoch = [137/241] Iter = [   0/   7] Loss = 0.02717 Avg Loss = 0.02717


Epoch 138, Learning rate: 7.8125e-06


INFO:root:Epoch = [138/241] Iter = [   0/   7] Loss = 0.02256 Avg Loss = 0.02256


Epoch 139, Learning rate: 7.8125e-06


INFO:root:Epoch = [139/241] Iter = [   0/   7] Loss = 0.02553 Avg Loss = 0.02553


Epoch 140, Learning rate: 3.90625e-06


INFO:root:Epoch = [140/241] Iter = [   0/   7] Loss = 0.02847 Avg Loss = 0.02847


Epoch 141, Learning rate: 3.90625e-06


INFO:root:Epoch = [141/241] Iter = [   0/   7] Loss = 0.0374 Avg Loss = 0.0374


Epoch 142, Learning rate: 3.90625e-06


INFO:root:Epoch = [142/241] Iter = [   0/   7] Loss = 0.02824 Avg Loss = 0.02824


Epoch 143, Learning rate: 3.90625e-06


INFO:root:Epoch = [143/241] Iter = [   0/   7] Loss = 0.02361 Avg Loss = 0.02361


Epoch 144, Learning rate: 3.90625e-06


INFO:root:Epoch = [144/241] Iter = [   0/   7] Loss = 0.02999 Avg Loss = 0.02999


Epoch 145, Learning rate: 3.90625e-06


INFO:root:Epoch = [145/241] Iter = [   0/   7] Loss = 0.02746 Avg Loss = 0.02746


Epoch 146, Learning rate: 3.90625e-06


INFO:root:Epoch = [146/241] Iter = [   0/   7] Loss = 0.03427 Avg Loss = 0.03427


Epoch 147, Learning rate: 3.90625e-06


INFO:root:Epoch = [147/241] Iter = [   0/   7] Loss = 0.02477 Avg Loss = 0.02477


Epoch 148, Learning rate: 3.90625e-06


INFO:root:Epoch = [148/241] Iter = [   0/   7] Loss = 0.02237 Avg Loss = 0.02237


Epoch 149, Learning rate: 3.90625e-06


INFO:root:Epoch = [149/241] Iter = [   0/   7] Loss = 0.0281 Avg Loss = 0.0281


Epoch 150, Learning rate: 3.90625e-06


INFO:root:Epoch = [150/241] Iter = [   0/   7] Loss = 0.0275 Avg Loss = 0.0275


Epoch 151, Learning rate: 3.90625e-06


INFO:root:Epoch = [151/241] Iter = [   0/   7] Loss = 0.02341 Avg Loss = 0.02341


Epoch 152, Learning rate: 3.90625e-06


INFO:root:Epoch = [152/241] Iter = [   0/   7] Loss = 0.02331 Avg Loss = 0.02331


Epoch 153, Learning rate: 3.90625e-06


INFO:root:Epoch = [153/241] Iter = [   0/   7] Loss = 0.02331 Avg Loss = 0.02331


Epoch 154, Learning rate: 3.90625e-06


INFO:root:Epoch = [154/241] Iter = [   0/   7] Loss = 0.03624 Avg Loss = 0.03624


Epoch 155, Learning rate: 3.90625e-06


INFO:root:Epoch = [155/241] Iter = [   0/   7] Loss = 0.03518 Avg Loss = 0.03518


Epoch 156, Learning rate: 3.90625e-06


INFO:root:Epoch = [156/241] Iter = [   0/   7] Loss = 0.02794 Avg Loss = 0.02794


Epoch 157, Learning rate: 3.90625e-06


INFO:root:Epoch = [157/241] Iter = [   0/   7] Loss = 0.02482 Avg Loss = 0.02482


Epoch 158, Learning rate: 3.90625e-06


INFO:root:Epoch = [158/241] Iter = [   0/   7] Loss = 0.02536 Avg Loss = 0.02536


Epoch 159, Learning rate: 3.90625e-06


INFO:root:Epoch = [159/241] Iter = [   0/   7] Loss = 0.02656 Avg Loss = 0.02656


Epoch 160, Learning rate: 1.953125e-06


INFO:root:Epoch = [160/241] Iter = [   0/   7] Loss = 0.02391 Avg Loss = 0.02391


Epoch 161, Learning rate: 1.953125e-06


INFO:root:Epoch = [161/241] Iter = [   0/   7] Loss = 0.026 Avg Loss = 0.026


Epoch 162, Learning rate: 1.953125e-06


INFO:root:Epoch = [162/241] Iter = [   0/   7] Loss = 0.03342 Avg Loss = 0.03342


Epoch 163, Learning rate: 1.953125e-06


INFO:root:Epoch = [163/241] Iter = [   0/   7] Loss = 0.03543 Avg Loss = 0.03543


Epoch 164, Learning rate: 1.953125e-06


INFO:root:Epoch = [164/241] Iter = [   0/   7] Loss = 0.02671 Avg Loss = 0.02671


Epoch 165, Learning rate: 1.953125e-06


INFO:root:Epoch = [165/241] Iter = [   0/   7] Loss = 0.02634 Avg Loss = 0.02634


Epoch 166, Learning rate: 1.953125e-06


INFO:root:Epoch = [166/241] Iter = [   0/   7] Loss = 0.03694 Avg Loss = 0.03694


Epoch 167, Learning rate: 1.953125e-06


INFO:root:Epoch = [167/241] Iter = [   0/   7] Loss = 0.02936 Avg Loss = 0.02936


Epoch 168, Learning rate: 1.953125e-06


INFO:root:Epoch = [168/241] Iter = [   0/   7] Loss = 0.02663 Avg Loss = 0.02663


Epoch 169, Learning rate: 1.953125e-06


INFO:root:Epoch = [169/241] Iter = [   0/   7] Loss = 0.02527 Avg Loss = 0.02527


Epoch 170, Learning rate: 1.953125e-06


INFO:root:Epoch = [170/241] Iter = [   0/   7] Loss = 0.02475 Avg Loss = 0.02475


Epoch 171, Learning rate: 1.953125e-06


INFO:root:Epoch = [171/241] Iter = [   0/   7] Loss = 0.029 Avg Loss = 0.029


Epoch 172, Learning rate: 1.953125e-06


INFO:root:Epoch = [172/241] Iter = [   0/   7] Loss = 0.02326 Avg Loss = 0.02326


Epoch 173, Learning rate: 1.953125e-06


INFO:root:Epoch = [173/241] Iter = [   0/   7] Loss = 0.02372 Avg Loss = 0.02372


Epoch 174, Learning rate: 1.953125e-06


INFO:root:Epoch = [174/241] Iter = [   0/   7] Loss = 0.034 Avg Loss = 0.034


Epoch 175, Learning rate: 1.953125e-06


INFO:root:Epoch = [175/241] Iter = [   0/   7] Loss = 0.03015 Avg Loss = 0.03015


Epoch 176, Learning rate: 1.953125e-06


INFO:root:Epoch = [176/241] Iter = [   0/   7] Loss = 0.02644 Avg Loss = 0.02644


Epoch 177, Learning rate: 1.953125e-06


INFO:root:Epoch = [177/241] Iter = [   0/   7] Loss = 0.02708 Avg Loss = 0.02708


Epoch 178, Learning rate: 1.953125e-06


INFO:root:Epoch = [178/241] Iter = [   0/   7] Loss = 0.03023 Avg Loss = 0.03023


Epoch 179, Learning rate: 1.953125e-06


INFO:root:Epoch = [179/241] Iter = [   0/   7] Loss = 0.02654 Avg Loss = 0.02654


Epoch 180, Learning rate: 9.765625e-07


INFO:root:Epoch = [180/241] Iter = [   0/   7] Loss = 0.03196 Avg Loss = 0.03196


Epoch 181, Learning rate: 9.765625e-07


INFO:root:Epoch = [181/241] Iter = [   0/   7] Loss = 0.02276 Avg Loss = 0.02276


Epoch 182, Learning rate: 9.765625e-07


INFO:root:Epoch = [182/241] Iter = [   0/   7] Loss = 0.02779 Avg Loss = 0.02779


Epoch 183, Learning rate: 9.765625e-07


INFO:root:Epoch = [183/241] Iter = [   0/   7] Loss = 0.02354 Avg Loss = 0.02354


Epoch 184, Learning rate: 9.765625e-07


INFO:root:Epoch = [184/241] Iter = [   0/   7] Loss = 0.03623 Avg Loss = 0.03623


Epoch 185, Learning rate: 9.765625e-07


INFO:root:Epoch = [185/241] Iter = [   0/   7] Loss = 0.02197 Avg Loss = 0.02197


Epoch 186, Learning rate: 9.765625e-07


INFO:root:Epoch = [186/241] Iter = [   0/   7] Loss = 0.02523 Avg Loss = 0.02523


Epoch 187, Learning rate: 9.765625e-07


INFO:root:Epoch = [187/241] Iter = [   0/   7] Loss = 0.03097 Avg Loss = 0.03097


Epoch 188, Learning rate: 9.765625e-07


INFO:root:Epoch = [188/241] Iter = [   0/   7] Loss = 0.02594 Avg Loss = 0.02594


Epoch 189, Learning rate: 9.765625e-07


INFO:root:Epoch = [189/241] Iter = [   0/   7] Loss = 0.02651 Avg Loss = 0.02651


Epoch 190, Learning rate: 9.765625e-07


INFO:root:Epoch = [190/241] Iter = [   0/   7] Loss = 0.02551 Avg Loss = 0.02551


Epoch 191, Learning rate: 9.765625e-07


INFO:root:Epoch = [191/241] Iter = [   0/   7] Loss = 0.03122 Avg Loss = 0.03122


Epoch 192, Learning rate: 9.765625e-07


INFO:root:Epoch = [192/241] Iter = [   0/   7] Loss = 0.02606 Avg Loss = 0.02606


Epoch 193, Learning rate: 9.765625e-07


INFO:root:Epoch = [193/241] Iter = [   0/   7] Loss = 0.02474 Avg Loss = 0.02474


Epoch 194, Learning rate: 9.765625e-07


INFO:root:Epoch = [194/241] Iter = [   0/   7] Loss = 0.02882 Avg Loss = 0.02882


Epoch 195, Learning rate: 9.765625e-07


INFO:root:Epoch = [195/241] Iter = [   0/   7] Loss = 0.02693 Avg Loss = 0.02693


Epoch 196, Learning rate: 9.765625e-07


INFO:root:Epoch = [196/241] Iter = [   0/   7] Loss = 0.02799 Avg Loss = 0.02799


Epoch 197, Learning rate: 9.765625e-07


INFO:root:Epoch = [197/241] Iter = [   0/   7] Loss = 0.03593 Avg Loss = 0.03593


Epoch 198, Learning rate: 9.765625e-07


INFO:root:Epoch = [198/241] Iter = [   0/   7] Loss = 0.02632 Avg Loss = 0.02632


Epoch 199, Learning rate: 9.765625e-07


INFO:root:Epoch = [199/241] Iter = [   0/   7] Loss = 0.029 Avg Loss = 0.029


Epoch 200, Learning rate: 4.8828125e-07


INFO:root:Epoch = [200/241] Iter = [   0/   7] Loss = 0.02554 Avg Loss = 0.02554


Epoch 201, Learning rate: 4.8828125e-07


INFO:root:Epoch = [201/241] Iter = [   0/   7] Loss = 0.02733 Avg Loss = 0.02733


Epoch 202, Learning rate: 4.8828125e-07


INFO:root:Epoch = [202/241] Iter = [   0/   7] Loss = 0.02913 Avg Loss = 0.02913


Epoch 203, Learning rate: 4.8828125e-07


INFO:root:Epoch = [203/241] Iter = [   0/   7] Loss = 0.03662 Avg Loss = 0.03662


Epoch 204, Learning rate: 4.8828125e-07


INFO:root:Epoch = [204/241] Iter = [   0/   7] Loss = 0.02664 Avg Loss = 0.02664


Epoch 205, Learning rate: 4.8828125e-07


INFO:root:Epoch = [205/241] Iter = [   0/   7] Loss = 0.0257 Avg Loss = 0.0257


Epoch 206, Learning rate: 4.8828125e-07


INFO:root:Epoch = [206/241] Iter = [   0/   7] Loss = 0.03277 Avg Loss = 0.03277


Epoch 207, Learning rate: 4.8828125e-07


INFO:root:Epoch = [207/241] Iter = [   0/   7] Loss = 0.02454 Avg Loss = 0.02454


Epoch 208, Learning rate: 4.8828125e-07


INFO:root:Epoch = [208/241] Iter = [   0/   7] Loss = 0.03419 Avg Loss = 0.03419


Epoch 209, Learning rate: 4.8828125e-07


INFO:root:Epoch = [209/241] Iter = [   0/   7] Loss = 0.02495 Avg Loss = 0.02495


Epoch 210, Learning rate: 4.8828125e-07


INFO:root:Epoch = [210/241] Iter = [   0/   7] Loss = 0.03229 Avg Loss = 0.03229


Epoch 211, Learning rate: 4.8828125e-07


INFO:root:Epoch = [211/241] Iter = [   0/   7] Loss = 0.03765 Avg Loss = 0.03765


Epoch 212, Learning rate: 4.8828125e-07


INFO:root:Epoch = [212/241] Iter = [   0/   7] Loss = 0.02552 Avg Loss = 0.02552


Epoch 213, Learning rate: 4.8828125e-07


INFO:root:Epoch = [213/241] Iter = [   0/   7] Loss = 0.03456 Avg Loss = 0.03456


Epoch 214, Learning rate: 4.8828125e-07


INFO:root:Epoch = [214/241] Iter = [   0/   7] Loss = 0.02608 Avg Loss = 0.02608


Epoch 215, Learning rate: 4.8828125e-07


INFO:root:Epoch = [215/241] Iter = [   0/   7] Loss = 0.02377 Avg Loss = 0.02377


Epoch 216, Learning rate: 4.8828125e-07


INFO:root:Epoch = [216/241] Iter = [   0/   7] Loss = 0.03295 Avg Loss = 0.03295


Epoch 217, Learning rate: 4.8828125e-07


INFO:root:Epoch = [217/241] Iter = [   0/   7] Loss = 0.02509 Avg Loss = 0.02509


Epoch 218, Learning rate: 4.8828125e-07


INFO:root:Epoch = [218/241] Iter = [   0/   7] Loss = 0.02342 Avg Loss = 0.02342


Epoch 219, Learning rate: 4.8828125e-07


INFO:root:Epoch = [219/241] Iter = [   0/   7] Loss = 0.02802 Avg Loss = 0.02802


Epoch 220, Learning rate: 2.44140625e-07


INFO:root:Epoch = [220/241] Iter = [   0/   7] Loss = 0.03014 Avg Loss = 0.03014


Epoch 221, Learning rate: 2.44140625e-07


INFO:root:Epoch = [221/241] Iter = [   0/   7] Loss = 0.02619 Avg Loss = 0.02619


Epoch 222, Learning rate: 2.44140625e-07


INFO:root:Epoch = [222/241] Iter = [   0/   7] Loss = 0.02186 Avg Loss = 0.02186


Epoch 223, Learning rate: 2.44140625e-07


INFO:root:Epoch = [223/241] Iter = [   0/   7] Loss = 0.02783 Avg Loss = 0.02783


Epoch 224, Learning rate: 2.44140625e-07


INFO:root:Epoch = [224/241] Iter = [   0/   7] Loss = 0.02674 Avg Loss = 0.02674


Epoch 225, Learning rate: 2.44140625e-07


INFO:root:Epoch = [225/241] Iter = [   0/   7] Loss = 0.03145 Avg Loss = 0.03145


Epoch 226, Learning rate: 2.44140625e-07


INFO:root:Epoch = [226/241] Iter = [   0/   7] Loss = 0.02402 Avg Loss = 0.02402


Epoch 227, Learning rate: 2.44140625e-07


INFO:root:Epoch = [227/241] Iter = [   0/   7] Loss = 0.02696 Avg Loss = 0.02696


Epoch 228, Learning rate: 2.44140625e-07


INFO:root:Epoch = [228/241] Iter = [   0/   7] Loss = 0.04348 Avg Loss = 0.04348


Epoch 229, Learning rate: 2.44140625e-07


INFO:root:Epoch = [229/241] Iter = [   0/   7] Loss = 0.02082 Avg Loss = 0.02082


Epoch 230, Learning rate: 2.44140625e-07


INFO:root:Epoch = [230/241] Iter = [   0/   7] Loss = 0.02893 Avg Loss = 0.02893


Epoch 231, Learning rate: 2.44140625e-07


INFO:root:Epoch = [231/241] Iter = [   0/   7] Loss = 0.02469 Avg Loss = 0.02469


Epoch 232, Learning rate: 2.44140625e-07


INFO:root:Epoch = [232/241] Iter = [   0/   7] Loss = 0.02425 Avg Loss = 0.02425


Epoch 233, Learning rate: 2.44140625e-07


INFO:root:Epoch = [233/241] Iter = [   0/   7] Loss = 0.02745 Avg Loss = 0.02745


Epoch 234, Learning rate: 2.44140625e-07


INFO:root:Epoch = [234/241] Iter = [   0/   7] Loss = 0.02607 Avg Loss = 0.02607


Epoch 235, Learning rate: 2.44140625e-07


INFO:root:Epoch = [235/241] Iter = [   0/   7] Loss = 0.0283 Avg Loss = 0.0283


Epoch 236, Learning rate: 2.44140625e-07


INFO:root:Epoch = [236/241] Iter = [   0/   7] Loss = 0.03652 Avg Loss = 0.03652


Epoch 237, Learning rate: 2.44140625e-07


INFO:root:Epoch = [237/241] Iter = [   0/   7] Loss = 0.02767 Avg Loss = 0.02767


Epoch 238, Learning rate: 2.44140625e-07


INFO:root:Epoch = [238/241] Iter = [   0/   7] Loss = 0.03336 Avg Loss = 0.03336


Epoch 239, Learning rate: 2.44140625e-07


INFO:root:Epoch = [239/241] Iter = [   0/   7] Loss = 0.02504 Avg Loss = 0.02504


Epoch 240, Learning rate: 1.220703125e-07


INFO:root:Epoch = [240/241] Iter = [   0/   7] Loss = 0.02504 Avg Loss = 0.02504


Epoch 241, Learning rate: 1.220703125e-07


<IPython.core.display.Javascript object>