# Models Testing on Heston data


In [1]:
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [2]:
# Set seeds
torch.manual_seed(0)
np.random.seed(0)

In [3]:
synthetic_calls_path = '../data/heston_mc_synthetic_calls_tot.csv'
synthetic_puts_path = '../data/heston_mc_synthetic_puts_tot.csv'

In [4]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')
    
    return df

In [5]:
synthetic_calls = pd.read_csv(synthetic_calls_path, index_col=0)
synthetic_puts = pd.read_csv(synthetic_puts_path, index_col=0)

synthetic_calls = reduce_mem_usage(synthetic_calls)
synthetic_puts = reduce_mem_usage(synthetic_puts)

In [6]:
synthetic_options = pd.concat([synthetic_calls, synthetic_puts], axis=0)
synthetic_options = shuffle(synthetic_options, random_state=0)
synthetic_options = synthetic_options.reset_index()
synthetic_options = synthetic_options.drop('index', axis=1)

In [7]:
synthetic_options

Unnamed: 0,Price,Strike,Type,Kappa,Rho,Theta,Xi,V_0,Interest Rate,Time to Expiration,Option Price
0,100,143.0,C,1.728516,0.158936,0.158936,0.158936,0.158936,0.058685,0.158936,0.000000
1,100,68.0,P,1.207031,0.547852,0.547852,0.547852,0.547852,0.073486,0.547852,0.957520
2,100,127.0,P,0.997070,0.672363,0.672363,0.672363,0.672363,0.014740,0.672363,26.546875
3,100,140.0,P,1.411133,0.494629,0.494629,0.494629,0.494629,0.052063,0.494629,38.750000
4,100,60.0,C,0.271729,0.102478,0.102478,0.102478,0.102478,0.035553,0.102478,38.593750
...,...,...,...,...,...,...,...,...,...,...,...
807995,100,71.0,C,0.553223,0.829590,0.829590,0.829590,0.829590,0.076904,0.829590,27.875000
807996,100,57.0,C,1.411133,0.825684,0.825684,0.825684,0.825684,0.084656,0.825684,42.437500
807997,100,135.0,C,1.419922,1.062500,1.062500,1.062500,1.062500,0.073792,1.062500,2.345703
807998,100,64.0,P,1.577148,1.038086,1.038086,1.038086,1.038086,0.091187,1.038086,0.202026


# Preprocessing

In [8]:
synthetic_options = pd.get_dummies(synthetic_options, prefix='', prefix_sep='')

In [9]:
input_sc = StandardScaler()
output_sc = StandardScaler()
input_data = input_sc.fit_transform(synthetic_options.drop('Option Price', axis=1))
output_data = output_sc.fit_transform(synthetic_options['Option Price'].values.reshape(-1, 1))

train_size = 0.8
val_size = 0.1

last_train_idx = int(np.round(len(input_data) * train_size))
last_val_idx = last_train_idx + int(np.round(len(input_data) * val_size))

X_train = input_data[0:last_train_idx]
X_val = input_data[last_train_idx:last_val_idx]
X_test = input_data[last_val_idx:]

y_train = output_data[0:last_train_idx]
y_val = output_data[last_train_idx:last_val_idx]
y_test = output_data[last_val_idx:]

In [10]:
X_train = Variable(torch.Tensor(X_train))
X_val = Variable(torch.Tensor(X_val))
X_test = Variable(torch.Tensor(X_test))

y_train = Variable(torch.Tensor(y_train))
y_val = Variable(torch.Tensor(y_val))
y_test = Variable(torch.Tensor(y_test))

# Model

In [11]:
CUDA = torch.cuda.is_available()
device = 'cuda:0' if CUDA else 'cpu'

In [12]:
class ResBlock(nn.Module):

  def __init__(self, module):
    super(ResBlock, self).__init__()
    self.module = module

  def forward(self, x):
    return self.module(x) + x

In [13]:
class HiddenLayer(nn.Module):

  def __init__(self, layer_size, act_fn):
      super(HiddenLayer, self).__init__()
      
      if act_fn == 'ReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ReLU())
      elif act_fn == 'LeakyReLU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.LeakyReLU())
      elif act_fn == 'ELU':
        self.layer = nn.Sequential(
          nn.Linear(layer_size, layer_size),
          nn.ELU())
    
  def forward(self, x):
    return self.layer(x)

In [14]:
class Net(nn.Module):

  def __init__(self, input_size, output_size, hidden_size, num_layers, act_fn):
    super(Net, self).__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.hidden_size = hidden_size

    if act_fn == 'ReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ReLU())
    elif act_fn == 'LeakyReLU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.LeakyReLU())
    elif act_fn == 'ELU':
      self.initial_layer = nn.Sequential(
          nn.Linear(self.input_size, self.hidden_size),
          nn.ELU())

    self.hidden_layers_list = []

    for i in range(num_layers // 2):
      self.hidden_layers_list.append(
          ResBlock(
            nn.Sequential(
                HiddenLayer(self.hidden_size, act_fn),
                HiddenLayer(self.hidden_size, act_fn)
            )
        )
      )

    self.hidden_layers = nn.Sequential(*self.hidden_layers_list)

    self.net = nn.Sequential(
        self.initial_layer,
        self.hidden_layers,
        nn.Linear(self.hidden_size, self.output_size)
    )
  
  def forward(self, x):
    return self.net(x)

In [15]:
def init_weights(m, init_m: str):

  @torch.no_grad()
  def init_uniform(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.uniform_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_normal(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.normal_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_xuniform(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.xavier_uniform_(m.weight)
      m.bias.data.fill_(0.01)

  @torch.no_grad()
  def init_xnormal(m):
    if isinstance(m, nn.Linear):
      torch.nn.init.xavier_normal_(m.weight)
      m.bias.data.fill_(0.01)

  if init_m == 'uniform':
    m.apply(init_uniform)
  elif init_m == 'normal':
    m.apply(init_normal)
  elif init_m == 'xaiver uniform':
    m.apply(init_xuniform)
  elif init_m == 'xavier normal':
    m.apply(init_xnormal)

# Best models from cross validation

In [16]:
best_models = [
               {'n_hidden': 400, 'n_layers': 8, 'act_fun': 'LeakyReLU', 'init_method': 'xavier uniform'},
               {'n_hidden': 400, 'n_layers': 4, 'act_fun': 'ReLU', 'init_method': 'xavier uniform'},
               {'n_hidden': 400, 'n_layers': 4, 'act_fun': 'LeakyReLU', 'init_method': 'xavier uniform'},
               {'n_hidden': 400, 'n_layers': 6, 'act_fun': 'LeakyReLU', 'init_method': 'xavier uniform'},
               {'n_hidden': 600, 'n_layers': 4, 'act_fun': 'LeakyReLU', 'init_method': 'xavier uniform'}
]

# Training

In [17]:
input_size = 11
output_size = 1
batch_size = 1024
epochs = 125
lr = 1e-4

loss_fn = nn.MSELoss()

In [18]:
class OptDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __len__(self):
    return len(self.X)

In [19]:
def evaluate(model, loss_fn, X, y):
  model.eval()
  with torch.no_grad():
    out = model(X.to(device))
    loss = loss_fn(out, y.to(device))
    return loss.item()

In [20]:
def mape_loss(y_pred, y):
  return sum([np.abs(y_i - hy_i) / y_i for y_i, hy_i in zip(y_pred, y)]) / len(y_pred)

In [21]:
def get_mape_loss(model, X, y):
  model.eval()
  with torch.no_grad():
    out = model(X.to(device))
    loss = mape_loss(
        out.squeeze().cpu().detach().numpy(), 
        y.squeeze().cpu().detach().numpy())
    return loss

In [22]:
def test_models(
  models_dict,
  epochs,
  batch_size,
  X_train,
  y_train,
  X_val,
  y_val,
  X_test,
  y_test,
  loss_fn
):
  testing_results = pd.DataFrame(columns=
                               ['hidden_size',
                                'n_layers',
                                'act_fun',
                                'init_methods',
                                'mean_val_result',
                                'std_val_result',
                                'test_mse',
                                'test_mae',
                                'test_rmse',
                                'test_mape'])

  for model_dict in models_dict:
    model = Net(input_size,
                output_size, 
                model_dict['n_hidden'], 
                model_dict['n_layers'], 
                model_dict['act_fun']).to(device)
    init_weights(model, model_dict['init_method'])
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    validation_losses = []
    test_res = {
        'hidden_size': model_dict['n_hidden'],
        'n_layers': model_dict['n_layers'],
        'act_fun': model_dict['act_fun'],
        'init_methods': model_dict['init_method']
    }

    print('Model: ', test_res)

    for epoch in range(epochs):
      model.train()
      total_loss = 0
      start_time = time.time()
      i = 0

      for batch, batch_labels in DataLoader(OptDataset(X_train, y_train), batch_size):
        out = model(batch.to(device))
        optimizer.zero_grad()

        loss = loss_fn(out, batch_labels.to(device))
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

        if i > 0 and i % 25 == 0:
          avg_loss = total_loss / 50
          elapsed = time.time() - start_time
          print('| Epoch {:3d} | {:5d}/{:5d} batches | lr {:2.5f} | ms/batch {:5.2f} | '
                  'loss {:5.8f}'.format(
              epoch, i, len(X_train) // batch_size+1, lr, elapsed * 1000 / 50,
              avg_loss))
          start_time = time.time()
          total_loss = 0
        
        i += 1

      validation_losses.append(evaluate(model, loss_fn, X_val, y_val))
    
    mse_test = evaluate(model, loss_fn, X_test, y_test)
    mae_loss = nn.L1Loss()
    mae_test = evaluate(model, mae_loss, X_test, y_test)
    mape_test = get_mape_loss(model, X_test, y_test)
    validation_losses = np.array(validation_losses)
    test_res['mean_val_result'] = validation_losses.mean()
    test_res['std_val_result'] = validation_losses.std()
    test_res['test_mse'] = mse_test
    test_res['test_mae'] = mae_test
    test_res['test_rmse'] = np.sqrt(mse_test)
    test_res['test_mape'] = mape_test

    testing_results = testing_results.append(test_res, ignore_index=True)

  return testing_results

In [23]:
testing_results = test_models(best_models,
            epochs,
            batch_size,
             X_train,
            y_train,
            X_val,
            y_val,
            X_test,
            y_test,
            loss_fn)

Model:  {'hidden_size': 400, 'n_layers': 8, 'act_fun': 'LeakyReLU', 'init_methods': 'xavier uniform'}
| Epoch   0 |    25/  632 batches | lr 0.00010 | ms/batch 12.81 | loss 0.25558726
| Epoch   0 |    50/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.02357464
| Epoch   0 |    75/  632 batches | lr 0.00010 | ms/batch 10.18 | loss 0.00670563
| Epoch   0 |   100/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00426822
| Epoch   0 |   125/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00337733
| Epoch   0 |   150/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00292318
| Epoch   0 |   175/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00275504
| Epoch   0 |   200/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00267560
| Epoch   0 |   225/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00255837
| Epoch   0 |   250/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00264991
| Epoch   0 |   275/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00251

| Epoch   3 |   625/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00231830
| Epoch   4 |    25/  632 batches | lr 0.00010 | ms/batch 10.58 | loss 0.00238695
| Epoch   4 |    50/  632 batches | lr 0.00010 | ms/batch 10.44 | loss 0.00236620
| Epoch   4 |    75/  632 batches | lr 0.00010 | ms/batch  8.62 | loss 0.00240161
| Epoch   4 |   100/  632 batches | lr 0.00010 | ms/batch 10.60 | loss 0.00238348
| Epoch   4 |   125/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00236274
| Epoch   4 |   150/  632 batches | lr 0.00010 | ms/batch 10.10 | loss 0.00236627
| Epoch   4 |   175/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00242295
| Epoch   4 |   200/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00233900
| Epoch   4 |   225/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00227230
| Epoch   4 |   250/  632 batches | lr 0.00010 | ms/batch 10.52 | loss 0.00239753
| Epoch   4 |   275/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00228825
| Epoch   4 |   

| Epoch   7 |   625/  632 batches | lr 0.00010 | ms/batch 10.32 | loss 0.00229823
| Epoch   8 |    25/  632 batches | lr 0.00010 | ms/batch 10.66 | loss 0.00235128
| Epoch   8 |    50/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00231314
| Epoch   8 |    75/  632 batches | lr 0.00010 | ms/batch 10.49 | loss 0.00230481
| Epoch   8 |   100/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00235875
| Epoch   8 |   125/  632 batches | lr 0.00010 | ms/batch 12.19 | loss 0.00230100
| Epoch   8 |   150/  632 batches | lr 0.00010 | ms/batch  9.23 | loss 0.00238436
| Epoch   8 |   175/  632 batches | lr 0.00010 | ms/batch 10.59 | loss 0.00235504
| Epoch   8 |   200/  632 batches | lr 0.00010 | ms/batch 10.44 | loss 0.00228544
| Epoch   8 |   225/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00224537
| Epoch   8 |   250/  632 batches | lr 0.00010 | ms/batch 10.74 | loss 0.00235114
| Epoch   8 |   275/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00224724
| Epoch   8 |   

| Epoch  11 |   625/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00228982
| Epoch  12 |    25/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00230187
| Epoch  12 |    50/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00226631
| Epoch  12 |    75/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00228194
| Epoch  12 |   100/  632 batches | lr 0.00010 | ms/batch  8.61 | loss 0.00228851
| Epoch  12 |   125/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00228862
| Epoch  12 |   150/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00234553
| Epoch  12 |   175/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00223390
| Epoch  12 |   200/  632 batches | lr 0.00010 | ms/batch  8.59 | loss 0.00224016
| Epoch  12 |   225/  632 batches | lr 0.00010 | ms/batch 10.13 | loss 0.00222175
| Epoch  12 |   250/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00231531
| Epoch  12 |   275/  632 batches | lr 0.00010 | ms/batch 10.14 | loss 0.00220924
| Epoch  12 |   

| Epoch  15 |   625/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00227865
| Epoch  16 |    25/  632 batches | lr 0.00010 | ms/batch 10.46 | loss 0.00226291
| Epoch  16 |    50/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00225078
| Epoch  16 |    75/  632 batches | lr 0.00010 | ms/batch  8.61 | loss 0.00225262
| Epoch  16 |   100/  632 batches | lr 0.00010 | ms/batch 10.18 | loss 0.00223547
| Epoch  16 |   125/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00227339
| Epoch  16 |   150/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00227308
| Epoch  16 |   175/  632 batches | lr 0.00010 | ms/batch  8.67 | loss 0.00217889
| Epoch  16 |   200/  632 batches | lr 0.00010 | ms/batch 10.27 | loss 0.00220518
| Epoch  16 |   225/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00219236
| Epoch  16 |   250/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00227087
| Epoch  16 |   275/  632 batches | lr 0.00010 | ms/batch  8.63 | loss 0.00217299
| Epoch  16 |   

| Epoch  19 |   625/  632 batches | lr 0.00010 | ms/batch 10.25 | loss 0.00223302
| Epoch  20 |    25/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00222370
| Epoch  20 |    50/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00221994
| Epoch  20 |    75/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00221096
| Epoch  20 |   100/  632 batches | lr 0.00010 | ms/batch 10.37 | loss 0.00220792
| Epoch  20 |   125/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00224294
| Epoch  20 |   150/  632 batches | lr 0.00010 | ms/batch 10.61 | loss 0.00220366
| Epoch  20 |   175/  632 batches | lr 0.00010 | ms/batch 10.16 | loss 0.00215534
| Epoch  20 |   200/  632 batches | lr 0.00010 | ms/batch 10.16 | loss 0.00217080
| Epoch  20 |   225/  632 batches | lr 0.00010 | ms/batch 10.88 | loss 0.00216354
| Epoch  20 |   250/  632 batches | lr 0.00010 | ms/batch 13.50 | loss 0.00223642
| Epoch  20 |   275/  632 batches | lr 0.00010 | ms/batch 11.98 | loss 0.00213997
| Epoch  20 |   

| Epoch  23 |   625/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00218118
| Epoch  24 |    25/  632 batches | lr 0.00010 | ms/batch 10.72 | loss 0.00218574
| Epoch  24 |    50/  632 batches | lr 0.00010 | ms/batch 10.53 | loss 0.00217951
| Epoch  24 |    75/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00216718
| Epoch  24 |   100/  632 batches | lr 0.00010 | ms/batch 10.47 | loss 0.00216998
| Epoch  24 |   125/  632 batches | lr 0.00010 | ms/batch 10.34 | loss 0.00219322
| Epoch  24 |   150/  632 batches | lr 0.00010 | ms/batch 10.48 | loss 0.00214324
| Epoch  24 |   175/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00211650
| Epoch  24 |   200/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00213513
| Epoch  24 |   225/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00212027
| Epoch  24 |   250/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00220467
| Epoch  24 |   275/  632 batches | lr 0.00010 | ms/batch  8.61 | loss 0.00210402
| Epoch  24 |   

| Epoch  27 |   625/  632 batches | lr 0.00010 | ms/batch 11.16 | loss 0.00212121
| Epoch  28 |    25/  632 batches | lr 0.00010 | ms/batch 11.83 | loss 0.00214536
| Epoch  28 |    50/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00214200
| Epoch  28 |    75/  632 batches | lr 0.00010 | ms/batch 10.81 | loss 0.00212232
| Epoch  28 |   100/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00212113
| Epoch  28 |   125/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00213744
| Epoch  28 |   150/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00209032
| Epoch  28 |   175/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00207661
| Epoch  28 |   200/  632 batches | lr 0.00010 | ms/batch 10.85 | loss 0.00209540
| Epoch  28 |   225/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00207628
| Epoch  28 |   250/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00217134
| Epoch  28 |   275/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00206692
| Epoch  28 |   

| Epoch  31 |   625/  632 batches | lr 0.00010 | ms/batch 10.69 | loss 0.00207091
| Epoch  32 |    25/  632 batches | lr 0.00010 | ms/batch 11.11 | loss 0.00210075
| Epoch  32 |    50/  632 batches | lr 0.00010 | ms/batch 10.88 | loss 0.00209469
| Epoch  32 |    75/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00207695
| Epoch  32 |   100/  632 batches | lr 0.00010 | ms/batch 10.72 | loss 0.00207451
| Epoch  32 |   125/  632 batches | lr 0.00010 | ms/batch 10.83 | loss 0.00208998
| Epoch  32 |   150/  632 batches | lr 0.00010 | ms/batch 11.84 | loss 0.00204322
| Epoch  32 |   175/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00203301
| Epoch  32 |   200/  632 batches | lr 0.00010 | ms/batch 11.36 | loss 0.00205183
| Epoch  32 |   225/  632 batches | lr 0.00010 | ms/batch 11.22 | loss 0.00202833
| Epoch  32 |   250/  632 batches | lr 0.00010 | ms/batch 10.99 | loss 0.00213048
| Epoch  32 |   275/  632 batches | lr 0.00010 | ms/batch  9.52 | loss 0.00202894
| Epoch  32 |   

| Epoch  35 |   625/  632 batches | lr 0.00010 | ms/batch 12.07 | loss 0.00201889
| Epoch  36 |    25/  632 batches | lr 0.00010 | ms/batch 12.70 | loss 0.00205124
| Epoch  36 |    50/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00204226
| Epoch  36 |    75/  632 batches | lr 0.00010 | ms/batch 11.68 | loss 0.00202957
| Epoch  36 |   100/  632 batches | lr 0.00010 | ms/batch 12.40 | loss 0.00202731
| Epoch  36 |   125/  632 batches | lr 0.00010 | ms/batch 12.02 | loss 0.00204450
| Epoch  36 |   150/  632 batches | lr 0.00010 | ms/batch  9.65 | loss 0.00199458
| Epoch  36 |   175/  632 batches | lr 0.00010 | ms/batch 11.86 | loss 0.00199058
| Epoch  36 |   200/  632 batches | lr 0.00010 | ms/batch 12.03 | loss 0.00200662
| Epoch  36 |   225/  632 batches | lr 0.00010 | ms/batch 11.81 | loss 0.00197806
| Epoch  36 |   250/  632 batches | lr 0.00010 | ms/batch  9.70 | loss 0.00208583
| Epoch  36 |   275/  632 batches | lr 0.00010 | ms/batch 12.08 | loss 0.00198675
| Epoch  36 |   

| Epoch  39 |   625/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00196215
| Epoch  40 |    25/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00199286
| Epoch  40 |    50/  632 batches | lr 0.00010 | ms/batch 11.69 | loss 0.00198687
| Epoch  40 |    75/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00197897
| Epoch  40 |   100/  632 batches | lr 0.00010 | ms/batch 12.00 | loss 0.00198175
| Epoch  40 |   125/  632 batches | lr 0.00010 | ms/batch 11.29 | loss 0.00199555
| Epoch  40 |   150/  632 batches | lr 0.00010 | ms/batch 11.61 | loss 0.00194077
| Epoch  40 |   175/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00193795
| Epoch  40 |   200/  632 batches | lr 0.00010 | ms/batch 11.81 | loss 0.00195831
| Epoch  40 |   225/  632 batches | lr 0.00010 | ms/batch 12.30 | loss 0.00192585
| Epoch  40 |   250/  632 batches | lr 0.00010 | ms/batch 11.76 | loss 0.00203769
| Epoch  40 |   275/  632 batches | lr 0.00010 | ms/batch 11.97 | loss 0.00193873
| Epoch  40 |   

| Epoch  43 |   625/  632 batches | lr 0.00010 | ms/batch 15.52 | loss 0.00190465
| Epoch  44 |    25/  632 batches | lr 0.00010 | ms/batch 13.50 | loss 0.00193098
| Epoch  44 |    50/  632 batches | lr 0.00010 | ms/batch  9.48 | loss 0.00192513
| Epoch  44 |    75/  632 batches | lr 0.00010 | ms/batch 10.86 | loss 0.00192145
| Epoch  44 |   100/  632 batches | lr 0.00010 | ms/batch 10.93 | loss 0.00193582
| Epoch  44 |   125/  632 batches | lr 0.00010 | ms/batch 11.50 | loss 0.00194113
| Epoch  44 |   150/  632 batches | lr 0.00010 | ms/batch  9.18 | loss 0.00188244
| Epoch  44 |   175/  632 batches | lr 0.00010 | ms/batch 11.63 | loss 0.00187956
| Epoch  44 |   200/  632 batches | lr 0.00010 | ms/batch 12.44 | loss 0.00190431
| Epoch  44 |   225/  632 batches | lr 0.00010 | ms/batch 11.72 | loss 0.00187243
| Epoch  44 |   250/  632 batches | lr 0.00010 | ms/batch 11.61 | loss 0.00198040
| Epoch  44 |   275/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00188902
| Epoch  44 |   

| Epoch  47 |   625/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00184517
| Epoch  48 |    25/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00186009
| Epoch  48 |    50/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00186041
| Epoch  48 |    75/  632 batches | lr 0.00010 | ms/batch 10.13 | loss 0.00185997
| Epoch  48 |   100/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00189009
| Epoch  48 |   125/  632 batches | lr 0.00010 | ms/batch 10.57 | loss 0.00188861
| Epoch  48 |   150/  632 batches | lr 0.00010 | ms/batch 10.92 | loss 0.00181948
| Epoch  48 |   175/  632 batches | lr 0.00010 | ms/batch 10.88 | loss 0.00181720
| Epoch  48 |   200/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00184632
| Epoch  48 |   225/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00181797
| Epoch  48 |   250/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00191727
| Epoch  48 |   275/  632 batches | lr 0.00010 | ms/batch 10.41 | loss 0.00183578
| Epoch  48 |   

| Epoch  51 |   625/  632 batches | lr 0.00010 | ms/batch 10.64 | loss 0.00179148
| Epoch  52 |    25/  632 batches | lr 0.00010 | ms/batch 10.96 | loss 0.00179083
| Epoch  52 |    50/  632 batches | lr 0.00010 | ms/batch 10.65 | loss 0.00178990
| Epoch  52 |    75/  632 batches | lr 0.00010 | ms/batch  8.67 | loss 0.00179971
| Epoch  52 |   100/  632 batches | lr 0.00010 | ms/batch 10.79 | loss 0.00183862
| Epoch  52 |   125/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00183257
| Epoch  52 |   150/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00175496
| Epoch  52 |   175/  632 batches | lr 0.00010 | ms/batch  8.63 | loss 0.00175358
| Epoch  52 |   200/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00178711
| Epoch  52 |   225/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00175840
| Epoch  52 |   250/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00184673
| Epoch  52 |   275/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00177204
| Epoch  52 |   

| Epoch  55 |   625/  632 batches | lr 0.00010 | ms/batch 10.95 | loss 0.00173726
| Epoch  56 |    25/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00172090
| Epoch  56 |    50/  632 batches | lr 0.00010 | ms/batch 10.94 | loss 0.00171969
| Epoch  56 |    75/  632 batches | lr 0.00010 | ms/batch 11.33 | loss 0.00174084
| Epoch  56 |   100/  632 batches | lr 0.00010 | ms/batch 11.02 | loss 0.00177535
| Epoch  56 |   125/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00177966
| Epoch  56 |   150/  632 batches | lr 0.00010 | ms/batch 10.40 | loss 0.00169347
| Epoch  56 |   175/  632 batches | lr 0.00010 | ms/batch 10.87 | loss 0.00169237
| Epoch  56 |   200/  632 batches | lr 0.00010 | ms/batch 10.64 | loss 0.00172920
| Epoch  56 |   225/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00169328
| Epoch  56 |   250/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00177372
| Epoch  56 |   275/  632 batches | lr 0.00010 | ms/batch 10.40 | loss 0.00170223
| Epoch  56 |   

| Epoch  59 |   625/  632 batches | lr 0.00010 | ms/batch 11.48 | loss 0.00167467
| Epoch  60 |    25/  632 batches | lr 0.00010 | ms/batch 10.76 | loss 0.00165110
| Epoch  60 |    50/  632 batches | lr 0.00010 | ms/batch 10.73 | loss 0.00165378
| Epoch  60 |    75/  632 batches | lr 0.00010 | ms/batch  9.06 | loss 0.00167973
| Epoch  60 |   100/  632 batches | lr 0.00010 | ms/batch 10.55 | loss 0.00169791
| Epoch  60 |   125/  632 batches | lr 0.00010 | ms/batch 10.52 | loss 0.00172247
| Epoch  60 |   150/  632 batches | lr 0.00010 | ms/batch 10.38 | loss 0.00162812
| Epoch  60 |   175/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00162444
| Epoch  60 |   200/  632 batches | lr 0.00010 | ms/batch 10.79 | loss 0.00166805
| Epoch  60 |   225/  632 batches | lr 0.00010 | ms/batch 10.42 | loss 0.00162233
| Epoch  60 |   250/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00170105
| Epoch  60 |   275/  632 batches | lr 0.00010 | ms/batch  8.62 | loss 0.00163641
| Epoch  60 |   

| Epoch  63 |   625/  632 batches | lr 0.00010 | ms/batch 10.34 | loss 0.00160859
| Epoch  64 |    25/  632 batches | lr 0.00010 | ms/batch 10.62 | loss 0.00158341
| Epoch  64 |    50/  632 batches | lr 0.00010 | ms/batch  8.65 | loss 0.00158531
| Epoch  64 |    75/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00160697
| Epoch  64 |   100/  632 batches | lr 0.00010 | ms/batch 10.27 | loss 0.00162473
| Epoch  64 |   125/  632 batches | lr 0.00010 | ms/batch 10.18 | loss 0.00166685
| Epoch  64 |   150/  632 batches | lr 0.00010 | ms/batch  8.64 | loss 0.00157007
| Epoch  64 |   175/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00156052
| Epoch  64 |   200/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00160246
| Epoch  64 |   225/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00154735
| Epoch  64 |   250/  632 batches | lr 0.00010 | ms/batch  8.59 | loss 0.00163523
| Epoch  64 |   275/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00156994
| Epoch  64 |   

| Epoch  67 |   625/  632 batches | lr 0.00010 | ms/batch 10.41 | loss 0.00154213
| Epoch  68 |    25/  632 batches | lr 0.00010 | ms/batch 11.69 | loss 0.00151553
| Epoch  68 |    50/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00152058
| Epoch  68 |    75/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00153970
| Epoch  68 |   100/  632 batches | lr 0.00010 | ms/batch 10.84 | loss 0.00154473
| Epoch  68 |   125/  632 batches | lr 0.00010 | ms/batch 10.50 | loss 0.00161000
| Epoch  68 |   150/  632 batches | lr 0.00010 | ms/batch 11.18 | loss 0.00151917
| Epoch  68 |   175/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00149640
| Epoch  68 |   200/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00154558
| Epoch  68 |   225/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00147563
| Epoch  68 |   250/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00157784
| Epoch  68 |   275/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00150607
| Epoch  68 |   

| Epoch  71 |   625/  632 batches | lr 0.00010 | ms/batch 10.98 | loss 0.00147471
| Epoch  72 |    25/  632 batches | lr 0.00010 | ms/batch 11.24 | loss 0.00145252
| Epoch  72 |    50/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00145891
| Epoch  72 |    75/  632 batches | lr 0.00010 | ms/batch 10.72 | loss 0.00147541
| Epoch  72 |   100/  632 batches | lr 0.00010 | ms/batch 11.41 | loss 0.00147696
| Epoch  72 |   125/  632 batches | lr 0.00010 | ms/batch 10.52 | loss 0.00155138
| Epoch  72 |   150/  632 batches | lr 0.00010 | ms/batch  8.96 | loss 0.00146631
| Epoch  72 |   175/  632 batches | lr 0.00010 | ms/batch 10.48 | loss 0.00143290
| Epoch  72 |   200/  632 batches | lr 0.00010 | ms/batch 10.45 | loss 0.00147912
| Epoch  72 |   225/  632 batches | lr 0.00010 | ms/batch 10.74 | loss 0.00140265
| Epoch  72 |   250/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00153591
| Epoch  72 |   275/  632 batches | lr 0.00010 | ms/batch 11.78 | loss 0.00143808
| Epoch  72 |   

| Epoch  75 |   625/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00141332
| Epoch  76 |    25/  632 batches | lr 0.00010 | ms/batch  9.43 | loss 0.00141294
| Epoch  76 |    50/  632 batches | lr 0.00010 | ms/batch 10.66 | loss 0.00140330
| Epoch  76 |    75/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00142097
| Epoch  76 |   100/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00140973
| Epoch  76 |   125/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00150153
| Epoch  76 |   150/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00141611
| Epoch  76 |   175/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00137543
| Epoch  76 |   200/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00141619
| Epoch  76 |   225/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00133598
| Epoch  76 |   250/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00148296
| Epoch  76 |   275/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00137257
| Epoch  76 |   

| Epoch  79 |   625/  632 batches | lr 0.00010 | ms/batch 10.27 | loss 0.00135413
| Epoch  80 |    25/  632 batches | lr 0.00010 | ms/batch 10.68 | loss 0.00137118
| Epoch  80 |    50/  632 batches | lr 0.00010 | ms/batch  8.55 | loss 0.00134217
| Epoch  80 |    75/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00135942
| Epoch  80 |   100/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00134826
| Epoch  80 |   125/  632 batches | lr 0.00010 | ms/batch 10.16 | loss 0.00143300
| Epoch  80 |   150/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00136306
| Epoch  80 |   175/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00131087
| Epoch  80 |   200/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00134739
| Epoch  80 |   225/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00128133
| Epoch  80 |   250/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00143472
| Epoch  80 |   275/  632 batches | lr 0.00010 | ms/batch  8.64 | loss 0.00130568
| Epoch  80 |   

| Epoch  83 |   625/  632 batches | lr 0.00010 | ms/batch 10.51 | loss 0.00129699
| Epoch  84 |    25/  632 batches | lr 0.00010 | ms/batch  9.10 | loss 0.00133346
| Epoch  84 |    50/  632 batches | lr 0.00010 | ms/batch 10.47 | loss 0.00128928
| Epoch  84 |    75/  632 batches | lr 0.00010 | ms/batch 10.16 | loss 0.00129371
| Epoch  84 |   100/  632 batches | lr 0.00010 | ms/batch  8.57 | loss 0.00128415
| Epoch  84 |   125/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00136665
| Epoch  84 |   150/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00130964
| Epoch  84 |   175/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00124486
| Epoch  84 |   200/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00128157
| Epoch  84 |   225/  632 batches | lr 0.00010 | ms/batch 10.39 | loss 0.00123274
| Epoch  84 |   250/  632 batches | lr 0.00010 | ms/batch 10.35 | loss 0.00137798
| Epoch  84 |   275/  632 batches | lr 0.00010 | ms/batch 10.60 | loss 0.00124020
| Epoch  84 |   

| Epoch  87 |   625/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00122944
| Epoch  88 |    25/  632 batches | lr 0.00010 | ms/batch 10.59 | loss 0.00129028
| Epoch  88 |    50/  632 batches | lr 0.00010 | ms/batch 10.25 | loss 0.00124058
| Epoch  88 |    75/  632 batches | lr 0.00010 | ms/batch  8.58 | loss 0.00123804
| Epoch  88 |   100/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00122458
| Epoch  88 |   125/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00128818
| Epoch  88 |   150/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00125712
| Epoch  88 |   175/  632 batches | lr 0.00010 | ms/batch  9.03 | loss 0.00117425
| Epoch  88 |   200/  632 batches | lr 0.00010 | ms/batch 10.35 | loss 0.00123324
| Epoch  88 |   225/  632 batches | lr 0.00010 | ms/batch 10.52 | loss 0.00120130
| Epoch  88 |   250/  632 batches | lr 0.00010 | ms/batch 10.48 | loss 0.00131459
| Epoch  88 |   275/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00117775
| Epoch  88 |   

| Epoch  91 |   625/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00118009
| Epoch  92 |    25/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00124548
| Epoch  92 |    50/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00119115
| Epoch  92 |    75/  632 batches | lr 0.00010 | ms/batch 10.46 | loss 0.00118642
| Epoch  92 |   100/  632 batches | lr 0.00010 | ms/batch 11.26 | loss 0.00117020
| Epoch  92 |   125/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00122236
| Epoch  92 |   150/  632 batches | lr 0.00010 | ms/batch 10.38 | loss 0.00120826
| Epoch  92 |   175/  632 batches | lr 0.00010 | ms/batch 10.35 | loss 0.00112105
| Epoch  92 |   200/  632 batches | lr 0.00010 | ms/batch 10.43 | loss 0.00118008
| Epoch  92 |   225/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00115126
| Epoch  92 |   250/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00125052
| Epoch  92 |   275/  632 batches | lr 0.00010 | ms/batch 10.34 | loss 0.00112272
| Epoch  92 |   

| Epoch  95 |   625/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00112198
| Epoch  96 |    25/  632 batches | lr 0.00010 | ms/batch 10.65 | loss 0.00119267
| Epoch  96 |    50/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00114674
| Epoch  96 |    75/  632 batches | lr 0.00010 | ms/batch  8.62 | loss 0.00111740
| Epoch  96 |   100/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00110241
| Epoch  96 |   125/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00114497
| Epoch  96 |   150/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00114999
| Epoch  96 |   175/  632 batches | lr 0.00010 | ms/batch  8.56 | loss 0.00109747
| Epoch  96 |   200/  632 batches | lr 0.00010 | ms/batch 10.18 | loss 0.00112681
| Epoch  96 |   225/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00110743
| Epoch  96 |   250/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00116298
| Epoch  96 |   275/  632 batches | lr 0.00010 | ms/batch  8.62 | loss 0.00107965
| Epoch  96 |   

| Epoch  99 |   625/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00107266
| Epoch 100 |    25/  632 batches | lr 0.00010 | ms/batch 10.66 | loss 0.00114871
| Epoch 100 |    50/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00109857
| Epoch 100 |    75/  632 batches | lr 0.00010 | ms/batch 10.40 | loss 0.00106229
| Epoch 100 |   100/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00105181
| Epoch 100 |   125/  632 batches | lr 0.00010 | ms/batch 10.32 | loss 0.00109492
| Epoch 100 |   150/  632 batches | lr 0.00010 | ms/batch  8.67 | loss 0.00110472
| Epoch 100 |   175/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00106322
| Epoch 100 |   200/  632 batches | lr 0.00010 | ms/batch 10.39 | loss 0.00107523
| Epoch 100 |   225/  632 batches | lr 0.00010 | ms/batch 10.61 | loss 0.00106329
| Epoch 100 |   250/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00111152
| Epoch 100 |   275/  632 batches | lr 0.00010 | ms/batch 10.37 | loss 0.00103671
| Epoch 100 |   

| Epoch 103 |   625/  632 batches | lr 0.00010 | ms/batch 11.94 | loss 0.00102224
| Epoch 104 |    25/  632 batches | lr 0.00010 | ms/batch 14.30 | loss 0.00108050
| Epoch 104 |    50/  632 batches | lr 0.00010 | ms/batch 11.18 | loss 0.00105662
| Epoch 104 |    75/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00100962
| Epoch 104 |   100/  632 batches | lr 0.00010 | ms/batch 10.38 | loss 0.00100285
| Epoch 104 |   125/  632 batches | lr 0.00010 | ms/batch 10.73 | loss 0.00105955
| Epoch 104 |   150/  632 batches | lr 0.00010 | ms/batch 10.69 | loss 0.00104508
| Epoch 104 |   175/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00102915
| Epoch 104 |   200/  632 batches | lr 0.00010 | ms/batch 10.18 | loss 0.00102265
| Epoch 104 |   225/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00101223
| Epoch 104 |   250/  632 batches | lr 0.00010 | ms/batch 11.20 | loss 0.00108071
| Epoch 104 |   275/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00099511
| Epoch 104 |   

| Epoch 107 |   625/  632 batches | lr 0.00010 | ms/batch 10.27 | loss 0.00100807
| Epoch 108 |    25/  632 batches | lr 0.00010 | ms/batch 10.89 | loss 0.00102151
| Epoch 108 |    50/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00102387
| Epoch 108 |    75/  632 batches | lr 0.00010 | ms/batch 10.72 | loss 0.00100583
| Epoch 108 |   100/  632 batches | lr 0.00010 | ms/batch 10.51 | loss 0.00097508
| Epoch 108 |   125/  632 batches | lr 0.00010 | ms/batch 10.38 | loss 0.00102795
| Epoch 108 |   150/  632 batches | lr 0.00010 | ms/batch  8.64 | loss 0.00098903
| Epoch 108 |   175/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00098727
| Epoch 108 |   200/  632 batches | lr 0.00010 | ms/batch 10.35 | loss 0.00097784
| Epoch 108 |   225/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00096685
| Epoch 108 |   250/  632 batches | lr 0.00010 | ms/batch  8.64 | loss 0.00103151
| Epoch 108 |   275/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00093419
| Epoch 108 |   

| Epoch 111 |   625/  632 batches | lr 0.00010 | ms/batch  8.60 | loss 0.00097176
| Epoch 112 |    25/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00098711
| Epoch 112 |    50/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00098178
| Epoch 112 |    75/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00096586
| Epoch 112 |   100/  632 batches | lr 0.00010 | ms/batch 10.40 | loss 0.00092230
| Epoch 112 |   125/  632 batches | lr 0.00010 | ms/batch 10.36 | loss 0.00098548
| Epoch 112 |   150/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00094450
| Epoch 112 |   175/  632 batches | lr 0.00010 | ms/batch  8.55 | loss 0.00095480
| Epoch 112 |   200/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00093834
| Epoch 112 |   225/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00093319
| Epoch 112 |   250/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00099682
| Epoch 112 |   275/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00089896
| Epoch 112 |   

| Epoch 115 |   625/  632 batches | lr 0.00010 | ms/batch 10.27 | loss 0.00093864
| Epoch 116 |    25/  632 batches | lr 0.00010 | ms/batch 10.61 | loss 0.00094790
| Epoch 116 |    50/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00094838
| Epoch 116 |    75/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00091511
| Epoch 116 |   100/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00087029
| Epoch 116 |   125/  632 batches | lr 0.00010 | ms/batch 10.32 | loss 0.00091358
| Epoch 116 |   150/  632 batches | lr 0.00010 | ms/batch  8.63 | loss 0.00089867
| Epoch 116 |   175/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00088448
| Epoch 116 |   200/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00088443
| Epoch 116 |   225/  632 batches | lr 0.00010 | ms/batch 10.34 | loss 0.00088932
| Epoch 116 |   250/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00095177
| Epoch 116 |   275/  632 batches | lr 0.00010 | ms/batch  8.54 | loss 0.00085114
| Epoch 116 |   

| Epoch 119 |   625/  632 batches | lr 0.00010 | ms/batch 10.39 | loss 0.00091367
| Epoch 120 |    25/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00092199
| Epoch 120 |    50/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00091271
| Epoch 120 |    75/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00089027
| Epoch 120 |   100/  632 batches | lr 0.00010 | ms/batch  8.57 | loss 0.00084633
| Epoch 120 |   125/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00091187
| Epoch 120 |   150/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00088077
| Epoch 120 |   175/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00088166
| Epoch 120 |   200/  632 batches | lr 0.00010 | ms/batch  8.63 | loss 0.00085789
| Epoch 120 |   225/  632 batches | lr 0.00010 | ms/batch 10.10 | loss 0.00085043
| Epoch 120 |   250/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00090457
| Epoch 120 |   275/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00080996
| Epoch 120 |   

| Epoch 123 |   625/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00082051
| Epoch 124 |    25/  632 batches | lr 0.00010 | ms/batch 11.30 | loss 0.00093897
| Epoch 124 |    50/  632 batches | lr 0.00010 | ms/batch 10.87 | loss 0.00085829
| Epoch 124 |    75/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00079613
| Epoch 124 |   100/  632 batches | lr 0.00010 | ms/batch 10.64 | loss 0.00079323
| Epoch 124 |   125/  632 batches | lr 0.00010 | ms/batch 10.94 | loss 0.00088599
| Epoch 124 |   150/  632 batches | lr 0.00010 | ms/batch 11.36 | loss 0.00088660
| Epoch 124 |   175/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00086711
| Epoch 124 |   200/  632 batches | lr 0.00010 | ms/batch 10.40 | loss 0.00084664
| Epoch 124 |   225/  632 batches | lr 0.00010 | ms/batch 11.12 | loss 0.00084815
| Epoch 124 |   250/  632 batches | lr 0.00010 | ms/batch 11.05 | loss 0.00086944
| Epoch 124 |   275/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00079995
| Epoch 124 |   

| Epoch   2 |   600/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00234860
| Epoch   2 |   625/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00240569
| Epoch   3 |    25/  632 batches | lr 0.00010 | ms/batch  7.63 | loss 0.00243112
| Epoch   3 |    50/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00236579
| Epoch   3 |    75/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00240450
| Epoch   3 |   100/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00238751
| Epoch   3 |   125/  632 batches | lr 0.00010 | ms/batch  7.34 | loss 0.00237776
| Epoch   3 |   150/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00241538
| Epoch   3 |   175/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00235138
| Epoch   3 |   200/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00235360
| Epoch   3 |   225/  632 batches | lr 0.00010 | ms/batch  7.33 | loss 0.00232683
| Epoch   3 |   250/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00243561
| Epoch   3 |   

| Epoch   6 |   600/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00230958
| Epoch   6 |   625/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00238769
| Epoch   7 |    25/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00237603
| Epoch   7 |    50/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00233724
| Epoch   7 |    75/  632 batches | lr 0.00010 | ms/batch  7.67 | loss 0.00236808
| Epoch   7 |   100/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00233221
| Epoch   7 |   125/  632 batches | lr 0.00010 | ms/batch  8.82 | loss 0.00236294
| Epoch   7 |   150/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00235358
| Epoch   7 |   175/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00227168
| Epoch   7 |   200/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00230875
| Epoch   7 |   225/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00228494
| Epoch   7 |   250/  632 batches | lr 0.00010 | ms/batch  8.96 | loss 0.00238074
| Epoch   7 |   

| Epoch  10 |   600/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00227274
| Epoch  10 |   625/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00235811
| Epoch  11 |    25/  632 batches | lr 0.00010 | ms/batch 10.03 | loss 0.00234786
| Epoch  11 |    50/  632 batches | lr 0.00010 | ms/batch  7.89 | loss 0.00231090
| Epoch  11 |    75/  632 batches | lr 0.00010 | ms/batch  9.52 | loss 0.00233149
| Epoch  11 |   100/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00232189
| Epoch  11 |   125/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00233813
| Epoch  11 |   150/  632 batches | lr 0.00010 | ms/batch  7.44 | loss 0.00227909
| Epoch  11 |   175/  632 batches | lr 0.00010 | ms/batch  9.52 | loss 0.00225678
| Epoch  11 |   200/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00228157
| Epoch  11 |   225/  632 batches | lr 0.00010 | ms/batch  9.24 | loss 0.00225805
| Epoch  11 |   250/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00234445
| Epoch  11 |   

| Epoch  14 |   600/  632 batches | lr 0.00010 | ms/batch  7.17 | loss 0.00224894
| Epoch  14 |   625/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00231732
| Epoch  15 |    25/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00233020
| Epoch  15 |    50/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00229284
| Epoch  15 |    75/  632 batches | lr 0.00010 | ms/batch  7.24 | loss 0.00229998
| Epoch  15 |   100/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00229442
| Epoch  15 |   125/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00229884
| Epoch  15 |   150/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00223671
| Epoch  15 |   175/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00222835
| Epoch  15 |   200/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00226153
| Epoch  15 |   225/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00223578
| Epoch  15 |   250/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00232217
| Epoch  15 |   

| Epoch  18 |   600/  632 batches | lr 0.00010 | ms/batch  9.25 | loss 0.00222026
| Epoch  18 |   625/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00227046
| Epoch  19 |    25/  632 batches | lr 0.00010 | ms/batch 11.62 | loss 0.00231344
| Epoch  19 |    50/  632 batches | lr 0.00010 | ms/batch  7.62 | loss 0.00227806
| Epoch  19 |    75/  632 batches | lr 0.00010 | ms/batch  9.23 | loss 0.00227260
| Epoch  19 |   100/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00226632
| Epoch  19 |   125/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00226898
| Epoch  19 |   150/  632 batches | lr 0.00010 | ms/batch  7.28 | loss 0.00220830
| Epoch  19 |   175/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00220403
| Epoch  19 |   200/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00224057
| Epoch  19 |   225/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00221122
| Epoch  19 |   250/  632 batches | lr 0.00010 | ms/batch  7.34 | loss 0.00230212
| Epoch  19 |   

| Epoch  22 |   600/  632 batches | lr 0.00010 | ms/batch  9.19 | loss 0.00218871
| Epoch  22 |   625/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00223090
| Epoch  23 |    25/  632 batches | lr 0.00010 | ms/batch  7.95 | loss 0.00229464
| Epoch  23 |    50/  632 batches | lr 0.00010 | ms/batch  9.18 | loss 0.00225447
| Epoch  23 |    75/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00224125
| Epoch  23 |   100/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00223317
| Epoch  23 |   125/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00224553
| Epoch  23 |   150/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00218614
| Epoch  23 |   175/  632 batches | lr 0.00010 | ms/batch  7.36 | loss 0.00217816
| Epoch  23 |   200/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00221943
| Epoch  23 |   225/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00218849
| Epoch  23 |   250/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00228196
| Epoch  23 |   

| Epoch  26 |   600/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00216191
| Epoch  26 |   625/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00219960
| Epoch  27 |    25/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00227185
| Epoch  27 |    50/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00222816
| Epoch  27 |    75/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00221473
| Epoch  27 |   100/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00220524
| Epoch  27 |   125/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00222407
| Epoch  27 |   150/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00216320
| Epoch  27 |   175/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00215357
| Epoch  27 |   200/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00219800
| Epoch  27 |   225/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00216735
| Epoch  27 |   250/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00226128
| Epoch  27 |   

| Epoch  30 |   600/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00213853
| Epoch  30 |   625/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00216843
| Epoch  31 |    25/  632 batches | lr 0.00010 | ms/batch  7.56 | loss 0.00224784
| Epoch  31 |    50/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00220041
| Epoch  31 |    75/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00218852
| Epoch  31 |   100/  632 batches | lr 0.00010 | ms/batch  7.22 | loss 0.00217877
| Epoch  31 |   125/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00219961
| Epoch  31 |   150/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00214127
| Epoch  31 |   175/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00212898
| Epoch  31 |   200/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00217492
| Epoch  31 |   225/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00214703
| Epoch  31 |   250/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00224242
| Epoch  31 |   

| Epoch  34 |   600/  632 batches | lr 0.00010 | ms/batch  9.30 | loss 0.00211238
| Epoch  34 |   625/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00213851
| Epoch  35 |    25/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00222451
| Epoch  35 |    50/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00217295
| Epoch  35 |    75/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00216195
| Epoch  35 |   100/  632 batches | lr 0.00010 | ms/batch  9.27 | loss 0.00215201
| Epoch  35 |   125/  632 batches | lr 0.00010 | ms/batch  9.25 | loss 0.00217534
| Epoch  35 |   150/  632 batches | lr 0.00010 | ms/batch  9.24 | loss 0.00211775
| Epoch  35 |   175/  632 batches | lr 0.00010 | ms/batch  7.40 | loss 0.00210430
| Epoch  35 |   200/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00215099
| Epoch  35 |   225/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00212500
| Epoch  35 |   250/  632 batches | lr 0.00010 | ms/batch  9.22 | loss 0.00222410
| Epoch  35 |   

| Epoch  38 |   600/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00208180
| Epoch  38 |   625/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00211003
| Epoch  39 |    25/  632 batches | lr 0.00010 | ms/batch  7.47 | loss 0.00219823
| Epoch  39 |    50/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00214900
| Epoch  39 |    75/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00213628
| Epoch  39 |   100/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00212273
| Epoch  39 |   125/  632 batches | lr 0.00010 | ms/batch  7.15 | loss 0.00215035
| Epoch  39 |   150/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00209413
| Epoch  39 |   175/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00207911
| Epoch  39 |   200/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00212470
| Epoch  39 |   225/  632 batches | lr 0.00010 | ms/batch  7.23 | loss 0.00210275
| Epoch  39 |   250/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00220372
| Epoch  39 |   

| Epoch  42 |   600/  632 batches | lr 0.00010 | ms/batch  7.16 | loss 0.00204924
| Epoch  42 |   625/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00208040
| Epoch  43 |    25/  632 batches | lr 0.00010 | ms/batch  9.03 | loss 0.00217325
| Epoch  43 |    50/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00212320
| Epoch  43 |    75/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00210880
| Epoch  43 |   100/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00209209
| Epoch  43 |   125/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00212188
| Epoch  43 |   150/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00206650
| Epoch  43 |   175/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00205259
| Epoch  43 |   200/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00209703
| Epoch  43 |   225/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00207868
| Epoch  43 |   250/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00218228
| Epoch  43 |   

| Epoch  46 |   600/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00201616
| Epoch  46 |   625/  632 batches | lr 0.00010 | ms/batch  9.23 | loss 0.00205003
| Epoch  47 |    25/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00214380
| Epoch  47 |    50/  632 batches | lr 0.00010 | ms/batch  7.54 | loss 0.00209661
| Epoch  47 |    75/  632 batches | lr 0.00010 | ms/batch  9.07 | loss 0.00207824
| Epoch  47 |   100/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00206178
| Epoch  47 |   125/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00209326
| Epoch  47 |   150/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00203932
| Epoch  47 |   175/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00202472
| Epoch  47 |   200/  632 batches | lr 0.00010 | ms/batch  9.11 | loss 0.00206813
| Epoch  47 |   225/  632 batches | lr 0.00010 | ms/batch  9.10 | loss 0.00205175
| Epoch  47 |   250/  632 batches | lr 0.00010 | ms/batch  7.92 | loss 0.00216001
| Epoch  47 |   

| Epoch  50 |   600/  632 batches | lr 0.00010 | ms/batch  7.15 | loss 0.00198014
| Epoch  50 |   625/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00201773
| Epoch  51 |    25/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00211184
| Epoch  51 |    50/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00206849
| Epoch  51 |    75/  632 batches | lr 0.00010 | ms/batch  7.15 | loss 0.00204640
| Epoch  51 |   100/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00202960
| Epoch  51 |   125/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00206149
| Epoch  51 |   150/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00200815
| Epoch  51 |   175/  632 batches | lr 0.00010 | ms/batch  7.16 | loss 0.00199571
| Epoch  51 |   200/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00203898
| Epoch  51 |   225/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00202135
| Epoch  51 |   250/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00213396
| Epoch  51 |   

| Epoch  54 |   600/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00194657
| Epoch  54 |   625/  632 batches | lr 0.00010 | ms/batch  9.22 | loss 0.00198737
| Epoch  55 |    25/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00207854
| Epoch  55 |    50/  632 batches | lr 0.00010 | ms/batch  7.28 | loss 0.00203947
| Epoch  55 |    75/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00201529
| Epoch  55 |   100/  632 batches | lr 0.00010 | ms/batch  9.16 | loss 0.00199665
| Epoch  55 |   125/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00203038
| Epoch  55 |   150/  632 batches | lr 0.00010 | ms/batch  7.34 | loss 0.00197699
| Epoch  55 |   175/  632 batches | lr 0.00010 | ms/batch  9.15 | loss 0.00196450
| Epoch  55 |   200/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00200714
| Epoch  55 |   225/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00198996
| Epoch  55 |   250/  632 batches | lr 0.00010 | ms/batch  7.44 | loss 0.00210195
| Epoch  55 |   

| Epoch  58 |   600/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00191329
| Epoch  58 |   625/  632 batches | lr 0.00010 | ms/batch  7.25 | loss 0.00195629
| Epoch  59 |    25/  632 batches | lr 0.00010 | ms/batch  7.46 | loss 0.00204200
| Epoch  59 |    50/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00200995
| Epoch  59 |    75/  632 batches | lr 0.00010 | ms/batch  7.16 | loss 0.00198429
| Epoch  59 |   100/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00196832
| Epoch  59 |   125/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00200049
| Epoch  59 |   150/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00194593
| Epoch  59 |   175/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00193295
| Epoch  59 |   200/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00197345
| Epoch  59 |   225/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00195678
| Epoch  59 |   250/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00206511
| Epoch  59 |   

| Epoch  62 |   600/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00188229
| Epoch  62 |   625/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00192343
| Epoch  63 |    25/  632 batches | lr 0.00010 | ms/batch  9.06 | loss 0.00200644
| Epoch  63 |    50/  632 batches | lr 0.00010 | ms/batch  7.13 | loss 0.00197724
| Epoch  63 |    75/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00195155
| Epoch  63 |   100/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00194015
| Epoch  63 |   125/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00196828
| Epoch  63 |   150/  632 batches | lr 0.00010 | ms/batch  7.28 | loss 0.00191357
| Epoch  63 |   175/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00189860
| Epoch  63 |   200/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00194030
| Epoch  63 |   225/  632 batches | lr 0.00010 | ms/batch  8.82 | loss 0.00192289
| Epoch  63 |   250/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00203084
| Epoch  63 |   

| Epoch  66 |   600/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00184992
| Epoch  66 |   625/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00189034
| Epoch  67 |    25/  632 batches | lr 0.00010 | ms/batch  7.48 | loss 0.00196709
| Epoch  67 |    50/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00194308
| Epoch  67 |    75/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00191765
| Epoch  67 |   100/  632 batches | lr 0.00010 | ms/batch  7.24 | loss 0.00191151
| Epoch  67 |   125/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00193549
| Epoch  67 |   150/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00188110
| Epoch  67 |   175/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00186612
| Epoch  67 |   200/  632 batches | lr 0.00010 | ms/batch  7.16 | loss 0.00190821
| Epoch  67 |   225/  632 batches | lr 0.00010 | ms/batch  8.67 | loss 0.00189020
| Epoch  67 |   250/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00199674
| Epoch  67 |   

| Epoch  70 |   600/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00181917
| Epoch  70 |   625/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00185787
| Epoch  71 |    25/  632 batches | lr 0.00010 | ms/batch  9.15 | loss 0.00192907
| Epoch  71 |    50/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00190435
| Epoch  71 |    75/  632 batches | lr 0.00010 | ms/batch  7.13 | loss 0.00188211
| Epoch  71 |   100/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00188449
| Epoch  71 |   125/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00190453
| Epoch  71 |   150/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00184727
| Epoch  71 |   175/  632 batches | lr 0.00010 | ms/batch  7.25 | loss 0.00183188
| Epoch  71 |   200/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00187460
| Epoch  71 |   225/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00185395
| Epoch  71 |   250/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00195751
| Epoch  71 |   

| Epoch  74 |   600/  632 batches | lr 0.00010 | ms/batch  9.11 | loss 0.00178504
| Epoch  74 |   625/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00182165
| Epoch  75 |    25/  632 batches | lr 0.00010 | ms/batch  7.57 | loss 0.00189096
| Epoch  75 |    50/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00186668
| Epoch  75 |    75/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00184540
| Epoch  75 |   100/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00185129
| Epoch  75 |   125/  632 batches | lr 0.00010 | ms/batch  7.25 | loss 0.00187200
| Epoch  75 |   150/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00181161
| Epoch  75 |   175/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00180071
| Epoch  75 |   200/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00184167
| Epoch  75 |   225/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00182065
| Epoch  75 |   250/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00192498
| Epoch  75 |   

| Epoch  78 |   600/  632 batches | lr 0.00010 | ms/batch  7.26 | loss 0.00175026
| Epoch  78 |   625/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00178886
| Epoch  79 |    25/  632 batches | lr 0.00010 | ms/batch  9.06 | loss 0.00185293
| Epoch  79 |    50/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00182863
| Epoch  79 |    75/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00180822
| Epoch  79 |   100/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00182013
| Epoch  79 |   125/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00183549
| Epoch  79 |   150/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00177395
| Epoch  79 |   175/  632 batches | lr 0.00010 | ms/batch  7.19 | loss 0.00177122
| Epoch  79 |   200/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00180973
| Epoch  79 |   225/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00178575
| Epoch  79 |   250/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00188745
| Epoch  79 |   

| Epoch  82 |   600/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00171687
| Epoch  82 |   625/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00175572
| Epoch  83 |    25/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00181556
| Epoch  83 |    50/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00179064
| Epoch  83 |    75/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00177037
| Epoch  83 |   100/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00178879
| Epoch  83 |   125/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00180040
| Epoch  83 |   150/  632 batches | lr 0.00010 | ms/batch  7.23 | loss 0.00173643
| Epoch  83 |   175/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00173665
| Epoch  83 |   200/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00177568
| Epoch  83 |   225/  632 batches | lr 0.00010 | ms/batch  8.82 | loss 0.00174838
| Epoch  83 |   250/  632 batches | lr 0.00010 | ms/batch  7.26 | loss 0.00184731
| Epoch  83 |   

| Epoch  86 |   600/  632 batches | lr 0.00010 | ms/batch  7.76 | loss 0.00168045
| Epoch  86 |   625/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00172334
| Epoch  87 |    25/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00178244
| Epoch  87 |    50/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00175394
| Epoch  87 |    75/  632 batches | lr 0.00010 | ms/batch  7.28 | loss 0.00173276
| Epoch  87 |   100/  632 batches | lr 0.00010 | ms/batch  9.21 | loss 0.00175747
| Epoch  87 |   125/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00176744
| Epoch  87 |   150/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00169968
| Epoch  87 |   175/  632 batches | lr 0.00010 | ms/batch  7.34 | loss 0.00170881
| Epoch  87 |   200/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00174379
| Epoch  87 |   225/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00171247
| Epoch  87 |   250/  632 batches | lr 0.00010 | ms/batch  9.24 | loss 0.00180981
| Epoch  87 |   

| Epoch  90 |   600/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00164505
| Epoch  90 |   625/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00168919
| Epoch  91 |    25/  632 batches | lr 0.00010 | ms/batch  9.23 | loss 0.00174532
| Epoch  91 |    50/  632 batches | lr 0.00010 | ms/batch  7.24 | loss 0.00171296
| Epoch  91 |    75/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00169343
| Epoch  91 |   100/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00172416
| Epoch  91 |   125/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00173694
| Epoch  91 |   150/  632 batches | lr 0.00010 | ms/batch  7.22 | loss 0.00166334
| Epoch  91 |   175/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00167548
| Epoch  91 |   200/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00170626
| Epoch  91 |   225/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00167294
| Epoch  91 |   250/  632 batches | lr 0.00010 | ms/batch  7.23 | loss 0.00177157
| Epoch  91 |   

| Epoch  94 |   600/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00160750
| Epoch  94 |   625/  632 batches | lr 0.00010 | ms/batch  7.27 | loss 0.00165616
| Epoch  95 |    25/  632 batches | lr 0.00010 | ms/batch  7.51 | loss 0.00170711
| Epoch  95 |    50/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00167463
| Epoch  95 |    75/  632 batches | lr 0.00010 | ms/batch  7.23 | loss 0.00165477
| Epoch  95 |   100/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00168914
| Epoch  95 |   125/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00171062
| Epoch  95 |   150/  632 batches | lr 0.00010 | ms/batch  9.11 | loss 0.00162587
| Epoch  95 |   175/  632 batches | lr 0.00010 | ms/batch  7.33 | loss 0.00164443
| Epoch  95 |   200/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00167047
| Epoch  95 |   225/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00163543
| Epoch  95 |   250/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00173014
| Epoch  95 |   

| Epoch  98 |   600/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00157078
| Epoch  98 |   625/  632 batches | lr 0.00010 | ms/batch  8.96 | loss 0.00162286
| Epoch  99 |    25/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00166751
| Epoch  99 |    50/  632 batches | lr 0.00010 | ms/batch  7.29 | loss 0.00163647
| Epoch  99 |    75/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00161696
| Epoch  99 |   100/  632 batches | lr 0.00010 | ms/batch  9.30 | loss 0.00165617
| Epoch  99 |   125/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00167725
| Epoch  99 |   150/  632 batches | lr 0.00010 | ms/batch  7.26 | loss 0.00158799
| Epoch  99 |   175/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00161598
| Epoch  99 |   200/  632 batches | lr 0.00010 | ms/batch  8.96 | loss 0.00163293
| Epoch  99 |   225/  632 batches | lr 0.00010 | ms/batch  8.96 | loss 0.00159596
| Epoch  99 |   250/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00169019
| Epoch  99 |   

| Epoch 102 |   600/  632 batches | lr 0.00010 | ms/batch  8.89 | loss 0.00153722
| Epoch 102 |   625/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00158869
| Epoch 103 |    25/  632 batches | lr 0.00010 | ms/batch  7.50 | loss 0.00163005
| Epoch 103 |    50/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00159909
| Epoch 103 |    75/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00158006
| Epoch 103 |   100/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00162558
| Epoch 103 |   125/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00164388
| Epoch 103 |   150/  632 batches | lr 0.00010 | ms/batch  9.07 | loss 0.00155032
| Epoch 103 |   175/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00158595
| Epoch 103 |   200/  632 batches | lr 0.00010 | ms/batch  7.47 | loss 0.00159491
| Epoch 103 |   225/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00155595
| Epoch 103 |   250/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00165252
| Epoch 103 |   

| Epoch 106 |   600/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00150025
| Epoch 106 |   625/  632 batches | lr 0.00010 | ms/batch  8.67 | loss 0.00155485
| Epoch 107 |    25/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00158978
| Epoch 107 |    50/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00156257
| Epoch 107 |    75/  632 batches | lr 0.00010 | ms/batch  7.17 | loss 0.00154463
| Epoch 107 |   100/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00159252
| Epoch 107 |   125/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00161753
| Epoch 107 |   150/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00151207
| Epoch 107 |   175/  632 batches | lr 0.00010 | ms/batch  7.17 | loss 0.00154930
| Epoch 107 |   200/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00155497
| Epoch 107 |   225/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00151813
| Epoch 107 |   250/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00161618
| Epoch 107 |   

| Epoch 110 |   600/  632 batches | lr 0.00010 | ms/batch  8.66 | loss 0.00146481
| Epoch 110 |   625/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00152201
| Epoch 111 |    25/  632 batches | lr 0.00010 | ms/batch  7.52 | loss 0.00155524
| Epoch 111 |    50/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00152373
| Epoch 111 |    75/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00150721
| Epoch 111 |   100/  632 batches | lr 0.00010 | ms/batch  8.67 | loss 0.00155560
| Epoch 111 |   125/  632 batches | lr 0.00010 | ms/batch  7.12 | loss 0.00159017
| Epoch 111 |   150/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00147590
| Epoch 111 |   175/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00151822
| Epoch 111 |   200/  632 batches | lr 0.00010 | ms/batch  8.66 | loss 0.00151857
| Epoch 111 |   225/  632 batches | lr 0.00010 | ms/batch  7.14 | loss 0.00148129
| Epoch 111 |   250/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00158217
| Epoch 111 |   

| Epoch 114 |   600/  632 batches | lr 0.00010 | ms/batch  7.30 | loss 0.00142758
| Epoch 114 |   625/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00149104
| Epoch 115 |    25/  632 batches | lr 0.00010 | ms/batch  9.06 | loss 0.00151391
| Epoch 115 |    50/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00148774
| Epoch 115 |    75/  632 batches | lr 0.00010 | ms/batch  7.24 | loss 0.00147227
| Epoch 115 |   100/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00152089
| Epoch 115 |   125/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00156609
| Epoch 115 |   150/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00144047
| Epoch 115 |   175/  632 batches | lr 0.00010 | ms/batch  7.31 | loss 0.00148384
| Epoch 115 |   200/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00147722
| Epoch 115 |   225/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00144219
| Epoch 115 |   250/  632 batches | lr 0.00010 | ms/batch  9.13 | loss 0.00155113
| Epoch 115 |   

| Epoch 118 |   600/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00139239
| Epoch 118 |   625/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00145749
| Epoch 119 |    25/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00148015
| Epoch 119 |    50/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00144969
| Epoch 119 |    75/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00143671
| Epoch 119 |   100/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00148569
| Epoch 119 |   125/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00153463
| Epoch 119 |   150/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00140832
| Epoch 119 |   175/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00144949
| Epoch 119 |   200/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00144034
| Epoch 119 |   225/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00140097
| Epoch 119 |   250/  632 batches | lr 0.00010 | ms/batch  7.16 | loss 0.00151814
| Epoch 119 |   

| Epoch 122 |   600/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00135542
| Epoch 122 |   625/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00142188
| Epoch 123 |    25/  632 batches | lr 0.00010 | ms/batch  9.06 | loss 0.00144404
| Epoch 123 |    50/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00141144
| Epoch 123 |    75/  632 batches | lr 0.00010 | ms/batch  7.16 | loss 0.00140421
| Epoch 123 |   100/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00144905
| Epoch 123 |   125/  632 batches | lr 0.00010 | ms/batch  8.70 | loss 0.00150223
| Epoch 123 |   150/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00137916
| Epoch 123 |   175/  632 batches | lr 0.00010 | ms/batch  7.11 | loss 0.00141854
| Epoch 123 |   200/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00140317
| Epoch 123 |   225/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00136131
| Epoch 123 |   250/  632 batches | lr 0.00010 | ms/batch  8.66 | loss 0.00149153
| Epoch 123 |   

| Epoch   1 |   575/  632 batches | lr 0.00010 | ms/batch  7.37 | loss 0.00241498
| Epoch   1 |   600/  632 batches | lr 0.00010 | ms/batch  9.15 | loss 0.00238904
| Epoch   1 |   625/  632 batches | lr 0.00010 | ms/batch  9.13 | loss 0.00247049
| Epoch   2 |    25/  632 batches | lr 0.00010 | ms/batch  9.99 | loss 0.00246924
| Epoch   2 |    50/  632 batches | lr 0.00010 | ms/batch  7.91 | loss 0.00242457
| Epoch   2 |    75/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00242960
| Epoch   2 |   100/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00239527
| Epoch   2 |   125/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00243049
| Epoch   2 |   150/  632 batches | lr 0.00010 | ms/batch  7.25 | loss 0.00240192
| Epoch   2 |   175/  632 batches | lr 0.00010 | ms/batch  9.08 | loss 0.00233837
| Epoch   2 |   200/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00237983
| Epoch   2 |   225/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00234774
| Epoch   2 |   

| Epoch   5 |   575/  632 batches | lr 0.00010 | ms/batch  9.10 | loss 0.00235810
| Epoch   5 |   600/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00231546
| Epoch   5 |   625/  632 batches | lr 0.00010 | ms/batch  7.38 | loss 0.00239725
| Epoch   6 |    25/  632 batches | lr 0.00010 | ms/batch  7.64 | loss 0.00238433
| Epoch   6 |    50/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00235097
| Epoch   6 |    75/  632 batches | lr 0.00010 | ms/batch  7.18 | loss 0.00236199
| Epoch   6 |   100/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00234673
| Epoch   6 |   125/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00236478
| Epoch   6 |   150/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00233891
| Epoch   6 |   175/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00228557
| Epoch   6 |   200/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00231834
| Epoch   6 |   225/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00229115
| Epoch   6 |   

| Epoch   9 |   575/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00231984
| Epoch   9 |   600/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00227291
| Epoch   9 |   625/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00234601
| Epoch  10 |    25/  632 batches | lr 0.00010 | ms/batch  9.14 | loss 0.00234763
| Epoch  10 |    50/  632 batches | lr 0.00010 | ms/batch  7.20 | loss 0.00231352
| Epoch  10 |    75/  632 batches | lr 0.00010 | ms/batch  8.81 | loss 0.00232275
| Epoch  10 |   100/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00232757
| Epoch  10 |   125/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00233308
| Epoch  10 |   150/  632 batches | lr 0.00010 | ms/batch  7.22 | loss 0.00227739
| Epoch  10 |   175/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00226101
| Epoch  10 |   200/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00228550
| Epoch  10 |   225/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00226691
| Epoch  10 |   

| Epoch  13 |   575/  632 batches | lr 0.00010 | ms/batch  8.76 | loss 0.00229374
| Epoch  13 |   600/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00224331
| Epoch  13 |   625/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00229908
| Epoch  14 |    25/  632 batches | lr 0.00010 | ms/batch  7.42 | loss 0.00232447
| Epoch  14 |    50/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00229331
| Epoch  14 |    75/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00229457
| Epoch  14 |   100/  632 batches | lr 0.00010 | ms/batch  7.14 | loss 0.00229766
| Epoch  14 |   125/  632 batches | lr 0.00010 | ms/batch  8.73 | loss 0.00229157
| Epoch  14 |   150/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00223885
| Epoch  14 |   175/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00223540
| Epoch  14 |   200/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00225963
| Epoch  14 |   225/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00224355
| Epoch  14 |   

| Epoch  17 |   575/  632 batches | lr 0.00010 | ms/batch  7.69 | loss 0.00227040
| Epoch  17 |   600/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00221192
| Epoch  17 |   625/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00225275
| Epoch  18 |    25/  632 batches | lr 0.00010 | ms/batch 10.25 | loss 0.00230272
| Epoch  18 |    50/  632 batches | lr 0.00010 | ms/batch  9.11 | loss 0.00227685
| Epoch  18 |    75/  632 batches | lr 0.00010 | ms/batch  7.32 | loss 0.00226908
| Epoch  18 |   100/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00226719
| Epoch  18 |   125/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00226036
| Epoch  18 |   150/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00221368
| Epoch  18 |   175/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00221071
| Epoch  18 |   200/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00223947
| Epoch  18 |   225/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00222008
| Epoch  18 |   

| Epoch  21 |   575/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00225122
| Epoch  21 |   600/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00218833
| Epoch  21 |   625/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00222017
| Epoch  22 |    25/  632 batches | lr 0.00010 | ms/batch  8.09 | loss 0.00228272
| Epoch  22 |    50/  632 batches | lr 0.00010 | ms/batch  9.10 | loss 0.00225581
| Epoch  22 |    75/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00224276
| Epoch  22 |   100/  632 batches | lr 0.00010 | ms/batch  9.11 | loss 0.00223802
| Epoch  22 |   125/  632 batches | lr 0.00010 | ms/batch  7.66 | loss 0.00224042
| Epoch  22 |   150/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00219184
| Epoch  22 |   175/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00218854
| Epoch  22 |   200/  632 batches | lr 0.00010 | ms/batch  8.96 | loss 0.00221782
| Epoch  22 |   225/  632 batches | lr 0.00010 | ms/batch  7.84 | loss 0.00220188
| Epoch  22 |   

| Epoch  25 |   575/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00223261
| Epoch  25 |   600/  632 batches | lr 0.00010 | ms/batch  7.27 | loss 0.00216568
| Epoch  25 |   625/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00219234
| Epoch  26 |    25/  632 batches | lr 0.00010 | ms/batch  9.41 | loss 0.00226315
| Epoch  26 |    50/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00223232
| Epoch  26 |    75/  632 batches | lr 0.00010 | ms/batch  7.91 | loss 0.00221965
| Epoch  26 |   100/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00221644
| Epoch  26 |   125/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00222024
| Epoch  26 |   150/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00216720
| Epoch  26 |   175/  632 batches | lr 0.00010 | ms/batch  7.79 | loss 0.00216422
| Epoch  26 |   200/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00219537
| Epoch  26 |   225/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00218692
| Epoch  26 |   

| Epoch  29 |   575/  632 batches | lr 0.00010 | ms/batch  7.29 | loss 0.00221340
| Epoch  29 |   600/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00214229
| Epoch  29 |   625/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00216647
| Epoch  30 |    25/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00224442
| Epoch  30 |    50/  632 batches | lr 0.00010 | ms/batch  7.28 | loss 0.00220461
| Epoch  30 |    75/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00219787
| Epoch  30 |   100/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00219312
| Epoch  30 |   125/  632 batches | lr 0.00010 | ms/batch  9.19 | loss 0.00220432
| Epoch  30 |   150/  632 batches | lr 0.00010 | ms/batch  7.57 | loss 0.00214328
| Epoch  30 |   175/  632 batches | lr 0.00010 | ms/batch  9.14 | loss 0.00213914
| Epoch  30 |   200/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00217545
| Epoch  30 |   225/  632 batches | lr 0.00010 | ms/batch  9.18 | loss 0.00217399
| Epoch  30 |   

| Epoch  33 |   575/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00219419
| Epoch  33 |   600/  632 batches | lr 0.00010 | ms/batch  7.34 | loss 0.00211648
| Epoch  33 |   625/  632 batches | lr 0.00010 | ms/batch  9.48 | loss 0.00214170
| Epoch  34 |    25/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00222370
| Epoch  34 |    50/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00217503
| Epoch  34 |    75/  632 batches | lr 0.00010 | ms/batch  7.61 | loss 0.00217617
| Epoch  34 |   100/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00216630
| Epoch  34 |   125/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00218496
| Epoch  34 |   150/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00212022
| Epoch  34 |   175/  632 batches | lr 0.00010 | ms/batch  7.34 | loss 0.00211729
| Epoch  34 |   200/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00215674
| Epoch  34 |   225/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00215816
| Epoch  34 |   

| Epoch  37 |   575/  632 batches | lr 0.00010 | ms/batch  7.48 | loss 0.00216907
| Epoch  37 |   600/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00209014
| Epoch  37 |   625/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00211531
| Epoch  38 |    25/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00220182
| Epoch  38 |    50/  632 batches | lr 0.00010 | ms/batch  8.05 | loss 0.00215035
| Epoch  38 |    75/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00215576
| Epoch  38 |   100/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00213763
| Epoch  38 |   125/  632 batches | lr 0.00010 | ms/batch  9.03 | loss 0.00216497
| Epoch  38 |   150/  632 batches | lr 0.00010 | ms/batch  7.48 | loss 0.00209712
| Epoch  38 |   175/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00209552
| Epoch  38 |   200/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00213745
| Epoch  38 |   225/  632 batches | lr 0.00010 | ms/batch  9.18 | loss 0.00213088
| Epoch  38 |   

| Epoch  41 |   575/  632 batches | lr 0.00010 | ms/batch  9.29 | loss 0.00214039
| Epoch  41 |   600/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00206485
| Epoch  41 |   625/  632 batches | lr 0.00010 | ms/batch  7.47 | loss 0.00208806
| Epoch  42 |    25/  632 batches | lr 0.00010 | ms/batch  7.80 | loss 0.00217677
| Epoch  42 |    50/  632 batches | lr 0.00010 | ms/batch  9.18 | loss 0.00212794
| Epoch  42 |    75/  632 batches | lr 0.00010 | ms/batch  7.59 | loss 0.00213471
| Epoch  42 |   100/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00211044
| Epoch  42 |   125/  632 batches | lr 0.00010 | ms/batch  9.41 | loss 0.00213906
| Epoch  42 |   150/  632 batches | lr 0.00010 | ms/batch  9.27 | loss 0.00207479
| Epoch  42 |   175/  632 batches | lr 0.00010 | ms/batch  7.42 | loss 0.00206828
| Epoch  42 |   200/  632 batches | lr 0.00010 | ms/batch  9.02 | loss 0.00211450
| Epoch  42 |   225/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00209838
| Epoch  42 |   

| Epoch  45 |   575/  632 batches | lr 0.00010 | ms/batch  7.54 | loss 0.00210907
| Epoch  45 |   600/  632 batches | lr 0.00010 | ms/batch  9.30 | loss 0.00204470
| Epoch  45 |   625/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00205735
| Epoch  46 |    25/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00215303
| Epoch  46 |    50/  632 batches | lr 0.00010 | ms/batch  7.88 | loss 0.00210681
| Epoch  46 |    75/  632 batches | lr 0.00010 | ms/batch  9.70 | loss 0.00211021
| Epoch  46 |   100/  632 batches | lr 0.00010 | ms/batch  9.14 | loss 0.00207437
| Epoch  46 |   125/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00210544
| Epoch  46 |   150/  632 batches | lr 0.00010 | ms/batch  7.58 | loss 0.00205146
| Epoch  46 |   175/  632 batches | lr 0.00010 | ms/batch  8.88 | loss 0.00203633
| Epoch  46 |   200/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00208954
| Epoch  46 |   225/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00207689
| Epoch  46 |   

| Epoch  49 |   575/  632 batches | lr 0.00010 | ms/batch  8.94 | loss 0.00207189
| Epoch  49 |   600/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00201628
| Epoch  49 |   625/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00203462
| Epoch  50 |    25/  632 batches | lr 0.00010 | ms/batch  7.81 | loss 0.00213257
| Epoch  50 |    50/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00209461
| Epoch  50 |    75/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00208750
| Epoch  50 |   100/  632 batches | lr 0.00010 | ms/batch  7.26 | loss 0.00204796
| Epoch  50 |   125/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00207951
| Epoch  50 |   150/  632 batches | lr 0.00010 | ms/batch  8.86 | loss 0.00202741
| Epoch  50 |   175/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00201790
| Epoch  50 |   200/  632 batches | lr 0.00010 | ms/batch  7.32 | loss 0.00206640
| Epoch  50 |   225/  632 batches | lr 0.00010 | ms/batch  9.25 | loss 0.00205885
| Epoch  50 |   

| Epoch  53 |   575/  632 batches | lr 0.00010 | ms/batch  7.65 | loss 0.00204664
| Epoch  53 |   600/  632 batches | lr 0.00010 | ms/batch  9.25 | loss 0.00198692
| Epoch  53 |   625/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00200965
| Epoch  54 |    25/  632 batches | lr 0.00010 | ms/batch  9.03 | loss 0.00210529
| Epoch  54 |    50/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00207124
| Epoch  54 |    75/  632 batches | lr 0.00010 | ms/batch  7.23 | loss 0.00205850
| Epoch  54 |   100/  632 batches | lr 0.00010 | ms/batch  8.87 | loss 0.00202283
| Epoch  54 |   125/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00205271
| Epoch  54 |   150/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00200292
| Epoch  54 |   175/  632 batches | lr 0.00010 | ms/batch  7.77 | loss 0.00199357
| Epoch  54 |   200/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00203851
| Epoch  54 |   225/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00203267
| Epoch  54 |   

| Epoch  57 |   575/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00202112
| Epoch  57 |   600/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00195881
| Epoch  57 |   625/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00198191
| Epoch  58 |    25/  632 batches | lr 0.00010 | ms/batch  7.60 | loss 0.00207549
| Epoch  58 |    50/  632 batches | lr 0.00010 | ms/batch  9.05 | loss 0.00204323
| Epoch  58 |    75/  632 batches | lr 0.00010 | ms/batch  9.08 | loss 0.00202766
| Epoch  58 |   100/  632 batches | lr 0.00010 | ms/batch  8.74 | loss 0.00199280
| Epoch  58 |   125/  632 batches | lr 0.00010 | ms/batch  7.37 | loss 0.00202429
| Epoch  58 |   150/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00197647
| Epoch  58 |   175/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00196556
| Epoch  58 |   200/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00200869
| Epoch  58 |   225/  632 batches | lr 0.00010 | ms/batch  7.48 | loss 0.00200592
| Epoch  58 |   

| Epoch  61 |   575/  632 batches | lr 0.00010 | ms/batch  9.85 | loss 0.00199488
| Epoch  61 |   600/  632 batches | lr 0.00010 | ms/batch  7.37 | loss 0.00192912
| Epoch  61 |   625/  632 batches | lr 0.00010 | ms/batch  9.24 | loss 0.00195242
| Epoch  62 |    25/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00204503
| Epoch  62 |    50/  632 batches | lr 0.00010 | ms/batch  9.19 | loss 0.00201439
| Epoch  62 |    75/  632 batches | lr 0.00010 | ms/batch  7.39 | loss 0.00199607
| Epoch  62 |   100/  632 batches | lr 0.00010 | ms/batch  9.56 | loss 0.00196347
| Epoch  62 |   125/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00199568
| Epoch  62 |   150/  632 batches | lr 0.00010 | ms/batch  9.26 | loss 0.00194533
| Epoch  62 |   175/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00193935
| Epoch  62 |   200/  632 batches | lr 0.00010 | ms/batch  8.82 | loss 0.00197944
| Epoch  62 |   225/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00197942
| Epoch  62 |   

| Epoch  65 |   575/  632 batches | lr 0.00010 | ms/batch  7.56 | loss 0.00196615
| Epoch  65 |   600/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00189803
| Epoch  65 |   625/  632 batches | lr 0.00010 | ms/batch  9.16 | loss 0.00192346
| Epoch  66 |    25/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00201089
| Epoch  66 |    50/  632 batches | lr 0.00010 | ms/batch  7.69 | loss 0.00198351
| Epoch  66 |    75/  632 batches | lr 0.00010 | ms/batch  9.67 | loss 0.00196191
| Epoch  66 |   100/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00193143
| Epoch  66 |   125/  632 batches | lr 0.00010 | ms/batch  9.48 | loss 0.00196210
| Epoch  66 |   150/  632 batches | lr 0.00010 | ms/batch  7.61 | loss 0.00191533
| Epoch  66 |   175/  632 batches | lr 0.00010 | ms/batch  9.12 | loss 0.00190717
| Epoch  66 |   200/  632 batches | lr 0.00010 | ms/batch  9.10 | loss 0.00194886
| Epoch  66 |   225/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00195424
| Epoch  66 |   

| Epoch  69 |   575/  632 batches | lr 0.00010 | ms/batch  9.16 | loss 0.00193747
| Epoch  69 |   600/  632 batches | lr 0.00010 | ms/batch  7.95 | loss 0.00186609
| Epoch  69 |   625/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00189084
| Epoch  70 |    25/  632 batches | lr 0.00010 | ms/batch  9.17 | loss 0.00197520
| Epoch  70 |    50/  632 batches | lr 0.00010 | ms/batch  9.62 | loss 0.00194883
| Epoch  70 |    75/  632 batches | lr 0.00010 | ms/batch  7.60 | loss 0.00192573
| Epoch  70 |   100/  632 batches | lr 0.00010 | ms/batch  9.13 | loss 0.00189446
| Epoch  70 |   125/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00193001
| Epoch  70 |   150/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00188213
| Epoch  70 |   175/  632 batches | lr 0.00010 | ms/batch  7.32 | loss 0.00187231
| Epoch  70 |   200/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00191423
| Epoch  70 |   225/  632 batches | lr 0.00010 | ms/batch  9.08 | loss 0.00192536
| Epoch  70 |   

| Epoch  73 |   575/  632 batches | lr 0.00010 | ms/batch  7.55 | loss 0.00190617
| Epoch  73 |   600/  632 batches | lr 0.00010 | ms/batch  9.51 | loss 0.00183114
| Epoch  73 |   625/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00185877
| Epoch  74 |    25/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00193873
| Epoch  74 |    50/  632 batches | lr 0.00010 | ms/batch  7.49 | loss 0.00191023
| Epoch  74 |    75/  632 batches | lr 0.00010 | ms/batch  9.12 | loss 0.00188912
| Epoch  74 |   100/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00185982
| Epoch  74 |   125/  632 batches | lr 0.00010 | ms/batch  9.14 | loss 0.00189688
| Epoch  74 |   150/  632 batches | lr 0.00010 | ms/batch  7.37 | loss 0.00184903
| Epoch  74 |   175/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00183764
| Epoch  74 |   200/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00187994
| Epoch  74 |   225/  632 batches | lr 0.00010 | ms/batch  9.30 | loss 0.00189611
| Epoch  74 |   

| Epoch  77 |   575/  632 batches | lr 0.00010 | ms/batch  8.75 | loss 0.00187111
| Epoch  77 |   600/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00179651
| Epoch  77 |   625/  632 batches | lr 0.00010 | ms/batch  7.21 | loss 0.00182502
| Epoch  78 |    25/  632 batches | lr 0.00010 | ms/batch  8.15 | loss 0.00189922
| Epoch  78 |    50/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00187061
| Epoch  78 |    75/  632 batches | lr 0.00010 | ms/batch  7.44 | loss 0.00185214
| Epoch  78 |   100/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00182671
| Epoch  78 |   125/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00186446
| Epoch  78 |   150/  632 batches | lr 0.00010 | ms/batch  9.50 | loss 0.00181508
| Epoch  78 |   175/  632 batches | lr 0.00010 | ms/batch  8.13 | loss 0.00180079
| Epoch  78 |   200/  632 batches | lr 0.00010 | ms/batch  9.14 | loss 0.00184230
| Epoch  78 |   225/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00186099
| Epoch  78 |   

| Epoch  81 |   575/  632 batches | lr 0.00010 | ms/batch  7.73 | loss 0.00183495
| Epoch  81 |   600/  632 batches | lr 0.00010 | ms/batch  9.12 | loss 0.00176121
| Epoch  81 |   625/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00179125
| Epoch  82 |    25/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00185931
| Epoch  82 |    50/  632 batches | lr 0.00010 | ms/batch  7.45 | loss 0.00183072
| Epoch  82 |    75/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00181456
| Epoch  82 |   100/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00179412
| Epoch  82 |   125/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00182838
| Epoch  82 |   150/  632 batches | lr 0.00010 | ms/batch  7.94 | loss 0.00178162
| Epoch  82 |   175/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00176474
| Epoch  82 |   200/  632 batches | lr 0.00010 | ms/batch  8.92 | loss 0.00180486
| Epoch  82 |   225/  632 batches | lr 0.00010 | ms/batch  8.95 | loss 0.00182179
| Epoch  82 |   

| Epoch  85 |   575/  632 batches | lr 0.00010 | ms/batch  9.51 | loss 0.00179841
| Epoch  85 |   600/  632 batches | lr 0.00010 | ms/batch  9.54 | loss 0.00172544
| Epoch  85 |   625/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00175845
| Epoch  86 |    25/  632 batches | lr 0.00010 | ms/batch  8.39 | loss 0.00181439
| Epoch  86 |    50/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00178612
| Epoch  86 |    75/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00177717
| Epoch  86 |   100/  632 batches | lr 0.00010 | ms/batch  7.93 | loss 0.00176451
| Epoch  86 |   125/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00179519
| Epoch  86 |   150/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00174468
| Epoch  86 |   175/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00172764
| Epoch  86 |   200/  632 batches | lr 0.00010 | ms/batch  7.96 | loss 0.00176649
| Epoch  86 |   225/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00177501
| Epoch  86 |   

| Epoch  89 |   575/  632 batches | lr 0.00010 | ms/batch  8.26 | loss 0.00175949
| Epoch  89 |   600/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00169574
| Epoch  89 |   625/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00173359
| Epoch  90 |    25/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00177048
| Epoch  90 |    50/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00174150
| Epoch  90 |    75/  632 batches | lr 0.00010 | ms/batch  7.78 | loss 0.00174195
| Epoch  90 |   100/  632 batches | lr 0.00010 | ms/batch  9.37 | loss 0.00174200
| Epoch  90 |   125/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00176187
| Epoch  90 |   150/  632 batches | lr 0.00010 | ms/batch  9.64 | loss 0.00170463
| Epoch  90 |   175/  632 batches | lr 0.00010 | ms/batch  8.03 | loss 0.00170349
| Epoch  90 |   200/  632 batches | lr 0.00010 | ms/batch  9.12 | loss 0.00173900
| Epoch  90 |   225/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00174238
| Epoch  90 |   

| Epoch  93 |   575/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00171849
| Epoch  93 |   600/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00166104
| Epoch  93 |   625/  632 batches | lr 0.00010 | ms/batch  9.66 | loss 0.00169733
| Epoch  94 |    25/  632 batches | lr 0.00010 | ms/batch  8.15 | loss 0.00172922
| Epoch  94 |    50/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00170098
| Epoch  94 |    75/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00170409
| Epoch  94 |   100/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00170849
| Epoch  94 |   125/  632 batches | lr 0.00010 | ms/batch  8.28 | loss 0.00172121
| Epoch  94 |   150/  632 batches | lr 0.00010 | ms/batch  9.65 | loss 0.00166816
| Epoch  94 |   175/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00167205
| Epoch  94 |   200/  632 batches | lr 0.00010 | ms/batch 10.14 | loss 0.00170779
| Epoch  94 |   225/  632 batches | lr 0.00010 | ms/batch  7.60 | loss 0.00170813
| Epoch  94 |   

| Epoch  97 |   575/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00167776
| Epoch  97 |   600/  632 batches | lr 0.00010 | ms/batch  8.15 | loss 0.00162422
| Epoch  97 |   625/  632 batches | lr 0.00010 | ms/batch 10.34 | loss 0.00166279
| Epoch  98 |    25/  632 batches | lr 0.00010 | ms/batch 10.42 | loss 0.00168749
| Epoch  98 |    50/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00166281
| Epoch  98 |    75/  632 batches | lr 0.00010 | ms/batch  8.29 | loss 0.00166633
| Epoch  98 |   100/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00167042
| Epoch  98 |   125/  632 batches | lr 0.00010 | ms/batch 10.59 | loss 0.00168634
| Epoch  98 |   150/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00162986
| Epoch  98 |   175/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00163612
| Epoch  98 |   200/  632 batches | lr 0.00010 | ms/batch  9.39 | loss 0.00167503
| Epoch  98 |   225/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00167150
| Epoch  98 |   

| Epoch 101 |   575/  632 batches | lr 0.00010 | ms/batch  7.27 | loss 0.00163689
| Epoch 101 |   600/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00159043
| Epoch 101 |   625/  632 batches | lr 0.00010 | ms/batch  8.93 | loss 0.00163043
| Epoch 102 |    25/  632 batches | lr 0.00010 | ms/batch  9.26 | loss 0.00164795
| Epoch 102 |    50/  632 batches | lr 0.00010 | ms/batch  7.24 | loss 0.00162446
| Epoch 102 |    75/  632 batches | lr 0.00010 | ms/batch  8.83 | loss 0.00162687
| Epoch 102 |   100/  632 batches | lr 0.00010 | ms/batch  8.79 | loss 0.00163313
| Epoch 102 |   125/  632 batches | lr 0.00010 | ms/batch  8.77 | loss 0.00165565
| Epoch 102 |   150/  632 batches | lr 0.00010 | ms/batch  7.41 | loss 0.00158927
| Epoch 102 |   175/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00159593
| Epoch 102 |   200/  632 batches | lr 0.00010 | ms/batch  9.04 | loss 0.00163853
| Epoch 102 |   225/  632 batches | lr 0.00010 | ms/batch  9.11 | loss 0.00163057
| Epoch 102 |   

| Epoch 105 |   575/  632 batches | lr 0.00010 | ms/batch  9.25 | loss 0.00159672
| Epoch 105 |   600/  632 batches | lr 0.00010 | ms/batch  7.50 | loss 0.00155633
| Epoch 105 |   625/  632 batches | lr 0.00010 | ms/batch  9.43 | loss 0.00159885
| Epoch 106 |    25/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00160925
| Epoch 106 |    50/  632 batches | lr 0.00010 | ms/batch  9.27 | loss 0.00158731
| Epoch 106 |    75/  632 batches | lr 0.00010 | ms/batch  7.90 | loss 0.00158802
| Epoch 106 |   100/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00159399
| Epoch 106 |   125/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00162696
| Epoch 106 |   150/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00154877
| Epoch 106 |   175/  632 batches | lr 0.00010 | ms/batch  7.78 | loss 0.00155330
| Epoch 106 |   200/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00159928
| Epoch 106 |   225/  632 batches | lr 0.00010 | ms/batch  9.68 | loss 0.00158061
| Epoch 106 |   

| Epoch 109 |   575/  632 batches | lr 0.00010 | ms/batch  7.42 | loss 0.00155564
| Epoch 109 |   600/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00152113
| Epoch 109 |   625/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00157175
| Epoch 110 |    25/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00157184
| Epoch 110 |    50/  632 batches | lr 0.00010 | ms/batch  7.35 | loss 0.00155043
| Epoch 110 |    75/  632 batches | lr 0.00010 | ms/batch  8.78 | loss 0.00155050
| Epoch 110 |   100/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00155350
| Epoch 110 |   125/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00159914
| Epoch 110 |   150/  632 batches | lr 0.00010 | ms/batch  7.56 | loss 0.00150762
| Epoch 110 |   175/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00151546
| Epoch 110 |   200/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00156000
| Epoch 110 |   225/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00153394
| Epoch 110 |   

| Epoch 113 |   575/  632 batches | lr 0.00010 | ms/batch 10.94 | loss 0.00151584
| Epoch 113 |   600/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00148283
| Epoch 113 |   625/  632 batches | lr 0.00010 | ms/batch  7.63 | loss 0.00153928
| Epoch 114 |    25/  632 batches | lr 0.00010 | ms/batch  8.07 | loss 0.00153368
| Epoch 114 |    50/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00151268
| Epoch 114 |    75/  632 batches | lr 0.00010 | ms/batch  7.33 | loss 0.00151204
| Epoch 114 |   100/  632 batches | lr 0.00010 | ms/batch  9.20 | loss 0.00150918
| Epoch 114 |   125/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00157349
| Epoch 114 |   150/  632 batches | lr 0.00010 | ms/batch  8.97 | loss 0.00147332
| Epoch 114 |   175/  632 batches | lr 0.00010 | ms/batch  7.56 | loss 0.00147828
| Epoch 114 |   200/  632 batches | lr 0.00010 | ms/batch  9.00 | loss 0.00152271
| Epoch 114 |   225/  632 batches | lr 0.00010 | ms/batch  9.29 | loss 0.00148477
| Epoch 114 |   

| Epoch 117 |   575/  632 batches | lr 0.00010 | ms/batch  7.30 | loss 0.00147415
| Epoch 117 |   600/  632 batches | lr 0.00010 | ms/batch  8.98 | loss 0.00144586
| Epoch 117 |   625/  632 batches | lr 0.00010 | ms/batch  9.16 | loss 0.00150879
| Epoch 118 |    25/  632 batches | lr 0.00010 | ms/batch  9.21 | loss 0.00149629
| Epoch 118 |    50/  632 batches | lr 0.00010 | ms/batch  7.27 | loss 0.00147711
| Epoch 118 |    75/  632 batches | lr 0.00010 | ms/batch  8.84 | loss 0.00147294
| Epoch 118 |   100/  632 batches | lr 0.00010 | ms/batch  9.01 | loss 0.00146934
| Epoch 118 |   125/  632 batches | lr 0.00010 | ms/batch  9.22 | loss 0.00154542
| Epoch 118 |   150/  632 batches | lr 0.00010 | ms/batch  7.25 | loss 0.00144001
| Epoch 118 |   175/  632 batches | lr 0.00010 | ms/batch  8.91 | loss 0.00143932
| Epoch 118 |   200/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00148569
| Epoch 118 |   225/  632 batches | lr 0.00010 | ms/batch  9.19 | loss 0.00144485
| Epoch 118 |   

| Epoch 121 |   575/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00143450
| Epoch 121 |   600/  632 batches | lr 0.00010 | ms/batch  8.85 | loss 0.00140707
| Epoch 121 |   625/  632 batches | lr 0.00010 | ms/batch  9.26 | loss 0.00147340
| Epoch 122 |    25/  632 batches | lr 0.00010 | ms/batch  8.15 | loss 0.00146194
| Epoch 122 |    50/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00144249
| Epoch 122 |    75/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00143463
| Epoch 122 |   100/  632 batches | lr 0.00010 | ms/batch  8.13 | loss 0.00142882
| Epoch 122 |   125/  632 batches | lr 0.00010 | ms/batch  9.50 | loss 0.00152030
| Epoch 122 |   150/  632 batches | lr 0.00010 | ms/batch  9.26 | loss 0.00141250
| Epoch 122 |   175/  632 batches | lr 0.00010 | ms/batch  9.66 | loss 0.00140538
| Epoch 122 |   200/  632 batches | lr 0.00010 | ms/batch  7.75 | loss 0.00144718
| Epoch 122 |   225/  632 batches | lr 0.00010 | ms/batch  8.99 | loss 0.00140555
| Epoch 122 |   

| Epoch   0 |   550/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00252913
| Epoch   0 |   575/  632 batches | lr 0.00010 | ms/batch  8.06 | loss 0.00242511
| Epoch   0 |   600/  632 batches | lr 0.00010 | ms/batch  9.61 | loss 0.00239489
| Epoch   0 |   625/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00247121
| Epoch   1 |    25/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00249879
| Epoch   1 |    50/  632 batches | lr 0.00010 | ms/batch  9.59 | loss 0.00245059
| Epoch   1 |    75/  632 batches | lr 0.00010 | ms/batch  8.27 | loss 0.00245902
| Epoch   1 |   100/  632 batches | lr 0.00010 | ms/batch  9.61 | loss 0.00240797
| Epoch   1 |   125/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00243038
| Epoch   1 |   150/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00245540
| Epoch   1 |   175/  632 batches | lr 0.00010 | ms/batch  8.16 | loss 0.00235368
| Epoch   1 |   200/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00238912
| Epoch   1 |   

| Epoch   4 |   550/  632 batches | lr 0.00010 | ms/batch  8.12 | loss 0.00247463
| Epoch   4 |   575/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00236348
| Epoch   4 |   600/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00229539
| Epoch   4 |   625/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00235020
| Epoch   5 |    25/  632 batches | lr 0.00010 | ms/batch  8.35 | loss 0.00237109
| Epoch   5 |    50/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00233477
| Epoch   5 |    75/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00235844
| Epoch   5 |   100/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00235618
| Epoch   5 |   125/  632 batches | lr 0.00010 | ms/batch  8.20 | loss 0.00232590
| Epoch   5 |   150/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00240098
| Epoch   5 |   175/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00231348
| Epoch   5 |   200/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00230641
| Epoch   5 |   

| Epoch   8 |   550/  632 batches | lr 0.00010 | ms/batch 10.68 | loss 0.00241170
| Epoch   8 |   575/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00232048
| Epoch   8 |   600/  632 batches | lr 0.00010 | ms/batch  8.00 | loss 0.00226807
| Epoch   8 |   625/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00234341
| Epoch   9 |    25/  632 batches | lr 0.00010 | ms/batch 10.49 | loss 0.00233426
| Epoch   9 |    50/  632 batches | lr 0.00010 | ms/batch  9.97 | loss 0.00231746
| Epoch   9 |    75/  632 batches | lr 0.00010 | ms/batch  8.33 | loss 0.00233534
| Epoch   9 |   100/  632 batches | lr 0.00010 | ms/batch  9.76 | loss 0.00229414
| Epoch   9 |   125/  632 batches | lr 0.00010 | ms/batch 10.03 | loss 0.00231268
| Epoch   9 |   150/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00233020
| Epoch   9 |   175/  632 batches | lr 0.00010 | ms/batch  8.45 | loss 0.00224548
| Epoch   9 |   200/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00227249
| Epoch   9 |   

| Epoch  12 |   550/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00235935
| Epoch  12 |   575/  632 batches | lr 0.00010 | ms/batch  8.39 | loss 0.00228301
| Epoch  12 |   600/  632 batches | lr 0.00010 | ms/batch  9.68 | loss 0.00222121
| Epoch  12 |   625/  632 batches | lr 0.00010 | ms/batch  9.61 | loss 0.00229970
| Epoch  13 |    25/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00229827
| Epoch  13 |    50/  632 batches | lr 0.00010 | ms/batch  7.99 | loss 0.00228641
| Epoch  13 |    75/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00229356
| Epoch  13 |   100/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00227258
| Epoch  13 |   125/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00228365
| Epoch  13 |   150/  632 batches | lr 0.00010 | ms/batch  8.37 | loss 0.00226815
| Epoch  13 |   175/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00223235
| Epoch  13 |   200/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00224168
| Epoch  13 |   

| Epoch  16 |   550/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00229689
| Epoch  16 |   575/  632 batches | lr 0.00010 | ms/batch  9.52 | loss 0.00224259
| Epoch  16 |   600/  632 batches | lr 0.00010 | ms/batch  7.99 | loss 0.00218464
| Epoch  16 |   625/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00224905
| Epoch  17 |    25/  632 batches | lr 0.00010 | ms/batch  9.80 | loss 0.00227140
| Epoch  17 |    50/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00225502
| Epoch  17 |    75/  632 batches | lr 0.00010 | ms/batch  8.12 | loss 0.00225636
| Epoch  17 |   100/  632 batches | lr 0.00010 | ms/batch  9.69 | loss 0.00224187
| Epoch  17 |   125/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00224500
| Epoch  17 |   150/  632 batches | lr 0.00010 | ms/batch  9.57 | loss 0.00221918
| Epoch  17 |   175/  632 batches | lr 0.00010 | ms/batch  7.99 | loss 0.00219847
| Epoch  17 |   200/  632 batches | lr 0.00010 | ms/batch  9.64 | loss 0.00221191
| Epoch  17 |   

| Epoch  20 |   550/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00225565
| Epoch  20 |   575/  632 batches | lr 0.00010 | ms/batch  8.30 | loss 0.00221396
| Epoch  20 |   600/  632 batches | lr 0.00010 | ms/batch  9.99 | loss 0.00214904
| Epoch  20 |   625/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00219833
| Epoch  21 |    25/  632 batches | lr 0.00010 | ms/batch 10.47 | loss 0.00224512
| Epoch  21 |    50/  632 batches | lr 0.00010 | ms/batch  8.82 | loss 0.00222774
| Epoch  21 |    75/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00222050
| Epoch  21 |   100/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00220387
| Epoch  21 |   125/  632 batches | lr 0.00010 | ms/batch  9.97 | loss 0.00220477
| Epoch  21 |   150/  632 batches | lr 0.00010 | ms/batch  8.33 | loss 0.00217768
| Epoch  21 |   175/  632 batches | lr 0.00010 | ms/batch 10.36 | loss 0.00216128
| Epoch  21 |   200/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00218280
| Epoch  21 |   

| Epoch  24 |   550/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00222294
| Epoch  24 |   575/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00218641
| Epoch  24 |   600/  632 batches | lr 0.00010 | ms/batch  9.47 | loss 0.00211467
| Epoch  24 |   625/  632 batches | lr 0.00010 | ms/batch  7.88 | loss 0.00216127
| Epoch  25 |    25/  632 batches | lr 0.00010 | ms/batch  8.25 | loss 0.00221748
| Epoch  25 |    50/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00219848
| Epoch  25 |    75/  632 batches | lr 0.00010 | ms/batch  7.91 | loss 0.00219057
| Epoch  25 |   100/  632 batches | lr 0.00010 | ms/batch  9.56 | loss 0.00216887
| Epoch  25 |   125/  632 batches | lr 0.00010 | ms/batch  9.50 | loss 0.00217228
| Epoch  25 |   150/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00214190
| Epoch  25 |   175/  632 batches | lr 0.00010 | ms/batch  7.90 | loss 0.00212195
| Epoch  25 |   200/  632 batches | lr 0.00010 | ms/batch  9.41 | loss 0.00215519
| Epoch  25 |   

| Epoch  28 |   550/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00218742
| Epoch  28 |   575/  632 batches | lr 0.00010 | ms/batch  8.04 | loss 0.00215871
| Epoch  28 |   600/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00208175
| Epoch  28 |   625/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00212635
| Epoch  29 |    25/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00218296
| Epoch  29 |    50/  632 batches | lr 0.00010 | ms/batch  8.16 | loss 0.00216498
| Epoch  29 |    75/  632 batches | lr 0.00010 | ms/batch  9.67 | loss 0.00215979
| Epoch  29 |   100/  632 batches | lr 0.00010 | ms/batch  9.43 | loss 0.00213270
| Epoch  29 |   125/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00213825
| Epoch  29 |   150/  632 batches | lr 0.00010 | ms/batch  8.03 | loss 0.00210695
| Epoch  29 |   175/  632 batches | lr 0.00010 | ms/batch  9.58 | loss 0.00208576
| Epoch  29 |   200/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00212524
| Epoch  29 |   

| Epoch  32 |   550/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00215484
| Epoch  32 |   575/  632 batches | lr 0.00010 | ms/batch 11.23 | loss 0.00212825
| Epoch  32 |   600/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00204666
| Epoch  32 |   625/  632 batches | lr 0.00010 | ms/batch 10.11 | loss 0.00209147
| Epoch  33 |    25/  632 batches | lr 0.00010 | ms/batch  8.72 | loss 0.00214664
| Epoch  33 |    50/  632 batches | lr 0.00010 | ms/batch 10.38 | loss 0.00213078
| Epoch  33 |    75/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00212818
| Epoch  33 |   100/  632 batches | lr 0.00010 | ms/batch  8.43 | loss 0.00209760
| Epoch  33 |   125/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00210592
| Epoch  33 |   150/  632 batches | lr 0.00010 | ms/batch  9.97 | loss 0.00206877
| Epoch  33 |   175/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00205001
| Epoch  33 |   200/  632 batches | lr 0.00010 | ms/batch  8.13 | loss 0.00209365
| Epoch  33 |   

| Epoch  36 |   550/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00211600
| Epoch  36 |   575/  632 batches | lr 0.00010 | ms/batch  8.09 | loss 0.00209527
| Epoch  36 |   600/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00200768
| Epoch  36 |   625/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00205733
| Epoch  37 |    25/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00210738
| Epoch  37 |    50/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00209083
| Epoch  37 |    75/  632 batches | lr 0.00010 | ms/batch  8.20 | loss 0.00209150
| Epoch  37 |   100/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00206043
| Epoch  37 |   125/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00207438
| Epoch  37 |   150/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00202929
| Epoch  37 |   175/  632 batches | lr 0.00010 | ms/batch  8.22 | loss 0.00201400
| Epoch  37 |   200/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00205961
| Epoch  37 |   

| Epoch  40 |   550/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00207478
| Epoch  40 |   575/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00205818
| Epoch  40 |   600/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00196903
| Epoch  40 |   625/  632 batches | lr 0.00010 | ms/batch 10.07 | loss 0.00202144
| Epoch  41 |    25/  632 batches | lr 0.00010 | ms/batch  8.62 | loss 0.00206492
| Epoch  41 |    50/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00204769
| Epoch  41 |    75/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00205369
| Epoch  41 |   100/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00202184
| Epoch  41 |   125/  632 batches | lr 0.00010 | ms/batch  8.22 | loss 0.00203668
| Epoch  41 |   150/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00198641
| Epoch  41 |   175/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00197677
| Epoch  41 |   200/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00201988
| Epoch  41 |   

| Epoch  44 |   550/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00202880
| Epoch  44 |   575/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00201772
| Epoch  44 |   600/  632 batches | lr 0.00010 | ms/batch  8.27 | loss 0.00192635
| Epoch  44 |   625/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00198274
| Epoch  45 |    25/  632 batches | lr 0.00010 | ms/batch 10.14 | loss 0.00202044
| Epoch  45 |    50/  632 batches | lr 0.00010 | ms/batch 10.20 | loss 0.00200257
| Epoch  45 |    75/  632 batches | lr 0.00010 | ms/batch  8.30 | loss 0.00201040
| Epoch  45 |   100/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00198007
| Epoch  45 |   125/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00200028
| Epoch  45 |   150/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00194343
| Epoch  45 |   175/  632 batches | lr 0.00010 | ms/batch  8.25 | loss 0.00193632
| Epoch  45 |   200/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00197744
| Epoch  45 |   

| Epoch  48 |   550/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00198216
| Epoch  48 |   575/  632 batches | lr 0.00010 | ms/batch  8.37 | loss 0.00197343
| Epoch  48 |   600/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00188266
| Epoch  48 |   625/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00194103
| Epoch  49 |    25/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00197469
| Epoch  49 |    50/  632 batches | lr 0.00010 | ms/batch  8.30 | loss 0.00195364
| Epoch  49 |    75/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00196503
| Epoch  49 |   100/  632 batches | lr 0.00010 | ms/batch 10.03 | loss 0.00193956
| Epoch  49 |   125/  632 batches | lr 0.00010 | ms/batch  9.99 | loss 0.00196276
| Epoch  49 |   150/  632 batches | lr 0.00010 | ms/batch  8.29 | loss 0.00189818
| Epoch  49 |   175/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00189470
| Epoch  49 |   200/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00193292
| Epoch  49 |   

| Epoch  52 |   550/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00193335
| Epoch  52 |   575/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00192296
| Epoch  52 |   600/  632 batches | lr 0.00010 | ms/batch  8.25 | loss 0.00183808
| Epoch  52 |   625/  632 batches | lr 0.00010 | ms/batch 10.01 | loss 0.00189564
| Epoch  53 |    25/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00192760
| Epoch  53 |    50/  632 batches | lr 0.00010 | ms/batch 10.07 | loss 0.00190317
| Epoch  53 |    75/  632 batches | lr 0.00010 | ms/batch  8.53 | loss 0.00191494
| Epoch  53 |   100/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00189417
| Epoch  53 |   125/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00192511
| Epoch  53 |   150/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00185058
| Epoch  53 |   175/  632 batches | lr 0.00010 | ms/batch  8.39 | loss 0.00184916
| Epoch  53 |   200/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00188517
| Epoch  53 |   

| Epoch  56 |   550/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00188523
| Epoch  56 |   575/  632 batches | lr 0.00010 | ms/batch  8.35 | loss 0.00187162
| Epoch  56 |   600/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00179147
| Epoch  56 |   625/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00184865
| Epoch  57 |    25/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00187885
| Epoch  57 |    50/  632 batches | lr 0.00010 | ms/batch  8.33 | loss 0.00185268
| Epoch  57 |    75/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00186489
| Epoch  57 |   100/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00184765
| Epoch  57 |   125/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00188441
| Epoch  57 |   150/  632 batches | lr 0.00010 | ms/batch  8.29 | loss 0.00180228
| Epoch  57 |   175/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00180306
| Epoch  57 |   200/  632 batches | lr 0.00010 | ms/batch 10.01 | loss 0.00183708
| Epoch  57 |   

| Epoch  60 |   550/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00183663
| Epoch  60 |   575/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00182129
| Epoch  60 |   600/  632 batches | lr 0.00010 | ms/batch  9.67 | loss 0.00174335
| Epoch  60 |   625/  632 batches | lr 0.00010 | ms/batch  8.10 | loss 0.00179781
| Epoch  61 |    25/  632 batches | lr 0.00010 | ms/batch  8.35 | loss 0.00183001
| Epoch  61 |    50/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00180113
| Epoch  61 |    75/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00181232
| Epoch  61 |   100/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00179803
| Epoch  61 |   125/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00184099
| Epoch  61 |   150/  632 batches | lr 0.00010 | ms/batch  9.76 | loss 0.00175297
| Epoch  61 |   175/  632 batches | lr 0.00010 | ms/batch  8.09 | loss 0.00175250
| Epoch  61 |   200/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00178620
| Epoch  61 |   

| Epoch  64 |   550/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00179075
| Epoch  64 |   575/  632 batches | lr 0.00010 | ms/batch  8.32 | loss 0.00176965
| Epoch  64 |   600/  632 batches | lr 0.00010 | ms/batch  9.77 | loss 0.00168958
| Epoch  64 |   625/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00175233
| Epoch  65 |    25/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00177616
| Epoch  65 |    50/  632 batches | lr 0.00010 | ms/batch  8.39 | loss 0.00175099
| Epoch  65 |    75/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00176236
| Epoch  65 |   100/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00174544
| Epoch  65 |   125/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00179655
| Epoch  65 |   150/  632 batches | lr 0.00010 | ms/batch  8.14 | loss 0.00170127
| Epoch  65 |   175/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00170344
| Epoch  65 |   200/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00173646
| Epoch  65 |   

| Epoch  68 |   550/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00174256
| Epoch  68 |   575/  632 batches | lr 0.00010 | ms/batch  9.80 | loss 0.00171205
| Epoch  68 |   600/  632 batches | lr 0.00010 | ms/batch  9.85 | loss 0.00163728
| Epoch  68 |   625/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00170779
| Epoch  69 |    25/  632 batches | lr 0.00010 | ms/batch  8.48 | loss 0.00172218
| Epoch  69 |    50/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00169646
| Epoch  69 |    75/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00170626
| Epoch  69 |   100/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00169812
| Epoch  69 |   125/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00175126
| Epoch  69 |   150/  632 batches | lr 0.00010 | ms/batch  9.68 | loss 0.00164822
| Epoch  69 |   175/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00165078
| Epoch  69 |   200/  632 batches | lr 0.00010 | ms/batch  8.21 | loss 0.00168210
| Epoch  69 |   

| Epoch  72 |   550/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00169932
| Epoch  72 |   575/  632 batches | lr 0.00010 | ms/batch  8.07 | loss 0.00165188
| Epoch  72 |   600/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00158114
| Epoch  72 |   625/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00166343
| Epoch  73 |    25/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00166741
| Epoch  73 |    50/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00164241
| Epoch  73 |    75/  632 batches | lr 0.00010 | ms/batch  8.14 | loss 0.00165379
| Epoch  73 |   100/  632 batches | lr 0.00010 | ms/batch  9.80 | loss 0.00164306
| Epoch  73 |   125/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00170524
| Epoch  73 |   150/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00159451
| Epoch  73 |   175/  632 batches | lr 0.00010 | ms/batch  8.17 | loss 0.00160005
| Epoch  73 |   200/  632 batches | lr 0.00010 | ms/batch  9.69 | loss 0.00162303
| Epoch  73 |   

| Epoch  76 |   550/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00165420
| Epoch  76 |   575/  632 batches | lr 0.00010 | ms/batch  9.80 | loss 0.00159206
| Epoch  76 |   600/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00152597
| Epoch  76 |   625/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00161345
| Epoch  77 |    25/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00160895
| Epoch  77 |    50/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00158647
| Epoch  77 |    75/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00160222
| Epoch  77 |   100/  632 batches | lr 0.00010 | ms/batch  9.69 | loss 0.00158620
| Epoch  77 |   125/  632 batches | lr 0.00010 | ms/batch  8.23 | loss 0.00166320
| Epoch  77 |   150/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00154561
| Epoch  77 |   175/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00154541
| Epoch  77 |   200/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00156754
| Epoch  77 |   

| Epoch  80 |   550/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00160813
| Epoch  80 |   575/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00153395
| Epoch  80 |   600/  632 batches | lr 0.00010 | ms/batch  8.13 | loss 0.00147103
| Epoch  80 |   625/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00155931
| Epoch  81 |    25/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00154892
| Epoch  81 |    50/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00152829
| Epoch  81 |    75/  632 batches | lr 0.00010 | ms/batch  8.24 | loss 0.00155193
| Epoch  81 |   100/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00153231
| Epoch  81 |   125/  632 batches | lr 0.00010 | ms/batch  9.97 | loss 0.00162429
| Epoch  81 |   150/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00149952
| Epoch  81 |   175/  632 batches | lr 0.00010 | ms/batch  8.55 | loss 0.00149355
| Epoch  81 |   200/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00151082
| Epoch  81 |   

| Epoch  84 |   550/  632 batches | lr 0.00010 | ms/batch 10.56 | loss 0.00156510
| Epoch  84 |   575/  632 batches | lr 0.00010 | ms/batch  9.09 | loss 0.00147694
| Epoch  84 |   600/  632 batches | lr 0.00010 | ms/batch 10.70 | loss 0.00142043
| Epoch  84 |   625/  632 batches | lr 0.00010 | ms/batch 10.56 | loss 0.00151007
| Epoch  85 |    25/  632 batches | lr 0.00010 | ms/batch 10.60 | loss 0.00148829
| Epoch  85 |    50/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00147070
| Epoch  85 |    75/  632 batches | lr 0.00010 | ms/batch 11.17 | loss 0.00149705
| Epoch  85 |   100/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00147335
| Epoch  85 |   125/  632 batches | lr 0.00010 | ms/batch 10.50 | loss 0.00158341
| Epoch  85 |   150/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00145304
| Epoch  85 |   175/  632 batches | lr 0.00010 | ms/batch 10.80 | loss 0.00144333
| Epoch  85 |   200/  632 batches | lr 0.00010 | ms/batch 10.70 | loss 0.00145491
| Epoch  85 |   

| Epoch  88 |   550/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00151486
| Epoch  88 |   575/  632 batches | lr 0.00010 | ms/batch  9.79 | loss 0.00142661
| Epoch  88 |   600/  632 batches | lr 0.00010 | ms/batch  8.27 | loss 0.00136937
| Epoch  88 |   625/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00145587
| Epoch  89 |    25/  632 batches | lr 0.00010 | ms/batch 10.05 | loss 0.00142694
| Epoch  89 |    50/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00141396
| Epoch  89 |    75/  632 batches | lr 0.00010 | ms/batch  8.08 | loss 0.00144478
| Epoch  89 |   100/  632 batches | lr 0.00010 | ms/batch  9.65 | loss 0.00142007
| Epoch  89 |   125/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00154083
| Epoch  89 |   150/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00141550
| Epoch  89 |   175/  632 batches | lr 0.00010 | ms/batch  8.37 | loss 0.00139413
| Epoch  89 |   200/  632 batches | lr 0.00010 | ms/batch  9.67 | loss 0.00140051
| Epoch  89 |   

| Epoch  92 |   550/  632 batches | lr 0.00010 | ms/batch  9.99 | loss 0.00147070
| Epoch  92 |   575/  632 batches | lr 0.00010 | ms/batch  8.51 | loss 0.00137803
| Epoch  92 |   600/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00132053
| Epoch  92 |   625/  632 batches | lr 0.00010 | ms/batch 10.45 | loss 0.00141169
| Epoch  93 |    25/  632 batches | lr 0.00010 | ms/batch 10.55 | loss 0.00137479
| Epoch  93 |    50/  632 batches | lr 0.00010 | ms/batch  8.26 | loss 0.00135739
| Epoch  93 |    75/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00138610
| Epoch  93 |   100/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00136368
| Epoch  93 |   125/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00149095
| Epoch  93 |   150/  632 batches | lr 0.00010 | ms/batch  8.50 | loss 0.00138635
| Epoch  93 |   175/  632 batches | lr 0.00010 | ms/batch 10.41 | loss 0.00134467
| Epoch  93 |   200/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00134950
| Epoch  93 |   

| Epoch  96 |   550/  632 batches | lr 0.00010 | ms/batch  9.84 | loss 0.00142958
| Epoch  96 |   575/  632 batches | lr 0.00010 | ms/batch  9.69 | loss 0.00133786
| Epoch  96 |   600/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00127070
| Epoch  96 |   625/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00136565
| Epoch  97 |    25/  632 batches | lr 0.00010 | ms/batch  8.47 | loss 0.00132899
| Epoch  97 |    50/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00131017
| Epoch  97 |    75/  632 batches | lr 0.00010 | ms/batch  8.15 | loss 0.00132997
| Epoch  97 |   100/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00130990
| Epoch  97 |   125/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00143207
| Epoch  97 |   150/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00134969
| Epoch  97 |   175/  632 batches | lr 0.00010 | ms/batch  8.21 | loss 0.00129233
| Epoch  97 |   200/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00130237
| Epoch  97 |   

| Epoch 100 |   550/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00139134
| Epoch 100 |   575/  632 batches | lr 0.00010 | ms/batch  8.26 | loss 0.00129433
| Epoch 100 |   600/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00121955
| Epoch 100 |   625/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00131155
| Epoch 101 |    25/  632 batches | lr 0.00010 | ms/batch 10.92 | loss 0.00129617
| Epoch 101 |    50/  632 batches | lr 0.00010 | ms/batch  8.20 | loss 0.00126690
| Epoch 101 |    75/  632 batches | lr 0.00010 | ms/batch 10.05 | loss 0.00127729
| Epoch 101 |   100/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00125989
| Epoch 101 |   125/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00138104
| Epoch 101 |   150/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00131256
| Epoch 101 |   175/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00123535
| Epoch 101 |   200/  632 batches | lr 0.00010 | ms/batch  9.85 | loss 0.00125686
| Epoch 101 |   

| Epoch 104 |   550/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00135780
| Epoch 104 |   575/  632 batches | lr 0.00010 | ms/batch 10.03 | loss 0.00125352
| Epoch 104 |   600/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00117535
| Epoch 104 |   625/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00125666
| Epoch 105 |    25/  632 batches | lr 0.00010 | ms/batch  8.68 | loss 0.00126017
| Epoch 105 |    50/  632 batches | lr 0.00010 | ms/batch  9.85 | loss 0.00122073
| Epoch 105 |    75/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00122735
| Epoch 105 |   100/  632 batches | lr 0.00010 | ms/batch  8.27 | loss 0.00121019
| Epoch 105 |   125/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00131791
| Epoch 105 |   150/  632 batches | lr 0.00010 | ms/batch 10.03 | loss 0.00127485
| Epoch 105 |   175/  632 batches | lr 0.00010 | ms/batch  9.99 | loss 0.00118635
| Epoch 105 |   200/  632 batches | lr 0.00010 | ms/batch  8.32 | loss 0.00121668
| Epoch 105 |   

| Epoch 108 |   550/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00132568
| Epoch 108 |   575/  632 batches | lr 0.00010 | ms/batch  8.21 | loss 0.00121755
| Epoch 108 |   600/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00113477
| Epoch 108 |   625/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00120936
| Epoch 109 |    25/  632 batches | lr 0.00010 | ms/batch 10.50 | loss 0.00121975
| Epoch 109 |    50/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00117638
| Epoch 109 |    75/  632 batches | lr 0.00010 | ms/batch  8.10 | loss 0.00117901
| Epoch 109 |   100/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00116661
| Epoch 109 |   125/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00126005
| Epoch 109 |   150/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00122644
| Epoch 109 |   175/  632 batches | lr 0.00010 | ms/batch  8.33 | loss 0.00114246
| Epoch 109 |   200/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00117463
| Epoch 109 |   

| Epoch 112 |   550/  632 batches | lr 0.00010 | ms/batch  8.40 | loss 0.00129198
| Epoch 112 |   575/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00118070
| Epoch 112 |   600/  632 batches | lr 0.00010 | ms/batch  9.99 | loss 0.00109258
| Epoch 112 |   625/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00116486
| Epoch 113 |    25/  632 batches | lr 0.00010 | ms/batch  8.56 | loss 0.00118697
| Epoch 113 |    50/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00113443
| Epoch 113 |    75/  632 batches | lr 0.00010 | ms/batch 10.10 | loss 0.00113204
| Epoch 113 |   100/  632 batches | lr 0.00010 | ms/batch 10.15 | loss 0.00111676
| Epoch 113 |   125/  632 batches | lr 0.00010 | ms/batch  9.27 | loss 0.00120035
| Epoch 113 |   150/  632 batches | lr 0.00010 | ms/batch 10.52 | loss 0.00118306
| Epoch 113 |   175/  632 batches | lr 0.00010 | ms/batch 10.48 | loss 0.00110460
| Epoch 113 |   200/  632 batches | lr 0.00010 | ms/batch 10.25 | loss 0.00113092
| Epoch 113 |   

| Epoch 116 |   550/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00126950
| Epoch 116 |   575/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00114634
| Epoch 116 |   600/  632 batches | lr 0.00010 | ms/batch  8.54 | loss 0.00105984
| Epoch 116 |   625/  632 batches | lr 0.00010 | ms/batch 10.05 | loss 0.00112181
| Epoch 117 |    25/  632 batches | lr 0.00010 | ms/batch 10.34 | loss 0.00115751
| Epoch 117 |    50/  632 batches | lr 0.00010 | ms/batch 10.14 | loss 0.00109493
| Epoch 117 |    75/  632 batches | lr 0.00010 | ms/batch  8.30 | loss 0.00108320
| Epoch 117 |   100/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00107023
| Epoch 117 |   125/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00114895
| Epoch 117 |   150/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00113608
| Epoch 117 |   175/  632 batches | lr 0.00010 | ms/batch  8.42 | loss 0.00106648
| Epoch 117 |   200/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00109055
| Epoch 117 |   

| Epoch 120 |   550/  632 batches | lr 0.00010 | ms/batch 10.26 | loss 0.00124064
| Epoch 120 |   575/  632 batches | lr 0.00010 | ms/batch  8.38 | loss 0.00110741
| Epoch 120 |   600/  632 batches | lr 0.00010 | ms/batch 10.07 | loss 0.00101695
| Epoch 120 |   625/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00107718
| Epoch 121 |    25/  632 batches | lr 0.00010 | ms/batch 10.48 | loss 0.00112060
| Epoch 121 |    50/  632 batches | lr 0.00010 | ms/batch  8.36 | loss 0.00105414
| Epoch 121 |    75/  632 batches | lr 0.00010 | ms/batch 10.33 | loss 0.00104320
| Epoch 121 |   100/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00102983
| Epoch 121 |   125/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00110150
| Epoch 121 |   150/  632 batches | lr 0.00010 | ms/batch  8.40 | loss 0.00108474
| Epoch 121 |   175/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00102871
| Epoch 121 |   200/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00105046
| Epoch 121 |   

| Epoch 124 |   550/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00121904
| Epoch 124 |   575/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00107302
| Epoch 124 |   600/  632 batches | lr 0.00010 | ms/batch  8.40 | loss 0.00098412
| Epoch 124 |   625/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00103767
Model:  {'hidden_size': 600, 'n_layers': 4, 'act_fun': 'LeakyReLU', 'init_methods': 'xavier uniform'}
| Epoch   0 |    25/  632 batches | lr 0.00010 | ms/batch 10.35 | loss 0.28509077
| Epoch   0 |    50/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.02203669
| Epoch   0 |    75/  632 batches | lr 0.00010 | ms/batch  7.98 | loss 0.00681778
| Epoch   0 |   100/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00420978
| Epoch   0 |   125/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00343796
| Epoch   0 |   150/  632 batches | lr 0.00010 | ms/batch  9.76 | loss 0.00297749
| Epoch   0 |   175/  632 batches | lr 0.00010 | ms/batch  8.21 | loss 0.00271

| Epoch   3 |   525/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00235729
| Epoch   3 |   550/  632 batches | lr 0.00010 | ms/batch  9.97 | loss 0.00245388
| Epoch   3 |   575/  632 batches | lr 0.00010 | ms/batch  8.10 | loss 0.00238372
| Epoch   3 |   600/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00229868
| Epoch   3 |   625/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00235062
| Epoch   4 |    25/  632 batches | lr 0.00010 | ms/batch 10.16 | loss 0.00240785
| Epoch   4 |    50/  632 batches | lr 0.00010 | ms/batch  8.38 | loss 0.00237506
| Epoch   4 |    75/  632 batches | lr 0.00010 | ms/batch 10.09 | loss 0.00237955
| Epoch   4 |   100/  632 batches | lr 0.00010 | ms/batch  9.76 | loss 0.00242023
| Epoch   4 |   125/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00236205
| Epoch   4 |   150/  632 batches | lr 0.00010 | ms/batch  8.26 | loss 0.00243294
| Epoch   4 |   175/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00241474
| Epoch   4 |   

| Epoch   7 |   525/  632 batches | lr 0.00010 | ms/batch  8.11 | loss 0.00230475
| Epoch   7 |   550/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00239482
| Epoch   7 |   575/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00234309
| Epoch   7 |   600/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00227613
| Epoch   7 |   625/  632 batches | lr 0.00010 | ms/batch  8.10 | loss 0.00234792
| Epoch   8 |    25/  632 batches | lr 0.00010 | ms/batch  8.47 | loss 0.00235347
| Epoch   8 |    50/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00232929
| Epoch   8 |    75/  632 batches | lr 0.00010 | ms/batch  8.18 | loss 0.00234677
| Epoch   8 |   100/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00233051
| Epoch   8 |   125/  632 batches | lr 0.00010 | ms/batch  9.85 | loss 0.00235379
| Epoch   8 |   150/  632 batches | lr 0.00010 | ms/batch 10.16 | loss 0.00238619
| Epoch   8 |   175/  632 batches | lr 0.00010 | ms/batch  8.18 | loss 0.00226575
| Epoch   8 |   

| Epoch  11 |   525/  632 batches | lr 0.00010 | ms/batch 10.50 | loss 0.00227982
| Epoch  11 |   550/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00234273
| Epoch  11 |   575/  632 batches | lr 0.00010 | ms/batch  8.37 | loss 0.00229301
| Epoch  11 |   600/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00223280
| Epoch  11 |   625/  632 batches | lr 0.00010 | ms/batch 10.59 | loss 0.00231696
| Epoch  12 |    25/  632 batches | lr 0.00010 | ms/batch 10.39 | loss 0.00231797
| Epoch  12 |    50/  632 batches | lr 0.00010 | ms/batch  8.33 | loss 0.00231331
| Epoch  12 |    75/  632 batches | lr 0.00010 | ms/batch 10.47 | loss 0.00230923
| Epoch  12 |   100/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00229426
| Epoch  12 |   125/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00233618
| Epoch  12 |   150/  632 batches | lr 0.00010 | ms/batch  8.90 | loss 0.00229027
| Epoch  12 |   175/  632 batches | lr 0.00010 | ms/batch 10.37 | loss 0.00223898
| Epoch  12 |   

| Epoch  15 |   525/  632 batches | lr 0.00010 | ms/batch  8.47 | loss 0.00224326
| Epoch  15 |   550/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00228057
| Epoch  15 |   575/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00226174
| Epoch  15 |   600/  632 batches | lr 0.00010 | ms/batch 10.29 | loss 0.00219440
| Epoch  15 |   625/  632 batches | lr 0.00010 | ms/batch 10.05 | loss 0.00225308
| Epoch  16 |    25/  632 batches | lr 0.00010 | ms/batch  8.46 | loss 0.00228511
| Epoch  16 |    50/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00227923
| Epoch  16 |    75/  632 batches | lr 0.00010 | ms/batch 10.07 | loss 0.00226212
| Epoch  16 |   100/  632 batches | lr 0.00010 | ms/batch  8.44 | loss 0.00225478
| Epoch  16 |   125/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00228187
| Epoch  16 |   150/  632 batches | lr 0.00010 | ms/batch 10.00 | loss 0.00223070
| Epoch  16 |   175/  632 batches | lr 0.00010 | ms/batch  9.90 | loss 0.00219483
| Epoch  16 |   

| Epoch  19 |   525/  632 batches | lr 0.00010 | ms/batch 10.05 | loss 0.00221257
| Epoch  19 |   550/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00223800
| Epoch  19 |   575/  632 batches | lr 0.00010 | ms/batch  8.30 | loss 0.00222734
| Epoch  19 |   600/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00215247
| Epoch  19 |   625/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00219429
| Epoch  20 |    25/  632 batches | lr 0.00010 | ms/batch 10.44 | loss 0.00225236
| Epoch  20 |    50/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00224772
| Epoch  20 |    75/  632 batches | lr 0.00010 | ms/batch  8.32 | loss 0.00221640
| Epoch  20 |   100/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00220998
| Epoch  20 |   125/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00222903
| Epoch  20 |   150/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00217931
| Epoch  20 |   175/  632 batches | lr 0.00010 | ms/batch  8.26 | loss 0.00215576
| Epoch  20 |   

| Epoch  23 |   525/  632 batches | lr 0.00010 | ms/batch 10.03 | loss 0.00217286
| Epoch  23 |   550/  632 batches | lr 0.00010 | ms/batch  8.26 | loss 0.00219809
| Epoch  23 |   575/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00219089
| Epoch  23 |   600/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00211412
| Epoch  23 |   625/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00214908
| Epoch  24 |    25/  632 batches | lr 0.00010 | ms/batch  8.48 | loss 0.00221707
| Epoch  24 |    50/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00221075
| Epoch  24 |    75/  632 batches | lr 0.00010 | ms/batch 10.25 | loss 0.00217371
| Epoch  24 |   100/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00216604
| Epoch  24 |   125/  632 batches | lr 0.00010 | ms/batch  8.46 | loss 0.00218644
| Epoch  24 |   150/  632 batches | lr 0.00010 | ms/batch 10.17 | loss 0.00212809
| Epoch  24 |   175/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00211277
| Epoch  24 |   

| Epoch  27 |   525/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00213170
| Epoch  27 |   550/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00216030
| Epoch  27 |   575/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00215354
| Epoch  27 |   600/  632 batches | lr 0.00010 | ms/batch  8.15 | loss 0.00207409
| Epoch  27 |   625/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00210729
| Epoch  28 |    25/  632 batches | lr 0.00010 | ms/batch 10.25 | loss 0.00217843
| Epoch  28 |    50/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00216600
| Epoch  28 |    75/  632 batches | lr 0.00010 | ms/batch  8.22 | loss 0.00213275
| Epoch  28 |   100/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00212338
| Epoch  28 |   125/  632 batches | lr 0.00010 | ms/batch  9.91 | loss 0.00213883
| Epoch  28 |   150/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00207848
| Epoch  28 |   175/  632 batches | lr 0.00010 | ms/batch  8.20 | loss 0.00207043
| Epoch  28 |   

| Epoch  31 |   525/  632 batches | lr 0.00010 | ms/batch 10.22 | loss 0.00209308
| Epoch  31 |   550/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00211950
| Epoch  31 |   575/  632 batches | lr 0.00010 | ms/batch  8.18 | loss 0.00211398
| Epoch  31 |   600/  632 batches | lr 0.00010 | ms/batch 10.01 | loss 0.00202675
| Epoch  31 |   625/  632 batches | lr 0.00010 | ms/batch 10.01 | loss 0.00206568
| Epoch  32 |    25/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00213137
| Epoch  32 |    50/  632 batches | lr 0.00010 | ms/batch  8.27 | loss 0.00211102
| Epoch  32 |    75/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00208888
| Epoch  32 |   100/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00207315
| Epoch  32 |   125/  632 batches | lr 0.00010 | ms/batch  9.75 | loss 0.00209150
| Epoch  32 |   150/  632 batches | lr 0.00010 | ms/batch  8.19 | loss 0.00203167
| Epoch  32 |   175/  632 batches | lr 0.00010 | ms/batch 10.52 | loss 0.00202711
| Epoch  32 |   

| Epoch  35 |   525/  632 batches | lr 0.00010 | ms/batch  9.82 | loss 0.00205135
| Epoch  35 |   550/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00207189
| Epoch  35 |   575/  632 batches | lr 0.00010 | ms/batch  9.37 | loss 0.00206495
| Epoch  35 |   600/  632 batches | lr 0.00010 | ms/batch  7.76 | loss 0.00197847
| Epoch  35 |   625/  632 batches | lr 0.00010 | ms/batch  9.42 | loss 0.00202000
| Epoch  36 |    25/  632 batches | lr 0.00010 | ms/batch  9.64 | loss 0.00208005
| Epoch  36 |    50/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00205415
| Epoch  36 |    75/  632 batches | lr 0.00010 | ms/batch  7.82 | loss 0.00203988
| Epoch  36 |   100/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00202517
| Epoch  36 |   125/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00203907
| Epoch  36 |   150/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00198458
| Epoch  36 |   175/  632 batches | lr 0.00010 | ms/batch  7.73 | loss 0.00198456
| Epoch  36 |   

| Epoch  39 |   525/  632 batches | lr 0.00010 | ms/batch 10.55 | loss 0.00200808
| Epoch  39 |   550/  632 batches | lr 0.00010 | ms/batch 10.64 | loss 0.00202111
| Epoch  39 |   575/  632 batches | lr 0.00010 | ms/batch  8.48 | loss 0.00200964
| Epoch  39 |   600/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00192482
| Epoch  39 |   625/  632 batches | lr 0.00010 | ms/batch 10.21 | loss 0.00196963
| Epoch  40 |    25/  632 batches | lr 0.00010 | ms/batch 10.86 | loss 0.00202434
| Epoch  40 |    50/  632 batches | lr 0.00010 | ms/batch  8.71 | loss 0.00199725
| Epoch  40 |    75/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00198386
| Epoch  40 |   100/  632 batches | lr 0.00010 | ms/batch  9.52 | loss 0.00197102
| Epoch  40 |   125/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00198319
| Epoch  40 |   150/  632 batches | lr 0.00010 | ms/batch  8.64 | loss 0.00193241
| Epoch  40 |   175/  632 batches | lr 0.00010 | ms/batch 10.73 | loss 0.00193891
| Epoch  40 |   

| Epoch  43 |   525/  632 batches | lr 0.00010 | ms/batch  7.93 | loss 0.00195696
| Epoch  43 |   550/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00196474
| Epoch  43 |   575/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00195278
| Epoch  43 |   600/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00186625
| Epoch  43 |   625/  632 batches | lr 0.00010 | ms/batch  7.71 | loss 0.00191507
| Epoch  44 |    25/  632 batches | lr 0.00010 | ms/batch  8.13 | loss 0.00196607
| Epoch  44 |    50/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00193404
| Epoch  44 |    75/  632 batches | lr 0.00010 | ms/batch  7.85 | loss 0.00192117
| Epoch  44 |   100/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00191447
| Epoch  44 |   125/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00191944
| Epoch  44 |   150/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00187795
| Epoch  44 |   175/  632 batches | lr 0.00010 | ms/batch  7.81 | loss 0.00188567
| Epoch  44 |   

| Epoch  47 |   525/  632 batches | lr 0.00010 | ms/batch  9.54 | loss 0.00189616
| Epoch  47 |   550/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00190184
| Epoch  47 |   575/  632 batches | lr 0.00010 | ms/batch  7.69 | loss 0.00188718
| Epoch  47 |   600/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00180450
| Epoch  47 |   625/  632 batches | lr 0.00010 | ms/batch  9.42 | loss 0.00185587
| Epoch  48 |    25/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00190098
| Epoch  48 |    50/  632 batches | lr 0.00010 | ms/batch  7.80 | loss 0.00186834
| Epoch  48 |    75/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00185510
| Epoch  48 |   100/  632 batches | lr 0.00010 | ms/batch  9.42 | loss 0.00185748
| Epoch  48 |   125/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00185175
| Epoch  48 |   150/  632 batches | lr 0.00010 | ms/batch  7.76 | loss 0.00181693
| Epoch  48 |   175/  632 batches | lr 0.00010 | ms/batch  9.39 | loss 0.00182670
| Epoch  48 |   

| Epoch  51 |   525/  632 batches | lr 0.00010 | ms/batch  7.90 | loss 0.00181640
| Epoch  51 |   550/  632 batches | lr 0.00010 | ms/batch  9.58 | loss 0.00183216
| Epoch  51 |   575/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00181198
| Epoch  51 |   600/  632 batches | lr 0.00010 | ms/batch  9.58 | loss 0.00173832
| Epoch  51 |   625/  632 batches | lr 0.00010 | ms/batch  9.42 | loss 0.00179624
| Epoch  52 |    25/  632 batches | lr 0.00010 | ms/batch  8.35 | loss 0.00183419
| Epoch  52 |    50/  632 batches | lr 0.00010 | ms/batch  9.65 | loss 0.00179935
| Epoch  52 |    75/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00178673
| Epoch  52 |   100/  632 batches | lr 0.00010 | ms/batch  8.03 | loss 0.00180101
| Epoch  52 |   125/  632 batches | lr 0.00010 | ms/batch  9.55 | loss 0.00178190
| Epoch  52 |   150/  632 batches | lr 0.00010 | ms/batch  9.61 | loss 0.00175429
| Epoch  52 |   175/  632 batches | lr 0.00010 | ms/batch  9.37 | loss 0.00176419
| Epoch  52 |   

| Epoch  55 |   525/  632 batches | lr 0.00010 | ms/batch 11.30 | loss 0.00172894
| Epoch  55 |   550/  632 batches | lr 0.00010 | ms/batch 10.42 | loss 0.00175848
| Epoch  55 |   575/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00173070
| Epoch  55 |   600/  632 batches | lr 0.00010 | ms/batch 10.35 | loss 0.00166835
| Epoch  55 |   625/  632 batches | lr 0.00010 | ms/batch 10.76 | loss 0.00172666
| Epoch  56 |    25/  632 batches | lr 0.00010 | ms/batch 10.45 | loss 0.00176331
| Epoch  56 |    50/  632 batches | lr 0.00010 | ms/batch 13.09 | loss 0.00172768
| Epoch  56 |    75/  632 batches | lr 0.00010 | ms/batch 10.59 | loss 0.00171755
| Epoch  56 |   100/  632 batches | lr 0.00010 | ms/batch 11.02 | loss 0.00174544
| Epoch  56 |   125/  632 batches | lr 0.00010 | ms/batch 10.19 | loss 0.00170836
| Epoch  56 |   150/  632 batches | lr 0.00010 | ms/batch 12.01 | loss 0.00168603
| Epoch  56 |   175/  632 batches | lr 0.00010 | ms/batch 10.97 | loss 0.00170475
| Epoch  56 |   

| Epoch  59 |   525/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00164258
| Epoch  59 |   550/  632 batches | lr 0.00010 | ms/batch  7.96 | loss 0.00168361
| Epoch  59 |   575/  632 batches | lr 0.00010 | ms/batch  9.64 | loss 0.00165549
| Epoch  59 |   600/  632 batches | lr 0.00010 | ms/batch  9.48 | loss 0.00159844
| Epoch  59 |   625/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00164904
| Epoch  60 |    25/  632 batches | lr 0.00010 | ms/batch  8.05 | loss 0.00168829
| Epoch  60 |    50/  632 batches | lr 0.00010 | ms/batch  9.41 | loss 0.00165423
| Epoch  60 |    75/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00164956
| Epoch  60 |   100/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00168219
| Epoch  60 |   125/  632 batches | lr 0.00010 | ms/batch  8.28 | loss 0.00163795
| Epoch  60 |   150/  632 batches | lr 0.00010 | ms/batch 10.23 | loss 0.00161102
| Epoch  60 |   175/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00164277
| Epoch  60 |   

| Epoch  63 |   525/  632 batches | lr 0.00010 | ms/batch  9.63 | loss 0.00155963
| Epoch  63 |   550/  632 batches | lr 0.00010 | ms/batch  9.69 | loss 0.00161233
| Epoch  63 |   575/  632 batches | lr 0.00010 | ms/batch 10.05 | loss 0.00158343
| Epoch  63 |   600/  632 batches | lr 0.00010 | ms/batch  7.99 | loss 0.00152428
| Epoch  63 |   625/  632 batches | lr 0.00010 | ms/batch  9.48 | loss 0.00157052
| Epoch  64 |    25/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00161808
| Epoch  64 |    50/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00158349
| Epoch  64 |    75/  632 batches | lr 0.00010 | ms/batch  8.32 | loss 0.00157769
| Epoch  64 |   100/  632 batches | lr 0.00010 | ms/batch 10.30 | loss 0.00160657
| Epoch  64 |   125/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00157267
| Epoch  64 |   150/  632 batches | lr 0.00010 | ms/batch  9.87 | loss 0.00153425
| Epoch  64 |   175/  632 batches | lr 0.00010 | ms/batch  8.28 | loss 0.00157692
| Epoch  64 |   

| Epoch  67 |   525/  632 batches | lr 0.00010 | ms/batch  9.65 | loss 0.00147795
| Epoch  67 |   550/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00154360
| Epoch  67 |   575/  632 batches | lr 0.00010 | ms/batch  8.16 | loss 0.00151779
| Epoch  67 |   600/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00145384
| Epoch  67 |   625/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00149859
| Epoch  68 |    25/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00154493
| Epoch  68 |    50/  632 batches | lr 0.00010 | ms/batch  8.07 | loss 0.00151189
| Epoch  68 |    75/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00150642
| Epoch  68 |   100/  632 batches | lr 0.00010 | ms/batch  9.69 | loss 0.00151590
| Epoch  68 |   125/  632 batches | lr 0.00010 | ms/batch  9.71 | loss 0.00152463
| Epoch  68 |   150/  632 batches | lr 0.00010 | ms/batch  7.84 | loss 0.00146043
| Epoch  68 |   175/  632 batches | lr 0.00010 | ms/batch  9.47 | loss 0.00149670
| Epoch  68 |   

| Epoch  71 |   525/  632 batches | lr 0.00010 | ms/batch  9.93 | loss 0.00140082
| Epoch  71 |   550/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00147809
| Epoch  71 |   575/  632 batches | lr 0.00010 | ms/batch  9.74 | loss 0.00144342
| Epoch  71 |   600/  632 batches | lr 0.00010 | ms/batch  8.29 | loss 0.00138461
| Epoch  71 |   625/  632 batches | lr 0.00010 | ms/batch  9.57 | loss 0.00142641
| Epoch  72 |    25/  632 batches | lr 0.00010 | ms/batch  9.78 | loss 0.00146453
| Epoch  72 |    50/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00143373
| Epoch  72 |    75/  632 batches | lr 0.00010 | ms/batch  8.66 | loss 0.00143092
| Epoch  72 |   100/  632 batches | lr 0.00010 | ms/batch  9.77 | loss 0.00142523
| Epoch  72 |   125/  632 batches | lr 0.00010 | ms/batch  9.54 | loss 0.00147686
| Epoch  72 |   150/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00139759
| Epoch  72 |   175/  632 batches | lr 0.00010 | ms/batch  7.86 | loss 0.00142123
| Epoch  72 |   

| Epoch  75 |   525/  632 batches | lr 0.00010 | ms/batch  9.66 | loss 0.00132438
| Epoch  75 |   550/  632 batches | lr 0.00010 | ms/batch  9.56 | loss 0.00141612
| Epoch  75 |   575/  632 batches | lr 0.00010 | ms/batch  7.90 | loss 0.00137017
| Epoch  75 |   600/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00132039
| Epoch  75 |   625/  632 batches | lr 0.00010 | ms/batch  9.51 | loss 0.00135426
| Epoch  76 |    25/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00138757
| Epoch  76 |    50/  632 batches | lr 0.00010 | ms/batch  8.17 | loss 0.00135909
| Epoch  76 |    75/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00135666
| Epoch  76 |   100/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00134094
| Epoch  76 |   125/  632 batches | lr 0.00010 | ms/batch  9.59 | loss 0.00142333
| Epoch  76 |   150/  632 batches | lr 0.00010 | ms/batch  8.06 | loss 0.00133937
| Epoch  76 |   175/  632 batches | lr 0.00010 | ms/batch  9.54 | loss 0.00135384
| Epoch  76 |   

| Epoch  79 |   525/  632 batches | lr 0.00010 | ms/batch  8.41 | loss 0.00125434
| Epoch  79 |   550/  632 batches | lr 0.00010 | ms/batch  9.66 | loss 0.00135313
| Epoch  79 |   575/  632 batches | lr 0.00010 | ms/batch  9.92 | loss 0.00130459
| Epoch  79 |   600/  632 batches | lr 0.00010 | ms/batch 10.37 | loss 0.00126384
| Epoch  79 |   625/  632 batches | lr 0.00010 | ms/batch  8.27 | loss 0.00128448
| Epoch  80 |    25/  632 batches | lr 0.00010 | ms/batch  8.66 | loss 0.00131679
| Epoch  80 |    50/  632 batches | lr 0.00010 | ms/batch 10.36 | loss 0.00129308
| Epoch  80 |    75/  632 batches | lr 0.00010 | ms/batch  8.40 | loss 0.00128387
| Epoch  80 |   100/  632 batches | lr 0.00010 | ms/batch 10.12 | loss 0.00126573
| Epoch  80 |   125/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00136076
| Epoch  80 |   150/  632 batches | lr 0.00010 | ms/batch  9.98 | loss 0.00128345
| Epoch  80 |   175/  632 batches | lr 0.00010 | ms/batch  8.10 | loss 0.00128154
| Epoch  80 |   

| Epoch  83 |   525/  632 batches | lr 0.00010 | ms/batch 10.24 | loss 0.00119135
| Epoch  83 |   550/  632 batches | lr 0.00010 | ms/batch 10.04 | loss 0.00129461
| Epoch  83 |   575/  632 batches | lr 0.00010 | ms/batch  8.02 | loss 0.00124281
| Epoch  83 |   600/  632 batches | lr 0.00010 | ms/batch 10.53 | loss 0.00121348
| Epoch  83 |   625/  632 batches | lr 0.00010 | ms/batch  9.95 | loss 0.00122334
| Epoch  84 |    25/  632 batches | lr 0.00010 | ms/batch 10.76 | loss 0.00125192
| Epoch  84 |    50/  632 batches | lr 0.00010 | ms/batch  7.90 | loss 0.00122851
| Epoch  84 |    75/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00121262
| Epoch  84 |   100/  632 batches | lr 0.00010 | ms/batch  9.96 | loss 0.00119654
| Epoch  84 |   125/  632 batches | lr 0.00010 | ms/batch 10.38 | loss 0.00129578
| Epoch  84 |   150/  632 batches | lr 0.00010 | ms/batch  8.80 | loss 0.00123068
| Epoch  84 |   175/  632 batches | lr 0.00010 | ms/batch 10.06 | loss 0.00120853
| Epoch  84 |   

| Epoch  87 |   525/  632 batches | lr 0.00010 | ms/batch  7.90 | loss 0.00113079
| Epoch  87 |   550/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00124280
| Epoch  87 |   575/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00118461
| Epoch  87 |   600/  632 batches | lr 0.00010 | ms/batch  9.62 | loss 0.00115611
| Epoch  87 |   625/  632 batches | lr 0.00010 | ms/batch  9.88 | loss 0.00115839
| Epoch  88 |    25/  632 batches | lr 0.00010 | ms/batch  8.21 | loss 0.00119219
| Epoch  88 |    50/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00116908
| Epoch  88 |    75/  632 batches | lr 0.00010 | ms/batch 10.31 | loss 0.00114718
| Epoch  88 |   100/  632 batches | lr 0.00010 | ms/batch  8.58 | loss 0.00113435
| Epoch  88 |   125/  632 batches | lr 0.00010 | ms/batch  9.81 | loss 0.00122307
| Epoch  88 |   150/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00117937
| Epoch  88 |   175/  632 batches | lr 0.00010 | ms/batch  9.45 | loss 0.00113738
| Epoch  88 |   

| Epoch  91 |   525/  632 batches | lr 0.00010 | ms/batch  9.46 | loss 0.00107632
| Epoch  91 |   550/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00119837
| Epoch  91 |   575/  632 batches | lr 0.00010 | ms/batch  7.77 | loss 0.00113184
| Epoch  91 |   600/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00110346
| Epoch  91 |   625/  632 batches | lr 0.00010 | ms/batch  9.39 | loss 0.00109645
| Epoch  92 |    25/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00114202
| Epoch  92 |    50/  632 batches | lr 0.00010 | ms/batch  9.39 | loss 0.00111560
| Epoch  92 |    75/  632 batches | lr 0.00010 | ms/batch  7.75 | loss 0.00108774
| Epoch  92 |   100/  632 batches | lr 0.00010 | ms/batch  9.41 | loss 0.00107510
| Epoch  92 |   125/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00116341
| Epoch  92 |   150/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00113366
| Epoch  92 |   175/  632 batches | lr 0.00010 | ms/batch  7.92 | loss 0.00107872
| Epoch  92 |   

| Epoch  95 |   525/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00102701
| Epoch  95 |   550/  632 batches | lr 0.00010 | ms/batch  7.85 | loss 0.00115396
| Epoch  95 |   575/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00107570
| Epoch  95 |   600/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00104476
| Epoch  95 |   625/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00104208
| Epoch  96 |    25/  632 batches | lr 0.00010 | ms/batch  8.12 | loss 0.00109941
| Epoch  96 |    50/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00106875
| Epoch  96 |    75/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00102800
| Epoch  96 |   100/  632 batches | lr 0.00010 | ms/batch  9.47 | loss 0.00101995
| Epoch  96 |   125/  632 batches | lr 0.00010 | ms/batch  7.99 | loss 0.00110377
| Epoch  96 |   150/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00109197
| Epoch  96 |   175/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00102504
| Epoch  96 |   

| Epoch  99 |   525/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00098184
| Epoch  99 |   550/  632 batches | lr 0.00010 | ms/batch  9.53 | loss 0.00110921
| Epoch  99 |   575/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00102820
| Epoch  99 |   600/  632 batches | lr 0.00010 | ms/batch  7.84 | loss 0.00099545
| Epoch  99 |   625/  632 batches | lr 0.00010 | ms/batch  9.52 | loss 0.00098913
| Epoch 100 |    25/  632 batches | lr 0.00010 | ms/batch  9.77 | loss 0.00106018
| Epoch 100 |    50/  632 batches | lr 0.00010 | ms/batch  9.44 | loss 0.00101364
| Epoch 100 |    75/  632 batches | lr 0.00010 | ms/batch  7.91 | loss 0.00097214
| Epoch 100 |   100/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00097036
| Epoch 100 |   125/  632 batches | lr 0.00010 | ms/batch  9.72 | loss 0.00104919
| Epoch 100 |   150/  632 batches | lr 0.00010 | ms/batch  9.43 | loss 0.00105217
| Epoch 100 |   175/  632 batches | lr 0.00010 | ms/batch  7.76 | loss 0.00097994
| Epoch 100 |   

| Epoch 103 |   525/  632 batches | lr 0.00010 | ms/batch  9.30 | loss 0.00093945
| Epoch 103 |   550/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00106730
| Epoch 103 |   575/  632 batches | lr 0.00010 | ms/batch  7.69 | loss 0.00098373
| Epoch 103 |   600/  632 batches | lr 0.00010 | ms/batch  9.29 | loss 0.00094681
| Epoch 103 |   625/  632 batches | lr 0.00010 | ms/batch  9.28 | loss 0.00094660
| Epoch 104 |    25/  632 batches | lr 0.00010 | ms/batch  9.66 | loss 0.00101803
| Epoch 104 |    50/  632 batches | lr 0.00010 | ms/batch  7.77 | loss 0.00096462
| Epoch 104 |    75/  632 batches | lr 0.00010 | ms/batch  9.30 | loss 0.00091787
| Epoch 104 |   100/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00092347
| Epoch 104 |   125/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00100023
| Epoch 104 |   150/  632 batches | lr 0.00010 | ms/batch  7.74 | loss 0.00100940
| Epoch 104 |   175/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00093558
| Epoch 104 |   

| Epoch 107 |   525/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00089970
| Epoch 107 |   550/  632 batches | lr 0.00010 | ms/batch  9.47 | loss 0.00103115
| Epoch 107 |   575/  632 batches | lr 0.00010 | ms/batch  9.29 | loss 0.00094297
| Epoch 107 |   600/  632 batches | lr 0.00010 | ms/batch  7.78 | loss 0.00090165
| Epoch 107 |   625/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00090282
| Epoch 108 |    25/  632 batches | lr 0.00010 | ms/batch  9.67 | loss 0.00097781
| Epoch 108 |    50/  632 batches | lr 0.00010 | ms/batch  9.25 | loss 0.00092744
| Epoch 108 |    75/  632 batches | lr 0.00010 | ms/batch  7.68 | loss 0.00087385
| Epoch 108 |   100/  632 batches | lr 0.00010 | ms/batch  9.23 | loss 0.00087811
| Epoch 108 |   125/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00094878
| Epoch 108 |   150/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00095392
| Epoch 108 |   175/  632 batches | lr 0.00010 | ms/batch  7.65 | loss 0.00089687
| Epoch 108 |   

| Epoch 111 |   525/  632 batches | lr 0.00010 | ms/batch  9.35 | loss 0.00086913
| Epoch 111 |   550/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00099814
| Epoch 111 |   575/  632 batches | lr 0.00010 | ms/batch  7.81 | loss 0.00090352
| Epoch 111 |   600/  632 batches | lr 0.00010 | ms/batch  9.49 | loss 0.00085714
| Epoch 111 |   625/  632 batches | lr 0.00010 | ms/batch  9.43 | loss 0.00086026
| Epoch 112 |    25/  632 batches | lr 0.00010 | ms/batch 10.02 | loss 0.00093358
| Epoch 112 |    50/  632 batches | lr 0.00010 | ms/batch  8.09 | loss 0.00088208
| Epoch 112 |    75/  632 batches | lr 0.00010 | ms/batch  9.40 | loss 0.00082989
| Epoch 112 |   100/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00083884
| Epoch 112 |   125/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00089951
| Epoch 112 |   150/  632 batches | lr 0.00010 | ms/batch  7.88 | loss 0.00088895
| Epoch 112 |   175/  632 batches | lr 0.00010 | ms/batch  9.32 | loss 0.00086811
| Epoch 112 |   

| Epoch 115 |   525/  632 batches | lr 0.00010 | ms/batch  7.69 | loss 0.00083443
| Epoch 115 |   550/  632 batches | lr 0.00010 | ms/batch  9.33 | loss 0.00095853
| Epoch 115 |   575/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00086266
| Epoch 115 |   600/  632 batches | lr 0.00010 | ms/batch  9.36 | loss 0.00081949
| Epoch 115 |   625/  632 batches | lr 0.00010 | ms/batch  7.71 | loss 0.00082728
| Epoch 116 |    25/  632 batches | lr 0.00010 | ms/batch  8.10 | loss 0.00089549
| Epoch 116 |    50/  632 batches | lr 0.00010 | ms/batch  9.37 | loss 0.00084786
| Epoch 116 |    75/  632 batches | lr 0.00010 | ms/batch  7.85 | loss 0.00079398
| Epoch 116 |   100/  632 batches | lr 0.00010 | ms/batch  9.39 | loss 0.00080322
| Epoch 116 |   125/  632 batches | lr 0.00010 | ms/batch  9.54 | loss 0.00085743
| Epoch 116 |   150/  632 batches | lr 0.00010 | ms/batch  9.39 | loss 0.00084359
| Epoch 116 |   175/  632 batches | lr 0.00010 | ms/batch  7.86 | loss 0.00083860
| Epoch 116 |   

| Epoch 119 |   525/  632 batches | lr 0.00010 | ms/batch  9.38 | loss 0.00080400
| Epoch 119 |   550/  632 batches | lr 0.00010 | ms/batch  9.34 | loss 0.00093122
| Epoch 119 |   575/  632 batches | lr 0.00010 | ms/batch  7.72 | loss 0.00082710
| Epoch 119 |   600/  632 batches | lr 0.00010 | ms/batch  9.31 | loss 0.00078475
| Epoch 119 |   625/  632 batches | lr 0.00010 | ms/batch  9.37 | loss 0.00079437
| Epoch 120 |    25/  632 batches | lr 0.00010 | ms/batch  9.83 | loss 0.00086374
| Epoch 120 |    50/  632 batches | lr 0.00010 | ms/batch  7.84 | loss 0.00081831
| Epoch 120 |    75/  632 batches | lr 0.00010 | ms/batch 10.28 | loss 0.00076025
| Epoch 120 |   100/  632 batches | lr 0.00010 | ms/batch 10.08 | loss 0.00077076
| Epoch 120 |   125/  632 batches | lr 0.00010 | ms/batch  9.61 | loss 0.00082426
| Epoch 120 |   150/  632 batches | lr 0.00010 | ms/batch  7.79 | loss 0.00080822
| Epoch 120 |   175/  632 batches | lr 0.00010 | ms/batch  9.73 | loss 0.00081051
| Epoch 120 |   

| Epoch 123 |   525/  632 batches | lr 0.00010 | ms/batch  8.08 | loss 0.00073966
| Epoch 123 |   550/  632 batches | lr 0.00010 | ms/batch  9.86 | loss 0.00087868
| Epoch 123 |   575/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00081240
| Epoch 123 |   600/  632 batches | lr 0.00010 | ms/batch  9.89 | loss 0.00074685
| Epoch 123 |   625/  632 batches | lr 0.00010 | ms/batch  9.58 | loss 0.00076248
| Epoch 124 |    25/  632 batches | lr 0.00010 | ms/batch  8.69 | loss 0.00083765
| Epoch 124 |    50/  632 batches | lr 0.00010 | ms/batch  9.66 | loss 0.00077898
| Epoch 124 |    75/  632 batches | lr 0.00010 | ms/batch  9.94 | loss 0.00073020
| Epoch 124 |   100/  632 batches | lr 0.00010 | ms/batch  8.11 | loss 0.00073476
| Epoch 124 |   125/  632 batches | lr 0.00010 | ms/batch  9.62 | loss 0.00078848
| Epoch 124 |   150/  632 batches | lr 0.00010 | ms/batch  9.54 | loss 0.00081294
| Epoch 124 |   175/  632 batches | lr 0.00010 | ms/batch  9.60 | loss 0.00079539
| Epoch 124 |   

In [24]:
testing_results

Unnamed: 0,hidden_size,n_layers,act_fun,init_methods,mean_val_result,std_val_result,test_mse,test_mae,test_rmse,test_mape
0,400,8,LeakyReLU,xavier uniform,0.003321,0.000882,0.002097,0.033638,0.045793,-0.165342
1,400,4,ReLU,xavier uniform,0.00397,0.00054,0.002925,0.039573,0.054085,-0.053486
2,400,4,LeakyReLU,xavier uniform,0.003975,0.000493,0.003117,0.040846,0.055828,-0.000966
3,400,6,LeakyReLU,xavier uniform,0.003573,0.000707,0.002501,0.036594,0.050011,0.007922
4,600,4,LeakyReLU,xavier uniform,0.003256,0.000956,0.001807,0.031455,0.042512,0.10401


In [25]:
testing_results.to_csv('../results/testing_results_heston.csv')