In [1]:
import os
import sys
import numpy as np
import argparse
import h5py
import math
import random
import time
import logging
import matplotlib.pyplot as plt
import sklearn
import csv

import torch
import torchaudio
#torch.backends.cudnn.benchmark=True
#torch.manual_seed(0)
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
 
sys.path.append("utils")
from utilities import get_filename
sys.path.append("pytorch/")
from models import *
import config

from scipy.stats import pearsonr
from torchmetrics import R2Score, ConcordanceCorrCoef


In [2]:

class Transfer_Cnn14(nn.Module):
    def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, 
        fmax, classes_num, freeze_base):
        """Classifier for a new task using pretrained Cnn14 as a sub module.
        """
        super(Transfer_Cnn14, self).__init__()
        audioset_classes_num = 527
        
        self.base = Cnn14(sample_rate, window_size, hop_size, mel_bins, fmin, 
            fmax, audioset_classes_num)

        # Transfer to another task layer
        self.ff_mid = nn.Linear(2048, 10, bias=True)
        #self.fc_transfer = nn.Linear(2048, classes_num, bias=True)
        self.fc_transfer = nn.Linear(10, classes_num, bias=True)
        self.gelu = nn.GELU()
        
        if freeze_base:
            # Freeze AudioSet pretrained layers
            for param in self.base.parameters():
                param.requires_grad = False

        self.init_weights()

    def init_weights(self):
        init_layer(self.fc_transfer)

    def load_from_pretrain(self, pretrained_checkpoint_path):
        checkpoint = torch.load(pretrained_checkpoint_path)
        self.base.load_state_dict(checkpoint['model'])

    def forward(self, input, mixup_lambda=None):
        """Input: (batch_size, data_length)
        """
        output_dict = self.base(input, mixup_lambda)
        embedding = output_dict['embedding']

        #clipwise_output =  torch.log_softmax(self.fc_transfer(embedding), dim=-1)
        embedding = self.gelu(self.ff_mid(embedding))
        output = self.fc_transfer(embedding)
        output_dict['regressor'] = output
 
        return output_dict

In [3]:
class Transfer_Cnn10(nn.Module):
    def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, 
        fmax, classes_num, freeze_base):
        """Classifier for a new task using pretrained Cnn14 as a sub module.
        """
        super(Transfer_Cnn10, self).__init__()
        audioset_classes_num = 527
        
        self.base = Cnn10(sample_rate, window_size, hop_size, mel_bins, fmin, 
            fmax, audioset_classes_num)

        # Transfer to another task layer
        self.ff_mid = nn.Linear(512, 10, bias=True)
        #self.fc_transfer = nn.Linear(512, classes_num, bias=True)
        self.fc_transfer = nn.Linear(10, classes_num, bias=True)
        self.gelu = nn.GELU()

        if freeze_base:
            # Freeze AudioSet pretrained layers
            for param in self.base.parameters():
                param.requires_grad = False

        self.init_weights()

    def init_weights(self):
        init_layer(self.fc_transfer)

    def load_from_pretrain(self, pretrained_checkpoint_path):
        checkpoint = torch.load(pretrained_checkpoint_path)
        self.base.load_state_dict(checkpoint['model'])

    def forward(self, input, mixup_lambda=None):
        """Input: (batch_size, data_length)
        """
        output_dict = self.base(input, mixup_lambda)
        embedding = output_dict['embedding']
        #print(embedding.shape)
        #print(self.fc_transfer(embedding).shape)
        #clipwise_output =  torch.log_softmax(self.fc_transfer(embedding), dim=-1)

        embedding = self.gelu(self.ff_mid(embedding))
        output = self.fc_transfer(embedding)
        output_dict['regressor'] = output
 
        return output_dict

In [4]:
class Transfer_Cnn6(nn.Module):
    def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, 
        fmax, classes_num, freeze_base):
        """Classifier for a new task using pretrained Cnn14 as a sub module.
        """
        super(Transfer_Cnn6, self).__init__()
        audioset_classes_num = 527
        
        self.base = Cnn6(sample_rate, window_size, hop_size, mel_bins, fmin, 
            fmax, audioset_classes_num)

        # Transfer to another task layer
        self.ff_mid = nn.Linear(512, 10, bias=True)
        #self.fc_transfer = nn.Linear(512, classes_num, bias=True)
        self.fc_transfer = nn.Linear(10, classes_num, bias=True)
        self.gelu = nn.GELU()

        if freeze_base:
            # Freeze AudioSet pretrained layers
            for param in self.base.parameters():
                param.requires_grad = False

        self.init_weights()

    def init_weights(self):
        init_layer(self.fc_transfer)

    def load_from_pretrain(self, pretrained_checkpoint_path):
        checkpoint = torch.load(pretrained_checkpoint_path)
        self.base.load_state_dict(checkpoint['model'])

    def forward(self, input, mixup_lambda=None):
        """Input: (batch_size, data_length)
        """
        output_dict = self.base(input, mixup_lambda)
        embedding = output_dict['embedding']
        #print(embedding.shape)
        #print(self.fc_transfer(embedding).shape)
        #clipwise_output =  torch.log_softmax(self.fc_transfer(embedding), dim=-1)

        embedding = self.gelu(self.ff_mid(embedding))
        output = self.fc_transfer(embedding)
        output_dict['regressor'] = output
 
        return output_dict

In [5]:
def extract_index_from_path(data_path_with_index):
    position_in_path = data_path_with_index.rfind('_')
    file_path = data_path_with_index[:position_in_path]
    index = int(data_path_with_index[(position_in_path+1):])
    return file_path, index

In [6]:
import pandas

folder = '../alunos_finger_Tinder/SPIRA_Dataset_V2/'

def create_data_paths_list(csv_file, folder=folder):
    csv = pandas.read_csv(csv_file)

    file_paths = []
    for file_path in csv['arquivo']:
        data_path_with_index = folder+file_path
        file_paths.append(data_path_with_index)
    
    oxigen_saturation_list = []
    for oxigen_saturation_val in csv['oxigenacao']:
        oxigen_saturation_list.append(oxigen_saturation_val)

    #merge
    data_paths_list = []
    for index in range(len(file_paths)):
        data_path_with_index = file_paths[index]
        oxigen_saturation_val = oxigen_saturation_list[index]
        data_path_oxygen_saturation = (data_path_with_index, oxigen_saturation_val)
        data_paths_list.append(data_path_oxygen_saturation)
        
    random.shuffle(data_paths_list)
    
    return data_paths_list
        
csv_file = '../alunos_finger_Tinder/SPIRA_Dataset_V2/metadata_cv_index.csv'
data_paths = create_data_paths_list(csv_file)


In [7]:
#build function to process data in batches
def process_batches(data_paths, number_coeffs, min_frequency, max_frequency, batch_size, pretrain, path_index):
    
    #parameters below maybe should be defined elsewhere
    #set audio length in seconds - this is max length of audios
    audio_length = 4
    device = 'cuda'
    new_sample_rate = 16000
    
    
    
    ################################
    
    data_batch = []
    
    data_target_list = []

    while len(data_batch) < batch_size and path_index < len(data_paths):
        data_path_oxygen_saturation = data_paths[path_index]
        data_path_with_index = data_path_oxygen_saturation[0]
        oxygen_saturation_val = data_path_oxygen_saturation[1]
        
        data_path, index = extract_index_from_path(data_path_with_index)
        sample_rate = torchaudio.info(data_path).sample_rate
        
        data_elem, sample_rate = torchaudio.load(data_path, frame_offset=index*sample_rate, num_frames = audio_length*sample_rate)
        #downsampling to fit gpu memory
        data_elem = torchaudio.transforms.Resample(sample_rate, new_sample_rate)(data_elem)
        sample_rate = new_sample_rate
        data_elem = data_elem[0]
        
        data_batch.append(data_elem)
        
        #for supervised training we store whether the file comes from patient/healthy group
        data_target_list.append(oxygen_saturation_val)
        #######################
        
        path_index +=1
            
    #convert list to torch tensor (pads different audio lengths to same size)
    data_batch = nn.utils.rnn.pad_sequence(data_batch, batch_first=True)
    
    data_batch = data_batch.to(device)
    
    #for supervised training
    data_target_list = torch.FloatTensor(data_target_list)
    data_target_list = data_target_list.to(device)
    ###########################
    

    return data_batch, data_target_list, path_index

In [8]:
#function to train model
def run_epoch(model, loss_compute, data_paths, avg_loss=0, pretrain='pretrain', training=True, batch_size=16, extract_coeffs='both', min_frequency = 0.0, max_frequency=None, number_coeffs=128, mask_proportion=0., mask_consecutive_frames=7, mask_frequency_proportion=0., random_noise_proportion=0.0):
    "Standard Training and Logging Function"
    start = time.time()
    total_tokens = 0
    total_loss = 0
    tokens = 0
    train_acc_avg = 0
    f1_score_avg = 0
    
    number_elements = len(data_paths)
    #number_steps = int(math.ceil(number_elements/batch_size))
    
    outputs=[]
    targets=[]
    
    #path index is the index of the audio file in the filenames list
    path_index = 0
    #step index stores the amount of steps taken by the algorithm so far
    step_index = 0
    while path_index < number_elements:
        step_index +=1
        #load the data and mask it
        data_batch, data_target_list, path_index = process_batches(data_paths, number_coeffs, min_frequency, max_frequency, batch_size, pretrain, path_index)
        b_size = data_batch.shape[0]
        #pass data through transformer
        #print(data_batch.shape)
        out = model.forward(data_batch)
        #compute loss
        #print('out', out.shape)
        #print('data_batch', data_batch.shape)
        if pretrain=='pretrain':
            #print('data_batch')
            loss = loss_compute(out, data_batch, training)
        else:
            loss, output, target = loss_compute(out, data_target_list, training)
        
        outputs.append(output)
        targets.append(target)

        total_loss += loss
        avg_loss = avg_loss*0.99 + loss*0.01
        total_tokens += b_size
        tokens += b_size
        true_avg_loss = total_loss/step_index
        
        #if path_index > 10:
        #    break
        
        if step_index % 5 == 1:
            elapsed = time.time() - start
            print("Epoch Step: %d Loss: %f Tokens per Sec: %f True Avg Loss: %f" %
                    (step_index, avg_loss, tokens / elapsed, true_avg_loss))
            start = time.time()
            tokens = 0

    outputs = np.concatenate(outputs)
    targets = np.concatenate(targets)
    
    abs_diff = np.abs(outputs-targets)
    diff_mean = np.mean(abs_diff)
    diff_std = np.std(abs_diff)
    
    corr_matrix = np.corrcoef(targets[:,0], outputs[:,0])
    corr = corr_matrix[0,1]
    R_sq = corr**2
    #Calculo Pearson https://www.geeksforgeeks.org/python-pearson-correlation-test-between-two-variables/
    pearson, _ = pearsonr(outputs[:,0], targets[:,0])

    print("Loss:", true_avg_loss)
    print("R2:", R_sq)
    print("Pearson:", pearson)

    if training == False:
        for res in ["Saida: {}  Alvo: {}".format(x,y) for x,y in zip(outputs,targets)] :
            print(res)

    print("Diff mean:", diff_mean)
    print("Diff std:", diff_std)

    return total_loss / (total_tokens), avg_loss, true_avg_loss, diff_mean, diff_std, R_sq, pearson

In [9]:
class NoamOpt:
    "Optim wrapper that implements rate."
    def __init__(self, model_size, factor, warmup, optimizer):
        self.optimizer = optimizer
        self._step = 0
        self.warmup = warmup
        self.factor = factor
        self.model_size = model_size
        self._rate = 0
        
    def step(self):
        "Update parameters and rate"
        self._step += 1
        rate = self.rate()
        for p in self.optimizer.param_groups:
            p['lr'] = rate
        self._rate = rate
        self.optimizer.step()
        
    def rate(self, step = None):
        "Implement `lrate` above"
        if step is None:
            step = self._step
        #return self.factor * \
        #    (self.model_size ** (-0.5) *
        #    min(step ** (-0.5), step * self.warmup ** (-1.5)))
        return 1e-4
        
def get_std_opt(model):
    return NoamOpt(model.src_embed[0].d_model, 2, 4000,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

In [10]:
class LossCompute:
    "A simple loss compute and train function."
    def __init__(self, model, opt=None, pretrain='pretrain', loss_function = 'MAE'):
        self.model = model
        self.opt = opt
        self.pretrain = pretrain
        self.loss_function = loss_function
        
    def __call__(self, output_dict, y, training):
        train_acc = 0
        if self.pretrain == 'pretrain':
            L1_loss = nn.L1Loss()
            loss = L1_loss(output_dict['clipwise_output'], y)
        else:
            if self.loss_function == 'MAE':#mae loss
                loss_fct = nn.L1Loss()
            else:#MSE loss
                loss_fct = nn.MSELoss()
            #check the indices for cross entropy loss
            y = y.unsqueeze(1)
            loss = loss_fct(output_dict['regressor'], y)
            preds = output_dict['regressor'].detach().cpu().clone()
            y_true = y.detach().cpu().clone()

        if training == True:
            loss.backward()
            if self.opt is not None:
                self.opt.step()
                self.opt.optimizer.zero_grad()
        return loss.data.item(), preds, y_true

In [11]:
args = {}
args['sample_rate']= 32000
args['window_size']= 1024
args['hop_size']=320
args['mel_bins']=64
args['fmin']=0
args['fmax']=32000
args['model_type']="Transfer_Cnn6"
args['pretrained_checkpoint_path']="../../spira_segunda_fase_coleta/Pretrained_audio_neural_networks/Cnn6.pth"
args['freeze_base']=False
args['cuda']=True

In [13]:
pretrain = 'ri'
loss_function = 'MSE'
d_model = 2048

# Arguments & parameters
sample_rate = args['sample_rate']
window_size = args['window_size']
hop_size = args['hop_size']
mel_bins = args['mel_bins']
fmin = args['fmin']
fmax = args['fmax']
model_type = args['model_type']
pretrained_checkpoint_path = args['pretrained_checkpoint_path']
freeze_base = args['freeze_base']
device = 'cuda' if (args['cuda'] and torch.cuda.is_available()) else 'cpu'
classes_num = 1#config.classes_num
pretrain = True if pretrained_checkpoint_path else False


cv_folds = 10
data_points = len(data_paths)
fold_size = int(data_points/cv_folds)

pearson_fold = []
R2_fold = []
MSE_fold = []
MAE_fold = []
MAE_std_fold = []

for i in range(cv_folds):
    slice_start = fold_size*i
    slice_end = fold_size*(i+1)
    data_paths_test = data_paths[slice_start:slice_end]
    data_paths_train = data_paths[:slice_start]+data_paths[slice_end:]


    # Model
    Model = eval(model_type)
    model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, classes_num, freeze_base)
    model_opt = NoamOpt(d_model, 1, 10,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))


    # Load pretrained model
    if pretrain:
        logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path))
        model.load_from_pretrain(pretrained_checkpoint_path)


    if 'cuda' in device:
        model.to(device)

    print('Load pretrained model successfully!')

    
    avg_loss=0
    model_path = 'model_test_mel.ckpt'
    min_frequency = 0.0
    max_frequency = None
    model.train()
    for epoch in range(20):
        loss, avg_loss, _, _, _, _, _ = run_epoch(model, 
                  LossCompute(model, model_opt, pretrain, loss_function),
                  data_paths_train, pretrain=pretrain, training=True, avg_loss=avg_loss,
                  min_frequency=min_frequency, max_frequency=max_frequency, number_coeffs=64)
    model.eval()
    with torch.no_grad():
        loss, avg_loss, true_avg_loss, diff_mean, diff_std, r2, pearson = run_epoch(model, 
                    LossCompute(model, None, pretrain, loss_function),
                    data_paths_test, pretrain=pretrain, training=False,
                    min_frequency=min_frequency, max_frequency=max_frequency, number_coeffs=64)
        print('val_true_avg_loss=', val_true_avg_loss)
        print('val_diff_mean=', val_diff_mean)
        print('val_diff_std=', val_diff_std)
        print('r2=', r2)
        print('pearson=', pearson)
        print('loss=', loss)
        
    pearson_fold.append(pearson)
    R2_fold.append(r2)
    MSE_fold.append(true_avg_loss)
    MAE_fold.append(diff_mean)
    MAE_std_fold.append(diff_std)

    
MSE_fold = np.array(MSE_fold)
MAE_fold = np.array(MAE_fold)
MAE_std_fold = np.array(MAE_std_fold)
R2_fold = np.array(R2_fold)
pearson_fold = np.array(pearson_fold)

print('========================================================================')
print("TEST SUMMARY OF THE CROSS VALIDATION")
print('========================================================================')
    
print("Avg MAE_std:", np.mean(MAE_std_fold))
print("Avg. abs error:",  np.mean(MAE_fold))
print("Avg r2:", np.mean(R2_fold))
print("Avg pearson:", np.mean(pearson_fold))
print("Avg MSE:", np.mean(MSE_fold))


Epoch Step: 1 Loss: 88.158203 Tokens per Sec: 62.632810 True Avg Loss: 8815.820312
Epoch Step: 6 Loss: 507.204885 Tokens per Sec: 67.658426 True Avg Loss: 8667.562988
Epoch Step: 11 Loss: 903.073726 Tokens per Sec: 70.414102 True Avg Loss: 8630.642933
Epoch Step: 16 Loss: 1278.366222 Tokens per Sec: 69.569481 True Avg Loss: 8608.656128
Epoch Step: 21 Loss: 1634.267384 Tokens per Sec: 68.619326 True Avg Loss: 8592.388625
Epoch Step: 26 Loss: 1968.891507 Tokens per Sec: 68.150794 True Avg Loss: 8567.192195
Epoch Step: 31 Loss: 2292.761861 Tokens per Sec: 69.371628 True Avg Loss: 8568.389428
Epoch Step: 36 Loss: 2594.064434 Tokens per Sec: 68.881912 True Avg Loss: 8550.759793
Epoch Step: 41 Loss: 2883.187930 Tokens per Sec: 69.021903 True Avg Loss: 8543.829102
Epoch Step: 46 Loss: 3158.405236 Tokens per Sec: 67.616583 True Avg Loss: 8538.941258
Epoch Step: 51 Loss: 3414.988547 Tokens per Sec: 66.556076 True Avg Loss: 8524.602156
Epoch Step: 56 Loss: 3658.164880 Tokens per Sec: 65.756670 T

Epoch Step: 6 Loss: 1181.945546 Tokens per Sec: 65.344345 True Avg Loss: 210.227295
Epoch Step: 11 Loss: 1133.407034 Tokens per Sec: 67.135651 True Avg Loss: 201.977671
Epoch Step: 16 Loss: 1084.551570 Tokens per Sec: 65.697189 True Avg Loss: 181.600003
Epoch Step: 21 Loss: 1041.827010 Tokens per Sec: 64.938603 True Avg Loss: 188.900811
Epoch Step: 26 Loss: 1000.426018 Tokens per Sec: 63.411109 True Avg Loss: 190.526001
Epoch Step: 31 Loss: 963.552164 Tokens per Sec: 63.903941 True Avg Loss: 199.937322
Epoch Step: 36 Loss: 925.577840 Tokens per Sec: 64.399549 True Avg Loss: 198.390635
Epoch Step: 41 Loss: 888.676444 Tokens per Sec: 64.206266 True Avg Loss: 195.236894
Epoch Step: 46 Loss: 855.016574 Tokens per Sec: 64.508724 True Avg Loss: 195.935355
Epoch Step: 51 Loss: 824.009839 Tokens per Sec: 63.912400 True Avg Loss: 198.473609
Epoch Step: 56 Loss: 792.776816 Tokens per Sec: 65.330604 True Avg Loss: 197.381935
Epoch Step: 61 Loss: 761.533976 Tokens per Sec: 64.778622 True Avg Loss:

Epoch Step: 21 Loss: 157.332768 Tokens per Sec: 65.531469 True Avg Loss: 137.723950
Epoch Step: 26 Loss: 155.781106 Tokens per Sec: 66.126697 True Avg Loss: 135.398168
Epoch Step: 31 Loss: 155.808564 Tokens per Sec: 65.983684 True Avg Loss: 138.795470
Epoch Step: 36 Loss: 154.915996 Tokens per Sec: 66.133344 True Avg Loss: 138.680608
Epoch Step: 41 Loss: 154.244102 Tokens per Sec: 65.716142 True Avg Loss: 139.041825
Epoch Step: 46 Loss: 151.813896 Tokens per Sec: 66.675367 True Avg Loss: 135.277335
Epoch Step: 51 Loss: 150.844632 Tokens per Sec: 65.495743 True Avg Loss: 134.960506
Epoch Step: 56 Loss: 149.276830 Tokens per Sec: 64.932470 True Avg Loss: 133.535144
Epoch Step: 61 Loss: 147.470075 Tokens per Sec: 66.960921 True Avg Loss: 131.793092
Epoch Step: 66 Loss: 147.484210 Tokens per Sec: 65.758925 True Avg Loss: 133.013529
Epoch Step: 71 Loss: 145.484672 Tokens per Sec: 66.562215 True Avg Loss: 131.146154
Epoch Step: 76 Loss: 144.672162 Tokens per Sec: 65.974252 True Avg Loss: 130

Epoch Step: 36 Loss: 119.100699 Tokens per Sec: 66.371306 True Avg Loss: 108.598127
Epoch Step: 41 Loss: 118.611884 Tokens per Sec: 66.055100 True Avg Loss: 108.612450
Epoch Step: 46 Loss: 118.159354 Tokens per Sec: 65.815639 True Avg Loss: 108.668033
Epoch Step: 51 Loss: 117.360838 Tokens per Sec: 65.466071 True Avg Loss: 108.029682
Epoch Step: 56 Loss: 116.425561 Tokens per Sec: 65.773285 True Avg Loss: 107.154824
Epoch Step: 61 Loss: 116.015543 Tokens per Sec: 66.689666 True Avg Loss: 107.248513
Epoch Step: 66 Loss: 118.825577 Tokens per Sec: 66.236189 True Avg Loss: 112.245492
Epoch Step: 71 Loss: 118.622055 Tokens per Sec: 67.278770 True Avg Loss: 112.402960
Epoch Step: 76 Loss: 117.967111 Tokens per Sec: 66.394906 True Avg Loss: 111.936021
Epoch Step: 81 Loss: 117.208868 Tokens per Sec: 66.128978 True Avg Loss: 111.351618
Epoch Step: 86 Loss: 117.385696 Tokens per Sec: 66.915865 True Avg Loss: 111.911878
Epoch Step: 91 Loss: 116.024626 Tokens per Sec: 66.632971 True Avg Loss: 110

Epoch Step: 51 Loss: 109.148585 Tokens per Sec: 64.996907 True Avg Loss: 104.835916
Epoch Step: 56 Loss: 109.044564 Tokens per Sec: 65.119300 True Avg Loss: 105.005079
Epoch Step: 61 Loss: 108.644483 Tokens per Sec: 66.099563 True Avg Loss: 104.634250
Epoch Step: 66 Loss: 108.278170 Tokens per Sec: 65.179841 True Avg Loss: 104.386702
Epoch Step: 71 Loss: 107.619811 Tokens per Sec: 65.701820 True Avg Loss: 103.707373
Epoch Step: 76 Loss: 107.542343 Tokens per Sec: 65.045491 True Avg Loss: 103.838207
Epoch Step: 81 Loss: 107.525310 Tokens per Sec: 65.172460 True Avg Loss: 104.035787
Epoch Step: 86 Loss: 108.461758 Tokens per Sec: 65.305098 True Avg Loss: 105.350816
Epoch Step: 91 Loss: 109.050320 Tokens per Sec: 64.892186 True Avg Loss: 106.195414
Epoch Step: 96 Loss: 109.430194 Tokens per Sec: 64.573850 True Avg Loss: 106.734740
Epoch Step: 1 Loss: 2.780659 Tokens per Sec: 118.652847 True Avg Loss: 278.065887
Epoch Step: 6 Loss: 14.902082 Tokens per Sec: 116.072849 True Avg Loss: 254.69

Epoch Step: 66 Loss: 102.959686 Tokens per Sec: 66.481268 True Avg Loss: 105.101719
Epoch Step: 71 Loss: 102.346273 Tokens per Sec: 67.254092 True Avg Loss: 104.072014
Epoch Step: 76 Loss: 101.021862 Tokens per Sec: 65.923973 True Avg Loss: 102.185321
Epoch Step: 81 Loss: 101.604593 Tokens per Sec: 65.697665 True Avg Loss: 102.849405
Epoch Step: 86 Loss: 101.285089 Tokens per Sec: 66.857600 True Avg Loss: 102.399919
Epoch Step: 91 Loss: 103.354682 Tokens per Sec: 66.499595 True Avg Loss: 104.659369
Epoch Step: 96 Loss: 103.787344 Tokens per Sec: 65.861668 True Avg Loss: 105.077453
Epoch Step: 1 Loss: 3.265310 Tokens per Sec: 123.080878 True Avg Loss: 326.530975
Epoch Step: 6 Loss: 18.078655 Tokens per Sec: 122.298738 True Avg Loss: 308.905838
val_avg_loss= 26.50518199502305
val_true_avg_loss= 306.62537638346356
val_diff_mean= 17.352024
val_diff_std= 3.1615732
loss= 21.06586555306238
Epoch= 20
Epoch Step: 1 Loss: 103.502034 Tokens per Sec: 67.982576 True Avg Loss: 114.535004
Epoch Step:

Epoch Step: 86 Loss: 96.550189 Tokens per Sec: 62.297283 True Avg Loss: 98.004820
Epoch Step: 91 Loss: 97.559846 Tokens per Sec: 61.565845 True Avg Loss: 99.052999
Epoch Step: 96 Loss: 97.755437 Tokens per Sec: 61.939104 True Avg Loss: 99.187768
Epoch Step: 1 Loss: 2.616309 Tokens per Sec: 116.825455 True Avg Loss: 261.630859
Epoch Step: 6 Loss: 14.567256 Tokens per Sec: 113.498373 True Avg Loss: 248.942159
val_avg_loss= 21.698422680266688
val_true_avg_loss= 250.9440171983507
val_diff_mean= 15.60538
val_diff_std= 3.3522725
loss= 17.24042866248211
Epoch= 24
Epoch Step: 1 Loss: 97.099228 Tokens per Sec: 60.899436 True Avg Loss: 58.749657
Epoch Step: 6 Loss: 97.811093 Tokens per Sec: 61.299543 True Avg Loss: 103.057294
Epoch Step: 11 Loss: 96.863449 Tokens per Sec: 61.454846 True Avg Loss: 91.956608
Epoch Step: 16 Loss: 96.903397 Tokens per Sec: 62.769414 True Avg Loss: 93.586572
Epoch Step: 21 Loss: 96.408926 Tokens per Sec: 65.009965 True Avg Loss: 92.002534
Epoch Step: 26 Loss: 96.5292

val_avg_loss= 20.120690648208743
val_true_avg_loss= 232.7348141140408
val_diff_mean= 15.045583
val_diff_std= 3.2086809
loss= 15.98941471012494
Epoch= 28
Epoch Step: 1 Loss: 91.066427 Tokens per Sec: 65.038431 True Avg Loss: 87.598129
Epoch Step: 6 Loss: 91.143756 Tokens per Sec: 65.159184 True Avg Loss: 91.906377
Epoch Step: 11 Loss: 90.069473 Tokens per Sec: 64.930209 True Avg Loss: 81.775242
Epoch Step: 16 Loss: 91.804977 Tokens per Sec: 64.586465 True Avg Loss: 95.397986
Epoch Step: 21 Loss: 90.586842 Tokens per Sec: 59.399624 True Avg Loss: 88.617621
Epoch Step: 26 Loss: 91.248974 Tokens per Sec: 63.366239 True Avg Loss: 91.583031
Epoch Step: 31 Loss: 91.376564 Tokens per Sec: 64.480200 True Avg Loss: 91.985425
Epoch Step: 36 Loss: 92.197416 Tokens per Sec: 65.077130 True Avg Loss: 94.256329
Epoch Step: 41 Loss: 91.787058 Tokens per Sec: 64.762255 True Avg Loss: 93.006130
Epoch Step: 46 Loss: 91.624488 Tokens per Sec: 62.551803 True Avg Loss: 92.521286
Epoch Step: 51 Loss: 91.38147

Epoch Step: 16 Loss: 90.353077 Tokens per Sec: 60.234883 True Avg Loss: 93.347437
Epoch Step: 21 Loss: 90.931862 Tokens per Sec: 62.287118 True Avg Loss: 95.357970
Epoch Step: 26 Loss: 91.130012 Tokens per Sec: 63.490454 True Avg Loss: 95.258662
Epoch Step: 31 Loss: 90.456978 Tokens per Sec: 61.675545 True Avg Loss: 92.380458
Epoch Step: 36 Loss: 90.230930 Tokens per Sec: 63.632904 True Avg Loss: 91.469337
Epoch Step: 41 Loss: 90.268394 Tokens per Sec: 63.544512 True Avg Loss: 91.400226
Epoch Step: 46 Loss: 90.391559 Tokens per Sec: 64.628662 True Avg Loss: 91.540115
Epoch Step: 51 Loss: 89.082181 Tokens per Sec: 62.871112 True Avg Loss: 88.822959
Epoch Step: 56 Loss: 88.700510 Tokens per Sec: 64.447579 True Avg Loss: 88.162943
Epoch Step: 61 Loss: 88.297969 Tokens per Sec: 65.317365 True Avg Loss: 87.498488
Epoch Step: 66 Loss: 90.072297 Tokens per Sec: 63.982512 True Avg Loss: 90.291973
Epoch Step: 71 Loss: 88.808779 Tokens per Sec: 64.239664 True Avg Loss: 88.473010
Epoch Step: 76 L

Epoch Step: 41 Loss: 85.355053 Tokens per Sec: 63.785608 True Avg Loss: 90.340752
Epoch Step: 46 Loss: 86.327316 Tokens per Sec: 63.834147 True Avg Loss: 91.946638
Epoch Step: 51 Loss: 86.534059 Tokens per Sec: 63.528307 True Avg Loss: 91.778668
Epoch Step: 56 Loss: 85.999726 Tokens per Sec: 62.763120 True Avg Loss: 90.335147
Epoch Step: 61 Loss: 86.311224 Tokens per Sec: 61.017189 True Avg Loss: 90.531055
Epoch Step: 66 Loss: 86.232298 Tokens per Sec: 64.676298 True Avg Loss: 90.079174
Epoch Step: 71 Loss: 85.702478 Tokens per Sec: 65.008315 True Avg Loss: 89.057905
Epoch Step: 76 Loss: 85.684662 Tokens per Sec: 63.252033 True Avg Loss: 88.817941
Epoch Step: 81 Loss: 85.275019 Tokens per Sec: 62.347637 True Avg Loss: 88.103593
Epoch Step: 86 Loss: 85.395890 Tokens per Sec: 63.890398 True Avg Loss: 88.084760
Epoch Step: 91 Loss: 85.628366 Tokens per Sec: 59.443568 True Avg Loss: 88.200124
Epoch Step: 96 Loss: 85.456005 Tokens per Sec: 58.755130 True Avg Loss: 87.897880
Epoch Step: 1 Lo

Epoch Step: 66 Loss: 79.253120 Tokens per Sec: 54.715649 True Avg Loss: 74.943182
Epoch Step: 71 Loss: 78.992021 Tokens per Sec: 59.759200 True Avg Loss: 74.890924
Epoch Step: 76 Loss: 78.711381 Tokens per Sec: 61.708449 True Avg Loss: 74.775862
Epoch Step: 81 Loss: 78.928504 Tokens per Sec: 60.559663 True Avg Loss: 75.294654
Epoch Step: 86 Loss: 77.981259 Tokens per Sec: 62.200729 True Avg Loss: 74.383403
Epoch Step: 91 Loss: 78.775515 Tokens per Sec: 61.025722 True Avg Loss: 75.467860
Epoch Step: 96 Loss: 80.174312 Tokens per Sec: 61.240684 True Avg Loss: 77.143717
Epoch Step: 1 Loss: 1.817546 Tokens per Sec: 112.590075 True Avg Loss: 181.754639
Epoch Step: 6 Loss: 10.275016 Tokens per Sec: 111.597347 True Avg Loss: 175.626300
val_avg_loss= 15.567644762095764
val_true_avg_loss= 179.9415808783637
val_diff_mean= 13.204327
val_diff_std= 2.4568949
loss= 12.362398686299798
Epoch= 41
Epoch Step: 1 Loss: 80.325055 Tokens per Sec: 63.864363 True Avg Loss: 82.835205
Epoch Step: 6 Loss: 80.557

Epoch Step: 91 Loss: 79.277342 Tokens per Sec: 61.875394 True Avg Loss: 77.679268
Epoch Step: 96 Loss: 79.892077 Tokens per Sec: 61.707303 True Avg Loss: 78.430170
Epoch Step: 1 Loss: 2.148038 Tokens per Sec: 109.349115 True Avg Loss: 214.803833
Epoch Step: 6 Loss: 12.102171 Tokens per Sec: 106.110366 True Avg Loss: 206.868507
val_avg_loss= 17.744049038790315
val_true_avg_loss= 205.3158484564887
val_diff_mean= 14.209736
val_diff_std= 2.42385
loss= 14.105668977926706
Epoch= 45
Epoch Step: 1 Loss: 79.734519 Tokens per Sec: 62.868214 True Avg Loss: 72.823242
Epoch Step: 6 Loss: 80.117124 Tokens per Sec: 61.330534 True Avg Loss: 85.091877
Epoch Step: 11 Loss: 80.332642 Tokens per Sec: 61.541105 True Avg Loss: 84.796288
Epoch Step: 16 Loss: 79.711269 Tokens per Sec: 59.826314 True Avg Loss: 79.405838
Epoch Step: 21 Loss: 79.824118 Tokens per Sec: 56.852252 True Avg Loss: 80.001884
Epoch Step: 26 Loss: 79.469446 Tokens per Sec: 58.427415 True Avg Loss: 78.568407
Epoch Step: 31 Loss: 78.94761

Epoch Step: 1 Loss: 79.156083 Tokens per Sec: 62.340617 True Avg Loss: 53.558022
Epoch Step: 6 Loss: 79.874867 Tokens per Sec: 62.161873 True Avg Loss: 87.174967
Epoch Step: 11 Loss: 79.764282 Tokens per Sec: 57.605807 True Avg Loss: 82.768162
Epoch Step: 16 Loss: 80.231866 Tokens per Sec: 56.147831 True Avg Loss: 84.760233
Epoch Step: 21 Loss: 80.949299 Tokens per Sec: 56.296228 True Avg Loss: 87.072415
Epoch Step: 26 Loss: 81.107989 Tokens per Sec: 53.112600 True Avg Loss: 86.427802
Epoch Step: 31 Loss: 80.897394 Tokens per Sec: 55.278474 True Avg Loss: 84.828760
Epoch Step: 36 Loss: 79.791594 Tokens per Sec: 56.693421 True Avg Loss: 81.144491
Epoch Step: 41 Loss: 80.451964 Tokens per Sec: 55.784740 True Avg Loss: 82.612127
Epoch Step: 46 Loss: 80.272858 Tokens per Sec: 60.862387 True Avg Loss: 81.991584
Epoch Step: 51 Loss: 80.021464 Tokens per Sec: 59.874706 True Avg Loss: 81.328778
Epoch Step: 56 Loss: 80.341670 Tokens per Sec: 58.435087 True Avg Loss: 81.813986
Epoch Step: 61 Los

Epoch Step: 26 Loss: 75.567710 Tokens per Sec: 54.088026 True Avg Loss: 77.487741
Epoch Step: 31 Loss: 74.708884 Tokens per Sec: 58.233568 True Avg Loss: 74.357750
Epoch Step: 36 Loss: 75.864310 Tokens per Sec: 55.052770 True Avg Loss: 77.699080
Epoch Step: 41 Loss: 75.482620 Tokens per Sec: 55.333196 True Avg Loss: 76.522764
Epoch Step: 46 Loss: 75.419507 Tokens per Sec: 60.608166 True Avg Loss: 76.247182
Epoch Step: 51 Loss: 75.503965 Tokens per Sec: 59.339486 True Avg Loss: 76.328086
Epoch Step: 56 Loss: 76.212841 Tokens per Sec: 58.639697 True Avg Loss: 77.545098
Epoch Step: 61 Loss: 76.277835 Tokens per Sec: 59.516890 True Avg Loss: 77.560666
Epoch Step: 66 Loss: 76.355117 Tokens per Sec: 60.635075 True Avg Loss: 77.583855
Epoch Step: 71 Loss: 75.168486 Tokens per Sec: 59.232285 True Avg Loss: 75.788980
Epoch Step: 76 Loss: 75.078534 Tokens per Sec: 59.354884 True Avg Loss: 75.616939
Epoch Step: 81 Loss: 75.468906 Tokens per Sec: 59.891175 True Avg Loss: 76.086152
Epoch Step: 86 L

Epoch Step: 51 Loss: 71.896917 Tokens per Sec: 64.351159 True Avg Loss: 68.000523
Epoch Step: 56 Loss: 71.460256 Tokens per Sec: 59.198469 True Avg Loss: 67.539367
Epoch Step: 61 Loss: 72.024761 Tokens per Sec: 65.376644 True Avg Loss: 68.785599
Epoch Step: 66 Loss: 72.436976 Tokens per Sec: 64.951487 True Avg Loss: 69.673036
Epoch Step: 71 Loss: 72.199106 Tokens per Sec: 66.135638 True Avg Loss: 69.518627
Epoch Step: 76 Loss: 72.300253 Tokens per Sec: 65.507251 True Avg Loss: 69.846705
Epoch Step: 81 Loss: 71.522440 Tokens per Sec: 64.822522 True Avg Loss: 69.024376
Epoch Step: 86 Loss: 72.612646 Tokens per Sec: 64.927231 True Avg Loss: 70.481729
Epoch Step: 91 Loss: 72.833282 Tokens per Sec: 65.060196 True Avg Loss: 70.862079
Epoch Step: 96 Loss: 72.072468 Tokens per Sec: 62.767688 True Avg Loss: 70.162774
Epoch Step: 1 Loss: 1.738452 Tokens per Sec: 120.762383 True Avg Loss: 173.845245
Epoch Step: 6 Loss: 10.404218 Tokens per Sec: 119.032217 True Avg Loss: 177.854251
val_avg_loss= 1

Epoch Step: 76 Loss: 75.926366 Tokens per Sec: 60.606436 True Avg Loss: 80.535923
Epoch Step: 81 Loss: 75.130032 Tokens per Sec: 56.766564 True Avg Loss: 79.260702
Epoch Step: 86 Loss: 75.416151 Tokens per Sec: 57.040100 True Avg Loss: 79.371488
Epoch Step: 91 Loss: 76.028472 Tokens per Sec: 55.460001 True Avg Loss: 79.841560
Epoch Step: 96 Loss: 74.941352 Tokens per Sec: 58.701967 True Avg Loss: 78.497288
Epoch Step: 1 Loss: 1.477726 Tokens per Sec: 106.627449 True Avg Loss: 147.772598
Epoch Step: 6 Loss: 8.632744 Tokens per Sec: 105.232722 True Avg Loss: 147.585871
val_avg_loss= 12.815747387713898
val_true_avg_loss= 148.25391133626303
val_diff_mean= 11.985831
val_diff_std= 2.418837
loss= 10.185383221575321
Epoch= 62
Saving model
Epoch Step: 1 Loss: 75.620527 Tokens per Sec: 62.347336 True Avg Loss: 72.856079
Epoch Step: 6 Loss: 75.819355 Tokens per Sec: 61.122992 True Avg Loss: 78.685628
Epoch Step: 11 Loss: 75.648404 Tokens per Sec: 61.339930 True Avg Loss: 75.746988
Epoch Step: 16 

Epoch Step: 1 Loss: 1.758587 Tokens per Sec: 104.966831 True Avg Loss: 175.858704
Epoch Step: 6 Loss: 10.281076 Tokens per Sec: 105.141482 True Avg Loss: 175.762309
val_avg_loss= 15.372184346499814
val_true_avg_loss= 177.7934112548828
val_diff_mean= 13.187889
val_diff_std= 2.359397
loss= 12.21481451369424
Epoch= 66
Epoch Step: 1 Loss: 69.422412 Tokens per Sec: 62.237879 True Avg Loss: 53.739044
Epoch Step: 6 Loss: 69.468785 Tokens per Sec: 56.856895 True Avg Loss: 67.386666
Epoch Step: 11 Loss: 69.663648 Tokens per Sec: 56.138615 True Avg Loss: 70.110535
Epoch Step: 16 Loss: 69.701979 Tokens per Sec: 56.872690 True Avg Loss: 70.292616
Epoch Step: 21 Loss: 70.572345 Tokens per Sec: 59.840302 True Avg Loss: 74.370026
Epoch Step: 26 Loss: 70.905800 Tokens per Sec: 60.811504 True Avg Loss: 74.959571
Epoch Step: 31 Loss: 70.753492 Tokens per Sec: 61.112505 True Avg Loss: 73.801343
Epoch Step: 36 Loss: 70.486051 Tokens per Sec: 55.893899 True Avg Loss: 72.623833
Epoch Step: 41 Loss: 69.94836

Epoch Step: 6 Loss: 69.593296 Tokens per Sec: 60.594079 True Avg Loss: 75.845911
Epoch Step: 11 Loss: 68.918618 Tokens per Sec: 57.009175 True Avg Loss: 66.769697
Epoch Step: 16 Loss: 68.351648 Tokens per Sec: 56.370735 True Avg Loss: 63.805513
Epoch Step: 21 Loss: 68.189153 Tokens per Sec: 55.890715 True Avg Loss: 64.079131
Epoch Step: 26 Loss: 67.968258 Tokens per Sec: 58.794849 True Avg Loss: 64.020976
Epoch Step: 31 Loss: 67.629449 Tokens per Sec: 61.340165 True Avg Loss: 63.546269
Epoch Step: 36 Loss: 68.030009 Tokens per Sec: 56.648340 True Avg Loss: 65.236675
Epoch Step: 41 Loss: 68.752967 Tokens per Sec: 56.817489 True Avg Loss: 67.376956
Epoch Step: 46 Loss: 68.506909 Tokens per Sec: 59.173319 True Avg Loss: 66.997062
Epoch Step: 51 Loss: 69.399546 Tokens per Sec: 59.270715 True Avg Loss: 68.938579
Epoch Step: 56 Loss: 70.392361 Tokens per Sec: 59.239699 True Avg Loss: 70.781444
Epoch Step: 61 Loss: 70.244640 Tokens per Sec: 61.437979 True Avg Loss: 70.497782
Epoch Step: 66 Lo

Epoch Step: 31 Loss: 70.889668 Tokens per Sec: 58.252210 True Avg Loss: 70.934628
Epoch Step: 36 Loss: 70.520405 Tokens per Sec: 56.949234 True Avg Loss: 69.848819
Epoch Step: 41 Loss: 70.680704 Tokens per Sec: 57.787498 True Avg Loss: 70.328703
Epoch Step: 46 Loss: 70.172456 Tokens per Sec: 59.555996 True Avg Loss: 69.213068
Epoch Step: 51 Loss: 69.143539 Tokens per Sec: 56.462917 True Avg Loss: 67.256360
Epoch Step: 56 Loss: 69.173978 Tokens per Sec: 60.595535 True Avg Loss: 67.464356
Epoch Step: 61 Loss: 68.923070 Tokens per Sec: 60.199880 True Avg Loss: 67.183552
Epoch Step: 66 Loss: 69.200548 Tokens per Sec: 58.270500 True Avg Loss: 67.741999
Epoch Step: 71 Loss: 68.230703 Tokens per Sec: 60.016756 True Avg Loss: 66.460799
Epoch Step: 76 Loss: 68.297442 Tokens per Sec: 58.253514 True Avg Loss: 66.674309
Epoch Step: 81 Loss: 68.400670 Tokens per Sec: 56.954068 True Avg Loss: 66.920253
Epoch Step: 86 Loss: 69.024660 Tokens per Sec: 59.337219 True Avg Loss: 67.748719
Epoch Step: 91 L

Epoch Step: 56 Loss: 71.588322 Tokens per Sec: 58.047458 True Avg Loss: 76.397485
Epoch Step: 61 Loss: 72.744228 Tokens per Sec: 58.767931 True Avg Loss: 77.954982
Epoch Step: 66 Loss: 72.345914 Tokens per Sec: 58.783189 True Avg Loss: 76.955491
Epoch Step: 71 Loss: 71.975385 Tokens per Sec: 58.534384 True Avg Loss: 76.096052
Epoch Step: 76 Loss: 72.022389 Tokens per Sec: 58.156175 True Avg Loss: 75.895991
Epoch Step: 81 Loss: 71.882755 Tokens per Sec: 58.734777 True Avg Loss: 75.484934
Epoch Step: 86 Loss: 71.272783 Tokens per Sec: 61.049539 True Avg Loss: 74.532588
Epoch Step: 91 Loss: 70.656060 Tokens per Sec: 59.690770 True Avg Loss: 73.666276
Epoch Step: 96 Loss: 70.908539 Tokens per Sec: 57.219083 True Avg Loss: 73.778880
Epoch Step: 1 Loss: 2.036328 Tokens per Sec: 108.451233 True Avg Loss: 203.632812
Epoch Step: 6 Loss: 11.493705 Tokens per Sec: 99.837817 True Avg Loss: 196.532756
val_avg_loss= 17.094845164526728
val_true_avg_loss= 197.77383422851562
val_diff_mean= 13.947728
va

Epoch Step: 81 Loss: 68.652802 Tokens per Sec: 59.393190 True Avg Loss: 71.819740
Epoch Step: 86 Loss: 68.608652 Tokens per Sec: 60.800122 True Avg Loss: 71.591427
Epoch Step: 91 Loss: 68.080512 Tokens per Sec: 57.578297 True Avg Loss: 70.840078
Epoch Step: 96 Loss: 68.296298 Tokens per Sec: 59.431345 True Avg Loss: 70.927462
Epoch Step: 1 Loss: 2.101763 Tokens per Sec: 99.010708 True Avg Loss: 210.176285
Epoch Step: 6 Loss: 11.912483 Tokens per Sec: 97.225180 True Avg Loss: 203.714137
val_avg_loss= 17.92107624919049
val_true_avg_loss= 207.25383843315973
val_diff_mean= 14.222505
val_diff_std= 2.364757
loss= 14.23881332746899
Epoch= 83
Epoch Step: 1 Loss: 68.118477 Tokens per Sec: 62.549855 True Avg Loss: 52.203682
Epoch Step: 6 Loss: 67.126955 Tokens per Sec: 59.122960 True Avg Loss: 48.500434
Epoch Step: 11 Loss: 66.573687 Tokens per Sec: 59.499782 True Avg Loss: 51.801642
Epoch Step: 16 Loss: 66.522295 Tokens per Sec: 59.577579 True Avg Loss: 56.118794
Epoch Step: 21 Loss: 67.031745 

Epoch Step: 6 Loss: 11.314059 Tokens per Sec: 101.448978 True Avg Loss: 193.484322
val_avg_loss= 16.888656216028057
val_true_avg_loss= 195.368408203125
val_diff_mean= 13.799435
val_diff_std= 2.4999459
loss= 13.422257052123092
Epoch= 87
Epoch Step: 1 Loss: 66.775486 Tokens per Sec: 59.042844 True Avg Loss: 50.031296
Epoch Step: 6 Loss: 66.126725 Tokens per Sec: 60.446967 True Avg Loss: 52.931356
Epoch Step: 11 Loss: 66.098076 Tokens per Sec: 58.474099 True Avg Loss: 58.733708
Epoch Step: 16 Loss: 65.321991 Tokens per Sec: 59.606779 True Avg Loss: 56.080392
Epoch Step: 21 Loss: 64.971163 Tokens per Sec: 58.296548 True Avg Loss: 56.584411
Epoch Step: 26 Loss: 65.159421 Tokens per Sec: 59.085304 True Avg Loss: 58.945938
Epoch Step: 31 Loss: 65.530034 Tokens per Sec: 59.922866 True Avg Loss: 61.173364
Epoch Step: 36 Loss: 65.467178 Tokens per Sec: 58.773902 True Avg Loss: 61.627696
Epoch Step: 41 Loss: 65.409402 Tokens per Sec: 59.143354 True Avg Loss: 61.959095
Epoch Step: 46 Loss: 66.6565

Epoch Step: 11 Loss: 66.810480 Tokens per Sec: 59.179540 True Avg Loss: 75.758396
Epoch Step: 16 Loss: 66.528140 Tokens per Sec: 60.226299 True Avg Loss: 71.199364
Epoch Step: 21 Loss: 66.189719 Tokens per Sec: 58.507797 True Avg Loss: 68.448127
Epoch Step: 26 Loss: 66.672819 Tokens per Sec: 59.093566 True Avg Loss: 69.876823
Epoch Step: 31 Loss: 65.766060 Tokens per Sec: 57.427053 True Avg Loss: 66.390620
Epoch Step: 36 Loss: 64.786673 Tokens per Sec: 59.350979 True Avg Loss: 63.506979
Epoch Step: 41 Loss: 64.371829 Tokens per Sec: 58.824576 True Avg Loss: 62.618045
Epoch Step: 46 Loss: 64.487578 Tokens per Sec: 60.928541 True Avg Loss: 63.072422
Epoch Step: 51 Loss: 64.047969 Tokens per Sec: 59.404924 True Avg Loss: 62.340204
Epoch Step: 56 Loss: 63.832367 Tokens per Sec: 59.070388 True Avg Loss: 62.102955
Epoch Step: 61 Loss: 63.681196 Tokens per Sec: 58.771689 True Avg Loss: 62.006462
Epoch Step: 66 Loss: 63.694836 Tokens per Sec: 58.880079 True Avg Loss: 62.153464
Epoch Step: 71 L

Epoch Step: 36 Loss: 64.877600 Tokens per Sec: 63.294485 True Avg Loss: 63.947882
Epoch Step: 41 Loss: 64.791469 Tokens per Sec: 61.555420 True Avg Loss: 63.842313
Epoch Step: 46 Loss: 64.943311 Tokens per Sec: 63.192461 True Avg Loss: 64.291456
Epoch Step: 51 Loss: 64.293815 Tokens per Sec: 62.410850 True Avg Loss: 63.064160
Epoch Step: 56 Loss: 64.276795 Tokens per Sec: 60.232234 True Avg Loss: 63.151435
Epoch Step: 61 Loss: 64.622650 Tokens per Sec: 63.559330 True Avg Loss: 63.809292
Epoch Step: 66 Loss: 64.258645 Tokens per Sec: 63.376567 True Avg Loss: 63.310933
Epoch Step: 71 Loss: 63.759188 Tokens per Sec: 64.692833 True Avg Loss: 62.657165
Epoch Step: 76 Loss: 65.098507 Tokens per Sec: 62.782673 True Avg Loss: 64.504579
Epoch Step: 81 Loss: 65.411080 Tokens per Sec: 62.432135 True Avg Loss: 64.938451
Epoch Step: 86 Loss: 65.877276 Tokens per Sec: 62.489305 True Avg Loss: 65.518447
Epoch Step: 91 Loss: 66.440944 Tokens per Sec: 59.439504 True Avg Loss: 66.173476
Epoch Step: 96 L