In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pathlib
pathlib.Path.ls = lambda x: list(x.iterdir())

In [3]:
import os
from datetime import datetime

import numpy as np
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [4]:
from evaluate import evaluate
from onsets_and_frames import *

In [5]:
logdir = 'runs/model'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
iterations = 500000 + 100
resume_iteration = 500000
checkpoint_interval = 1000
train_on = 'MAESTRO'

batch_size = 8
sequence_length = 327680
model_complexity = 48

if torch.cuda.is_available() and torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory < 10e9:
    batch_size //= 2
    sequence_length //= 2
    print(f'Reducing batch size to {batch_size} and sequence_length to {sequence_length} to save memory')

learning_rate = 0.0006
learning_rate_decay_steps = 10000
learning_rate_decay_rate = 0.98

leave_one_out = None

clip_gradient_norm = 3

validation_length = sequence_length
validation_interval = 500


In [6]:
writer = SummaryWriter(logdir)

train_groups, validation_groups = ['train'], ['validation']

if leave_one_out is not None:
    all_years = {'2004', '2006', '2008', '2009', '2011', '2013', '2014', '2015', '2017'}
    train_groups = list(all_years - {str(leave_one_out)})
    validation_groups = [str(leave_one_out)]

if train_on == 'MAESTRO':
    dataset = MAESTRO(groups=train_groups, sequence_length=sequence_length)
    validation_dataset = MAESTRO(groups=validation_groups, sequence_length=sequence_length)
else:
    dataset = MAPS(groups=['AkPnBcht', 'AkPnBsdf', 'AkPnCGdD', 'AkPnStgb', 'SptkBGAm', 'SptkBGCl', 'StbgTGd2'], sequence_length=sequence_length)
    validation_dataset = MAPS(groups=['ENSTDkAm', 'ENSTDkCl'], sequence_length=validation_length)

Loading group train:   0%|          | 3/954 [00:00<00:51, 18.49it/s]

Loading 1 group of MAESTRO at data/MAESTRO


Loading group train: 100%|██████████| 954/954 [15:01<00:00,  1.06it/s]
Loading group validation:   0%|          | 0/105 [00:00<?, ?it/s]

Loading 1 group of MAESTRO at data/MAESTRO


Loading group validation: 100%|██████████| 105/105 [01:41<00:00,  1.04it/s]


In [7]:
import torch
import torch.nn.functional as F
from torch import nn

from onsets_and_frames.lstm import BiLSTM
from onsets_and_frames.mel import melspectrogram
from onsets_and_frames.transcriber import ConvStack

In [8]:
class OnsetsAndFramesV2(nn.Module):
    def __init__(self, input_features, output_features, model_complexity=48):
        super().__init__()

        model_size = model_complexity * 16
        sequence_model = lambda input_size, output_size: BiLSTM(input_size, output_size // 2)

        self.onset_stack = nn.Sequential(
            ConvStack(input_features, model_size),
            sequence_model(model_size, model_size),
            nn.Linear(model_size, output_features),
            nn.Sigmoid()
        )
        self.offset_stack = nn.Sequential(
            ConvStack(input_features, model_size),
            sequence_model(model_size, model_size),
            nn.Linear(model_size, output_features),
            nn.Sigmoid()
        )
        self.frame_stack = nn.Sequential(
            ConvStack(input_features, model_size),
            nn.Linear(model_size, output_features),
            nn.Sigmoid()
        )
        #self.combined_stack = nn.Sequential(
        #    sequence_model(output_features * 3, model_size),
        #    nn.Linear(model_size, output_features),
        #    nn.Sigmoid()
        #)
        self.combined_stack1 = nn.Sequential(
            sequence_model(output_features * 3, model_size),
        )
        self.combined_stack2= nn.Sequential(
            nn.Linear(model_size, output_features),
            nn.Sigmoid()
        )
        self.shift_param_stack = nn.Sequential(
            nn.Linear(model_size * 2, 1)
        )
        self.velocity_stack = nn.Sequential(
            ConvStack(input_features, model_size),
            nn.Linear(model_size, output_features)
        )
        
        
    def forward(self, mel):
        onset_pred = self.onset_stack(mel)
        offset_pred = self.offset_stack(mel)
        activation_pred = self.frame_stack(mel)
        combined_pred = torch.cat([onset_pred.detach(), offset_pred.detach(), activation_pred], dim=-1)

        #frame_pred = self.combined_stack(combined_pred)
        co_used_pred = self.combined_stack1(combined_pred)
        frame_pred = self.combined_stack2(co_used_pred)
        
        velocity_pred = self.velocity_stack(mel)
        return onset_pred, offset_pred, activation_pred, frame_pred, velocity_pred, co_used_pred

    def SetFreeze(self, is_freeze, is_co_use_freeze, is_frame_freeze, is_shift_freeze):
        for p in self.parameters():
            p.requires_grad = not is_freeze
            
        for p in self.combined_stack1.parameters():
            p.requires_grad = not is_co_use_freeze

        for p in self.combined_stack2.parameters():
            p.requires_grad = not is_frame_freeze
            
        for p in self.shift_param_stack.parameters():
            p.requires_grad = not is_shift_freeze
        
    def run_shift_param_on_batch(self, batch):
        audio_label = batch['audio']
        onset_label = batch['onset']
        offset_label = batch['offset']
        frame_label = batch['frame']
        velocity_label = batch['velocity']
        
        shift_audio_label = batch['shift_audio']
        shift_label = batch['shift']

        mel = melspectrogram(audio_label.reshape(-1, audio_label.shape[-1])[:, :-1]).transpose(-1, -2)
        onset_pred, offset_pred, _, frame_pred, velocity_pred, co_used_pred = self(mel)

        mel_shift = melspectrogram(shift_audio_label.reshape(-1, shift_audio_label.shape[-1])[:, :-1]).transpose(-1, -2)
        onset_shift_pred, offset_shift_pred, _, frame_shift_pred, velocity_shift_pred, co_used_shift_pred = self(mel_shift)

        two_co_used_pred = torch.cat([co_used_pred, co_used_shift_pred], dim=-1)
        shift_pred = self.shift_param_stack(two_co_used_pred).squeeze(-1)
        
        predictions = {
            'onset': onset_pred.reshape(*onset_label.shape),
            'offset': offset_pred.reshape(*offset_label.shape),
            'frame': frame_pred.reshape(*frame_label.shape),
            'velocity': velocity_pred.reshape(*velocity_label.shape),
            'shift': shift_pred.reshape(*shift_label.shape)
        }

        shift_loss = nn.MSELoss()
        losses = {
            'loss/onset': F.binary_cross_entropy(predictions['onset'], onset_label),
            'loss/offset': F.binary_cross_entropy(predictions['offset'], offset_label),
            'loss/frame': F.binary_cross_entropy(predictions['frame'], frame_label),
            'loss/velocity': self.velocity_loss(predictions['velocity'], velocity_label, onset_label),
            'loss/shift': F.mse_loss(predictions['shift'], shift_label)
        }

        return predictions, losses

    def run_on_batch(self, batch):
        audio_label = batch['audio']
        onset_label = batch['onset']
        offset_label = batch['offset']
        frame_label = batch['frame']
        velocity_label = batch['velocity']

        mel = melspectrogram(audio_label.reshape(-1, audio_label.shape[-1])[:, :-1]).transpose(-1, -2)
        onset_pred, offset_pred, _, frame_pred, velocity_pred, _ = self(mel)

        predictions = {
            'onset': onset_pred.reshape(*onset_label.shape),
            'offset': offset_pred.reshape(*offset_label.shape),
            'frame': frame_pred.reshape(*frame_label.shape),
            'velocity': velocity_pred.reshape(*velocity_label.shape)
        }

        losses = {
            'loss/onset': F.binary_cross_entropy(predictions['onset'], onset_label),
            'loss/offset': F.binary_cross_entropy(predictions['offset'], offset_label),
            'loss/frame': F.binary_cross_entropy(predictions['frame'], frame_label),
            'loss/velocity': self.velocity_loss(predictions['velocity'], velocity_label, onset_label)
        }

        return predictions, losses
    
    def velocity_loss(self, velocity_pred, velocity_label, onset_label):
        denominator = onset_label.sum()
        if denominator.item() == 0:
            return denominator
        else:
            return (onset_label * (velocity_label - velocity_pred) ** 2).sum() / denominator


In [9]:
from collections import OrderedDict
def rename_key(old_dict, old_key, new_key):
    return OrderedDict((new_key if k == old_key else k, v) for k, v in old_dict.items())    

def load_and_adapt_model(model_path, device):
    old_model = torch.load(model_path)

    model_state = old_model.state_dict()

    model_state = rename_key(model_state,'combined_stack.0.rnn.weight_ih_l0', 'combined_stack1.0.rnn.weight_ih_l0')
    model_state = rename_key(model_state,'combined_stack.0.rnn.weight_hh_l0', 'combined_stack1.0.rnn.weight_hh_l0')
    model_state = rename_key(model_state,'combined_stack.0.rnn.bias_ih_l0', 'combined_stack1.0.rnn.bias_ih_l0')
    model_state = rename_key(model_state,'combined_stack.0.rnn.bias_hh_l0', 'combined_stack1.0.rnn.bias_hh_l0')

    model_state = rename_key(model_state,'combined_stack.0.rnn.weight_ih_l0_reverse', 'combined_stack1.0.rnn.weight_ih_l0_reverse')
    model_state = rename_key(model_state,'combined_stack.0.rnn.weight_hh_l0_reverse', 'combined_stack1.0.rnn.weight_hh_l0_reverse')
    model_state = rename_key(model_state,'combined_stack.0.rnn.bias_ih_l0_reverse', 'combined_stack1.0.rnn.bias_ih_l0_reverse')
    model_state = rename_key(model_state,'combined_stack.0.rnn.bias_hh_l0_reverse', 'combined_stack1.0.rnn.bias_hh_l0_reverse')

    model_state = rename_key(model_state,'combined_stack.1.weight', 'combined_stack2.0.weight')
    model_state = rename_key(model_state,'combined_stack.1.bias', 'combined_stack2.0.bias')
    
    model = OnsetsAndFramesV2(N_MELS, MAX_MIDI - MIN_MIDI + 1, model_complexity)
    model.load_state_dict(model_state, strict=False)

    model = model.to(device)
    
    return model

In [10]:
model_path = "./runs/model/model-500000.pt"
model = load_and_adapt_model(model_path, device)



In [11]:
def init_default(m):
    "Initialize `m` weights with `func` and set `bias` to 0."
    if hasattr(m, 'weight'):
        nn.init.uniform_(m.weight)
    if hasattr(m, 'bias') and hasattr(m.bias, 'data'):
        m.bias.data.fill_(0.)
    return m


In [None]:
##### NOTE: be careful to init

In [12]:
#init_default(model.shift_param_stack[0])

Linear(in_features=1536, out_features=1, bias=True)

In [13]:
batch_size=16
loader = DataLoader(dataset, batch_size, shuffle=True, drop_last=True)

In [14]:
model.SetFreeze(True, False, False, False)
model.train()

OnsetsAndFramesV2(
  (onset_stack): Sequential(
    (0): ConvStack(
      (cnn): Sequential(
        (0): Conv2d(1, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU()
        (6): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
        (7): Dropout(p=0.25, inplace=False)
        (8): Conv2d(48, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (9): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (10): ReLU()
        (11): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
        (12): Dropout(p=0.25, inplace=False)
      )
      (fc): Sequential(
        (0): 

In [15]:
optimizer = torch.optim.Adam(model.parameters(), learning_rate)
scheduler = StepLR(optimizer, step_size=learning_rate_decay_steps, gamma=learning_rate_decay_rate)

In [16]:
resume_iteration=500000
iterations=500000+500000
validation_interval = 500   #orig: 500
ouptut_interval=500

In [17]:
train_shift_loss = []
train_frame_loss = []

loop = tqdm(range(resume_iteration + 1, iterations + 1))
for i, batch in zip(loop, cycle(loader)):
    #predictions, losses = model.run_on_batch(batch)
    #loss = sum(losses.values())

    predictions_with_shift, losses_with_shift = model.run_shift_param_on_batch(batch)
    loss_shift = losses_with_shift['loss/shift']
    train_shift_loss.append(loss_shift.item())
    
    loss_frame = losses_with_shift['loss/frame']
    train_frame_loss.append(loss_frame.item())
    
    loss = loss_shift+loss_frame

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    if (len(train_shift_loss)>=ouptut_interval):
        mean_train_shift_loss = np.mean(np.array(train_shift_loss))
        mean_train_frame_loss = np.mean(np.array(train_frame_loss))
        print (f"mean train loss: shift = {mean_train_shift_loss}, frame = {mean_train_frame_loss}")    
        train_shift_loss=[]
        train_frame_loss=[]

    if clip_gradient_norm:
        clip_grad_norm_(model.parameters(), clip_gradient_norm)

    for key, value in {'loss': loss, **losses_with_shift}.items():
        writer.add_scalar(key, value.item(), global_step=i)

    if i % validation_interval == 0:
        model.eval()
        with torch.no_grad():
            for key, value in evaluate(validation_dataset, model).items():
                writer.add_scalar('validation/' + key.replace(' ', '_'), np.mean(value), global_step=i)
        model.train()

    if i % checkpoint_interval == 0:
        torch.save(model, os.path.join(logdir, f'model-{i}.pt'))
        torch.save(optimizer.state_dict(), os.path.join(logdir, 'last-optimizer-state.pt'))


  0%|          | 499/500000 [05:03<84:48:46,  1.64it/s]

mean train loss: shift = 5.8234214532375335, frame = 0.04142925556376576


  ref_matched_velocities)[0]
  0%|          | 999/500000 [10:34<84:49:14,  1.63it/s]  

mean train loss: shift = 1.8265601613521576, frame = 0.06011466918885708


  "type " + obj.__name__ + ". It won't be checked "
  0%|          | 1499/500000 [16:03<84:40:27,  1.64it/s]  

mean train loss: shift = 1.3237709020376205, frame = 0.06474947452545166


  0%|          | 1999/500000 [21:34<84:29:27,  1.64it/s]  

mean train loss: shift = 1.0866074886322021, frame = 0.06526735086739063


  0%|          | 2499/500000 [27:04<84:17:45,  1.64it/s]  

mean train loss: shift = 0.934998901963234, frame = 0.06582510279119015


  1%|          | 2999/500000 [32:37<84:31:55,  1.63it/s]  

mean train loss: shift = 0.8616830588579177, frame = 0.0656897299811244


  1%|          | 3499/500000 [38:07<84:07:16,  1.64it/s]  

mean train loss: shift = 0.7721041839718819, frame = 0.06512725572288036


  1%|          | 3999/500000 [43:37<84:03:39,  1.64it/s]  

mean train loss: shift = 0.7125851649045944, frame = 0.06461627815663815


  1%|          | 4499/500000 [49:08<83:57:04,  1.64it/s]  

mean train loss: shift = 0.641015353858471, frame = 0.062198071360588074


  1%|          | 4999/500000 [54:40<84:02:30,  1.64it/s]  

mean train loss: shift = 0.5917122908234597, frame = 0.06193289812654257


  1%|          | 5499/500000 [1:00:10<83:30:51,  1.64it/s]

mean train loss: shift = 0.5379030435085297, frame = 0.060808566331863406


  1%|          | 5999/500000 [1:05:41<83:39:17,  1.64it/s]  

mean train loss: shift = 0.5186276940107346, frame = 0.05954261914640665


  1%|▏         | 6499/500000 [1:11:12<83:47:07,  1.64it/s]  

mean train loss: shift = 0.5003340648412704, frame = 0.05839237114787102


  1%|▏         | 6999/500000 [1:16:43<83:41:06,  1.64it/s]  

mean train loss: shift = 0.4593266118764877, frame = 0.057367236383259294


  1%|▏         | 7499/500000 [1:22:13<83:44:56,  1.63it/s]  

mean train loss: shift = 0.45486972150206567, frame = 0.05687677825987339


  2%|▏         | 7999/500000 [1:27:45<83:28:18,  1.64it/s]  

mean train loss: shift = 0.43028635427355766, frame = 0.05556971889734268


  2%|▏         | 8499/500000 [1:33:15<83:18:15,  1.64it/s]  

mean train loss: shift = 0.4086780924797058, frame = 0.05448058827966452


  2%|▏         | 8999/500000 [1:38:46<83:15:40,  1.64it/s]  

mean train loss: shift = 0.3830278124213219, frame = 0.052434595368802545


  2%|▏         | 9499/500000 [1:44:17<83:17:11,  1.64it/s]  

mean train loss: shift = 0.40084879395365713, frame = 0.0522880579046905


  2%|▏         | 9999/500000 [1:49:48<82:53:14,  1.64it/s]  

mean train loss: shift = 0.3598799087405205, frame = 0.051763702392578125


  2%|▏         | 10499/500000 [1:55:20<82:54:01,  1.64it/s]  

mean train loss: shift = 0.3490604270398617, frame = 0.050659461714327336


  2%|▏         | 10999/500000 [2:00:52<83:12:37,  1.63it/s]  

mean train loss: shift = 0.34493608155846595, frame = 0.04992383835837245


  2%|▏         | 11499/500000 [2:06:24<82:36:57,  1.64it/s]  

mean train loss: shift = 0.32938992461562155, frame = 0.04816902482509613


  2%|▏         | 11999/500000 [2:11:55<82:48:18,  1.64it/s]  

mean train loss: shift = 0.3263119949698448, frame = 0.04798135925084353


  2%|▏         | 12499/500000 [2:17:27<82:45:12,  1.64it/s]  

mean train loss: shift = 0.31037797075510026, frame = 0.047323979243636134


  3%|▎         | 12999/500000 [2:22:58<82:33:04,  1.64it/s]  

mean train loss: shift = 0.2939382630884647, frame = 0.04682250913232565


  3%|▎         | 13499/500000 [2:28:29<82:22:44,  1.64it/s]  

mean train loss: shift = 0.3031712635308504, frame = 0.0463706064671278


  3%|▎         | 13999/500000 [2:34:01<82:41:57,  1.63it/s]  

mean train loss: shift = 0.28706192184984686, frame = 0.04628621856123209


  3%|▎         | 14499/500000 [2:39:32<82:30:40,  1.63it/s]  

mean train loss: shift = 0.28641357401013373, frame = 0.044794210743159056


  3%|▎         | 14999/500000 [2:45:04<82:20:39,  1.64it/s]  

mean train loss: shift = 0.25787674364447594, frame = 0.044166021425276994


  3%|▎         | 15499/500000 [2:50:37<82:11:08,  1.64it/s]  

mean train loss: shift = 0.26298879471421244, frame = 0.043923850167542695


  3%|▎         | 15999/500000 [2:56:08<81:59:29,  1.64it/s]  

mean train loss: shift = 0.25926354333758356, frame = 0.04343678516149521


  3%|▎         | 16499/500000 [3:01:39<81:57:24,  1.64it/s]  

mean train loss: shift = 0.2518068915605545, frame = 0.042252697311341764


  3%|▎         | 16999/500000 [3:07:12<81:59:47,  1.64it/s]  

mean train loss: shift = 0.25280552147328855, frame = 0.04271795016899705


  3%|▎         | 17499/500000 [3:12:43<81:36:18,  1.64it/s]  

mean train loss: shift = 0.24417528346180917, frame = 0.04212874499708414


  4%|▎         | 17999/500000 [3:18:14<81:36:17,  1.64it/s]  

mean train loss: shift = 0.23304390048980714, frame = 0.04179463527724147


  4%|▎         | 18499/500000 [3:23:46<81:52:11,  1.63it/s]  

mean train loss: shift = 0.23160029788315295, frame = 0.04154566534608602


  4%|▍         | 18999/500000 [3:29:18<81:34:54,  1.64it/s]  

mean train loss: shift = 0.22582497331500054, frame = 0.04131676312536001


  4%|▍         | 19499/500000 [3:34:50<81:30:21,  1.64it/s]  

mean train loss: shift = 0.22629845781624316, frame = 0.04102486070245504


  4%|▍         | 19999/500000 [3:40:22<81:41:21,  1.63it/s]  

mean train loss: shift = 0.23037313413619995, frame = 0.04041443770751357


  4%|▍         | 20499/500000 [3:45:54<81:13:54,  1.64it/s]  

mean train loss: shift = 0.2138305111080408, frame = 0.0401933905147016


  4%|▍         | 20999/500000 [3:51:27<81:08:32,  1.64it/s]  

mean train loss: shift = 0.20329978665709494, frame = 0.039454294234514235


  4%|▍         | 21499/500000 [3:56:59<81:40:02,  1.63it/s]  

mean train loss: shift = 0.21212053123116492, frame = 0.04000307369977236


  4%|▍         | 21999/500000 [4:02:30<80:53:50,  1.64it/s]  

mean train loss: shift = 0.20668661201000213, frame = 0.03957400242611766


  4%|▍         | 22499/500000 [4:08:02<80:46:24,  1.64it/s]  

mean train loss: shift = 0.19765110239386557, frame = 0.039161949925124645


  5%|▍         | 22999/500000 [4:13:34<80:57:17,  1.64it/s]  

mean train loss: shift = 0.19602620346844196, frame = 0.038950429704040286


  5%|▍         | 23499/500000 [4:19:05<80:39:04,  1.64it/s]  

mean train loss: shift = 0.19047910979390145, frame = 0.038662388566881416


  5%|▍         | 23999/500000 [4:24:37<80:39:56,  1.64it/s]  

mean train loss: shift = 0.19108799761533737, frame = 0.03868531798571348


  5%|▍         | 24499/500000 [4:30:10<80:40:34,  1.64it/s]  

mean train loss: shift = 0.19378392608463765, frame = 0.03823970928415656


  5%|▍         | 24999/500000 [4:35:41<80:18:37,  1.64it/s]  

mean train loss: shift = 0.18961722348630428, frame = 0.03788812347128987


  5%|▌         | 25499/500000 [4:41:13<80:13:38,  1.64it/s]  

mean train loss: shift = 0.17943595385551453, frame = 0.0378604109659791


  5%|▌         | 25999/500000 [4:46:44<80:23:30,  1.64it/s]  

mean train loss: shift = 0.17457770484685897, frame = 0.03718062274903059


  5%|▌         | 26499/500000 [4:52:16<79:55:10,  1.65it/s]  

mean train loss: shift = 0.17044592916965484, frame = 0.037288830876350404


  5%|▌         | 26999/500000 [4:57:48<80:03:41,  1.64it/s]  

mean train loss: shift = 0.17874341948330402, frame = 0.037767122019082305


  5%|▌         | 27499/500000 [5:03:20<80:14:50,  1.64it/s]  

mean train loss: shift = 0.17201036788523197, frame = 0.036955962099134924


  6%|▌         | 27999/500000 [5:08:52<80:09:42,  1.64it/s]  

mean train loss: shift = 0.16314383114874362, frame = 0.03724479017034173


  6%|▌         | 28499/500000 [5:14:24<80:03:55,  1.64it/s]  

mean train loss: shift = 0.16777371008694172, frame = 0.03673366450145841


  6%|▌         | 28999/500000 [5:19:55<79:48:41,  1.64it/s]  

mean train loss: shift = 0.1621264905259013, frame = 0.03622495118901133


  6%|▌         | 29499/500000 [5:25:26<79:32:34,  1.64it/s]  

mean train loss: shift = 0.1616590919792652, frame = 0.036054068993777035


  6%|▌         | 29999/500000 [5:30:57<79:48:12,  1.64it/s]  

mean train loss: shift = 0.16372263611853122, frame = 0.03631910867616534


  6%|▌         | 30499/500000 [5:36:30<79:44:12,  1.64it/s]  

mean train loss: shift = 0.15265596989542246, frame = 0.03589033138379455


  6%|▌         | 30999/500000 [5:42:01<79:19:46,  1.64it/s]  

mean train loss: shift = 0.16172153829038144, frame = 0.03595490765944123


  6%|▋         | 31499/500000 [5:47:33<79:13:49,  1.64it/s]  

mean train loss: shift = 0.16075967244803904, frame = 0.03573497063294053


  6%|▋         | 31999/500000 [5:53:05<79:17:44,  1.64it/s]  

mean train loss: shift = 0.14773303928226234, frame = 0.03548809112235904


  6%|▋         | 32499/500000 [5:58:35<79:03:08,  1.64it/s]  

mean train loss: shift = 0.15380463236570358, frame = 0.035946449160575863


  7%|▋         | 32999/500000 [6:04:06<78:59:33,  1.64it/s]  

mean train loss: shift = 0.1511735604405403, frame = 0.035655403297394514


  7%|▋         | 33499/500000 [6:09:38<79:07:32,  1.64it/s]  

mean train loss: shift = 0.14394304306060077, frame = 0.03498227755352855


  7%|▋         | 33999/500000 [6:15:10<78:57:36,  1.64it/s]  

mean train loss: shift = 0.14253226098418237, frame = 0.035169350907206534


  7%|▋         | 34499/500000 [6:20:42<78:53:36,  1.64it/s]  

mean train loss: shift = 0.14452224887907505, frame = 0.03474840323999524


  7%|▋         | 34999/500000 [6:26:14<78:44:58,  1.64it/s]  

mean train loss: shift = 0.14558147432655097, frame = 0.03496504924073816


  7%|▋         | 35499/500000 [6:31:45<78:40:38,  1.64it/s]  

mean train loss: shift = 0.14104385607689618, frame = 0.034747162770479914


  7%|▋         | 35999/500000 [6:37:17<78:24:32,  1.64it/s]  

mean train loss: shift = 0.14007022607326508, frame = 0.03462322220951319


  7%|▋         | 36499/500000 [6:42:49<78:25:35,  1.64it/s]  

mean train loss: shift = 0.13729036000370978, frame = 0.03388783521205187


  7%|▋         | 36999/500000 [6:48:20<78:27:56,  1.64it/s]  

mean train loss: shift = 0.1376205581203103, frame = 0.03458384348079562


  7%|▋         | 37499/500000 [6:53:52<78:15:51,  1.64it/s]  

mean train loss: shift = 0.13146127549558878, frame = 0.03392687042057514


  8%|▊         | 37999/500000 [6:59:24<78:20:02,  1.64it/s]  

mean train loss: shift = 0.13425189132988452, frame = 0.034549381531774996


  8%|▊         | 38499/500000 [7:04:54<78:22:53,  1.64it/s]  

mean train loss: shift = 0.13514056341350078, frame = 0.03385636585578322


  8%|▊         | 38999/500000 [7:10:27<78:16:22,  1.64it/s]  

mean train loss: shift = 0.12782831539958717, frame = 0.03408488880470395


  8%|▊         | 39499/500000 [7:15:59<78:11:25,  1.64it/s]  

mean train loss: shift = 0.13105222856998444, frame = 0.03359167022630572


  8%|▊         | 39999/500000 [7:21:30<77:53:56,  1.64it/s]  

mean train loss: shift = 0.12978319577872754, frame = 0.03364482113718986


  8%|▊         | 40499/500000 [7:27:03<77:56:45,  1.64it/s]  

mean train loss: shift = 0.12517260018736123, frame = 0.03328530458919704


  8%|▊         | 40999/500000 [7:32:35<77:52:00,  1.64it/s]  

mean train loss: shift = 0.1311476568877697, frame = 0.033339207202196124


  8%|▊         | 41499/500000 [7:38:06<77:37:09,  1.64it/s]  

mean train loss: shift = 0.12567225355654954, frame = 0.03387888889759779


  8%|▊         | 41999/500000 [7:43:38<77:37:44,  1.64it/s]  

mean train loss: shift = 0.12437038596719503, frame = 0.03327156401053071


  8%|▊         | 42499/500000 [7:49:11<77:47:19,  1.63it/s]  

mean train loss: shift = 0.1332547753378749, frame = 0.03330544361844659


  9%|▊         | 42999/500000 [7:54:42<77:24:15,  1.64it/s]  

mean train loss: shift = 0.11394303224980831, frame = 0.03339275734499097


  9%|▊         | 43499/500000 [8:00:13<77:26:43,  1.64it/s]  

mean train loss: shift = 0.12039560573548079, frame = 0.03262256341427565


  9%|▉         | 43999/500000 [8:05:45<77:18:07,  1.64it/s]  

mean train loss: shift = 0.11845338549464941, frame = 0.03328004217520356


  9%|▉         | 44499/500000 [8:11:17<77:11:46,  1.64it/s]  

mean train loss: shift = 0.1201815039217472, frame = 0.03290616977214813


  9%|▉         | 44999/500000 [8:16:49<76:58:33,  1.64it/s]  

mean train loss: shift = 0.11714161175489425, frame = 0.03303363130241632


  9%|▉         | 45499/500000 [8:22:21<77:22:50,  1.63it/s]  

mean train loss: shift = 0.11375362227112054, frame = 0.032702697921544316


  9%|▉         | 45999/500000 [8:27:53<76:39:45,  1.65it/s]  

mean train loss: shift = 0.12156618491560221, frame = 0.03279976006224752


  9%|▉         | 46499/500000 [8:33:25<76:48:39,  1.64it/s]  

mean train loss: shift = 0.11855341974645853, frame = 0.03265567101165652


  9%|▉         | 46999/500000 [8:38:58<76:48:05,  1.64it/s]  

mean train loss: shift = 0.11111802492290736, frame = 0.03236287435144186


  9%|▉         | 47499/500000 [8:44:30<76:31:10,  1.64it/s]  

mean train loss: shift = 0.11202006752043962, frame = 0.032768842939287425


 10%|▉         | 47999/500000 [8:50:03<76:34:27,  1.64it/s]  

mean train loss: shift = 0.10479224018007517, frame = 0.03195526197552681


 10%|▉         | 48499/500000 [8:55:36<76:40:54,  1.64it/s]  

mean train loss: shift = 0.11621906781941652, frame = 0.031688162371516226


 10%|▉         | 48999/500000 [9:01:07<76:15:06,  1.64it/s]  

mean train loss: shift = 0.10887002332508564, frame = 0.031931536290794614


 10%|▉         | 49499/500000 [9:06:39<76:29:12,  1.64it/s]  

mean train loss: shift = 0.10850020153075457, frame = 0.0323258688300848


 10%|▉         | 49999/500000 [9:12:12<76:27:36,  1.63it/s]  

mean train loss: shift = 0.11199284491688014, frame = 0.03174013121426106


 10%|█         | 50499/500000 [9:17:45<76:09:09,  1.64it/s]  

mean train loss: shift = 0.10297755434364081, frame = 0.032189742974936965


 10%|█         | 50999/500000 [9:23:17<76:09:12,  1.64it/s]  

mean train loss: shift = 0.11457377856224775, frame = 0.031878282200545074


 10%|█         | 51499/500000 [9:28:49<76:16:01,  1.63it/s]  

mean train loss: shift = 0.1036936299353838, frame = 0.03187666921690106


 10%|█         | 51999/500000 [9:34:21<75:55:38,  1.64it/s]  

mean train loss: shift = 0.10035558141767979, frame = 0.03193568582087755


 10%|█         | 52499/500000 [9:39:54<75:49:59,  1.64it/s]  

mean train loss: shift = 0.10269245186448098, frame = 0.03203860305249691


 11%|█         | 52999/500000 [9:45:27<75:56:18,  1.64it/s]  

mean train loss: shift = 0.10392330722510815, frame = 0.03164204316213727


 11%|█         | 53499/500000 [9:50:58<75:39:44,  1.64it/s]  

mean train loss: shift = 0.10206455504149199, frame = 0.03133316658437252


 11%|█         | 53999/500000 [9:56:30<75:46:18,  1.64it/s]  

mean train loss: shift = 0.10222274204343558, frame = 0.03106824339181185


 11%|█         | 54499/500000 [10:02:04<75:38:09,  1.64it/s] 

mean train loss: shift = 0.10433067739754916, frame = 0.03146913876757026


 11%|█         | 54999/500000 [10:07:34<75:32:19,  1.64it/s]  

mean train loss: shift = 0.09822537121176719, frame = 0.03138877880573273


 11%|█         | 55499/500000 [10:13:07<75:35:18,  1.63it/s]  

mean train loss: shift = 0.09896400780975818, frame = 0.031159293986856938


 11%|█         | 55999/500000 [10:18:40<75:33:53,  1.63it/s]  

mean train loss: shift = 0.09369878322631121, frame = 0.030585501536726952


 11%|█▏        | 56499/500000 [10:24:12<75:11:37,  1.64it/s]  

mean train loss: shift = 0.09676276380568742, frame = 0.031160832941532136


 11%|█▏        | 56999/500000 [10:29:43<75:08:06,  1.64it/s]  

mean train loss: shift = 0.0966719416230917, frame = 0.031108490047976375


 11%|█▏        | 57499/500000 [10:35:16<75:08:44,  1.64it/s]  

mean train loss: shift = 0.09323688443750143, frame = 0.030781601637601852


 12%|█▏        | 57999/500000 [10:40:48<74:49:51,  1.64it/s]  

mean train loss: shift = 0.09469785701856018, frame = 0.030918651331216098


 12%|█▏        | 58499/500000 [10:46:20<74:51:05,  1.64it/s]  

mean train loss: shift = 0.09378195264190435, frame = 0.03079218472354114


 12%|█▏        | 58999/500000 [10:51:53<74:49:41,  1.64it/s]  

mean train loss: shift = 0.10118016235530376, frame = 0.031154908552765846


 12%|█▏        | 59499/500000 [10:57:25<74:40:16,  1.64it/s]  

mean train loss: shift = 0.090943324547261, frame = 0.03167135195061564


 12%|█▏        | 59999/500000 [11:02:56<74:35:42,  1.64it/s]  

mean train loss: shift = 0.09475682878494263, frame = 0.031041701335459946


 12%|█▏        | 60499/500000 [11:08:29<74:33:42,  1.64it/s]  

mean train loss: shift = 0.09298672300949692, frame = 0.030510816019028424


 12%|█▏        | 60999/500000 [11:14:01<74:26:11,  1.64it/s]  

mean train loss: shift = 0.09264314809441566, frame = 0.031034865237772463


 12%|█▏        | 61499/500000 [11:19:33<74:14:43,  1.64it/s]  

mean train loss: shift = 0.09384715339541436, frame = 0.030501681935042144


 12%|█▏        | 61999/500000 [11:25:05<74:20:02,  1.64it/s]  

mean train loss: shift = 0.08870437011867761, frame = 0.030635522559285165


 12%|█▏        | 62499/500000 [11:30:38<74:10:07,  1.64it/s]  

mean train loss: shift = 0.09208116734027863, frame = 0.030037106815725564


 13%|█▎        | 62999/500000 [11:36:10<74:02:46,  1.64it/s]  

mean train loss: shift = 0.08989132248610258, frame = 0.030480013988912105


 13%|█▎        | 63499/500000 [11:41:43<74:05:44,  1.64it/s]  

mean train loss: shift = 0.09073744697868824, frame = 0.030210945703089236


 13%|█▎        | 63999/500000 [11:47:14<73:50:21,  1.64it/s]  

mean train loss: shift = 0.08672593358159066, frame = 0.03072471746057272


 13%|█▎        | 64499/500000 [11:52:48<73:40:52,  1.64it/s]  

mean train loss: shift = 0.08836186124756933, frame = 0.030555166706442833


 13%|█▎        | 64999/500000 [11:58:21<73:49:30,  1.64it/s]  

mean train loss: shift = 0.08842133466154337, frame = 0.030108977112919093


 13%|█▎        | 65499/500000 [12:03:52<73:34:00,  1.64it/s]  

mean train loss: shift = 0.08463162541389466, frame = 0.030064069136977196


 13%|█▎        | 65999/500000 [12:09:25<73:35:02,  1.64it/s]  

mean train loss: shift = 0.0860372220017016, frame = 0.029971139177680014


 13%|█▎        | 66499/500000 [12:14:58<73:29:40,  1.64it/s]  

mean train loss: shift = 0.08785668134689331, frame = 0.030174775686115027


 13%|█▎        | 66999/500000 [12:20:30<73:16:44,  1.64it/s]  

mean train loss: shift = 0.08662564016878604, frame = 0.03023121204599738


 13%|█▎        | 67499/500000 [12:26:02<73:20:40,  1.64it/s]  

mean train loss: shift = 0.08460000322759151, frame = 0.029603930074721573


 14%|█▎        | 67999/500000 [12:31:35<73:25:01,  1.63it/s]  

mean train loss: shift = 0.08386792050302029, frame = 0.029504548475146292


 14%|█▎        | 68499/500000 [12:37:06<73:02:21,  1.64it/s]  

mean train loss: shift = 0.08023635805770755, frame = 0.030028160605579616


 14%|█▍        | 68999/500000 [12:42:39<73:06:30,  1.64it/s]  

mean train loss: shift = 0.08733593710884452, frame = 0.03017540317773819


 14%|█▍        | 69499/500000 [12:48:12<73:06:58,  1.64it/s]  

mean train loss: shift = 0.08317992082983255, frame = 0.029717119202017785


 14%|█▍        | 69999/500000 [12:53:44<72:49:52,  1.64it/s]  

mean train loss: shift = 0.07934901345521211, frame = 0.029679082484915853


 14%|█▍        | 70499/500000 [12:59:16<72:56:48,  1.64it/s]  

mean train loss: shift = 0.08311633188277483, frame = 0.029510288298130037


 14%|█▍        | 70999/500000 [13:04:49<72:47:55,  1.64it/s]  

mean train loss: shift = 0.08416457855328917, frame = 0.02956770134344697


 14%|█▍        | 71499/500000 [13:10:21<72:35:19,  1.64it/s]  

mean train loss: shift = 0.07834994316846132, frame = 0.029413291577249766


 14%|█▍        | 71999/500000 [13:15:53<72:46:34,  1.63it/s]  

mean train loss: shift = 0.08070893271267414, frame = 0.02894981269352138


 14%|█▍        | 72499/500000 [13:21:26<72:46:24,  1.63it/s]  

mean train loss: shift = 0.08276603826880455, frame = 0.02945726753398776


 15%|█▍        | 72999/500000 [13:26:57<72:30:22,  1.64it/s]  

mean train loss: shift = 0.07823243112117052, frame = 0.02977153915539384


 15%|█▍        | 73499/500000 [13:32:30<72:17:30,  1.64it/s]  

mean train loss: shift = 0.08020183081552386, frame = 0.029238260619342326


 15%|█▍        | 73999/500000 [13:38:02<72:15:53,  1.64it/s]  

mean train loss: shift = 0.07777588149160147, frame = 0.02941070567071438


 15%|█▍        | 74499/500000 [13:43:35<72:04:13,  1.64it/s]  

mean train loss: shift = 0.07890013438090682, frame = 0.029128923628479242


 15%|█▍        | 74999/500000 [13:49:07<72:02:13,  1.64it/s]  

mean train loss: shift = 0.07824841570481658, frame = 0.029436815079301595


 15%|█▌        | 75499/500000 [13:54:40<72:05:40,  1.64it/s]  

mean train loss: shift = 0.07697880225628614, frame = 0.029536689415574073


 15%|█▌        | 75999/500000 [14:00:12<71:49:10,  1.64it/s]  

mean train loss: shift = 0.07510544030740857, frame = 0.028755392525345088


 15%|█▌        | 76499/500000 [14:05:43<71:52:36,  1.64it/s]  

mean train loss: shift = 0.07583256576955319, frame = 0.02870385855063796


 15%|█▌        | 76999/500000 [14:11:17<71:52:38,  1.63it/s]  

mean train loss: shift = 0.07774038162827492, frame = 0.029112729135900735


 15%|█▌        | 77499/500000 [14:16:49<71:41:08,  1.64it/s]  

mean train loss: shift = 0.07598072503879666, frame = 0.029002808667719364


 16%|█▌        | 77999/500000 [14:22:21<71:38:51,  1.64it/s] 

mean train loss: shift = 0.07272585973516106, frame = 0.029141477717086674


 16%|█▌        | 78499/500000 [14:27:54<71:28:00,  1.64it/s]  

mean train loss: shift = 0.07281640719249845, frame = 0.029140055764466523


 16%|█▌        | 78999/500000 [14:33:25<71:15:27,  1.64it/s]  

mean train loss: shift = 0.074134755987674, frame = 0.028684751093387605


 16%|█▌        | 79499/500000 [14:38:57<71:23:06,  1.64it/s]  

mean train loss: shift = 0.07370460936799646, frame = 0.02908246722444892


 16%|█▌        | 79999/500000 [14:44:30<71:17:40,  1.64it/s]  

mean train loss: shift = 0.0751190653629601, frame = 0.028883575353771448


 16%|█▌        | 80499/500000 [14:50:01<70:58:25,  1.64it/s] 

mean train loss: shift = 0.07230795017257333, frame = 0.02854420228302479


 16%|█▌        | 80999/500000 [14:55:33<71:08:39,  1.64it/s]  

mean train loss: shift = 0.07590940904989839, frame = 0.028687481194734573


 16%|█▋        | 81499/500000 [15:01:06<71:14:18,  1.63it/s]  

mean train loss: shift = 0.07250109305232763, frame = 0.02832539259456098


 16%|█▋        | 81999/500000 [15:06:39<70:48:17,  1.64it/s]  

mean train loss: shift = 0.07264351635053753, frame = 0.02872704812884331


 16%|█▋        | 82499/500000 [15:12:12<70:40:04,  1.64it/s]  

mean train loss: shift = 0.07192836010083556, frame = 0.028820526268333195


 17%|█▋        | 82999/500000 [15:17:45<70:44:38,  1.64it/s]  

mean train loss: shift = 0.07387397795915604, frame = 0.028899744110181928


 17%|█▋        | 83499/500000 [15:23:16<70:32:37,  1.64it/s] 

mean train loss: shift = 0.07596941678225994, frame = 0.028856472350656987


 17%|█▋        | 83999/500000 [15:28:49<70:37:19,  1.64it/s]  

mean train loss: shift = 0.07239117385074496, frame = 0.028593247413635253


 17%|█▋        | 84499/500000 [15:34:22<70:41:46,  1.63it/s]  

mean train loss: shift = 0.06980258098989725, frame = 0.02882853043824434


 17%|█▋        | 84999/500000 [15:39:54<70:13:51,  1.64it/s]  

mean train loss: shift = 0.06808027397468686, frame = 0.02832392533123493


 17%|█▋        | 85499/500000 [15:45:26<70:10:31,  1.64it/s]  

mean train loss: shift = 0.07080871950462461, frame = 0.028423721849918364


 17%|█▋        | 85999/500000 [15:50:59<70:22:18,  1.63it/s] 

mean train loss: shift = 0.07086164674535393, frame = 0.028424583531916143


 17%|█▋        | 86499/500000 [15:56:30<70:00:58,  1.64it/s] 

mean train loss: shift = 0.07132427056133747, frame = 0.028442092971876262


 17%|█▋        | 86999/500000 [16:02:04<69:56:07,  1.64it/s]  

mean train loss: shift = 0.0733000597320497, frame = 0.028736271403729915


 17%|█▋        | 87499/500000 [16:07:36<70:02:38,  1.64it/s] 

mean train loss: shift = 0.06746625345945358, frame = 0.028453859850764273


 18%|█▊        | 87999/500000 [16:13:08<69:44:20,  1.64it/s]  

mean train loss: shift = 0.07024665248021483, frame = 0.028233955189585686


 18%|█▊        | 88499/500000 [16:18:40<69:37:21,  1.64it/s]  

mean train loss: shift = 0.07127645112946629, frame = 0.028344938203692435


 18%|█▊        | 88999/500000 [16:24:12<69:59:19,  1.63it/s] 

mean train loss: shift = 0.06823190677165986, frame = 0.028237471802160144


 18%|█▊        | 89499/500000 [16:29:45<69:28:56,  1.64it/s]  

mean train loss: shift = 0.06522720989212394, frame = 0.02813725305721164


 18%|█▊        | 89999/500000 [16:35:16<69:25:19,  1.64it/s] 

mean train loss: shift = 0.06913857030123473, frame = 0.028392263486981394


 18%|█▊        | 90499/500000 [16:40:49<69:27:16,  1.64it/s]  

mean train loss: shift = 0.06480708995461464, frame = 0.02829648024216294


 18%|█▊        | 90999/500000 [16:46:21<69:20:50,  1.64it/s] 

mean train loss: shift = 0.07232056441903115, frame = 0.027919841643422844


 18%|█▊        | 91499/500000 [16:51:54<69:18:15,  1.64it/s]  

mean train loss: shift = 0.0683862697146833, frame = 0.028121729984879492


 18%|█▊        | 91999/500000 [16:57:27<69:10:07,  1.64it/s] 

mean train loss: shift = 0.06606984143704175, frame = 0.027951922150328757


 18%|█▊        | 92499/500000 [17:02:59<68:55:35,  1.64it/s]  

mean train loss: shift = 0.06591916644573212, frame = 0.027972142450511456


 19%|█▊        | 92999/500000 [17:08:31<68:54:48,  1.64it/s] 

mean train loss: shift = 0.06567863155901432, frame = 0.02842561334557831


 19%|█▊        | 93499/500000 [17:14:04<69:09:06,  1.63it/s] 

mean train loss: shift = 0.06436148191988468, frame = 0.02806091541238129


 19%|█▉        | 93999/500000 [17:19:35<68:37:50,  1.64it/s] 

mean train loss: shift = 0.06463019008561968, frame = 0.027752569701522588


 19%|█▉        | 94499/500000 [17:25:08<68:39:15,  1.64it/s]  

mean train loss: shift = 0.06735328038409352, frame = 0.027764246586710215


 19%|█▉        | 94999/500000 [17:30:41<68:45:30,  1.64it/s] 

mean train loss: shift = 0.06487519393861294, frame = 0.027917302083224058


 19%|█▉        | 95499/500000 [17:36:12<68:22:44,  1.64it/s] 

mean train loss: shift = 0.06425914296880364, frame = 0.027391252823174


 19%|█▉        | 95999/500000 [17:41:44<68:44:16,  1.63it/s] 

mean train loss: shift = 0.06415994238853455, frame = 0.027644203644245864


 19%|█▉        | 96499/500000 [17:47:17<68:31:58,  1.64it/s] 

mean train loss: shift = 0.0632454727254808, frame = 0.028137339014559982


 19%|█▉        | 96999/500000 [17:52:49<68:25:00,  1.64it/s] 

mean train loss: shift = 0.06455303179845213, frame = 0.02783034051209688


 19%|█▉        | 97499/500000 [17:58:22<68:16:20,  1.64it/s] 

mean train loss: shift = 0.06283947169035674, frame = 0.027621159641072153


 20%|█▉        | 97999/500000 [18:03:55<68:11:52,  1.64it/s]  

mean train loss: shift = 0.06536969486996531, frame = 0.027673639185726642


 20%|█▉        | 98499/500000 [18:09:27<68:06:51,  1.64it/s] 

mean train loss: shift = 0.06368268328905105, frame = 0.02770159089937806


 20%|█▉        | 98999/500000 [18:14:59<67:54:35,  1.64it/s] 

mean train loss: shift = 0.06419716513156891, frame = 0.02723770681209862


 20%|█▉        | 99499/500000 [18:20:32<68:00:59,  1.64it/s] 

mean train loss: shift = 0.06475978372618556, frame = 0.02744431504793465


 20%|█▉        | 99999/500000 [18:26:03<67:35:37,  1.64it/s] 

mean train loss: shift = 0.06219786125048995, frame = 0.027926480442285538


 20%|██        | 100499/500000 [18:31:35<67:41:53,  1.64it/s] 

mean train loss: shift = 0.05956231619045138, frame = 0.02725894094072282


 20%|██        | 100999/500000 [18:37:07<67:45:35,  1.64it/s] 

mean train loss: shift = 0.06220299008861184, frame = 0.02750801342912018


 20%|██        | 101499/500000 [18:42:39<67:33:10,  1.64it/s] 

mean train loss: shift = 0.0628122851587832, frame = 0.027338945880532265


 20%|██        | 101999/500000 [18:48:12<67:23:32,  1.64it/s] 

mean train loss: shift = 0.06046954406425357, frame = 0.0270899059176445


 20%|██        | 102499/500000 [18:53:44<67:26:18,  1.64it/s] 

mean train loss: shift = 0.060748084906488656, frame = 0.027419491298496725


 21%|██        | 102999/500000 [18:59:16<67:11:18,  1.64it/s] 

mean train loss: shift = 0.061811998941004274, frame = 0.027192170213907955


 21%|██        | 103499/500000 [19:04:48<67:11:31,  1.64it/s] 

mean train loss: shift = 0.060177371561527256, frame = 0.027448023399338127


 21%|██        | 103999/500000 [19:10:20<67:26:33,  1.63it/s] 

mean train loss: shift = 0.06030536198243499, frame = 0.027431686654686926


 21%|██        | 104499/500000 [19:15:52<67:06:35,  1.64it/s] 

mean train loss: shift = 0.06040539081767202, frame = 0.027562492828816176


 21%|██        | 104999/500000 [19:21:26<66:58:42,  1.64it/s]  

mean train loss: shift = 0.06052493098378182, frame = 0.027225204648450015


 21%|██        | 105499/500000 [19:26:59<67:04:15,  1.63it/s] 

mean train loss: shift = 0.05956823034211993, frame = 0.026992524879053233


 21%|██        | 105999/500000 [19:32:31<66:54:17,  1.64it/s] 

mean train loss: shift = 0.06379797413945199, frame = 0.02746585239097476


 21%|██▏       | 106499/500000 [19:38:02<66:36:27,  1.64it/s] 

mean train loss: shift = 0.05870979260653257, frame = 0.027067354399710895


 21%|██▏       | 106999/500000 [19:43:35<66:47:54,  1.63it/s] 

mean train loss: shift = 0.05970759778097272, frame = 0.027296046098694206


 21%|██▏       | 107499/500000 [19:49:06<66:25:28,  1.64it/s] 

mean train loss: shift = 0.06046373733133078, frame = 0.027333334578201175


 22%|██▏       | 107999/500000 [19:54:38<66:28:48,  1.64it/s] 

mean train loss: shift = 0.05649604466743767, frame = 0.027709730986505747


 22%|██▏       | 108499/500000 [20:00:11<66:27:13,  1.64it/s] 

mean train loss: shift = 0.05662963826954365, frame = 0.027360567662864925


 22%|██▏       | 108999/500000 [20:05:46<67:31:34,  1.61it/s] 

mean train loss: shift = 0.05922130487859249, frame = 0.027191096076741816


 22%|██▏       | 109499/500000 [20:11:29<68:54:43,  1.57it/s] 

mean train loss: shift = 0.05681928640604019, frame = 0.02696103285253048


 22%|██▏       | 109999/500000 [20:17:25<67:09:53,  1.61it/s] 

mean train loss: shift = 0.05969580678641796, frame = 0.026912508049979807


 22%|██▏       | 110499/500000 [20:23:01<65:56:42,  1.64it/s] 

mean train loss: shift = 0.05778588870167732, frame = 0.027275813084095716


 22%|██▏       | 110999/500000 [20:28:34<65:58:37,  1.64it/s] 

mean train loss: shift = 0.05602176730334759, frame = 0.02660746749676764


 22%|██▏       | 111499/500000 [20:34:06<66:03:49,  1.63it/s] 

mean train loss: shift = 0.05780860690400005, frame = 0.02705702923797071


 22%|██▏       | 111999/500000 [20:39:38<65:37:55,  1.64it/s] 

mean train loss: shift = 0.055241012528538705, frame = 0.026794829457998277


 22%|██▏       | 112499/500000 [20:45:11<65:49:03,  1.64it/s] 

mean train loss: shift = 0.05558773909136653, frame = 0.02728357145190239


 23%|██▎       | 112999/500000 [20:50:43<65:49:31,  1.63it/s] 

mean train loss: shift = 0.056808466322720054, frame = 0.026817722026258706


 23%|██▎       | 113499/500000 [20:56:16<65:23:27,  1.64it/s] 

mean train loss: shift = 0.053681569643318654, frame = 0.027265321532264353


 23%|██▎       | 113999/500000 [21:01:48<65:23:57,  1.64it/s] 

mean train loss: shift = 0.05795541578903794, frame = 0.027153035467490552


 23%|██▎       | 114499/500000 [21:07:21<65:25:01,  1.64it/s] 

mean train loss: shift = 0.05493317519687116, frame = 0.026915087591856717


 23%|██▎       | 114999/500000 [21:12:53<65:06:25,  1.64it/s] 

mean train loss: shift = 0.05503951434791088, frame = 0.02679905566945672


 23%|██▎       | 115499/500000 [21:18:25<64:51:46,  1.65it/s] 

mean train loss: shift = 0.0550977027490735, frame = 0.02669000830501318


 23%|██▎       | 115999/500000 [21:23:57<65:05:08,  1.64it/s] 

mean train loss: shift = 0.05444608899950981, frame = 0.026763552067801358


 23%|██▎       | 116499/500000 [21:29:29<64:51:07,  1.64it/s] 

mean train loss: shift = 0.0540400525983423, frame = 0.026938871061429382


 23%|██▎       | 116999/500000 [21:35:01<64:43:44,  1.64it/s] 

mean train loss: shift = 0.05812103157863021, frame = 0.02701823156140745


 23%|██▎       | 117499/500000 [21:40:34<64:50:40,  1.64it/s] 

mean train loss: shift = 0.05410309436917305, frame = 0.026683128353208303


 24%|██▎       | 117999/500000 [21:46:05<64:29:55,  1.65it/s] 

mean train loss: shift = 0.05427879436314106, frame = 0.02639060833491385


 24%|██▎       | 118499/500000 [21:51:37<64:36:08,  1.64it/s] 

mean train loss: shift = 0.05588169861584902, frame = 0.026781415371224283


 24%|██▍       | 118999/500000 [21:57:10<66:16:40,  1.60it/s] 

mean train loss: shift = 0.05302576442062855, frame = 0.026408618073910473


 24%|██▍       | 119499/500000 [22:02:41<64:23:53,  1.64it/s] 

mean train loss: shift = 0.0544814657792449, frame = 0.02631064088456333


 24%|██▍       | 119999/500000 [22:08:13<64:12:40,  1.64it/s] 

mean train loss: shift = 0.05376917011290789, frame = 0.02679752797074616


 24%|██▍       | 120499/500000 [22:13:45<64:13:54,  1.64it/s] 

mean train loss: shift = 0.053509096056222914, frame = 0.026598014436662196


 24%|██▍       | 120999/500000 [22:19:17<63:57:59,  1.65it/s] 

mean train loss: shift = 0.053759852927178144, frame = 0.026421861469745636


 24%|██▍       | 121499/500000 [22:24:49<64:02:25,  1.64it/s] 

mean train loss: shift = 0.054340640604496004, frame = 0.026565835896879435


 24%|██▍       | 121999/500000 [22:30:21<63:53:54,  1.64it/s] 

mean train loss: shift = 0.0529298528842628, frame = 0.02660967236571014


 24%|██▍       | 122499/500000 [22:35:53<63:46:06,  1.64it/s] 

mean train loss: shift = 0.05338065394386649, frame = 0.026467492386698724


 25%|██▍       | 122999/500000 [22:41:25<63:41:21,  1.64it/s] 

mean train loss: shift = 0.05192806815356016, frame = 0.026614383436739445


 25%|██▍       | 123499/500000 [22:46:57<63:44:16,  1.64it/s] 

mean train loss: shift = 0.05324174451828003, frame = 0.026645239528268576


 25%|██▍       | 123999/500000 [22:52:29<63:26:48,  1.65it/s] 

mean train loss: shift = 0.05190636952221393, frame = 0.026277367105707525


 25%|██▍       | 124499/500000 [22:58:01<63:30:05,  1.64it/s] 

mean train loss: shift = 0.05298206003755331, frame = 0.026434700343757868


 25%|██▍       | 124999/500000 [23:03:34<63:30:41,  1.64it/s] 

mean train loss: shift = 0.054044429447501895, frame = 0.026038198322057723


 25%|██▌       | 125499/500000 [23:09:06<63:11:03,  1.65it/s] 

mean train loss: shift = 0.05174426897987723, frame = 0.02605844870954752


 25%|██▌       | 125999/500000 [23:14:37<63:16:08,  1.64it/s] 

mean train loss: shift = 0.0543244843184948, frame = 0.026405160842463374


 25%|██▌       | 126499/500000 [23:20:09<63:18:38,  1.64it/s] 

mean train loss: shift = 0.05221591667830944, frame = 0.026415640654042363


 25%|██▌       | 126999/500000 [23:25:40<63:02:15,  1.64it/s] 

mean train loss: shift = 0.051923554223030806, frame = 0.026559354078024625


 25%|██▌       | 127499/500000 [23:31:13<63:01:34,  1.64it/s] 

mean train loss: shift = 0.05371777119673789, frame = 0.0262967992965132


 26%|██▌       | 127999/500000 [23:36:45<62:59:43,  1.64it/s] 

mean train loss: shift = 0.052038575246930126, frame = 0.026297893891111018


 26%|██▌       | 128499/500000 [23:42:16<62:49:19,  1.64it/s] 

mean train loss: shift = 0.05147385812550783, frame = 0.026066064793616534


 26%|██▌       | 128999/500000 [23:47:48<62:46:51,  1.64it/s] 

mean train loss: shift = 0.05170738279819489, frame = 0.026287277450785042


 26%|██▌       | 129499/500000 [23:53:21<62:49:23,  1.64it/s] 

mean train loss: shift = 0.05171092940866947, frame = 0.026601926947012543


 26%|██▌       | 129999/500000 [23:58:54<62:26:35,  1.65it/s] 

mean train loss: shift = 0.05123413059860468, frame = 0.026444889532402156


 26%|██▌       | 130499/500000 [24:04:26<62:36:32,  1.64it/s] 

mean train loss: shift = 0.051650095455348495, frame = 0.02624655064381659


 26%|██▌       | 130999/500000 [24:09:59<62:32:33,  1.64it/s] 

mean train loss: shift = 0.04983943955600262, frame = 0.026117269510403277


 26%|██▋       | 131499/500000 [24:15:32<62:09:28,  1.65it/s] 

mean train loss: shift = 0.05101866465434432, frame = 0.025931261613965036


 26%|██▋       | 131999/500000 [24:21:03<62:13:34,  1.64it/s] 

mean train loss: shift = 0.053800551937893036, frame = 0.026137968743219973


 26%|██▋       | 132499/500000 [24:26:35<62:15:01,  1.64it/s] 

mean train loss: shift = 0.049587048824876544, frame = 0.026125373231247067


 27%|██▋       | 132999/500000 [24:32:07<62:03:35,  1.64it/s] 

mean train loss: shift = 0.048562665581703184, frame = 0.02607323482632637


 27%|██▋       | 133499/500000 [24:37:40<62:04:11,  1.64it/s] 

mean train loss: shift = 0.048186231572180986, frame = 0.025964835967868565


 27%|██▋       | 133999/500000 [24:43:12<62:02:50,  1.64it/s] 

mean train loss: shift = 0.048547753989696504, frame = 0.026422600431367755


 27%|██▋       | 134499/500000 [24:48:43<61:45:07,  1.64it/s] 

mean train loss: shift = 0.04795001379773021, frame = 0.026148289086297155


 27%|██▋       | 134999/500000 [24:54:14<61:45:38,  1.64it/s] 

mean train loss: shift = 0.051157251842319965, frame = 0.026388126023113728


 27%|██▋       | 135499/500000 [24:59:47<61:48:29,  1.64it/s] 

mean train loss: shift = 0.04788311011530459, frame = 0.025830853186547757


 27%|██▋       | 135999/500000 [25:05:18<61:30:12,  1.64it/s] 

mean train loss: shift = 0.04898493628948927, frame = 0.025929055197164416


 27%|██▋       | 136499/500000 [25:10:50<61:30:55,  1.64it/s] 

mean train loss: shift = 0.04928000594675541, frame = 0.025868150738999247


 27%|██▋       | 136999/500000 [25:16:22<61:30:06,  1.64it/s] 

mean train loss: shift = 0.04811088238283992, frame = 0.026053664945065974


 27%|██▋       | 137499/500000 [25:21:53<61:10:38,  1.65it/s] 

mean train loss: shift = 0.05024829897657037, frame = 0.026032929321750997


 28%|██▊       | 137999/500000 [25:27:25<61:15:34,  1.64it/s] 

mean train loss: shift = 0.05056539101526141, frame = 0.025784970760345458


 28%|██▊       | 138499/500000 [25:32:59<61:18:56,  1.64it/s] 

mean train loss: shift = 0.04922366066649556, frame = 0.02580438833683729


 28%|██▊       | 138999/500000 [25:38:38<62:26:26,  1.61it/s] 

mean train loss: shift = 0.04828488517180085, frame = 0.025865848913788795


 28%|██▊       | 139499/500000 [25:44:24<62:37:47,  1.60it/s] 

mean train loss: shift = 0.048700774267315866, frame = 0.025988067070022224


 28%|██▊       | 139999/500000 [25:50:04<61:52:08,  1.62it/s] 

mean train loss: shift = 0.04953694066777825, frame = 0.026118905063718556


 28%|██▊       | 140499/500000 [25:55:39<60:47:18,  1.64it/s] 

mean train loss: shift = 0.04661620142497122, frame = 0.02578678168170154


 28%|██▊       | 140999/500000 [26:01:10<60:40:13,  1.64it/s] 

mean train loss: shift = 0.04722469552233815, frame = 0.026018329238519072


 28%|██▊       | 141499/500000 [26:06:44<60:43:28,  1.64it/s] 

mean train loss: shift = 0.04641013520210981, frame = 0.025633640022948385


 28%|██▊       | 141999/500000 [26:12:16<60:31:46,  1.64it/s] 

mean train loss: shift = 0.049044293861836195, frame = 0.02601977057568729


 28%|██▊       | 142499/500000 [26:17:48<60:36:39,  1.64it/s] 

mean train loss: shift = 0.04735333585180342, frame = 0.02572083231806755


 29%|██▊       | 142999/500000 [26:23:20<60:28:10,  1.64it/s] 

mean train loss: shift = 0.04837642405182123, frame = 0.02618590907379985


 29%|██▊       | 143499/500000 [26:28:52<60:13:26,  1.64it/s] 

mean train loss: shift = 0.0462361977212131, frame = 0.025857624374330045


 29%|██▉       | 143999/500000 [26:34:24<60:08:50,  1.64it/s] 

mean train loss: shift = 0.049411259956657884, frame = 0.025829384833574297


 29%|██▉       | 144499/500000 [26:39:56<60:05:38,  1.64it/s] 

mean train loss: shift = 0.04592138484865427, frame = 0.025790666559711097


 29%|██▉       | 144999/500000 [26:45:28<59:58:18,  1.64it/s] 

mean train loss: shift = 0.04615808181464672, frame = 0.025985129995271562


 29%|██▉       | 145499/500000 [26:50:59<59:53:29,  1.64it/s] 

mean train loss: shift = 0.0473926496617496, frame = 0.025743433332070707


 29%|██▉       | 145999/500000 [26:56:32<59:59:15,  1.64it/s] 

mean train loss: shift = 0.045993259029462934, frame = 0.025575048640370367


 29%|██▉       | 146499/500000 [27:02:04<59:42:53,  1.64it/s] 

mean train loss: shift = 0.04711663901805878, frame = 0.02549308496899903


 29%|██▉       | 146999/500000 [27:07:36<59:44:45,  1.64it/s] 

mean train loss: shift = 0.04497783513739705, frame = 0.02574369372986257


 29%|██▉       | 147499/500000 [27:13:08<59:44:41,  1.64it/s] 

mean train loss: shift = 0.0482514415346086, frame = 0.02565777203440666


 30%|██▉       | 147999/500000 [27:18:39<59:34:05,  1.64it/s] 

mean train loss: shift = 0.04583804559148848, frame = 0.025216907493770122


 30%|██▉       | 148499/500000 [27:24:12<59:27:36,  1.64it/s] 

mean train loss: shift = 0.04591340812295675, frame = 0.02565609378553927


 30%|██▉       | 148999/500000 [27:29:44<59:28:37,  1.64it/s] 

mean train loss: shift = 0.04632522075995803, frame = 0.02567244771309197


 30%|██▉       | 149499/500000 [27:35:15<59:18:49,  1.64it/s] 

mean train loss: shift = 0.047403543453663585, frame = 0.02568136971257627


 30%|██▉       | 149999/500000 [27:40:47<59:13:59,  1.64it/s] 

mean train loss: shift = 0.04574248044937849, frame = 0.025568726640194654


 30%|███       | 150499/500000 [27:46:20<59:18:37,  1.64it/s] 

mean train loss: shift = 0.04618633677624166, frame = 0.02572059108875692


 30%|███       | 150999/500000 [27:51:52<59:06:54,  1.64it/s] 

mean train loss: shift = 0.04692318120226264, frame = 0.025288174867630006


 30%|███       | 151499/500000 [27:57:24<58:47:18,  1.65it/s] 

mean train loss: shift = 0.043069972563534974, frame = 0.025657167103141545


 30%|███       | 151999/500000 [28:02:56<59:05:57,  1.64it/s] 

mean train loss: shift = 0.0458872203566134, frame = 0.02573907102458179


 30%|███       | 152499/500000 [28:08:28<58:32:44,  1.65it/s] 

mean train loss: shift = 0.04490475346520543, frame = 0.025496939182281495


 31%|███       | 152999/500000 [28:13:59<58:41:03,  1.64it/s] 

mean train loss: shift = 0.04411446502245962, frame = 0.025071422861889004


 31%|███       | 153499/500000 [28:19:31<58:42:49,  1.64it/s] 

mean train loss: shift = 0.04554232937283814, frame = 0.025623336654156448


 31%|███       | 153999/500000 [28:25:03<58:34:50,  1.64it/s] 

mean train loss: shift = 0.04609082427807152, frame = 0.02541783676482737


 31%|███       | 154499/500000 [28:30:35<58:32:44,  1.64it/s] 

mean train loss: shift = 0.04434971928596496, frame = 0.025364337846636772


 31%|███       | 154999/500000 [28:36:06<58:28:19,  1.64it/s] 

mean train loss: shift = 0.04431551548466087, frame = 0.025339713828638196


 31%|███       | 155499/500000 [28:41:37<58:07:17,  1.65it/s] 

mean train loss: shift = 0.04451340214535594, frame = 0.025106762798503043


 31%|███       | 155999/500000 [28:47:10<58:17:57,  1.64it/s] 

mean train loss: shift = 0.04439072943106294, frame = 0.025565723242238166


 31%|███▏      | 156499/500000 [28:52:43<58:08:47,  1.64it/s] 

mean train loss: shift = 0.04497659632749856, frame = 0.025180195543915034


 31%|███▏      | 156999/500000 [28:58:14<57:57:57,  1.64it/s] 

mean train loss: shift = 0.044655167188495395, frame = 0.025332155825570225


 31%|███▏      | 157499/500000 [29:03:45<57:52:31,  1.64it/s] 

mean train loss: shift = 0.0434101628717035, frame = 0.02553841779381037


 32%|███▏      | 157999/500000 [29:09:18<58:01:12,  1.64it/s] 

mean train loss: shift = 0.043432689806446434, frame = 0.025145010685548187


 32%|███▏      | 158499/500000 [29:14:50<57:39:19,  1.65it/s] 

mean train loss: shift = 0.043443114768713716, frame = 0.025317004650831223


 32%|███▏      | 158999/500000 [29:20:22<57:39:44,  1.64it/s] 

mean train loss: shift = 0.043970382617786526, frame = 0.025362897649407388


 32%|███▏      | 159499/500000 [29:25:55<57:29:10,  1.65it/s] 

mean train loss: shift = 0.04291398312151432, frame = 0.025617768470197914


 32%|███▏      | 159999/500000 [29:31:28<57:33:28,  1.64it/s] 

mean train loss: shift = 0.04354515044018626, frame = 0.025299415182322264


 32%|███▏      | 160499/500000 [29:36:59<57:27:24,  1.64it/s] 

mean train loss: shift = 0.04301792975142598, frame = 0.025588210854679345


 32%|███▏      | 160999/500000 [29:42:31<57:24:29,  1.64it/s] 

mean train loss: shift = 0.04210900918394327, frame = 0.025260691752657296


 32%|███▏      | 161499/500000 [29:48:03<57:17:24,  1.64it/s] 

mean train loss: shift = 0.043231271093711256, frame = 0.025030833249911667


 32%|███▏      | 161999/500000 [29:53:34<57:02:45,  1.65it/s] 

mean train loss: shift = 0.043009881798177954, frame = 0.025110306333750488


 32%|███▏      | 162499/500000 [29:59:07<57:07:27,  1.64it/s] 

mean train loss: shift = 0.04272647243924439, frame = 0.025402055375277996


 33%|███▎      | 162999/500000 [30:04:39<56:54:48,  1.64it/s] 

mean train loss: shift = 0.042945147408172485, frame = 0.025107725447043778


 33%|███▎      | 163499/500000 [30:10:11<56:55:57,  1.64it/s] 

mean train loss: shift = 0.04329593900591135, frame = 0.02478159629367292


 33%|███▎      | 163999/500000 [30:15:43<56:57:15,  1.64it/s] 

mean train loss: shift = 0.041253314146772024, frame = 0.02511877666413784


 33%|███▎      | 164499/500000 [30:21:14<56:38:10,  1.65it/s] 

mean train loss: shift = 0.04265999592095614, frame = 0.025460966842249036


 33%|███▎      | 164999/500000 [30:26:46<56:41:05,  1.64it/s] 

mean train loss: shift = 0.04290164951235056, frame = 0.025106814041733743


 33%|███▎      | 165499/500000 [30:32:19<56:43:11,  1.64it/s] 

mean train loss: shift = 0.04103218412026763, frame = 0.025136973729357123


 33%|███▎      | 165999/500000 [30:37:50<56:26:26,  1.64it/s] 

mean train loss: shift = 0.043256849439814685, frame = 0.02531782985664904


 33%|███▎      | 166499/500000 [30:43:22<56:26:47,  1.64it/s] 

mean train loss: shift = 0.04135849089547992, frame = 0.024835843132808805


 33%|███▎      | 166999/500000 [30:48:54<56:24:41,  1.64it/s] 

mean train loss: shift = 0.04209974080696702, frame = 0.025160587046295403


 33%|███▎      | 167499/500000 [30:54:25<56:11:04,  1.64it/s] 

mean train loss: shift = 0.04159702839516103, frame = 0.02508495729789138


 34%|███▎      | 167999/500000 [30:59:57<56:07:53,  1.64it/s] 

mean train loss: shift = 0.042274787589907645, frame = 0.025304787922650576


 34%|███▎      | 168499/500000 [31:05:29<56:13:38,  1.64it/s] 

mean train loss: shift = 0.04224440729804337, frame = 0.024904884703457357


 34%|███▍      | 168999/500000 [31:11:00<55:53:24,  1.65it/s] 

mean train loss: shift = 0.04087510310485959, frame = 0.02528062569722533


 34%|███▍      | 169499/500000 [31:16:32<55:52:28,  1.64it/s] 

mean train loss: shift = 0.04866664723679423, frame = 0.025273257955908776


 34%|███▍      | 169999/500000 [31:22:05<55:58:26,  1.64it/s] 

mean train loss: shift = 0.04094748847931624, frame = 0.02473089218698442


 34%|███▍      | 170499/500000 [31:27:35<55:44:06,  1.64it/s] 

mean train loss: shift = 0.04052737394534051, frame = 0.02531294962018728


 34%|███▍      | 170999/500000 [31:33:08<55:38:11,  1.64it/s] 

mean train loss: shift = 0.04347860968671739, frame = 0.025160497110337018


 34%|███▍      | 171499/500000 [31:38:40<55:41:36,  1.64it/s] 

mean train loss: shift = 0.040452975172549484, frame = 0.02458917136490345


 34%|███▍      | 171999/500000 [31:44:12<55:25:54,  1.64it/s] 

mean train loss: shift = 0.03977954899519682, frame = 0.02493809955008328


 34%|███▍      | 172499/500000 [31:49:43<55:19:38,  1.64it/s] 

mean train loss: shift = 0.04017982474528253, frame = 0.024773866694420575


 35%|███▍      | 172999/500000 [31:55:16<55:25:14,  1.64it/s] 

mean train loss: shift = 0.041024565048515794, frame = 0.024610990012064578


 35%|███▍      | 173499/500000 [32:00:49<55:09:53,  1.64it/s] 

mean train loss: shift = 0.04001790823787451, frame = 0.024607284393161535


 35%|███▍      | 173999/500000 [32:06:20<55:10:43,  1.64it/s] 

mean train loss: shift = 0.04174727380461991, frame = 0.02492617621086538


 35%|███▍      | 174499/500000 [32:11:51<55:06:38,  1.64it/s] 

mean train loss: shift = 0.0404943007864058, frame = 0.024831510035321115


 35%|███▍      | 174999/500000 [32:17:23<54:56:34,  1.64it/s] 

mean train loss: shift = 0.04009543865546584, frame = 0.024709158707410098


 35%|███▌      | 175499/500000 [32:22:55<54:52:46,  1.64it/s] 

mean train loss: shift = 0.04328085671551526, frame = 0.025027356661856175


 35%|███▌      | 175999/500000 [32:28:27<54:48:47,  1.64it/s] 

mean train loss: shift = 0.040183622669428584, frame = 0.025008225619792938


 35%|███▌      | 176499/500000 [32:33:58<54:41:36,  1.64it/s] 

mean train loss: shift = 0.040807298745959994, frame = 0.024822331031784415


 35%|███▌      | 176999/500000 [32:39:30<54:39:07,  1.64it/s] 

mean train loss: shift = 0.04135204064287245, frame = 0.024634404277428985


 35%|███▌      | 177499/500000 [32:45:03<54:41:18,  1.64it/s] 

mean train loss: shift = 0.03995188845321536, frame = 0.024861559553071857


 36%|███▌      | 177999/500000 [32:50:35<54:22:26,  1.64it/s] 

mean train loss: shift = 0.04089483786746859, frame = 0.024735939301550387


 36%|███▌      | 178499/500000 [32:56:06<54:19:54,  1.64it/s] 

mean train loss: shift = 0.03999772688932717, frame = 0.024489123575389384


 36%|███▌      | 178999/500000 [33:01:38<54:22:47,  1.64it/s] 

mean train loss: shift = 0.04028009854070842, frame = 0.024618676306679844


 36%|███▌      | 179499/500000 [33:07:10<54:06:58,  1.65it/s] 

mean train loss: shift = 0.04098878026753664, frame = 0.024686027649790047


 36%|███▌      | 179999/500000 [33:12:43<54:08:24,  1.64it/s] 

mean train loss: shift = 0.04084464773908258, frame = 0.02509244411997497


 36%|███▌      | 180499/500000 [33:18:14<54:06:35,  1.64it/s] 

mean train loss: shift = 0.03922385450266302, frame = 0.024764451848343014


 36%|███▌      | 180999/500000 [33:23:46<53:53:25,  1.64it/s] 

mean train loss: shift = 0.0390591667573899, frame = 0.0246352523509413


 36%|███▋      | 181499/500000 [33:29:18<53:51:13,  1.64it/s] 

mean train loss: shift = 0.03747715032100678, frame = 0.02493709601648152


 36%|███▋      | 181999/500000 [33:34:49<53:57:44,  1.64it/s] 

mean train loss: shift = 0.03959736901894212, frame = 0.024587030181661248


 36%|███▋      | 182499/500000 [33:40:21<53:45:23,  1.64it/s] 

mean train loss: shift = 0.03932302528619766, frame = 0.024637818947434425


 37%|███▋      | 182999/500000 [33:45:52<53:37:47,  1.64it/s] 

mean train loss: shift = 0.03883462752215564, frame = 0.02451796365343034


 37%|███▋      | 183499/500000 [33:51:25<53:40:21,  1.64it/s] 

mean train loss: shift = 0.038453434849157926, frame = 0.02445233286730945


 37%|███▋      | 183999/500000 [33:56:57<53:27:48,  1.64it/s] 

mean train loss: shift = 0.03987668531946838, frame = 0.02439397844299674


 37%|███▋      | 184499/500000 [34:02:28<53:25:07,  1.64it/s] 

mean train loss: shift = 0.03900940456055105, frame = 0.02458372465893626


 37%|███▋      | 184999/500000 [34:08:01<53:24:31,  1.64it/s] 

mean train loss: shift = 0.03905946142040193, frame = 0.02435211797617376


 37%|███▋      | 185499/500000 [34:13:33<53:06:43,  1.64it/s] 

mean train loss: shift = 0.037815305162221195, frame = 0.02456829129345715


 37%|███▋      | 185999/500000 [34:19:05<53:09:46,  1.64it/s] 

mean train loss: shift = 0.036916180495172736, frame = 0.024692442808300255


 37%|███▋      | 186499/500000 [34:24:37<53:07:58,  1.64it/s] 

mean train loss: shift = 0.038729374293237924, frame = 0.0248215389624238


 37%|███▋      | 186999/500000 [34:30:10<52:55:08,  1.64it/s] 

mean train loss: shift = 0.03866209749877453, frame = 0.024420280430465936


 37%|███▋      | 187499/500000 [34:35:42<52:56:23,  1.64it/s] 

mean train loss: shift = 0.03977964232675731, frame = 0.02423217214271426


 38%|███▊      | 187999/500000 [34:41:15<52:56:22,  1.64it/s] 

mean train loss: shift = 0.03817102587781847, frame = 0.024246314054355025


 38%|███▊      | 188499/500000 [34:46:46<52:38:15,  1.64it/s] 

mean train loss: shift = 0.03858530466817319, frame = 0.02439014484733343


 38%|███▊      | 188999/500000 [34:52:18<52:38:55,  1.64it/s] 

mean train loss: shift = 0.039499632181599735, frame = 0.024533000387251376


 38%|███▊      | 189499/500000 [34:57:51<52:42:41,  1.64it/s] 

mean train loss: shift = 0.03837821212783456, frame = 0.02463028093241155


 38%|███▊      | 189999/500000 [35:03:24<52:31:47,  1.64it/s] 

mean train loss: shift = 0.039316363727673886, frame = 0.024755735650658608


 38%|███▊      | 190499/500000 [35:08:56<52:22:38,  1.64it/s] 

mean train loss: shift = 0.03832615540549159, frame = 0.02451061618514359


 38%|███▊      | 190999/500000 [35:14:28<52:25:05,  1.64it/s] 

mean train loss: shift = 0.03703875777684152, frame = 0.024257122533395886


 38%|███▊      | 191499/500000 [35:19:59<52:15:21,  1.64it/s] 

mean train loss: shift = 0.036367845186963677, frame = 0.024133789854124187


 38%|███▊      | 191999/500000 [35:25:32<52:10:11,  1.64it/s] 

mean train loss: shift = 0.03757337430678308, frame = 0.024127478023990987


 38%|███▊      | 192499/500000 [35:31:05<52:05:45,  1.64it/s] 

mean train loss: shift = 0.03749590262025595, frame = 0.02406334087625146


 39%|███▊      | 192999/500000 [35:36:36<51:51:53,  1.64it/s] 

mean train loss: shift = 0.037242980035021904, frame = 0.024633152386173605


 39%|███▊      | 193499/500000 [35:42:09<51:49:51,  1.64it/s] 

mean train loss: shift = 0.03861079691164195, frame = 0.02431425861828029


 39%|███▉      | 193999/500000 [35:47:41<51:53:17,  1.64it/s] 

mean train loss: shift = 0.03828825450129807, frame = 0.024460648419335485


 39%|███▉      | 194499/500000 [35:53:14<51:41:45,  1.64it/s] 

mean train loss: shift = 0.03844898157566786, frame = 0.02430847444012761


 39%|███▉      | 194999/500000 [35:58:46<51:39:20,  1.64it/s] 

mean train loss: shift = 0.03693460607901215, frame = 0.024187986033037304


 39%|███▉      | 195499/500000 [36:04:19<51:39:45,  1.64it/s] 

mean train loss: shift = 0.03808421164937317, frame = 0.02442108348198235


 39%|███▉      | 195999/500000 [36:09:50<51:30:58,  1.64it/s] 

mean train loss: shift = 0.03630344712547958, frame = 0.024112944182008506


 39%|███▉      | 196499/500000 [36:15:22<51:25:54,  1.64it/s] 

mean train loss: shift = 0.035074246374890206, frame = 0.0239897716678679


 39%|███▉      | 196999/500000 [36:20:56<51:20:48,  1.64it/s] 

mean train loss: shift = 0.038109761487692594, frame = 0.02437156851030886


 39%|███▉      | 197499/500000 [36:26:27<51:06:39,  1.64it/s] 

mean train loss: shift = 0.037573947256430984, frame = 0.024386416830122472


 40%|███▉      | 197999/500000 [36:32:00<51:09:50,  1.64it/s] 

mean train loss: shift = 0.03619007058441639, frame = 0.02454857767187059


 40%|███▉      | 198499/500000 [36:37:32<51:03:57,  1.64it/s] 

mean train loss: shift = 0.03669754802063108, frame = 0.024104378992691636


 40%|███▉      | 198999/500000 [36:43:04<50:55:45,  1.64it/s] 

mean train loss: shift = 0.03656401659362018, frame = 0.023891299292445184


 40%|███▉      | 199499/500000 [36:48:36<50:46:14,  1.64it/s] 

mean train loss: shift = 0.036348974127322436, frame = 0.02431916018202901


 40%|███▉      | 199999/500000 [36:54:09<50:48:34,  1.64it/s] 

mean train loss: shift = 0.03786790283024311, frame = 0.024163705157116057


 40%|████      | 200499/500000 [36:59:40<50:32:54,  1.65it/s] 

mean train loss: shift = 0.03638366418331861, frame = 0.024499026466161013


 40%|████      | 200999/500000 [37:05:12<50:39:22,  1.64it/s] 

mean train loss: shift = 0.03648804095387459, frame = 0.02461091537028551


 40%|████      | 201499/500000 [37:10:44<50:32:39,  1.64it/s] 

mean train loss: shift = 0.03591627500578761, frame = 0.024243791783228517


 40%|████      | 201999/500000 [37:16:16<50:25:47,  1.64it/s] 

mean train loss: shift = 0.0369056257866323, frame = 0.02446951922774315


 40%|████      | 202499/500000 [37:21:48<50:20:33,  1.64it/s] 

mean train loss: shift = 0.035756590101867915, frame = 0.02438462283462286


 41%|████      | 202999/500000 [37:27:21<50:26:06,  1.64it/s] 

mean train loss: shift = 0.036015323711559175, frame = 0.024117570094764233


 41%|████      | 203499/500000 [37:32:54<50:12:42,  1.64it/s] 

mean train loss: shift = 0.03629137957282364, frame = 0.024288482312113047


 41%|████      | 203999/500000 [37:38:27<50:05:08,  1.64it/s] 

mean train loss: shift = 0.036270671047270296, frame = 0.024232494780793788


 41%|████      | 204499/500000 [37:44:00<50:02:53,  1.64it/s] 

mean train loss: shift = 0.03600817330554128, frame = 0.024102781657129525


 41%|████      | 204999/500000 [37:49:31<49:53:31,  1.64it/s] 

mean train loss: shift = 0.03545839492790401, frame = 0.023612187292426826


 41%|████      | 205499/500000 [37:55:03<49:55:03,  1.64it/s] 

mean train loss: shift = 0.03561759314313531, frame = 0.023871703308075667


 41%|████      | 205999/500000 [38:00:35<49:54:34,  1.64it/s] 

mean train loss: shift = 0.03524260194972158, frame = 0.024404621755704285


 41%|████▏     | 206499/500000 [38:06:08<49:39:17,  1.64it/s] 

mean train loss: shift = 0.036070321746170524, frame = 0.024244316440075638


 41%|████▏     | 206999/500000 [38:11:39<49:32:40,  1.64it/s] 

mean train loss: shift = 0.03677068953216076, frame = 0.023969403317198157


 41%|████▏     | 207499/500000 [38:17:11<49:29:22,  1.64it/s] 

mean train loss: shift = 0.035841582166031005, frame = 0.02413961724936962


 42%|████▏     | 207999/500000 [38:22:43<49:21:45,  1.64it/s] 

mean train loss: shift = 0.03575714350864291, frame = 0.02417749755829573


 42%|████▏     | 208499/500000 [38:28:16<49:20:13,  1.64it/s] 

mean train loss: shift = 0.036662325007840994, frame = 0.02420174409635365


 42%|████▏     | 208999/500000 [38:33:48<49:13:53,  1.64it/s] 

mean train loss: shift = 0.037785825580358506, frame = 0.024334190590307118


 42%|████▏     | 209499/500000 [38:39:21<49:06:23,  1.64it/s] 

mean train loss: shift = 0.03645688280463219, frame = 0.023917128544300795


 42%|████▏     | 209999/500000 [38:44:52<49:01:33,  1.64it/s] 

mean train loss: shift = 0.03536285941489041, frame = 0.024077569456771017


 42%|████▏     | 210499/500000 [38:50:24<49:10:06,  1.64it/s] 

mean train loss: shift = 0.03418316741660237, frame = 0.024196889400482178


 42%|████▏     | 210999/500000 [38:55:55<48:50:44,  1.64it/s] 

mean train loss: shift = 0.035738743087276814, frame = 0.02406985688768327


 42%|████▏     | 211499/500000 [39:01:27<48:51:15,  1.64it/s] 

mean train loss: shift = 0.03539372957870364, frame = 0.023668617345392703


 42%|████▏     | 211999/500000 [39:07:00<48:47:10,  1.64it/s] 

mean train loss: shift = 0.03542908178828657, frame = 0.024080979242920877


 42%|████▏     | 212499/500000 [39:12:32<48:36:41,  1.64it/s] 

mean train loss: shift = 0.03592114597558975, frame = 0.02380288098193705


 43%|████▎     | 212999/500000 [39:18:04<48:35:29,  1.64it/s] 

mean train loss: shift = 0.03638646756671369, frame = 0.02414275760576129


 43%|████▎     | 213499/500000 [39:23:37<48:31:43,  1.64it/s] 

mean train loss: shift = 0.03606031247973442, frame = 0.023921975404024125


 43%|████▎     | 213999/500000 [39:29:09<48:16:03,  1.65it/s] 

mean train loss: shift = 0.03487548702396452, frame = 0.024078004460781812


 43%|████▎     | 214499/500000 [39:34:41<48:18:42,  1.64it/s] 

mean train loss: shift = 0.0361063036005944, frame = 0.02435868887975812


 43%|████▎     | 214999/500000 [39:40:14<48:20:35,  1.64it/s] 

mean train loss: shift = 0.034460869502276184, frame = 0.024047995954751968


 43%|████▎     | 215499/500000 [39:45:46<48:00:57,  1.65it/s] 

mean train loss: shift = 0.034749590165913107, frame = 0.02370527708530426


 43%|████▎     | 215999/500000 [39:51:18<48:02:44,  1.64it/s] 

mean train loss: shift = 0.033987331181764606, frame = 0.023688166156411172


 43%|████▎     | 216499/500000 [39:56:51<48:07:53,  1.64it/s] 

mean train loss: shift = 0.035142396101728084, frame = 0.024104049956426024


 43%|████▎     | 216999/500000 [40:02:23<47:50:49,  1.64it/s] 

mean train loss: shift = 0.03524022511579096, frame = 0.024009557630866766


 43%|████▎     | 217499/500000 [40:07:55<47:54:00,  1.64it/s] 

mean train loss: shift = 0.03369740683399141, frame = 0.02399335515126586


 44%|████▎     | 217999/500000 [40:13:28<47:46:12,  1.64it/s] 

mean train loss: shift = 0.03589405994676054, frame = 0.023929532399401068


 44%|████▎     | 218499/500000 [40:18:59<47:35:32,  1.64it/s] 

mean train loss: shift = 0.034371931381523606, frame = 0.023852853994816542


 44%|████▍     | 218999/500000 [40:24:32<47:34:11,  1.64it/s] 

mean train loss: shift = 0.034353228591382505, frame = 0.024022504348307848


 44%|████▍     | 219499/500000 [40:30:04<47:32:27,  1.64it/s] 

mean train loss: shift = 0.03352691674232483, frame = 0.023997687328606843


 44%|████▍     | 219999/500000 [40:35:35<47:18:20,  1.64it/s] 

mean train loss: shift = 0.035094152627512816, frame = 0.023607426887378095


 44%|████▍     | 220499/500000 [40:41:07<47:15:36,  1.64it/s] 

mean train loss: shift = 0.03409736135043204, frame = 0.02354870463721454


 44%|████▍     | 220999/500000 [40:46:39<47:17:05,  1.64it/s] 

mean train loss: shift = 0.03589053403958678, frame = 0.024202094480395318


 44%|████▍     | 221499/500000 [40:52:10<47:04:45,  1.64it/s] 

mean train loss: shift = 0.03386234107613564, frame = 0.023968855367973448


 44%|████▍     | 221999/500000 [40:57:41<47:00:48,  1.64it/s] 

mean train loss: shift = 0.03490437523089349, frame = 0.023451512908563017


 44%|████▍     | 222499/500000 [41:03:13<46:58:40,  1.64it/s] 

mean train loss: shift = 0.03247586230933666, frame = 0.023889569934457542


 45%|████▍     | 222999/500000 [41:08:45<46:49:50,  1.64it/s] 

mean train loss: shift = 0.03253188749961555, frame = 0.023992409996688367


 45%|████▍     | 223499/500000 [41:14:17<46:52:01,  1.64it/s] 

mean train loss: shift = 0.03324279307946563, frame = 0.023892603201791644


 45%|████▍     | 223999/500000 [41:19:49<46:45:15,  1.64it/s] 

mean train loss: shift = 0.03343045493774116, frame = 0.02375137376599014


 45%|████▍     | 224499/500000 [41:25:21<46:38:43,  1.64it/s] 

mean train loss: shift = 0.035364876940846446, frame = 0.023655369497835636


 45%|████▍     | 224999/500000 [41:30:53<46:35:12,  1.64it/s] 

mean train loss: shift = 0.03448242781125009, frame = 0.023994322983548044


 45%|████▌     | 225499/500000 [41:36:25<46:32:23,  1.64it/s] 

mean train loss: shift = 0.034491326346993444, frame = 0.0236154880002141


 45%|████▌     | 225999/500000 [41:41:56<46:16:05,  1.65it/s] 

mean train loss: shift = 0.03387739445269108, frame = 0.023898695001378655


 45%|████▌     | 226499/500000 [41:47:28<46:18:33,  1.64it/s] 

mean train loss: shift = 0.03378638017643243, frame = 0.023625932607799768


 45%|████▌     | 226999/500000 [41:53:00<46:15:54,  1.64it/s] 

mean train loss: shift = 0.03329150631465018, frame = 0.023532394813373686


 45%|████▌     | 227499/500000 [41:58:33<46:04:08,  1.64it/s] 

mean train loss: shift = 0.03393948310613632, frame = 0.023597408302128316


 46%|████▌     | 227999/500000 [42:04:03<45:57:41,  1.64it/s] 

mean train loss: shift = 0.03400496604666114, frame = 0.023797819605097175


 46%|████▌     | 228499/500000 [42:09:37<45:59:53,  1.64it/s] 

mean train loss: shift = 0.03421950078755617, frame = 0.02346152040734887


 46%|████▌     | 228999/500000 [42:15:08<45:49:12,  1.64it/s] 

mean train loss: shift = 0.034235536968335506, frame = 0.023948528749868275


 46%|████▌     | 229499/500000 [42:20:40<45:43:20,  1.64it/s] 

mean train loss: shift = 0.03511575212143361, frame = 0.02387007235363126


 46%|████▌     | 229999/500000 [42:26:13<45:46:07,  1.64it/s] 

mean train loss: shift = 0.03383367902971804, frame = 0.02367876886203885


 46%|████▌     | 230499/500000 [42:31:45<45:37:17,  1.64it/s] 

mean train loss: shift = 0.03281421588920057, frame = 0.023694510543718933


 46%|████▌     | 230999/500000 [42:37:16<45:30:32,  1.64it/s] 

mean train loss: shift = 0.03313879625312984, frame = 0.02381224291585386


 46%|████▋     | 231499/500000 [42:42:49<45:37:48,  1.63it/s] 

mean train loss: shift = 0.03355268357694149, frame = 0.023977943046018482


 46%|████▋     | 231999/500000 [42:48:21<45:18:17,  1.64it/s] 

mean train loss: shift = 0.03328666889294982, frame = 0.023648211181163788


 46%|████▋     | 232499/500000 [42:53:52<45:13:56,  1.64it/s] 

mean train loss: shift = 0.032911349959671495, frame = 0.023794888503849507


 47%|████▋     | 232999/500000 [42:59:24<45:07:14,  1.64it/s] 

mean train loss: shift = 0.032965582709759474, frame = 0.023678081769496203


 47%|████▋     | 233499/500000 [43:04:57<45:07:35,  1.64it/s] 

mean train loss: shift = 0.03226462146639824, frame = 0.02359978005848825


 47%|████▋     | 233999/500000 [43:10:29<44:55:11,  1.64it/s] 

mean train loss: shift = 0.032844070127233865, frame = 0.023698960358276962


 47%|████▋     | 234499/500000 [43:16:01<44:58:50,  1.64it/s] 

mean train loss: shift = 0.033791970448568465, frame = 0.023682208249345423


 47%|████▋     | 234999/500000 [43:21:32<44:44:00,  1.65it/s] 

mean train loss: shift = 0.03251552103646099, frame = 0.02345057173073292


 47%|████▋     | 235499/500000 [43:27:04<44:43:17,  1.64it/s] 

mean train loss: shift = 0.03242763161286712, frame = 0.023454800184816122


 47%|████▋     | 235999/500000 [43:32:37<44:50:00,  1.64it/s] 

mean train loss: shift = 0.032984186178073284, frame = 0.0230802106205374


 47%|████▋     | 236499/500000 [43:38:10<44:33:50,  1.64it/s] 

mean train loss: shift = 0.03355420583486557, frame = 0.023333484154194593


 47%|████▋     | 236999/500000 [43:43:42<44:29:56,  1.64it/s] 

mean train loss: shift = 0.03186043162830174, frame = 0.023732922287657856


 47%|████▋     | 237499/500000 [43:49:15<44:31:44,  1.64it/s] 

mean train loss: shift = 0.03182665614970028, frame = 0.02412277137115598


 48%|████▊     | 237999/500000 [43:54:47<44:18:18,  1.64it/s] 

mean train loss: shift = 0.03224205698631704, frame = 0.023683399993926286


 48%|████▊     | 238499/500000 [44:00:19<44:23:49,  1.64it/s] 

mean train loss: shift = 0.03357681613601744, frame = 0.023546674348413944


 48%|████▊     | 238999/500000 [44:05:51<44:14:35,  1.64it/s] 

mean train loss: shift = 0.03195439160242677, frame = 0.02322006821632385


 48%|████▊     | 239499/500000 [44:11:23<44:05:50,  1.64it/s] 

mean train loss: shift = 0.03263666282594204, frame = 0.023351965257897973


 48%|████▊     | 239999/500000 [44:16:55<44:01:30,  1.64it/s] 

mean train loss: shift = 0.03197804742492735, frame = 0.023386393662542106


 48%|████▊     | 240499/500000 [44:22:27<44:02:59,  1.64it/s] 

mean train loss: shift = 0.03241679628007114, frame = 0.023553385147824884


 48%|████▊     | 240999/500000 [44:27:58<43:47:27,  1.64it/s] 

mean train loss: shift = 0.032093833610415456, frame = 0.02329014022834599


 48%|████▊     | 241499/500000 [44:33:32<43:42:31,  1.64it/s] 

mean train loss: shift = 0.03104413736052811, frame = 0.02332271644845605


 48%|████▊     | 241999/500000 [44:39:04<43:48:46,  1.64it/s] 

mean train loss: shift = 0.03242710890434682, frame = 0.022988970803096892


 48%|████▊     | 242499/500000 [44:44:36<43:29:34,  1.64it/s] 

mean train loss: shift = 0.03181812230870128, frame = 0.023394326796755193


 49%|████▊     | 242999/500000 [44:50:08<43:31:02,  1.64it/s] 

mean train loss: shift = 0.031580819047987464, frame = 0.02347497562132776


 49%|████▊     | 243499/500000 [44:55:40<43:25:12,  1.64it/s] 

mean train loss: shift = 0.030532824255526066, frame = 0.023145253947004677


 49%|████▉     | 243999/500000 [45:01:11<43:20:11,  1.64it/s] 

mean train loss: shift = 0.03225451386347413, frame = 0.0235902168918401


 49%|████▉     | 244499/500000 [45:06:43<43:15:34,  1.64it/s] 

mean train loss: shift = 0.03201250811852515, frame = 0.023291683146730066


 49%|████▉     | 244999/500000 [45:12:15<43:18:38,  1.64it/s] 

mean train loss: shift = 0.033195582933723926, frame = 0.023248562067747117


 49%|████▉     | 245499/500000 [45:17:48<43:03:18,  1.64it/s] 

mean train loss: shift = 0.030895801780745386, frame = 0.022986854314804076


 49%|████▉     | 245999/500000 [45:23:20<42:58:05,  1.64it/s] 

mean train loss: shift = 0.03150536379218102, frame = 0.023248893229290843


 49%|████▉     | 246499/500000 [45:28:52<42:56:14,  1.64it/s] 

mean train loss: shift = 0.03290542488358915, frame = 0.023277360271662474


 49%|████▉     | 246999/500000 [45:34:23<42:46:13,  1.64it/s] 

mean train loss: shift = 0.03158843637444079, frame = 0.023180027076974512


 49%|████▉     | 247499/500000 [45:39:55<42:43:09,  1.64it/s] 

mean train loss: shift = 0.034972825687378646, frame = 0.023418352449312806


 50%|████▉     | 247999/500000 [45:45:27<42:39:26,  1.64it/s] 

mean train loss: shift = 0.03187525138631463, frame = 0.023299652175977827


 50%|████▉     | 248499/500000 [45:50:59<42:30:33,  1.64it/s] 

mean train loss: shift = 0.03220964506454766, frame = 0.023143009250983594


 50%|████▉     | 248999/500000 [45:56:31<42:26:36,  1.64it/s] 

mean train loss: shift = 0.031564299976453186, frame = 0.023438961248844863


 50%|████▉     | 249499/500000 [46:02:03<42:22:37,  1.64it/s] 

mean train loss: shift = 0.031015458453446627, frame = 0.023155283564701677


 50%|████▉     | 249999/500000 [46:07:36<42:17:23,  1.64it/s] 

mean train loss: shift = 0.03258733454719186, frame = 0.023427887121215463


 50%|█████     | 250499/500000 [46:13:07<42:09:02,  1.64it/s] 

mean train loss: shift = 0.031796566197648646, frame = 0.023205565398558975


 50%|█████     | 250999/500000 [46:18:39<42:10:15,  1.64it/s] 

mean train loss: shift = 0.031162909038364887, frame = 0.02323647464439273


 50%|█████     | 251499/500000 [46:24:11<42:03:37,  1.64it/s] 

mean train loss: shift = 0.03108066231943667, frame = 0.023168495610356332


 50%|█████     | 251999/500000 [46:29:42<42:03:21,  1.64it/s] 

mean train loss: shift = 0.031193708822131155, frame = 0.023175459157675504


 50%|█████     | 252499/500000 [46:35:15<41:58:28,  1.64it/s] 

mean train loss: shift = 0.031246909096837045, frame = 0.02294330176897347


 51%|█████     | 252999/500000 [46:40:47<41:47:48,  1.64it/s] 

mean train loss: shift = 0.03097071670740843, frame = 0.023412926832213996


 51%|█████     | 253499/500000 [46:46:19<41:41:13,  1.64it/s] 

mean train loss: shift = 0.0333937364127487, frame = 0.023266396703198553


 51%|█████     | 253999/500000 [46:51:51<41:42:22,  1.64it/s] 

mean train loss: shift = 0.03008311809413135, frame = 0.023441802186891438


 51%|█████     | 254499/500000 [46:57:23<41:36:32,  1.64it/s] 

mean train loss: shift = 0.030228507621213794, frame = 0.023419794147834183


 51%|█████     | 254999/500000 [47:02:54<41:27:17,  1.64it/s] 

mean train loss: shift = 0.03060210349969566, frame = 0.02333638596534729


 51%|█████     | 255499/500000 [47:08:26<41:26:27,  1.64it/s] 

mean train loss: shift = 0.03111358068138361, frame = 0.023012429317459462


 51%|█████     | 255999/500000 [47:13:57<41:14:23,  1.64it/s] 

mean train loss: shift = 0.03153891101665795, frame = 0.023280573777854443


 51%|█████▏    | 256499/500000 [47:19:29<41:10:09,  1.64it/s] 

mean train loss: shift = 0.031095975555479525, frame = 0.023214406384155156


 51%|█████▏    | 256999/500000 [47:25:01<41:12:36,  1.64it/s] 

mean train loss: shift = 0.030094091825187207, frame = 0.023101114697754385


 51%|█████▏    | 257499/500000 [47:30:32<41:01:08,  1.64it/s] 

mean train loss: shift = 0.030787811160087584, frame = 0.023458048913627863


 52%|█████▏    | 257999/500000 [47:36:04<41:02:22,  1.64it/s] 

mean train loss: shift = 0.030532120265066624, frame = 0.023559355588629843


 52%|█████▏    | 258499/500000 [47:41:37<40:59:39,  1.64it/s] 

mean train loss: shift = 0.030040947623550893, frame = 0.023253004800528287


 52%|█████▏    | 258999/500000 [47:47:09<40:48:13,  1.64it/s] 

mean train loss: shift = 0.03202007161080837, frame = 0.02326474886201322


 52%|█████▏    | 259499/500000 [47:52:41<40:44:51,  1.64it/s] 

mean train loss: shift = 0.030597638487815857, frame = 0.02328047089278698


 52%|█████▏    | 259999/500000 [47:58:13<40:42:03,  1.64it/s] 

mean train loss: shift = 0.031312235394492745, frame = 0.023358131175860764


 52%|█████▏    | 260499/500000 [48:03:44<40:26:12,  1.65it/s] 

mean train loss: shift = 0.03076193435676396, frame = 0.02300751346349716


 52%|█████▏    | 260999/500000 [48:09:15<40:29:37,  1.64it/s] 

mean train loss: shift = 0.031000632751733065, frame = 0.02295015882514417


 52%|█████▏    | 261499/500000 [48:14:48<40:27:19,  1.64it/s] 

mean train loss: shift = 0.029903384756296874, frame = 0.02320040885359049


 52%|█████▏    | 261999/500000 [48:20:19<40:12:15,  1.64it/s] 

mean train loss: shift = 0.03165852009505034, frame = 0.022999576192349196


 52%|█████▏    | 262499/500000 [48:25:51<40:12:19,  1.64it/s] 

mean train loss: shift = 0.030432971311733125, frame = 0.02331617094576359


 53%|█████▎    | 262999/500000 [48:31:25<40:09:22,  1.64it/s] 

mean train loss: shift = 0.029925524799153207, frame = 0.023122787496075034


 53%|█████▎    | 263499/500000 [48:36:57<40:00:46,  1.64it/s] 

mean train loss: shift = 0.030266794815659522, frame = 0.023061885649338365


 53%|█████▎    | 263999/500000 [48:42:30<39:57:25,  1.64it/s] 

mean train loss: shift = 0.03304382462799549, frame = 0.02314677573367953


 53%|█████▎    | 264499/500000 [48:48:02<39:54:58,  1.64it/s] 

mean train loss: shift = 0.031110569309443236, frame = 0.02265073888935149


 53%|█████▎    | 264999/500000 [48:53:33<39:45:43,  1.64it/s] 

mean train loss: shift = 0.029634638803079726, frame = 0.023178765777498483


 53%|█████▎    | 265499/500000 [48:59:05<39:44:03,  1.64it/s] 

mean train loss: shift = 0.028887386532500385, frame = 0.02284423517808318


 53%|█████▎    | 265999/500000 [49:04:39<39:42:46,  1.64it/s] 

mean train loss: shift = 0.02987627499550581, frame = 0.023109725642949344


 53%|█████▎    | 266499/500000 [49:10:10<39:35:23,  1.64it/s] 

mean train loss: shift = 0.02952754455432296, frame = 0.02312237128801644


 53%|█████▎    | 266999/500000 [49:15:41<39:26:17,  1.64it/s] 

mean train loss: shift = 0.029648991961032153, frame = 0.022925245547667145


 53%|█████▎    | 267499/500000 [49:21:15<39:25:17,  1.64it/s] 

mean train loss: shift = 0.029310877857729793, frame = 0.022810209231451155


 54%|█████▎    | 267999/500000 [49:26:47<39:18:07,  1.64it/s] 

mean train loss: shift = 0.029796359779313206, frame = 0.022878417858853937


 54%|█████▎    | 268499/500000 [49:32:20<39:16:35,  1.64it/s] 

mean train loss: shift = 0.029753320878371597, frame = 0.023346728570759298


 54%|█████▍    | 268999/500000 [49:37:53<39:07:04,  1.64it/s] 

mean train loss: shift = 0.030408810650929808, frame = 0.02313439205661416


 54%|█████▍    | 269499/500000 [49:43:26<38:57:39,  1.64it/s] 

mean train loss: shift = 0.030014084139838814, frame = 0.023063173903152347


 54%|█████▍    | 269999/500000 [49:48:58<38:58:51,  1.64it/s] 

mean train loss: shift = 0.02973123768903315, frame = 0.02276653409563005


 54%|█████▍    | 270499/500000 [49:54:31<38:57:09,  1.64it/s] 

mean train loss: shift = 0.0294168237503618, frame = 0.023090081494301556


 54%|█████▍    | 270999/500000 [50:00:02<38:37:44,  1.65it/s] 

mean train loss: shift = 0.029556657882407307, frame = 0.022771778820082546


 54%|█████▍    | 271499/500000 [50:05:34<38:44:04,  1.64it/s] 

mean train loss: shift = 0.029784388260915874, frame = 0.02303866987116635


 54%|█████▍    | 271999/500000 [50:11:06<38:36:06,  1.64it/s] 

mean train loss: shift = 0.029055991215631367, frame = 0.022827883752062916


 54%|█████▍    | 272499/500000 [50:16:37<38:27:57,  1.64it/s] 

mean train loss: shift = 0.02855363711901009, frame = 0.023203960381448268


 55%|█████▍    | 272999/500000 [50:22:09<38:23:34,  1.64it/s] 

mean train loss: shift = 0.029197258960455656, frame = 0.022953904738649725


 55%|█████▍    | 273499/500000 [50:27:42<38:28:03,  1.64it/s] 

mean train loss: shift = 0.029770718213170767, frame = 0.02270795678906143


 55%|█████▍    | 273999/500000 [50:33:13<38:11:32,  1.64it/s] 

mean train loss: shift = 0.028838620217517017, frame = 0.023124507013708353


 55%|█████▍    | 274499/500000 [50:38:45<38:07:43,  1.64it/s] 

mean train loss: shift = 0.029906468652188777, frame = 0.02274016421660781


 55%|█████▍    | 274999/500000 [50:44:17<38:10:15,  1.64it/s] 

mean train loss: shift = 0.030067070829682054, frame = 0.022920377844944596


 55%|█████▌    | 275499/500000 [50:49:49<38:02:54,  1.64it/s] 

mean train loss: shift = 0.029197108272463083, frame = 0.022995256232097746


 55%|█████▌    | 275999/500000 [50:55:20<37:54:52,  1.64it/s] 

mean train loss: shift = 0.028817693496122957, frame = 0.023124806456267834


 55%|█████▌    | 276499/500000 [51:00:53<37:56:30,  1.64it/s] 

mean train loss: shift = 0.029278115537017584, frame = 0.022740195740014315


 55%|█████▌    | 276999/500000 [51:06:24<37:45:25,  1.64it/s] 

mean train loss: shift = 0.029893279142677785, frame = 0.022915609192103146


 55%|█████▌    | 277499/500000 [51:11:57<37:42:49,  1.64it/s] 

mean train loss: shift = 0.02967440938204527, frame = 0.0230533825494349


 56%|█████▌    | 277999/500000 [51:17:30<37:40:07,  1.64it/s] 

mean train loss: shift = 0.029840607941150666, frame = 0.022595011593773962


 56%|█████▌    | 278499/500000 [51:23:01<37:27:27,  1.64it/s] 

mean train loss: shift = 0.030036488300189377, frame = 0.023203458366915584


 56%|█████▌    | 278999/500000 [51:28:33<37:29:40,  1.64it/s] 

mean train loss: shift = 0.02918066069483757, frame = 0.023075282404199243


 56%|█████▌    | 279499/500000 [51:34:06<37:24:29,  1.64it/s] 

mean train loss: shift = 0.028986790189519523, frame = 0.022810942865908145


 56%|█████▌    | 279999/500000 [51:39:37<37:10:48,  1.64it/s] 

mean train loss: shift = 0.02836937282048166, frame = 0.02302942438982427


 56%|█████▌    | 280499/500000 [51:45:10<37:14:51,  1.64it/s] 

mean train loss: shift = 0.028092948889359832, frame = 0.022845271980389954


 56%|█████▌    | 280999/500000 [51:50:43<37:08:22,  1.64it/s] 

mean train loss: shift = 0.029052921202033757, frame = 0.022436481082811953


 56%|█████▋    | 281499/500000 [51:56:15<37:00:12,  1.64it/s] 

mean train loss: shift = 0.02798957408219576, frame = 0.022920245034620167


 56%|█████▋    | 281999/500000 [52:01:47<36:56:42,  1.64it/s] 

mean train loss: shift = 0.02848022186383605, frame = 0.022819676471874117


 56%|█████▋    | 282499/500000 [52:07:20<36:57:05,  1.64it/s] 

mean train loss: shift = 0.028034422593191267, frame = 0.023032813340425493


 57%|█████▋    | 282999/500000 [52:12:51<36:40:04,  1.64it/s] 

mean train loss: shift = 0.02887533533014357, frame = 0.022766192458570002


 57%|█████▋    | 283499/500000 [52:18:24<36:37:48,  1.64it/s] 

mean train loss: shift = 0.028122287940233946, frame = 0.022543917790055275


 57%|█████▋    | 283999/500000 [52:23:56<36:36:04,  1.64it/s] 

mean train loss: shift = 0.029904479488730432, frame = 0.02242884113267064


 57%|█████▋    | 284499/500000 [52:29:29<36:26:35,  1.64it/s] 

mean train loss: shift = 0.027640300385653974, frame = 0.023252671239897608


 57%|█████▋    | 284999/500000 [52:35:00<36:19:21,  1.64it/s] 

mean train loss: shift = 0.028892021952196957, frame = 0.02308825509622693


 57%|█████▋    | 285499/500000 [52:40:32<36:24:00,  1.64it/s] 

mean train loss: shift = 0.02801909774541855, frame = 0.022511869419366122


 57%|█████▋    | 285999/500000 [52:46:03<36:10:34,  1.64it/s] 

mean train loss: shift = 0.029494128409773113, frame = 0.022629853719845414


 57%|█████▋    | 286499/500000 [52:51:36<36:12:10,  1.64it/s] 

mean train loss: shift = 0.0282486872933805, frame = 0.022742999356240034


 57%|█████▋    | 286999/500000 [52:57:07<36:01:26,  1.64it/s] 

mean train loss: shift = 0.029226127872243525, frame = 0.02246836178563535


 57%|█████▋    | 287499/500000 [53:02:39<36:00:01,  1.64it/s] 

mean train loss: shift = 0.028891096553765237, frame = 0.02276651201210916


 58%|█████▊    | 287999/500000 [53:08:11<35:52:18,  1.64it/s] 

mean train loss: shift = 0.02904783698543906, frame = 0.022384384343400596


 58%|█████▊    | 288499/500000 [53:13:44<35:49:49,  1.64it/s] 

mean train loss: shift = 0.028518452398478986, frame = 0.022818758497014643


 58%|█████▊    | 288999/500000 [53:19:15<35:41:02,  1.64it/s] 

mean train loss: shift = 0.029880793966352938, frame = 0.02248071545176208


 58%|█████▊    | 289499/500000 [53:24:47<35:38:38,  1.64it/s] 

mean train loss: shift = 0.028246149085462094, frame = 0.022949849896132947


 58%|█████▊    | 289999/500000 [53:30:18<35:34:59,  1.64it/s] 

mean train loss: shift = 0.028826500618830322, frame = 0.022938575197011233


 58%|█████▊    | 290499/500000 [53:35:51<35:22:28,  1.65it/s] 

mean train loss: shift = 0.027909456791356205, frame = 0.02279935244843364


 58%|█████▊    | 290999/500000 [53:41:23<35:20:41,  1.64it/s] 

mean train loss: shift = 0.028767422154545783, frame = 0.022644986636936665


 58%|█████▊    | 291499/500000 [53:46:56<35:19:31,  1.64it/s] 

mean train loss: shift = 0.02908658109419048, frame = 0.022749524954706432


 58%|█████▊    | 291999/500000 [53:52:28<35:10:53,  1.64it/s] 

mean train loss: shift = 0.02694060179591179, frame = 0.02286499801278114


 58%|█████▊    | 292499/500000 [53:57:59<35:08:59,  1.64it/s] 

mean train loss: shift = 0.027766609586775303, frame = 0.02278419756889343


 59%|█████▊    | 292999/500000 [54:03:32<35:04:40,  1.64it/s] 

mean train loss: shift = 0.029076880414038896, frame = 0.022664591755717993


 59%|█████▊    | 293499/500000 [54:09:03<34:54:51,  1.64it/s] 

mean train loss: shift = 0.028772198893129826, frame = 0.022841791989281775


 59%|█████▉    | 293999/500000 [54:14:35<34:53:45,  1.64it/s] 

mean train loss: shift = 0.028162706360220908, frame = 0.022805692506954075


 59%|█████▉    | 294499/500000 [54:20:07<34:50:06,  1.64it/s] 

mean train loss: shift = 0.027679171930998565, frame = 0.02253931195475161


 59%|█████▉    | 294999/500000 [54:25:39<34:40:08,  1.64it/s] 

mean train loss: shift = 0.02826207117922604, frame = 0.02262774855643511


 59%|█████▉    | 295499/500000 [54:31:11<34:36:57,  1.64it/s] 

mean train loss: shift = 0.027862042762339116, frame = 0.02247441666573286


 59%|█████▉    | 295999/500000 [54:36:45<34:36:52,  1.64it/s] 

mean train loss: shift = 0.029079707382246852, frame = 0.022852749081328512


 59%|█████▉    | 296499/500000 [54:42:17<34:23:24,  1.64it/s] 

mean train loss: shift = 0.027711993720382452, frame = 0.022745978025719524


 59%|█████▉    | 296999/500000 [54:47:49<34:18:27,  1.64it/s] 

mean train loss: shift = 0.028339323664084076, frame = 0.022578788481652737


 59%|█████▉    | 297499/500000 [54:53:21<34:18:26,  1.64it/s] 

mean train loss: shift = 0.027193118378520013, frame = 0.02243036855943501


 60%|█████▉    | 297999/500000 [54:58:54<34:07:09,  1.64it/s] 

mean train loss: shift = 0.027977744018658997, frame = 0.022704871585592628


 60%|█████▉    | 298499/500000 [55:04:25<34:00:51,  1.65it/s] 

mean train loss: shift = 0.027988366732373832, frame = 0.022730609476566313


 60%|█████▉    | 298999/500000 [55:09:57<34:04:10,  1.64it/s] 

mean train loss: shift = 0.027608387269079684, frame = 0.022593078730627894


 60%|█████▉    | 299499/500000 [55:15:29<33:51:17,  1.65it/s] 

mean train loss: shift = 0.027825483920052647, frame = 0.0225318599473685


 60%|█████▉    | 299999/500000 [55:21:01<33:49:45,  1.64it/s] 

mean train loss: shift = 0.027412161795422434, frame = 0.022708376504480838


 60%|██████    | 300499/500000 [55:26:33<33:50:23,  1.64it/s] 

mean train loss: shift = 0.02824128611013293, frame = 0.022223117211833596


 60%|██████    | 300999/500000 [55:32:05<33:39:00,  1.64it/s] 

mean train loss: shift = 0.027629956604912877, frame = 0.022572648610919715


 60%|██████    | 301499/500000 [55:37:38<33:36:27,  1.64it/s] 

mean train loss: shift = 0.027423103757202624, frame = 0.022640492487698793


 60%|██████    | 301999/500000 [55:43:10<33:32:50,  1.64it/s] 

mean train loss: shift = 0.026766232680529356, frame = 0.022551461344584824


 60%|██████    | 302499/500000 [55:48:42<33:25:37,  1.64it/s] 

mean train loss: shift = 0.027697851536795496, frame = 0.02271638347953558


 61%|██████    | 302999/500000 [55:54:13<33:20:24,  1.64it/s] 

mean train loss: shift = 0.027965726278722285, frame = 0.0225771151073277


 61%|██████    | 303499/500000 [55:59:46<33:17:20,  1.64it/s] 

mean train loss: shift = 0.0284270760435611, frame = 0.022504445720463992


 61%|██████    | 303999/500000 [56:05:18<33:08:22,  1.64it/s] 

mean train loss: shift = 0.02778799227066338, frame = 0.022080378537997603


 61%|██████    | 304499/500000 [56:10:50<33:01:47,  1.64it/s] 

mean train loss: shift = 0.02833289348334074, frame = 0.022576659604907037


 61%|██████    | 304999/500000 [56:16:23<33:02:50,  1.64it/s] 

mean train loss: shift = 0.02728378733061254, frame = 0.022713107585906982


 61%|██████    | 305499/500000 [56:21:56<32:55:38,  1.64it/s] 

mean train loss: shift = 0.027107394563034178, frame = 0.02243128568306565


 61%|██████    | 305999/500000 [56:27:29<32:51:19,  1.64it/s] 

mean train loss: shift = 0.027108767047524452, frame = 0.02258440334908664


 61%|██████▏   | 306499/500000 [56:33:02<32:47:19,  1.64it/s] 

mean train loss: shift = 0.02667741097509861, frame = 0.022520194524899124


 61%|██████▏   | 306999/500000 [56:38:33<32:35:42,  1.64it/s] 

mean train loss: shift = 0.02626264087483287, frame = 0.022463373890146614


 61%|██████▏   | 307499/500000 [56:44:05<32:36:59,  1.64it/s] 

mean train loss: shift = 0.027276071585714816, frame = 0.022580526780337094


 62%|██████▏   | 307999/500000 [56:49:38<32:37:58,  1.63it/s] 

mean train loss: shift = 0.02619907610863447, frame = 0.022404331590980292


 62%|██████▏   | 308499/500000 [56:55:11<32:23:32,  1.64it/s] 

mean train loss: shift = 0.027108623772859573, frame = 0.022322656219825147


 62%|██████▏   | 308999/500000 [57:00:43<32:16:57,  1.64it/s] 

mean train loss: shift = 0.026661437071859837, frame = 0.022247831730172037


 62%|██████▏   | 309499/500000 [57:06:15<32:17:15,  1.64it/s] 

mean train loss: shift = 0.027409700501710176, frame = 0.022415039766579867


 62%|██████▏   | 309999/500000 [57:11:47<32:08:26,  1.64it/s] 

mean train loss: shift = 0.027219572987407445, frame = 0.022597123751416804


 62%|██████▏   | 310499/500000 [57:17:18<32:04:22,  1.64it/s] 

mean train loss: shift = 0.026294635012745857, frame = 0.022414848666638135


 62%|██████▏   | 310999/500000 [57:22:50<32:00:45,  1.64it/s] 

mean train loss: shift = 0.027333039838820697, frame = 0.022611631266772746


 62%|██████▏   | 311499/500000 [57:28:22<31:49:33,  1.65it/s] 

mean train loss: shift = 0.026977899795398115, frame = 0.02235513916797936


 62%|██████▏   | 311999/500000 [57:33:55<31:44:17,  1.65it/s] 

mean train loss: shift = 0.026698618192225694, frame = 0.022726320162415506


 62%|██████▏   | 312499/500000 [57:39:28<31:43:23,  1.64it/s] 

mean train loss: shift = 0.026608167914673686, frame = 0.02234641274623573


 63%|██████▎   | 312999/500000 [57:45:00<31:32:20,  1.65it/s] 

mean train loss: shift = 0.02732458875887096, frame = 0.02228793361224234


 63%|██████▎   | 313499/500000 [57:50:32<31:32:42,  1.64it/s] 

mean train loss: shift = 0.026000851355493067, frame = 0.022531135885044934


 63%|██████▎   | 313999/500000 [57:56:04<31:33:12,  1.64it/s] 

mean train loss: shift = 0.027075230926275253, frame = 0.02247690194286406


 63%|██████▎   | 314499/500000 [58:01:37<31:21:54,  1.64it/s] 

mean train loss: shift = 0.026678914302960037, frame = 0.022168383687734605


 63%|██████▎   | 314999/500000 [58:07:09<31:18:07,  1.64it/s] 

mean train loss: shift = 0.02601137907616794, frame = 0.022513273032382132


 63%|██████▎   | 315499/500000 [58:12:41<31:20:06,  1.64it/s] 

mean train loss: shift = 0.026726843232288956, frame = 0.022399434434249996


 63%|██████▎   | 315999/500000 [58:18:13<31:05:46,  1.64it/s] 

mean train loss: shift = 0.02630630782805383, frame = 0.022725222596898676


 63%|██████▎   | 316499/500000 [58:23:44<30:59:31,  1.64it/s] 

mean train loss: shift = 0.026251197023317217, frame = 0.022324633900076152


 63%|██████▎   | 316999/500000 [58:29:17<31:00:54,  1.64it/s] 

mean train loss: shift = 0.02931815496645868, frame = 0.022294655246660113


 63%|██████▎   | 317499/500000 [58:34:50<30:51:18,  1.64it/s] 

mean train loss: shift = 0.027473290221765638, frame = 0.02267167375423014


 64%|██████▎   | 317999/500000 [58:40:21<30:45:57,  1.64it/s] 

mean train loss: shift = 0.026113433236256242, frame = 0.022356590745970607


 64%|██████▎   | 318499/500000 [58:45:54<30:46:29,  1.64it/s] 

mean train loss: shift = 0.02664098584279418, frame = 0.022391149301081895


 64%|██████▍   | 318999/500000 [58:51:27<30:38:33,  1.64it/s] 

mean train loss: shift = 0.025456809835508465, frame = 0.02192772307433188


 64%|██████▍   | 319499/500000 [58:56:59<30:33:14,  1.64it/s] 

mean train loss: shift = 0.026552307691425085, frame = 0.02236402494646609


 64%|██████▍   | 319999/500000 [59:02:31<30:33:17,  1.64it/s] 

mean train loss: shift = 0.025610845999792218, frame = 0.02241953727044165


 64%|██████▍   | 320499/500000 [59:08:03<30:22:17,  1.64it/s] 

mean train loss: shift = 0.0267396076368168, frame = 0.022206859923899175


 64%|██████▍   | 320999/500000 [59:13:37<30:18:08,  1.64it/s] 

mean train loss: shift = 0.026006975669413804, frame = 0.022258265824988483


 64%|██████▍   | 321499/500000 [59:19:10<30:20:01,  1.63it/s] 

mean train loss: shift = 0.026179964596405626, frame = 0.02241736170463264


 64%|██████▍   | 321999/500000 [59:24:42<30:11:52,  1.64it/s] 

mean train loss: shift = 0.026241554679349063, frame = 0.022040183529257774


 64%|██████▍   | 322499/500000 [59:30:15<30:11:04,  1.63it/s] 

mean train loss: shift = 0.02777579814195633, frame = 0.022211949210613965


 65%|██████▍   | 322999/500000 [59:35:48<30:00:42,  1.64it/s] 

mean train loss: shift = 0.02630865970440209, frame = 0.022369731042534112


 65%|██████▍   | 323499/500000 [59:41:21<29:53:44,  1.64it/s] 

mean train loss: shift = 0.026573179833590983, frame = 0.022367221618071198


 65%|██████▍   | 323999/500000 [59:46:53<29:47:57,  1.64it/s] 

mean train loss: shift = 0.026352614073082804, frame = 0.022312447428703308


 65%|██████▍   | 324499/500000 [59:52:26<29:42:38,  1.64it/s] 

mean train loss: shift = 0.02662611944414675, frame = 0.02239447338692844


 65%|██████▍   | 324999/500000 [59:57:56<29:35:38,  1.64it/s] 

mean train loss: shift = 0.025997238425537944, frame = 0.022234493367373945


 65%|██████▌   | 325499/500000 [60:03:29<29:30:06,  1.64it/s] 

mean train loss: shift = 0.02626471706852317, frame = 0.022103279296308755


 65%|██████▌   | 325999/500000 [60:09:03<29:29:08,  1.64it/s] 

mean train loss: shift = 0.02648318558745086, frame = 0.022182099653407932


 65%|██████▌   | 326499/500000 [60:14:34<29:18:23,  1.64it/s] 

mean train loss: shift = 0.025820665176957847, frame = 0.02256457249633968


 65%|██████▌   | 326999/500000 [60:20:06<29:18:25,  1.64it/s] 

mean train loss: shift = 0.02616123579442501, frame = 0.02240746853686869


 65%|██████▌   | 327499/500000 [60:25:39<29:13:08,  1.64it/s] 

mean train loss: shift = 0.026574685929343103, frame = 0.02237211025878787


 66%|██████▌   | 327999/500000 [60:31:10<29:07:16,  1.64it/s] 

mean train loss: shift = 0.02528243281133473, frame = 0.022265483871102332


 66%|██████▌   | 328499/500000 [60:36:42<29:05:07,  1.64it/s] 

mean train loss: shift = 0.025587536338716747, frame = 0.021973629448562862


 66%|██████▌   | 328999/500000 [60:42:14<28:59:31,  1.64it/s] 

mean train loss: shift = 0.02674130460433662, frame = 0.022199406335130335


 66%|██████▌   | 329499/500000 [60:47:46<28:52:13,  1.64it/s] 

mean train loss: shift = 0.026269155472517014, frame = 0.02236331479251385


 66%|██████▌   | 329999/500000 [60:53:17<28:47:26,  1.64it/s] 

mean train loss: shift = 0.02561120733805001, frame = 0.022094824885949493


 66%|██████▌   | 330499/500000 [60:58:50<28:44:08,  1.64it/s] 

mean train loss: shift = 0.025355757320299745, frame = 0.02234555860608816


 66%|██████▌   | 330999/500000 [61:04:22<28:36:36,  1.64it/s] 

mean train loss: shift = 0.025419281020760538, frame = 0.022420338355004787


 66%|██████▋   | 331499/500000 [61:09:54<28:29:21,  1.64it/s] 

mean train loss: shift = 0.025834053914994, frame = 0.021813501069322228


 66%|██████▋   | 331999/500000 [61:15:27<28:28:44,  1.64it/s] 

mean train loss: shift = 0.025845492009073497, frame = 0.022068150348961355


 66%|██████▋   | 332499/500000 [61:20:58<28:19:19,  1.64it/s] 

mean train loss: shift = 0.025740878799930214, frame = 0.022456055510789157


 67%|██████▋   | 332999/500000 [61:26:31<28:14:17,  1.64it/s] 

mean train loss: shift = 0.025743128634989263, frame = 0.022084471257403492


 67%|██████▋   | 333499/500000 [61:32:03<28:13:40,  1.64it/s] 

mean train loss: shift = 0.024578721677884458, frame = 0.022216639621183275


 67%|██████▋   | 333999/500000 [61:37:34<28:04:51,  1.64it/s] 

mean train loss: shift = 0.026435566512867808, frame = 0.02228375164605677


 67%|██████▋   | 334499/500000 [61:43:06<27:59:46,  1.64it/s] 

mean train loss: shift = 0.025046254305168988, frame = 0.022268618242815136


 67%|██████▋   | 334999/500000 [61:48:39<27:56:24,  1.64it/s] 

mean train loss: shift = 0.02587769715487957, frame = 0.022058746615424753


 67%|██████▋   | 335499/500000 [61:54:12<27:48:48,  1.64it/s] 

mean train loss: shift = 0.026553147330880166, frame = 0.022013768929988145


 67%|██████▋   | 335999/500000 [61:59:44<27:47:09,  1.64it/s] 

mean train loss: shift = 0.026150475000962615, frame = 0.022347089981660247


 67%|██████▋   | 336499/500000 [62:05:17<27:46:38,  1.64it/s] 

mean train loss: shift = 0.0252665152233094, frame = 0.02231283907033503


 67%|██████▋   | 336999/500000 [62:10:48<27:33:49,  1.64it/s] 

mean train loss: shift = 0.025767469657585026, frame = 0.02253092668391764


 67%|██████▋   | 337499/500000 [62:16:20<27:29:01,  1.64it/s] 

mean train loss: shift = 0.026086862538009882, frame = 0.0223171017318964


 68%|██████▊   | 337999/500000 [62:21:52<27:29:11,  1.64it/s] 

mean train loss: shift = 0.02573941192217171, frame = 0.022050860684365035


 68%|██████▊   | 338499/500000 [62:27:25<27:15:37,  1.65it/s] 

mean train loss: shift = 0.025190909292548896, frame = 0.0221991065479815


 68%|██████▊   | 338999/500000 [62:32:57<27:15:45,  1.64it/s] 

mean train loss: shift = 0.02618100817874074, frame = 0.022406986325979233


 68%|██████▊   | 339499/500000 [62:38:29<27:14:03,  1.64it/s] 

mean train loss: shift = 0.025504810716956854, frame = 0.02239457402937114


 68%|██████▊   | 339999/500000 [62:44:01<27:08:44,  1.64it/s] 

mean train loss: shift = 0.024931732267141342, frame = 0.022263022623956203


 68%|██████▊   | 340499/500000 [62:49:34<26:57:29,  1.64it/s] 

mean train loss: shift = 0.025461904590949415, frame = 0.022183112390339373


 68%|██████▊   | 340999/500000 [62:55:06<26:57:51,  1.64it/s] 

mean train loss: shift = 0.024929916486144066, frame = 0.022300027882680297


 68%|██████▊   | 341499/500000 [63:00:37<26:46:40,  1.64it/s] 

mean train loss: shift = 0.026031580690294504, frame = 0.022011543715372683


 68%|██████▊   | 341999/500000 [63:06:10<26:45:40,  1.64it/s] 

mean train loss: shift = 0.02584612750634551, frame = 0.022382645143195986


 68%|██████▊   | 342499/500000 [63:11:42<26:44:52,  1.64it/s] 

mean train loss: shift = 0.025731119196861982, frame = 0.022109348330646754


 69%|██████▊   | 342999/500000 [63:17:15<26:33:50,  1.64it/s] 

mean train loss: shift = 0.024681545319035648, frame = 0.022058027951046826


 69%|██████▊   | 343499/500000 [63:22:46<26:30:04,  1.64it/s] 

mean train loss: shift = 0.025298735968768596, frame = 0.022363465305417777


 69%|██████▉   | 343999/500000 [63:28:19<26:26:23,  1.64it/s] 

mean train loss: shift = 0.025590874252840876, frame = 0.022216898191720246


 69%|██████▉   | 344499/500000 [63:33:51<26:20:31,  1.64it/s] 

mean train loss: shift = 0.02537716523837298, frame = 0.02196325430832803


 69%|██████▉   | 344999/500000 [63:39:23<26:16:18,  1.64it/s] 

mean train loss: shift = 0.025124812167137862, frame = 0.021886835020035505


 69%|██████▉   | 345499/500000 [63:44:55<26:12:32,  1.64it/s] 

mean train loss: shift = 0.025029407665133476, frame = 0.022179506616666914


 69%|██████▉   | 345999/500000 [63:50:27<26:04:59,  1.64it/s] 

mean train loss: shift = 0.026267220741137864, frame = 0.021961770800873637


 69%|██████▉   | 346499/500000 [63:55:59<25:57:29,  1.64it/s] 

mean train loss: shift = 0.025531435998156667, frame = 0.02215910243615508


 69%|██████▉   | 346999/500000 [64:01:32<25:54:58,  1.64it/s] 

mean train loss: shift = 0.02505274255014956, frame = 0.02177151928655803


 69%|██████▉   | 347499/500000 [64:07:04<25:46:41,  1.64it/s] 

mean train loss: shift = 0.025812506936490537, frame = 0.022257617738097906


 70%|██████▉   | 347999/500000 [64:12:37<25:43:31,  1.64it/s] 

mean train loss: shift = 0.02400135355629027, frame = 0.02192370842024684


 70%|██████▉   | 348499/500000 [64:18:09<25:40:43,  1.64it/s] 

mean train loss: shift = 0.024842206478118897, frame = 0.021771560540422796


 70%|██████▉   | 348999/500000 [64:23:41<25:31:59,  1.64it/s] 

mean train loss: shift = 0.025785377975553275, frame = 0.021938649263232947


 70%|██████▉   | 349499/500000 [64:29:13<25:27:27,  1.64it/s] 

mean train loss: shift = 0.024827067255973816, frame = 0.02197826669923961


 70%|██████▉   | 349999/500000 [64:34:46<25:22:49,  1.64it/s] 

mean train loss: shift = 0.025910315377637744, frame = 0.02213033589720726


 70%|███████   | 350499/500000 [64:40:18<25:18:50,  1.64it/s] 

mean train loss: shift = 0.025019672116264702, frame = 0.02205022521689534


 70%|███████   | 350999/500000 [64:45:51<25:11:38,  1.64it/s] 

mean train loss: shift = 0.024490374660119413, frame = 0.021756694681942464


 70%|███████   | 351499/500000 [64:51:23<25:06:54,  1.64it/s] 

mean train loss: shift = 0.023631917733699083, frame = 0.021962287789210676


 70%|███████   | 351999/500000 [64:56:54<25:01:16,  1.64it/s] 

mean train loss: shift = 0.024346604725345968, frame = 0.021964760649949313


 70%|███████   | 352499/500000 [65:02:26<24:56:19,  1.64it/s] 

mean train loss: shift = 0.02448596348054707, frame = 0.021981823163107036


 71%|███████   | 352999/500000 [65:07:59<24:56:05,  1.64it/s] 

mean train loss: shift = 0.024219485905021428, frame = 0.022153702681884168


 71%|███████   | 353499/500000 [65:13:30<24:51:21,  1.64it/s] 

mean train loss: shift = 0.024507158283144235, frame = 0.022113263176754116


 71%|███████   | 353999/500000 [65:19:02<24:42:23,  1.64it/s] 

mean train loss: shift = 0.024659656248055398, frame = 0.021838230116292833


 71%|███████   | 354499/500000 [65:24:35<24:37:33,  1.64it/s] 

mean train loss: shift = 0.02480972758680582, frame = 0.021880652904510498


 71%|███████   | 354999/500000 [65:30:06<24:31:42,  1.64it/s] 

mean train loss: shift = 0.02420095768943429, frame = 0.021989878775551915


 71%|███████   | 355499/500000 [65:35:38<24:27:49,  1.64it/s] 

mean train loss: shift = 0.024498489920049906, frame = 0.021718706764280796


 71%|███████   | 355999/500000 [65:41:10<24:24:01,  1.64it/s] 

mean train loss: shift = 0.024948517322540283, frame = 0.021950767083093525


 71%|███████▏  | 356499/500000 [65:46:41<24:14:59,  1.64it/s] 

mean train loss: shift = 0.023879899775609375, frame = 0.022182334311306475


 71%|███████▏  | 356999/500000 [65:52:12<24:14:03,  1.64it/s] 

mean train loss: shift = 0.024332708230242132, frame = 0.021504114728420974


 71%|███████▏  | 357499/500000 [65:57:45<24:14:01,  1.63it/s] 

mean train loss: shift = 0.02450684566050768, frame = 0.022133821645751596


 72%|███████▏  | 357999/500000 [66:03:17<23:57:31,  1.65it/s] 

mean train loss: shift = 0.02413331227749586, frame = 0.021808048458769918


 72%|███████▏  | 358499/500000 [66:08:50<23:55:16,  1.64it/s] 

mean train loss: shift = 0.025158419778570532, frame = 0.02171185534261167


 72%|███████▏  | 358999/500000 [66:14:22<23:52:18,  1.64it/s] 

mean train loss: shift = 0.02501069212704897, frame = 0.022026507621631025


 72%|███████▏  | 359499/500000 [66:19:55<23:44:52,  1.64it/s] 

mean train loss: shift = 0.02412957532797009, frame = 0.02237211866863072


 72%|███████▏  | 359999/500000 [66:25:26<23:40:50,  1.64it/s] 

mean train loss: shift = 0.024328830847516657, frame = 0.02215913244150579


 72%|███████▏  | 360499/500000 [66:30:58<23:38:04,  1.64it/s] 

mean train loss: shift = 0.024148178627714515, frame = 0.022024821728467942


 72%|███████▏  | 360999/500000 [66:36:30<23:29:04,  1.64it/s] 

mean train loss: shift = 0.024045258387923242, frame = 0.02173101993650198


 72%|███████▏  | 361499/500000 [66:42:01<23:25:46,  1.64it/s] 

mean train loss: shift = 0.024237544037401675, frame = 0.021864755302667616


 72%|███████▏  | 361999/500000 [66:47:33<23:24:12,  1.64it/s] 

mean train loss: shift = 0.02448322665505111, frame = 0.021857831751927733


 72%|███████▏  | 362499/500000 [66:53:05<23:17:42,  1.64it/s] 

mean train loss: shift = 0.02454424897581339, frame = 0.021883151588961484


 73%|███████▎  | 362999/500000 [66:58:37<23:10:40,  1.64it/s] 

mean train loss: shift = 0.024071547642350196, frame = 0.021839241322129964


 73%|███████▎  | 363499/500000 [67:04:09<23:10:16,  1.64it/s] 

mean train loss: shift = 0.02442745436914265, frame = 0.02208597289212048


 73%|███████▎  | 363999/500000 [67:09:40<22:59:16,  1.64it/s] 

mean train loss: shift = 0.024058201149106027, frame = 0.021764863681048154


 73%|███████▎  | 364499/500000 [67:15:12<22:59:23,  1.64it/s] 

mean train loss: shift = 0.02439361872524023, frame = 0.022015188535675405


 73%|███████▎  | 364999/500000 [67:20:44<22:51:15,  1.64it/s] 

mean train loss: shift = 0.024006933784112335, frame = 0.021588889438658953


 73%|███████▎  | 365499/500000 [67:26:17<22:42:43,  1.64it/s] 

mean train loss: shift = 0.02385519547201693, frame = 0.021745974451303482


 73%|███████▎  | 365999/500000 [67:31:50<22:41:21,  1.64it/s] 

mean train loss: shift = 0.02464208096638322, frame = 0.021963940285146238


 73%|███████▎  | 366499/500000 [67:37:22<22:36:49,  1.64it/s] 

mean train loss: shift = 0.02375633368920535, frame = 0.02193270457908511


 73%|███████▎  | 366999/500000 [67:42:53<22:29:58,  1.64it/s] 

mean train loss: shift = 0.023875298891216517, frame = 0.02207070640102029


 73%|███████▎  | 367499/500000 [67:48:25<22:23:35,  1.64it/s] 

mean train loss: shift = 0.02437586896494031, frame = 0.022359881732612847


 74%|███████▎  | 367999/500000 [67:53:57<22:21:29,  1.64it/s] 

mean train loss: shift = 0.024850023899227382, frame = 0.02173793963342905


 74%|███████▎  | 368499/500000 [67:59:28<22:13:16,  1.64it/s] 

mean train loss: shift = 0.02421243021450937, frame = 0.021760727832093835


 74%|███████▍  | 368999/500000 [68:05:00<22:07:20,  1.64it/s] 

mean train loss: shift = 0.022890552897006273, frame = 0.02194067801348865


 74%|███████▍  | 369499/500000 [68:10:33<22:09:27,  1.64it/s] 

mean train loss: shift = 0.02452537763118744, frame = 0.02209151235409081


 74%|███████▍  | 369999/500000 [68:16:05<21:59:17,  1.64it/s] 

mean train loss: shift = 0.024435763884335757, frame = 0.021946547254920006


 74%|███████▍  | 370499/500000 [68:21:37<21:56:51,  1.64it/s] 

mean train loss: shift = 0.023367809243500234, frame = 0.022086685091257094


 74%|███████▍  | 370999/500000 [68:27:09<21:53:47,  1.64it/s] 

mean train loss: shift = 0.023919075060635807, frame = 0.02213675046339631


 74%|███████▍  | 371499/500000 [68:32:41<21:46:53,  1.64it/s] 

mean train loss: shift = 0.02433163161948323, frame = 0.021751660257577896


 74%|███████▍  | 371999/500000 [68:38:12<21:38:03,  1.64it/s] 

mean train loss: shift = 0.023868264682590963, frame = 0.022008638767525554


 74%|███████▍  | 372499/500000 [68:43:46<21:36:55,  1.64it/s] 

mean train loss: shift = 0.024042787104845045, frame = 0.02166559785977006


 75%|███████▍  | 372999/500000 [68:49:17<21:27:13,  1.64it/s] 

mean train loss: shift = 0.023459722358733417, frame = 0.022209253078326583


 75%|███████▍  | 373499/500000 [68:54:49<21:25:25,  1.64it/s] 

mean train loss: shift = 0.02402910060249269, frame = 0.021649549482390285


 75%|███████▍  | 373999/500000 [69:00:21<21:21:00,  1.64it/s] 

mean train loss: shift = 0.023743094054982067, frame = 0.02178799291700125


 75%|███████▍  | 374499/500000 [69:05:53<21:15:28,  1.64it/s] 

mean train loss: shift = 0.023999983854591848, frame = 0.021935439066961406


 75%|███████▍  | 374999/500000 [69:11:24<21:10:02,  1.64it/s] 

mean train loss: shift = 0.024114572016522288, frame = 0.02187551756761968


 75%|███████▌  | 375499/500000 [69:16:57<21:08:18,  1.64it/s] 

mean train loss: shift = 0.023324058772064747, frame = 0.021540668744593858


 75%|███████▌  | 375999/500000 [69:22:28<20:59:54,  1.64it/s] 

mean train loss: shift = 0.02295147871784866, frame = 0.021718930250033738


 75%|███████▌  | 376499/500000 [69:28:02<20:55:15,  1.64it/s] 

mean train loss: shift = 0.024063402405008674, frame = 0.02194358729571104


 75%|███████▌  | 376999/500000 [69:33:35<20:49:49,  1.64it/s] 

mean train loss: shift = 0.02305430599115789, frame = 0.02204438591003418


 75%|███████▌  | 377499/500000 [69:39:07<20:45:14,  1.64it/s] 

mean train loss: shift = 0.023700877582654357, frame = 0.021719796182587743


 76%|███████▌  | 377999/500000 [69:44:39<20:39:26,  1.64it/s] 

mean train loss: shift = 0.022898698871955274, frame = 0.02160802473127842


 76%|███████▌  | 378499/500000 [69:50:12<20:38:10,  1.64it/s] 

mean train loss: shift = 0.0235957171972841, frame = 0.02126124407723546


 76%|███████▌  | 378999/500000 [69:55:45<20:27:11,  1.64it/s] 

mean train loss: shift = 0.023872262853197754, frame = 0.021671216383576393


 76%|███████▌  | 379499/500000 [70:01:17<20:24:55,  1.64it/s] 

mean train loss: shift = 0.02400824470818043, frame = 0.02176147008500993


 76%|███████▌  | 379999/500000 [70:06:50<20:19:17,  1.64it/s] 

mean train loss: shift = 0.023539053028449417, frame = 0.02181610218062997


 76%|███████▌  | 380499/500000 [70:12:21<20:13:25,  1.64it/s] 

mean train loss: shift = 0.023360558219254018, frame = 0.021597750829532743


 76%|███████▌  | 380999/500000 [70:17:52<20:09:26,  1.64it/s] 

mean train loss: shift = 0.02272216928936541, frame = 0.021758836843073368


 76%|███████▋  | 381499/500000 [70:23:26<20:07:33,  1.64it/s] 

mean train loss: shift = 0.022668113935738803, frame = 0.02178280320763588


 76%|███████▋  | 381999/500000 [70:28:58<19:57:58,  1.64it/s] 

mean train loss: shift = 0.023603408940136433, frame = 0.02187865698710084


 76%|███████▋  | 382499/500000 [70:34:30<19:53:55,  1.64it/s] 

mean train loss: shift = 0.02294498270004988, frame = 0.021914071781560778


 77%|███████▋  | 382999/500000 [70:40:02<19:49:15,  1.64it/s] 

mean train loss: shift = 0.022889818523079156, frame = 0.02161013164743781


 77%|███████▋  | 383499/500000 [70:45:33<19:41:59,  1.64it/s] 

mean train loss: shift = 0.023005089929327368, frame = 0.021704058799892664


 77%|███████▋  | 383999/500000 [70:51:05<19:35:54,  1.64it/s] 

mean train loss: shift = 0.023843813568353654, frame = 0.021925939371809363


 77%|███████▋  | 384499/500000 [70:56:38<19:36:40,  1.64it/s] 

mean train loss: shift = 0.023271927073597907, frame = 0.02171448640525341


 77%|███████▋  | 384999/500000 [71:02:10<19:26:01,  1.64it/s] 

mean train loss: shift = 0.023565035296604038, frame = 0.021591097179800272


 77%|███████▋  | 385499/500000 [71:07:42<19:25:57,  1.64it/s] 

mean train loss: shift = 0.023111740284599364, frame = 0.02164527389407158


 77%|███████▋  | 385999/500000 [71:13:14<19:16:55,  1.64it/s] 

mean train loss: shift = 0.023440017679706215, frame = 0.021230380525812508


 77%|███████▋  | 386499/500000 [71:18:46<19:12:07,  1.64it/s] 

mean train loss: shift = 0.02304628398269415, frame = 0.021268760507926344


 77%|███████▋  | 386999/500000 [71:24:18<19:05:14,  1.64it/s] 

mean train loss: shift = 0.02478190515190363, frame = 0.021583583779633046


 77%|███████▋  | 387499/500000 [71:29:51<19:06:49,  1.63it/s] 

mean train loss: shift = 0.023939825780689716, frame = 0.02155348588898778


 78%|███████▊  | 387999/500000 [71:35:22<18:53:36,  1.65it/s] 

mean train loss: shift = 0.023355565994977952, frame = 0.02146890286915004


 78%|███████▊  | 388499/500000 [71:40:55<18:52:02,  1.64it/s] 

mean train loss: shift = 0.022662068855017422, frame = 0.021665624398738145


 78%|███████▊  | 388999/500000 [71:46:27<18:47:13,  1.64it/s] 

mean train loss: shift = 0.023245432103984057, frame = 0.021583020506426694


 78%|███████▊  | 389499/500000 [71:52:00<18:40:17,  1.64it/s] 

mean train loss: shift = 0.023215131694450976, frame = 0.02182082900032401


 78%|███████▊  | 389999/500000 [71:57:33<18:37:18,  1.64it/s] 

mean train loss: shift = 0.0225489314366132, frame = 0.021756731674075127


 78%|███████▊  | 390499/500000 [72:03:06<18:35:26,  1.64it/s] 

mean train loss: shift = 0.02239225988090038, frame = 0.0217374185025692


 78%|███████▊  | 390999/500000 [72:08:37<18:25:42,  1.64it/s] 

mean train loss: shift = 0.02220584278739989, frame = 0.021630765875801445


 78%|███████▊  | 391499/500000 [72:14:11<18:23:51,  1.64it/s] 

mean train loss: shift = 0.022286491304636002, frame = 0.021427049551159143


 78%|███████▊  | 391999/500000 [72:19:44<18:19:48,  1.64it/s] 

mean train loss: shift = 0.02289542883168906, frame = 0.02139357778429985


 78%|███████▊  | 392499/500000 [72:25:16<18:12:18,  1.64it/s] 

mean train loss: shift = 0.02362589182704687, frame = 0.021876186730340124


 79%|███████▊  | 392999/500000 [72:30:48<18:05:53,  1.64it/s] 

mean train loss: shift = 0.023487571502104402, frame = 0.02127240077778697


 79%|███████▊  | 393499/500000 [72:36:21<18:02:33,  1.64it/s] 

mean train loss: shift = 0.02284243266098201, frame = 0.0218182695414871


 79%|███████▉  | 393999/500000 [72:41:52<17:53:59,  1.64it/s] 

mean train loss: shift = 0.02268096575140953, frame = 0.021314015168696642


 79%|███████▉  | 394499/500000 [72:47:24<17:52:42,  1.64it/s] 

mean train loss: shift = 0.023561891259625555, frame = 0.021691909449175


 79%|███████▉  | 394999/500000 [72:52:56<17:47:19,  1.64it/s] 

mean train loss: shift = 0.02342625137232244, frame = 0.021475396130234


 79%|███████▉  | 395499/500000 [72:58:29<17:40:37,  1.64it/s] 

mean train loss: shift = 0.022250145079568028, frame = 0.021288102906197308


 79%|███████▉  | 395999/500000 [73:04:00<17:35:22,  1.64it/s] 

mean train loss: shift = 0.02263781782146543, frame = 0.02154228661581874


 79%|███████▉  | 396499/500000 [73:09:33<17:32:38,  1.64it/s] 

mean train loss: shift = 0.02274499085359275, frame = 0.02145481431670487


 79%|███████▉  | 396999/500000 [73:15:05<17:22:51,  1.65it/s] 

mean train loss: shift = 0.023190100248903035, frame = 0.021999447055161


 79%|███████▉  | 397499/500000 [73:20:37<17:18:54,  1.64it/s] 

mean train loss: shift = 0.023352921217679977, frame = 0.021434197090566157


 80%|███████▉  | 397999/500000 [73:26:10<17:17:19,  1.64it/s] 

mean train loss: shift = 0.023758114176802337, frame = 0.021638570690527558


 80%|███████▉  | 398499/500000 [73:31:42<17:10:49,  1.64it/s] 

mean train loss: shift = 0.022711465045809745, frame = 0.02159498470276594


 80%|███████▉  | 398999/500000 [73:37:13<17:04:49,  1.64it/s] 

mean train loss: shift = 0.0231268839687109, frame = 0.02154699765704572


 80%|███████▉  | 399499/500000 [73:42:46<17:02:29,  1.64it/s] 

mean train loss: shift = 0.022906828989274798, frame = 0.021613886116072536


 80%|███████▉  | 399999/500000 [73:48:18<16:53:50,  1.64it/s] 

mean train loss: shift = 0.02260008130967617, frame = 0.02202128474228084


 80%|████████  | 400499/500000 [73:53:51<16:48:59,  1.64it/s] 

mean train loss: shift = 0.022043144257739185, frame = 0.021443496227264405


 80%|████████  | 400999/500000 [73:59:23<16:45:33,  1.64it/s] 

mean train loss: shift = 0.02291933967266232, frame = 0.0218838163446635


 80%|████████  | 401499/500000 [74:04:55<16:39:36,  1.64it/s] 

mean train loss: shift = 0.022682278311811386, frame = 0.02181511918641627


 80%|████████  | 401999/500000 [74:10:26<16:34:24,  1.64it/s] 

mean train loss: shift = 0.021864682333543897, frame = 0.021461108280345798


 80%|████████  | 402499/500000 [74:16:00<16:33:07,  1.64it/s] 

mean train loss: shift = 0.02190003150794655, frame = 0.021785014072433113


 81%|████████  | 402999/500000 [74:21:32<16:23:06,  1.64it/s] 

mean train loss: shift = 0.02253086942061782, frame = 0.021574197083711624


 81%|████████  | 403499/500000 [74:27:04<16:20:08,  1.64it/s] 

mean train loss: shift = 0.02216525630094111, frame = 0.021477530039846898


 81%|████████  | 403999/500000 [74:32:37<16:16:44,  1.64it/s] 

mean train loss: shift = 0.022454634128138425, frame = 0.021395788792520763


 81%|████████  | 404499/500000 [74:38:09<16:08:47,  1.64it/s] 

mean train loss: shift = 0.022100777316838502, frame = 0.021430936941877008


 81%|████████  | 404999/500000 [74:43:41<16:05:49,  1.64it/s] 

mean train loss: shift = 0.02217989231273532, frame = 0.02152907499484718


 81%|████████  | 405499/500000 [74:49:14<16:02:39,  1.64it/s] 

mean train loss: shift = 0.0226978111397475, frame = 0.0215367996096611


 81%|████████  | 405999/500000 [74:54:45<15:53:04,  1.64it/s] 

mean train loss: shift = 0.0221920647919178, frame = 0.021489389976486563


 81%|████████▏ | 406499/500000 [75:00:17<15:52:11,  1.64it/s] 

mean train loss: shift = 0.022924642249941826, frame = 0.02173690500855446


 81%|████████▏ | 406999/500000 [75:05:50<15:46:01,  1.64it/s] 

mean train loss: shift = 0.02242562030442059, frame = 0.021527236673980953


 81%|████████▏ | 407499/500000 [75:11:23<15:37:59,  1.64it/s] 

mean train loss: shift = 0.021794351149350404, frame = 0.021216237206012012


 82%|████████▏ | 407999/500000 [75:16:55<15:34:45,  1.64it/s] 

mean train loss: shift = 0.02276514989323914, frame = 0.021591338427737354


 82%|████████▏ | 408499/500000 [75:22:27<15:31:07,  1.64it/s] 

mean train loss: shift = 0.02286323368921876, frame = 0.02154616788588464


 82%|████████▏ | 408999/500000 [75:27:59<15:23:25,  1.64it/s] 

mean train loss: shift = 0.022410464257001876, frame = 0.02165239432454109


 82%|████████▏ | 409499/500000 [75:33:32<15:18:22,  1.64it/s] 

mean train loss: shift = 0.022103258222341538, frame = 0.021476568397134542


 82%|████████▏ | 409999/500000 [75:39:06<15:17:49,  1.63it/s] 

mean train loss: shift = 0.023097395110875368, frame = 0.021208215240389107


 82%|████████▏ | 410499/500000 [75:44:38<15:07:59,  1.64it/s] 

mean train loss: shift = 0.02138061172142625, frame = 0.021464905636385083


 82%|████████▏ | 410999/500000 [75:50:10<15:04:24,  1.64it/s] 

mean train loss: shift = 0.02254435532540083, frame = 0.021372414438053964


 82%|████████▏ | 411499/500000 [75:55:42<15:01:38,  1.64it/s] 

mean train loss: shift = 0.02573636094108224, frame = 0.02145522090420127


 82%|████████▏ | 411999/500000 [76:01:14<14:53:29,  1.64it/s] 

mean train loss: shift = 0.021816718670539557, frame = 0.021483383130282165


 82%|████████▏ | 412499/500000 [76:06:46<14:49:20,  1.64it/s] 

mean train loss: shift = 0.021332096511498092, frame = 0.02146513237990439


 83%|████████▎ | 412999/500000 [76:12:19<14:46:01,  1.64it/s] 

mean train loss: shift = 0.022104726824909448, frame = 0.021126058541238307


 83%|████████▎ | 413499/500000 [76:17:51<14:39:09,  1.64it/s] 

mean train loss: shift = 0.021794684674590826, frame = 0.021752551233395933


 83%|████████▎ | 413999/500000 [76:23:23<14:34:25,  1.64it/s] 

mean train loss: shift = 0.022122220013290644, frame = 0.02141849067993462


 83%|████████▎ | 414499/500000 [76:28:56<14:29:53,  1.64it/s] 

mean train loss: shift = 0.02143370706960559, frame = 0.021291185652837157


 83%|████████▎ | 414999/500000 [76:34:28<14:23:11,  1.64it/s] 

mean train loss: shift = 0.02216962431371212, frame = 0.021372052013874054


 83%|████████▎ | 415499/500000 [76:40:00<14:20:15,  1.64it/s] 

mean train loss: shift = 0.021889941677451133, frame = 0.02163123308122158


 83%|████████▎ | 415999/500000 [76:45:33<14:15:09,  1.64it/s] 

mean train loss: shift = 0.021984550915658475, frame = 0.021575544910505413


 83%|████████▎ | 416499/500000 [76:51:06<14:07:39,  1.64it/s] 

mean train loss: shift = 0.02245890219323337, frame = 0.021611071372404693


 83%|████████▎ | 416999/500000 [76:56:37<14:02:46,  1.64it/s] 

mean train loss: shift = 0.022075152795761825, frame = 0.02149402299337089


 83%|████████▎ | 417499/500000 [77:02:10<13:59:26,  1.64it/s] 

mean train loss: shift = 0.02174836352467537, frame = 0.021494941683486105


 84%|████████▎ | 417999/500000 [77:07:42<13:54:07,  1.64it/s] 

mean train loss: shift = 0.022625474579632283, frame = 0.02148166373372078


 84%|████████▎ | 418499/500000 [77:13:14<13:48:28,  1.64it/s] 

mean train loss: shift = 0.021815467811189593, frame = 0.02114805817976594


 84%|████████▍ | 418999/500000 [77:18:48<13:45:34,  1.64it/s] 

mean train loss: shift = 0.02090307313762605, frame = 0.021286122430115937


 84%|████████▍ | 419499/500000 [77:24:19<13:36:39,  1.64it/s] 

mean train loss: shift = 0.021810736970975995, frame = 0.021554867496713995


 84%|████████▍ | 419999/500000 [77:29:52<13:31:07,  1.64it/s] 

mean train loss: shift = 0.022017607276327907, frame = 0.02137679772078991


 84%|████████▍ | 420499/500000 [77:35:25<13:30:37,  1.63it/s] 

mean train loss: shift = 0.022308974895626308, frame = 0.021304613759741187


 84%|████████▍ | 420999/500000 [77:40:56<13:20:22,  1.65it/s] 

mean train loss: shift = 0.021710027797147632, frame = 0.021576347906142473


 84%|████████▍ | 421499/500000 [77:46:28<13:16:51,  1.64it/s] 

mean train loss: shift = 0.021551093006506563, frame = 0.021319976143538952


 84%|████████▍ | 421999/500000 [77:52:00<13:13:29,  1.64it/s] 

mean train loss: shift = 0.021277141908183694, frame = 0.021330295778810977


 84%|████████▍ | 422499/500000 [77:57:32<13:06:26,  1.64it/s] 

mean train loss: shift = 0.021868255281820893, frame = 0.021309304043650626


 85%|████████▍ | 422999/500000 [78:03:04<13:00:49,  1.64it/s] 

mean train loss: shift = 0.021959471620619297, frame = 0.021320935908704996


 85%|████████▍ | 423499/500000 [78:08:37<12:57:54,  1.64it/s] 

mean train loss: shift = 0.02213022196572274, frame = 0.0212775996401906


 85%|████████▍ | 423999/500000 [78:14:09<12:50:06,  1.64it/s] 

mean train loss: shift = 0.021987223813310267, frame = 0.02139057088457048


 85%|████████▍ | 424499/500000 [78:19:41<12:46:42,  1.64it/s] 

mean train loss: shift = 0.021988679285161197, frame = 0.021328783858567478


 85%|████████▍ | 424999/500000 [78:25:14<12:44:13,  1.64it/s] 

mean train loss: shift = 0.021591919145546853, frame = 0.0214697486422956


 85%|████████▌ | 425499/500000 [78:30:46<12:37:14,  1.64it/s] 

mean train loss: shift = 0.021787025119177995, frame = 0.021173807812854648


 85%|████████▌ | 425999/500000 [78:36:19<12:31:48,  1.64it/s] 

mean train loss: shift = 0.021738948022946715, frame = 0.021338233672082423


 85%|████████▌ | 426499/500000 [78:41:52<12:27:57,  1.64it/s] 

mean train loss: shift = 0.021761289938353003, frame = 0.021086425617337225


 85%|████████▌ | 426999/500000 [78:47:23<12:21:38,  1.64it/s] 

mean train loss: shift = 0.021521444333717228, frame = 0.021338361632078886


 85%|████████▌ | 427499/500000 [78:52:56<12:18:15,  1.64it/s] 

mean train loss: shift = 0.021645309679210186, frame = 0.021280903309583665


 86%|████████▌ | 427999/500000 [78:58:28<12:12:58,  1.64it/s] 

mean train loss: shift = 0.021708684640005232, frame = 0.02107609634846449


 86%|████████▌ | 428499/500000 [79:04:00<12:05:53,  1.64it/s] 

mean train loss: shift = 0.0217089521009475, frame = 0.02116352858953178


 86%|████████▌ | 428999/500000 [79:09:32<12:01:54,  1.64it/s] 

mean train loss: shift = 0.021455435370095075, frame = 0.021062862792983653


 86%|████████▌ | 429499/500000 [79:15:05<11:58:06,  1.64it/s] 

mean train loss: shift = 0.022253662743605674, frame = 0.02138031979650259


 86%|████████▌ | 429999/500000 [79:20:36<11:49:53,  1.64it/s] 

mean train loss: shift = 0.02063038014061749, frame = 0.021213000616058707


 86%|████████▌ | 430499/500000 [79:26:08<11:46:06,  1.64it/s] 

mean train loss: shift = 0.022007159700617195, frame = 0.021392930375412105


 86%|████████▌ | 430999/500000 [79:31:40<11:43:11,  1.64it/s] 

mean train loss: shift = 0.02107812630198896, frame = 0.02158286648057401


 86%|████████▋ | 431499/500000 [79:37:11<11:35:08,  1.64it/s] 

mean train loss: shift = 0.022713718959130345, frame = 0.02108070306107402


 86%|████████▋ | 431999/500000 [79:42:43<11:31:44,  1.64it/s] 

mean train loss: shift = 0.021062626938335598, frame = 0.021640873413532973


 86%|████████▋ | 432499/500000 [79:48:16<11:26:22,  1.64it/s] 

mean train loss: shift = 0.021383495531976222, frame = 0.021265741642564534


 87%|████████▋ | 432999/500000 [79:53:47<11:19:17,  1.64it/s] 

mean train loss: shift = 0.020902241504751146, frame = 0.021377702686935663


 87%|████████▋ | 433499/500000 [79:59:20<11:16:48,  1.64it/s] 

mean train loss: shift = 0.021205219630151987, frame = 0.02129162490181625


 87%|████████▋ | 433999/500000 [80:04:53<11:12:18,  1.64it/s] 

mean train loss: shift = 0.021097900062799452, frame = 0.02114059187285602


 87%|████████▋ | 434499/500000 [80:10:25<11:06:18,  1.64it/s] 

mean train loss: shift = 0.021555747780017554, frame = 0.021578878467902542


 87%|████████▋ | 434999/500000 [80:15:56<10:59:25,  1.64it/s] 

mean train loss: shift = 0.0216366227529943, frame = 0.020894954269751905


 87%|████████▋ | 435499/500000 [80:21:29<10:56:57,  1.64it/s] 

mean train loss: shift = 0.021182107373140753, frame = 0.021066826824098824


 87%|████████▋ | 435999/500000 [80:27:01<10:48:11,  1.65it/s] 

mean train loss: shift = 0.020624919240362943, frame = 0.021083770416676997


 87%|████████▋ | 436499/500000 [80:32:33<10:45:29,  1.64it/s] 

mean train loss: shift = 0.021053898632526397, frame = 0.021155138801783324


 87%|████████▋ | 436999/500000 [80:38:05<10:39:31,  1.64it/s] 

mean train loss: shift = 0.02343500194977969, frame = 0.02109725177101791


 87%|████████▋ | 437499/500000 [80:43:37<10:34:01,  1.64it/s] 

mean train loss: shift = 0.021068802028894426, frame = 0.02131513650342822


 88%|████████▊ | 437999/500000 [80:49:09<10:29:08,  1.64it/s] 

mean train loss: shift = 0.021320671213790773, frame = 0.0213460880368948


 88%|████████▊ | 438499/500000 [80:54:43<10:26:01,  1.64it/s] 

mean train loss: shift = 0.021304318860173227, frame = 0.020988336179405452


 88%|████████▊ | 438999/500000 [81:00:14<10:18:58,  1.64it/s] 

mean train loss: shift = 0.022128032823093235, frame = 0.021292807692661883


 88%|████████▊ | 439499/500000 [81:05:46<10:14:01,  1.64it/s] 

mean train loss: shift = 0.020792843582108617, frame = 0.021393705813214182


 88%|████████▊ | 439999/500000 [81:11:18<10:10:14,  1.64it/s] 

mean train loss: shift = 0.020857328961603343, frame = 0.021387934114784003


 88%|████████▊ | 440499/500000 [81:16:50<10:04:00,  1.64it/s] 

mean train loss: shift = 0.021030271034687756, frame = 0.021272221606224776


 88%|████████▊ | 440999/500000 [81:22:22<9:59:40,  1.64it/s]  

mean train loss: shift = 0.021238312248140573, frame = 0.021188930306583642


 88%|████████▊ | 441499/500000 [81:27:55<9:56:10,  1.64it/s]  

mean train loss: shift = 0.021930968327447772, frame = 0.021383663093671203


 88%|████████▊ | 441999/500000 [81:33:26<9:47:21,  1.65it/s]  

mean train loss: shift = 0.020716754980385305, frame = 0.021052352976053953


 88%|████████▊ | 442499/500000 [81:38:58<9:44:42,  1.64it/s]  

mean train loss: shift = 0.02110820906702429, frame = 0.021486208841204644


 89%|████████▊ | 442999/500000 [81:44:30<9:39:17,  1.64it/s]  

mean train loss: shift = 0.020979564153589307, frame = 0.021441247316077352


 89%|████████▊ | 443499/500000 [81:50:01<9:33:16,  1.64it/s]  

mean train loss: shift = 0.024410773182287814, frame = 0.021527442244812848


 89%|████████▉ | 443999/500000 [81:55:34<9:28:13,  1.64it/s]  

mean train loss: shift = 0.021379150727763772, frame = 0.021070216601714493


 89%|████████▉ | 444499/500000 [82:01:06<9:24:54,  1.64it/s]  

mean train loss: shift = 0.021193583814427257, frame = 0.021308290177956223


 89%|████████▉ | 444999/500000 [82:06:37<9:17:42,  1.64it/s]  

mean train loss: shift = 0.020466428110376002, frame = 0.021191165713593363


 89%|████████▉ | 445499/500000 [82:12:10<9:13:34,  1.64it/s]  

mean train loss: shift = 0.02150726825464517, frame = 0.02125011297687888


 89%|████████▉ | 445999/500000 [82:17:42<9:08:59,  1.64it/s]  

mean train loss: shift = 0.021099093049764633, frame = 0.02123302844725549


 89%|████████▉ | 446499/500000 [82:23:14<9:03:05,  1.64it/s]  

mean train loss: shift = 0.020199307327158748, frame = 0.021360437721014022


 89%|████████▉ | 446999/500000 [82:28:46<8:59:02,  1.64it/s]  

mean train loss: shift = 0.02103541933745146, frame = 0.021252087719738483


 89%|████████▉ | 447499/500000 [82:34:20<8:54:55,  1.64it/s]  

mean train loss: shift = 0.020583676350302994, frame = 0.021085085067898034


 90%|████████▉ | 447999/500000 [82:39:52<8:47:53,  1.64it/s]  

mean train loss: shift = 0.02200459106359631, frame = 0.021184075752273202


 90%|████████▉ | 448499/500000 [82:45:24<8:44:38,  1.64it/s]  

mean train loss: shift = 0.020171945514157415, frame = 0.020801698049530387


 90%|████████▉ | 448999/500000 [82:50:58<8:38:45,  1.64it/s]  

mean train loss: shift = 0.02129143164679408, frame = 0.021126721749082207


 90%|████████▉ | 449499/500000 [82:56:29<8:32:30,  1.64it/s]  

mean train loss: shift = 0.02093508065212518, frame = 0.02118902163207531


 90%|████████▉ | 449999/500000 [83:02:01<8:28:12,  1.64it/s]  

mean train loss: shift = 0.021422117264941334, frame = 0.021119570277631283


 90%|█████████ | 450499/500000 [83:07:34<8:25:09,  1.63it/s]  

mean train loss: shift = 0.020755844255909324, frame = 0.021290963614359497


 90%|█████████ | 450999/500000 [83:13:05<8:17:24,  1.64it/s]  

mean train loss: shift = 0.020694940755143762, frame = 0.020952156139537693


 90%|█████████ | 451499/500000 [83:18:38<8:12:10,  1.64it/s]  

mean train loss: shift = 0.020339940530247985, frame = 0.02112843686528504


 90%|█████████ | 451999/500000 [83:24:11<8:07:46,  1.64it/s]  

mean train loss: shift = 0.020871164079755544, frame = 0.020847612719982863


 90%|█████████ | 452499/500000 [83:29:43<8:02:50,  1.64it/s]  

mean train loss: shift = 0.02039833942707628, frame = 0.02101757776737213


 91%|█████████ | 452999/500000 [83:35:14<7:57:21,  1.64it/s]  

mean train loss: shift = 0.02029847623966634, frame = 0.021424342725425958


 91%|█████████ | 453499/500000 [83:40:47<7:53:14,  1.64it/s]  

mean train loss: shift = 0.020351749463006854, frame = 0.02134817585349083


 91%|█████████ | 453999/500000 [83:46:19<7:46:32,  1.64it/s]  

mean train loss: shift = 0.02101884714141488, frame = 0.02076878508552909


 91%|█████████ | 454499/500000 [83:51:53<7:43:18,  1.64it/s]  

mean train loss: shift = 0.02075088189356029, frame = 0.02104849099740386


 91%|█████████ | 454999/500000 [83:57:26<7:38:06,  1.64it/s]  

mean train loss: shift = 0.020688668489456177, frame = 0.020906809078529476


 91%|█████████ | 455499/500000 [84:02:57<7:32:05,  1.64it/s]  

mean train loss: shift = 0.0212178455311805, frame = 0.02121447374112904


 91%|█████████ | 455999/500000 [84:08:29<7:26:28,  1.64it/s]  

mean train loss: shift = 0.01986409655585885, frame = 0.021105620801448823


 91%|█████████▏| 456499/500000 [84:14:02<7:22:27,  1.64it/s]  

mean train loss: shift = 0.020299764539115132, frame = 0.02112090565264225


 91%|█████████▏| 456999/500000 [84:19:34<7:16:33,  1.64it/s]  

mean train loss: shift = 0.02167497184872627, frame = 0.02104987551830709


 91%|█████████▏| 457499/500000 [84:25:06<7:11:57,  1.64it/s]  

mean train loss: shift = 0.021290127678774296, frame = 0.021334215182811023


 92%|█████████▏| 457999/500000 [84:30:39<7:07:03,  1.64it/s]  

mean train loss: shift = 0.02034822602570057, frame = 0.02090763489343226


 92%|█████████▏| 458499/500000 [84:36:11<7:01:30,  1.64it/s]  

mean train loss: shift = 0.020973857687786222, frame = 0.021036472572013735


 92%|█████████▏| 458999/500000 [84:41:42<6:57:11,  1.64it/s] 

mean train loss: shift = 0.020121291373856365, frame = 0.021146094281226397


 92%|█████████▏| 459499/500000 [84:47:15<6:51:52,  1.64it/s]  

mean train loss: shift = 0.02061019704863429, frame = 0.021120121037587522


 92%|█████████▏| 459999/500000 [84:52:46<6:46:14,  1.64it/s] 

mean train loss: shift = 0.02125223504472524, frame = 0.021198136974126102


 92%|█████████▏| 460499/500000 [84:58:19<6:41:00,  1.64it/s] 

mean train loss: shift = 0.020295997467823325, frame = 0.02106457063741982


 92%|█████████▏| 460999/500000 [85:03:51<6:37:32,  1.64it/s] 

mean train loss: shift = 0.020156754476949573, frame = 0.020821263330057262


 92%|█████████▏| 461499/500000 [85:09:24<6:30:42,  1.64it/s] 

mean train loss: shift = 0.02032231172081083, frame = 0.021061241207644344


 92%|█████████▏| 461999/500000 [85:14:57<6:26:29,  1.64it/s] 

mean train loss: shift = 0.020780289916321634, frame = 0.021095152337104083


 92%|█████████▏| 462499/500000 [85:20:30<6:22:42,  1.63it/s] 

mean train loss: shift = 0.01989970310404897, frame = 0.021162360539659857


 93%|█████████▎| 462999/500000 [85:26:02<6:15:32,  1.64it/s] 

mean train loss: shift = 0.02046460913494229, frame = 0.021037619147449733


 93%|█████████▎| 463499/500000 [85:31:35<6:10:51,  1.64it/s] 

mean train loss: shift = 0.020221766298636795, frame = 0.020911397367715835


 93%|█████████▎| 463999/500000 [85:37:08<6:06:21,  1.64it/s] 

mean train loss: shift = 0.020094932164996863, frame = 0.021449755672365428


 93%|█████████▎| 464499/500000 [85:42:40<6:00:39,  1.64it/s] 

mean train loss: shift = 0.02036327802669257, frame = 0.020859743945300577


 93%|█████████▎| 464999/500000 [85:48:13<5:56:11,  1.64it/s] 

mean train loss: shift = 0.021016574491746723, frame = 0.02118377266637981


 93%|█████████▎| 465499/500000 [85:53:46<5:51:41,  1.64it/s] 

mean train loss: shift = 0.02006724229082465, frame = 0.021296061731874944


 93%|█████████▎| 465999/500000 [85:59:19<5:45:21,  1.64it/s] 

mean train loss: shift = 0.02012203591596335, frame = 0.02107887087389827


 93%|█████████▎| 466499/500000 [86:04:51<5:40:09,  1.64it/s] 

mean train loss: shift = 0.020037398243322967, frame = 0.020732778267934918


 93%|█████████▎| 466999/500000 [86:10:25<5:36:22,  1.64it/s] 

mean train loss: shift = 0.020016344561241566, frame = 0.02104956646449864


 93%|█████████▎| 467499/500000 [86:15:57<5:29:52,  1.64it/s] 

mean train loss: shift = 0.02092658688686788, frame = 0.021176419647410512


 94%|█████████▎| 467999/500000 [86:21:29<5:25:50,  1.64it/s] 

mean train loss: shift = 0.020149127485230563, frame = 0.021003130538389085


 94%|█████████▎| 468499/500000 [86:27:01<5:20:49,  1.64it/s] 

mean train loss: shift = 0.02021589889843017, frame = 0.02116242723725736


 94%|█████████▍| 468999/500000 [86:32:33<5:15:14,  1.64it/s] 

mean train loss: shift = 0.020096186792477964, frame = 0.0212069210447371


 94%|█████████▍| 469499/500000 [86:38:05<5:10:47,  1.64it/s] 

mean train loss: shift = 0.019747078130953014, frame = 0.021108299454674125


 94%|█████████▍| 469999/500000 [86:43:39<5:05:32,  1.64it/s] 

mean train loss: shift = 0.019695540417917073, frame = 0.020836848352104427


 94%|█████████▍| 470499/500000 [86:49:11<4:58:57,  1.64it/s] 

mean train loss: shift = 0.019916985194198787, frame = 0.020928888402879237


 94%|█████████▍| 470999/500000 [86:54:43<4:55:08,  1.64it/s] 

mean train loss: shift = 0.021422337455675004, frame = 0.02111253693886101


 94%|█████████▍| 471499/500000 [87:00:17<4:50:26,  1.64it/s] 

mean train loss: shift = 0.020207162344828247, frame = 0.021377168303355575


 94%|█████████▍| 471999/500000 [87:05:49<4:44:02,  1.64it/s] 

mean train loss: shift = 0.02072969746682793, frame = 0.020862231401726602


 94%|█████████▍| 472499/500000 [87:11:22<4:38:46,  1.64it/s] 

mean train loss: shift = 0.020145163343288003, frame = 0.020767197603359818


 95%|█████████▍| 472999/500000 [87:16:55<4:34:44,  1.64it/s] 

mean train loss: shift = 0.020291568013839422, frame = 0.020694027507677674


 95%|█████████▍| 473499/500000 [87:22:26<4:28:53,  1.64it/s] 

mean train loss: shift = 0.019831812378950417, frame = 0.020708833245560526


 95%|█████████▍| 473999/500000 [87:27:59<4:23:42,  1.64it/s] 

mean train loss: shift = 0.020693660590797663, frame = 0.020942952094599603


 95%|█████████▍| 474499/500000 [87:33:32<4:19:12,  1.64it/s] 

mean train loss: shift = 0.020203233427368104, frame = 0.021131610583513975


 95%|█████████▍| 474999/500000 [87:39:03<4:14:05,  1.64it/s] 

mean train loss: shift = 0.019756326406262816, frame = 0.02071534800156951


 95%|█████████▌| 475499/500000 [87:44:35<4:08:59,  1.64it/s] 

mean train loss: shift = 0.020044137680903077, frame = 0.02093863192014396


 95%|█████████▌| 475999/500000 [87:50:07<4:04:32,  1.64it/s] 

mean train loss: shift = 0.020945474414154886, frame = 0.020675645411014555


 95%|█████████▌| 476499/500000 [87:55:39<3:59:08,  1.64it/s] 

mean train loss: shift = 0.019945788130164146, frame = 0.02089481850527227


 95%|█████████▌| 476999/500000 [88:01:12<3:53:51,  1.64it/s] 

mean train loss: shift = 0.020024022757075727, frame = 0.02116670534014702


 95%|█████████▌| 477499/500000 [88:06:45<3:48:43,  1.64it/s] 

mean train loss: shift = 0.020261511895805596, frame = 0.02068677801080048


 96%|█████████▌| 477999/500000 [88:12:16<3:43:32,  1.64it/s] 

mean train loss: shift = 0.020897474546916782, frame = 0.020851758940145373


 96%|█████████▌| 478499/500000 [88:17:49<3:38:31,  1.64it/s] 

mean train loss: shift = 0.02002286713104695, frame = 0.021120663195848464


 96%|█████████▌| 478999/500000 [88:23:21<3:34:01,  1.64it/s] 

mean train loss: shift = 0.020570505699142812, frame = 0.02114300621300936


 96%|█████████▌| 479499/500000 [88:28:53<3:28:01,  1.64it/s] 

mean train loss: shift = 0.020206745352596044, frame = 0.021189393900334835


 96%|█████████▌| 479999/500000 [88:34:25<3:24:05,  1.63it/s] 

mean train loss: shift = 0.019898258864879607, frame = 0.021176702208817004


 96%|█████████▌| 480499/500000 [88:39:58<3:18:14,  1.64it/s] 

mean train loss: shift = 0.019891168134287, frame = 0.020977626722306013


 96%|█████████▌| 480999/500000 [88:45:30<3:12:39,  1.64it/s] 

mean train loss: shift = 0.01993533404171467, frame = 0.020911619225516917


 96%|█████████▋| 481499/500000 [88:51:03<3:08:03,  1.64it/s] 

mean train loss: shift = 0.02040544135775417, frame = 0.021086958013474943


 96%|█████████▋| 481999/500000 [88:56:36<3:03:19,  1.64it/s] 

mean train loss: shift = 0.020798918536864223, frame = 0.02093006834760308


 96%|█████████▋| 482499/500000 [89:02:08<2:57:39,  1.64it/s] 

mean train loss: shift = 0.01899562347214669, frame = 0.021054148143157363


 97%|█████████▋| 482999/500000 [89:07:40<2:52:42,  1.64it/s] 

mean train loss: shift = 0.01999404496513307, frame = 0.02070262248814106


 97%|█████████▋| 483499/500000 [89:13:13<2:48:17,  1.63it/s] 

mean train loss: shift = 0.019489741074852646, frame = 0.020779035637155174


 97%|█████████▋| 483999/500000 [89:18:45<2:42:25,  1.64it/s] 

mean train loss: shift = 0.019896316796541216, frame = 0.02075529265962541


 97%|█████████▋| 484499/500000 [89:24:17<2:37:26,  1.64it/s] 

mean train loss: shift = 0.01967384508345276, frame = 0.02079085131175816


 97%|█████████▋| 484999/500000 [89:29:50<2:32:54,  1.64it/s] 

mean train loss: shift = 0.019609391318634154, frame = 0.02099208138883114


 97%|█████████▋| 485499/500000 [89:35:22<2:27:25,  1.64it/s] 

mean train loss: shift = 0.019701952547766268, frame = 0.021193585216999055


 97%|█████████▋| 485999/500000 [89:40:53<2:21:58,  1.64it/s] 

mean train loss: shift = 0.01950352147128433, frame = 0.020961359241977333


 97%|█████████▋| 486499/500000 [89:46:27<2:17:20,  1.64it/s] 

mean train loss: shift = 0.020120094674639405, frame = 0.020813716953620314


 97%|█████████▋| 486999/500000 [89:52:00<2:12:06,  1.64it/s] 

mean train loss: shift = 0.01975753537937999, frame = 0.021245403185486792


 97%|█████████▋| 487499/500000 [89:57:31<2:06:58,  1.64it/s] 

mean train loss: shift = 0.01996734260302037, frame = 0.020849143475294114


 98%|█████████▊| 487999/500000 [90:03:04<2:02:06,  1.64it/s] 

mean train loss: shift = 0.020512119649909438, frame = 0.021080827955156565


 98%|█████████▊| 488499/500000 [90:08:37<1:56:50,  1.64it/s] 

mean train loss: shift = 0.019712624380365015, frame = 0.020844521110877395


 98%|█████████▊| 488999/500000 [90:14:09<1:51:52,  1.64it/s] 

mean train loss: shift = 0.01985397143289447, frame = 0.020798552891239525


 98%|█████████▊| 489499/500000 [90:19:43<1:46:50,  1.64it/s] 

mean train loss: shift = 0.019533532919362186, frame = 0.020957741659134627


 98%|█████████▊| 489999/500000 [90:25:15<1:41:34,  1.64it/s] 

mean train loss: shift = 0.0194699050989002, frame = 0.020800301203504203


 98%|█████████▊| 490499/500000 [90:30:49<1:36:38,  1.64it/s] 

mean train loss: shift = 0.019556167998351158, frame = 0.020945985732600092


 98%|█████████▊| 490999/500000 [90:36:23<1:31:38,  1.64it/s] 

mean train loss: shift = 0.01981599617563188, frame = 0.020598586324602364


 98%|█████████▊| 491499/500000 [90:41:55<1:26:22,  1.64it/s] 

mean train loss: shift = 0.0196562317898497, frame = 0.02079091997630894


 98%|█████████▊| 491999/500000 [90:47:27<1:21:18,  1.64it/s] 

mean train loss: shift = 0.01961315848957747, frame = 0.02070998740568757


 98%|█████████▊| 492499/500000 [90:52:59<1:16:30,  1.63it/s] 

mean train loss: shift = 0.019945850187912582, frame = 0.02083439108915627


 99%|█████████▊| 492999/500000 [90:58:31<1:11:01,  1.64it/s] 

mean train loss: shift = 0.019266859732568264, frame = 0.0209348670784384


 99%|█████████▊| 493499/500000 [91:04:04<1:05:50,  1.65it/s] 

mean train loss: shift = 0.019719456855207682, frame = 0.02083227683790028


 99%|█████████▉| 493999/500000 [91:09:36<1:01:03,  1.64it/s] 

mean train loss: shift = 0.019667438851669432, frame = 0.02094526074640453


 99%|█████████▉| 494499/500000 [91:15:08<55:51,  1.64it/s]   

mean train loss: shift = 0.019195873142220078, frame = 0.02085207371786237


 99%|█████████▉| 494999/500000 [91:20:40<50:44,  1.64it/s]   

mean train loss: shift = 0.019509151371195913, frame = 0.020899551467970012


 99%|█████████▉| 495499/500000 [91:26:12<45:48,  1.64it/s]   

mean train loss: shift = 0.01923128076735884, frame = 0.02085743243433535


 99%|█████████▉| 495999/500000 [91:31:44<40:35,  1.64it/s]   

mean train loss: shift = 0.018907892651855947, frame = 0.02078669587522745


 99%|█████████▉| 496499/500000 [91:37:16<35:38,  1.64it/s]  

mean train loss: shift = 0.019082630227319895, frame = 0.02085220465436578


 99%|█████████▉| 496999/500000 [91:42:48<30:32,  1.64it/s]  

mean train loss: shift = 0.019711372780613603, frame = 0.020755012283101677


 99%|█████████▉| 497499/500000 [91:48:20<25:28,  1.64it/s]  

mean train loss: shift = 0.02007071331422776, frame = 0.020894128466024994


100%|█████████▉| 497999/500000 [91:53:53<20:19,  1.64it/s]  

mean train loss: shift = 0.019237517298199235, frame = 0.020625066179782152


100%|█████████▉| 498499/500000 [91:59:26<15:17,  1.64it/s]  

mean train loss: shift = 0.019354546518996357, frame = 0.020992579029873015


100%|█████████▉| 498999/500000 [92:04:58<10:09,  1.64it/s]  

mean train loss: shift = 0.019411134372465312, frame = 0.020822239886969327


100%|█████████▉| 499499/500000 [92:10:31<05:05,  1.64it/s]  

mean train loss: shift = 0.021778937647119165, frame = 0.02065453510172665


100%|█████████▉| 499999/500000 [92:16:04<00:00,  1.64it/s]  

mean train loss: shift = 0.01949497579038143, frame = 0.020870717661455273


100%|██████████| 500000/500000 [92:16:32<00:00,  1.51it/s]


In [None]:
    if resume_iteration is None:
        model = OnsetsAndFrames(N_MELS, MAX_MIDI - MIN_MIDI + 1, model_complexity).to(device)
        optimizer = torch.optim.Adam(model.parameters(), learning_rate)
        resume_iteration = 0
    else:
        model_path = os.path.join(logdir, f'model-{resume_iteration}.pt')
        model = torch.load(model_path)
        optimizer = torch.optim.Adam(model.parameters(), learning_rate)
        optimizer.load_state_dict(torch.load(os.path.join(logdir, 'last-optimizer-state.pt')))



In [33]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

leave_one_out=None
clip_gradient_norm = 3
batch_size = 2
sequence_length = 327680
model_complexity = 48


if torch.cuda.is_available() and torch.cuda.get_device_properties(torch.cuda.current_device()).total_memory < 10e9:
    batch_size //= 2
    sequence_length //= 2
    print(f'Reducing batch size to {batch_size} and sequence_length to {sequence_length} to save memory')

train_groups, validation_groups = ['train'], ['validation']

if leave_one_out is not None:
    all_years = {'2004', '2006', '2008', '2009', '2011', '2013', '2014', '2015', '2017'}
    train_groups = list(all_years - {str(leave_one_out)})
    validation_groups = [str(leave_one_out)]
    

In [6]:
dataset = MAESTRO(groups=train_groups, sequence_length=sequence_length)

Loading group train:   0%|          | 0/954 [00:00<?, ?it/s]

Loading 1 group of MAESTRO at data/MAESTRO


Loading group train: 100%|██████████| 954/954 [01:12<00:00, 13.22it/s]

load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for debug purpose
load 8 data. early return for de




### try to debug run_shift_param_on_batch

In [58]:
logdir="runs/model"
writer = SummaryWriter(logdir)
train_groups, validation_groups = ['train'], ['validation']

if leave_one_out is not None:
    all_years = {'2004', '2006', '2008', '2009', '2011', '2013', '2014', '2015', '2017'}
    train_groups = list(all_years - {str(leave_one_out)})
    validation_groups = [str(leave_one_out)]

if train_on == 'MAESTRO':
    dataset = MAESTRO(groups=train_groups, sequence_length=sequence_length)
    validation_dataset = MAESTRO(groups=validation_groups, sequence_length=sequence_length)
else:
    dataset = MAPS(groups=['AkPnBcht', 'AkPnBsdf', 'AkPnCGdD', 'AkPnStgb', 'SptkBGAm', 'SptkBGCl', 'StbgTGd2'], sequence_length=sequence_length)
    validation_dataset = MAPS(groups=['ENSTDkAm', 'ENSTDkCl'], sequence_length=validation_length)
    

In [20]:
onebatch = next(iter(loader))

In [21]:
with torch.no_grad():
    pred, lls = model.run_shift_param_on_batch(onebatch)

In [30]:
print (lls['loss/onset'].item())

0.0033941396977752447


In [23]:
print (pred["shift"])

tensor([[ 0.0197, -0.0122,  0.1601,  ..., -2.7126, -0.8132, -0.6011],
        [ 1.6764,  2.0073,  2.0166,  ..., -0.6118,  0.0727,  0.1484],
        [ 1.8172,  1.7645,  1.5668,  ...,  0.6216,  0.6121,  0.3840],
        ...,
        [-0.6990, -0.2199, -0.5687,  ..., -0.3635, -0.2629,  0.2758],
        [-0.4857,  0.4936, -1.2555,  ...,  0.3769,  0.1807,  0.2952],
        [ 1.2530,  1.6878,  1.8124,  ...,  1.4559,  1.7927,  2.1411]],
       device='cuda:0')


In [22]:
print (onebatch['shift'])
#print (pred['shift'][0,:])

tensor([[-1., -1., -1.,  ..., -1., -1., -1.],
        [ 1.,  1.,  1.,  ...,  1.,  1.,  1.],
        [ 3.,  3.,  3.,  ...,  3.,  3.,  3.],
        ...,
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [-1., -1., -1.,  ..., -1., -1., -1.],
        [ 4.,  4.,  4.,  ...,  4.,  4.,  4.]], device='cuda:0')


In [15]:
predictions, losses = new_model.run_on_batch(batch)


In [46]:
aaa = torch.randn(2,10)
bbb = torch.randn(2,10)
aaa *= 0
bbb[0,:] = 0.5
bbb[1,:] = 0.0
print (aaa)
print (bbb)
F.mse_loss(aaa, bbb)

tensor([[-0., 0., 0., 0., 0., 0., 0., 0., -0., 0.],
        [-0., -0., -0., 0., 0., -0., 0., -0., 0., -0.]])
tensor([[0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000,
         0.5000],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000]])


tensor(0.1250)

In [47]:
0.5*0.5*10/20

0.125

In [16]:
print (losses[])

{'loss/onset': tensor(0.0034, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>), 'loss/offset': tensor(0.0022, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>), 'loss/frame': tensor(0.0282, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>), 'loss/velocity': tensor(0.0048, device='cuda:0', grad_fn=<DivBackward0>)}


In [37]:
one = dataset[0]

In [40]:
print (one['audio'].shape, one['audio'].dtype)
print (one['shift_audio'].shape, one['shift_audio'].dtype)
print (one['label'].shape)
print (one['shift'].shape, one['shift'].dtype)
a = one['shift'].float()
print (a.shape, a.dtype)

torch.Size([327680]) torch.float32
torch.Size([327680]) torch.float32
torch.Size([640, 88])
torch.Size([640]) torch.uint8
torch.Size([640]) torch.float32


In [39]:
print (batch['audio'].shape)
print (batch['shift_audio'].shape)
print (batch['shift'].shape)

torch.Size([8, 327680])
torch.Size([8, 327680])
torch.Size([8, 640])


In [None]:

def bn_drop_lin(n_in, n_out, bn=True, p:float=0., actn=False):
    "Sequence of batchnorm (if `bn`), dropout (with `p`) and linear (`n_in`,`n_out`) layers followed by `actn`."
    layers = [nn.BatchNorm1d(n_in)] if bn else []
    if p != 0: layers.append(nn.Dropout(p))
    layers.append(nn.Linear(n_in, n_out))
    if actn: layers.append(nn.ReLU(inplace=True))
    return layers


In [None]:
class RegressionModel(nn.Module):
    def __init__(self, input_features):
        #input_features = model_size = 768
        super().__init__()

        layers = []
        layers += bn_drop_lin(input_features, 256, bn=True, p=0, actn=True)
        layers += bn_drop_lin(256, 64, bn=True, p=0, actn=True)
        layers += bn_drop_lin(64, 1, bn=False, p=0, actn=False)
        self.layers = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.layers(x)
    

In [70]:
reg = RegressionModel(768)

In [71]:
x = torch.randn((24, 768))
y=reg(x)
print (y.shape)

torch.Size([24, 1])


In [74]:
y_gt = torch.randn(24)
print (y_gt.shape)
print (y_gt)

torch.Size([24])
tensor([ 0.0791,  0.1717, -1.5212, -2.2675,  1.8543, -1.9075,  1.5579, -0.1735,
        -0.2960,  1.3799, -0.6627, -1.0970, -0.7396,  0.3816, -1.2872, -1.1826,
        -1.0749,  0.3559, -0.7001,  1.8105,  1.1595,  0.8026, -0.8735, -0.7194])


In [17]:
train_path = pathlib.Path('./data/MAESTRO')
train_path.ls()

[PosixPath('data/MAESTRO/2004'),
 PosixPath('data/MAESTRO/2009'),
 PosixPath('data/MAESTRO/LICENSE'),
 PosixPath('data/MAESTRO/2008'),
 PosixPath('data/MAESTRO/2017'),
 PosixPath('data/MAESTRO/2013'),
 PosixPath('data/MAESTRO/2006'),
 PosixPath('data/MAESTRO/maestro-v1.0.0.csv'),
 PosixPath('data/MAESTRO/2011'),
 PosixPath('data/MAESTRO/2014'),
 PosixPath('data/MAESTRO/2015'),
 PosixPath('data/MAESTRO/README'),
 PosixPath('data/MAESTRO/maestro-v1.0.0.json')]

In [None]:
check_load = torch.load("./data/MAESTRO/2004/MIDI-Unprocessed_SMF_05_R1_2004_02-03_ORIG_MID--AUDIO_05_R1_2004_06_Track06_wav.pt")
for k in check_load.keys():
    print (k)

In [None]:
model