In [1]:
# this notebook will use a basic GPT based decision transformer in offline reinforcement learning setting to create bot for trading stock
# get cuda device
# import libraries
import math
import torch
import torch.nn as nn
import torch.nn.functional as F

from cust_transf import DecisionTransformer

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import numpy as np

# utility function to compute the discounted cumulative sum of a vector
def discount_cumsum(x, gamma):
    disc_cumsum = np.zeros_like(x)
    disc_cumsum[-1] = x[-1]
    for t in reversed(range(x.shape[0]-1)):
        disc_cumsum[t] = x[t] + gamma * disc_cumsum[t+1]
    return disc_cumsum

# utility function to evaluate the performance of the agent on a given environment
# TODO: change it to work with custom environment
def evaluate(model, device, context_len, env, rtg_target, rtg_scale, 
            num_eval_ep=10, max_test_ep_len=28, state_mean=None, state_std=None, render=False):
    eval_batch_size = 1

    results = {}
    total_reward = 0
    total_length = 0

    state_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    state_mean = torch.zeros((state_dim,)).to(device) if state_mean is None else torch.from_numpy(state_mean).to(device)
    state_std = torch.ones((state_dim,)).to(device) if state_std is None else torch.from_numpy(state_std).to(device)
    
    # create timestep for transformer
    timesteps = torch.arange(start=0, end=max_test_ep_len, step=1)
    timesteps = timesteps.unsqueeze(0).repeat(eval_batch_size, 1).to(device)

    # evaluate the agent
    model.eval()
    with torch.no_grad():

        for _ in range(num_eval_ep):

            # zeros place holders
            actions = torch.zeros((eval_batch_size, max_test_ep_len, act_dim),
                                dtype=torch.float32, device=device)

            states = torch.zeros((eval_batch_size, max_test_ep_len, state_dim),
                                dtype=torch.float32, device=device)
            
            rewards_to_go = torch.zeros((eval_batch_size, max_test_ep_len, 1),
                                dtype=torch.float32, device=device)
            
            # init episode
            running_state = env.reset()
            running_reward = 0
            running_rtg = rtg_target / rtg_scale

            for t in range(max_test_ep_len):

                total_timesteps += 1

                # add state in placeholder and normalize
                states[0, t] = torch.from_numpy(running_state).to(device)
                states[0, t] = (states[0, t] - state_mean) / state_std

                # calcualate running rtg and add in placeholder
                running_rtg = running_rtg - (running_reward / rtg_scale)
                rewards_to_go[0, t] = running_rtg

                if t < context_len:
                    _, act_preds, _ = model.forward(timesteps[:,:context_len],
                                                states[:,:context_len],
                                                actions[:,:context_len],
                                                rewards_to_go[:,:context_len])
                    act = act_preds[0, t].detach()
                else:
                    _, act_preds, _ = model.forward(timesteps[:,t-context_len+1:t+1],
                                                states[:,t-context_len+1:t+1],
                                                actions[:,t-context_len+1:t+1],
                                                rewards_to_go[:,t-context_len+1:t+1])
                    act = act_preds[0, -1].detach()


                running_state, running_reward, done, _ = env.step(act.cpu().numpy())

                # add action in placeholder
                actions[0, t] = act

                total_reward += running_reward

                if render:
                    env.render()
                if done:
                    break

    results['eval/avg_reward'] = total_reward / num_eval_ep
    results['eval/avg_ep_len'] = total_timesteps / num_eval_ep
    
    return results




In [3]:
from datasets.load import load_dataset
from torch.utils.data import Dataset, DataLoader
# define a custom dataset class which loads the data, modifies the reward to be the discounted cumulative sum and apply trajectory masking
class CustomTrajDataset(Dataset):
    def __init__(self, file_name, context_len, gamma, rtg_scale):
        self.gamma = gamma
        self.context_len = context_len

        # load the data
        data = load_dataset("json", data_files = file_name, field = 'data')
        self.data_state = np.array(data['train']['state'], dtype=np.float32)
        self.data_action = np.array(data['train']['action'], dtype=np.float32)
        self.rtg = np.array(data['train']['reward'], dtype=np.float32)


        # calculate min len, the mean and std of the state and rtg for all data
        self.stateshape = self.data_state.shape
        # calculate mean of state and rtg with numpy
        self.state_mean = np.mean(self.data_state, axis=(-2,-1), keepdims=True)
        self.state_std = np.std(np.abs(self.data_state), axis=(-2,-1), keepdims=True)
        #self.state_mean = torch.mean(data['train']['state'], dim=(-2,-1), keepdim=True)
        #self.state_std = torch.std(data['train']['state'], dim=(-2,-1), keepdim=True)
        self.norm_state = (self.data_state - self.state_mean) / self.state_std

        self.rtg = np.apply_along_axis(discount_cumsum, 1, data['train']['reward'], self.gamma) # type: ignore
        self.rtg = self.rtg / rtg_scale

    def get_state_stats(self):
        return self.state_mean, self.state_std        

    def __len__(self):
        return self.stateshape[0]

    def __getitem__(self, idx):
        state = self.norm_state[idx]
        action = self.data_action[idx]
        rtg = self.rtg[idx]

        data_len = state.shape[0]
        
        if data_len > self.context_len:
            # sample random start index
            start_idx = np.random.randint(0, data_len - self.context_len)
            # slice the data and convert to torch
            state = torch.from_numpy(state[start_idx:start_idx+self.context_len])
            action = torch.from_numpy(action[start_idx:start_idx+self.context_len])
            rtg = torch.from_numpy(rtg[start_idx:start_idx+self.context_len])
            timesteps = torch.arange(start=start_idx, end=start_idx + self.context_len, step=1)
            # trajectory mask
            mask = torch.ones(self.context_len, dtype=torch.long)
        else:
            padding_len = self.context_len - data_len

            # pad the data with zeros
            state = torch.from_numpy(state)
            state = torch.cat([state, torch.zeros((padding_len, *state.shape[1:]))], dim=0)

            action = torch.from_numpy(action)
            action = torch.cat([action, torch.zeros((padding_len, *action.shape[1:]))], dim=0)

            rtg = torch.from_numpy(rtg)
            rtg = torch.cat([rtg, torch.zeros((padding_len, *rtg.shape[1:]))], dim=0)

            timesteps = torch.arange(start=0, end=self.context_len, step=1)

            # trajectory mask
            mask = torch.cat([torch.ones(data_len, dtype=torch.long), torch.zeros(padding_len, dtype=torch.long)], dim=0)
        
        return state, action, rtg, timesteps, mask


In [5]:
# load huggingface dataset from json file
filename = 'AAPL_2190_2016-01-01_1d_random_replaybuffer.json'
context_len = 20
Max_balance = 2147483647

dataset = CustomTrajDataset(filename, context_len = context_len ,gamma = 0.99, rtg_scale = Max_balance)

Using custom data configuration default-3a69897addd28b22
Found cached dataset json (/home/victoru/.cache/huggingface/datasets/json/default-3a69897addd28b22/0.0.0/e6070c77f18f01a5ad4551a8b7edfba20b8438b7cad4d94e6ad9378022ce4aab)
100%|██████████| 1/1 [00:00<00:00, 1209.43it/s]


In [6]:
# define training parameters
batch_size = 2
# small learning rate to try to avoid mixed precision caused NaNs
lr = 3e-5
wt_decay = 1e-4
warmup_steps = 10000
n_epochs = 500

In [7]:
# create dataloader from dataset
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [8]:
# define model parameters
# sample 1 batch from dataloader
norm_state, actions, rtg, timestep, traj_mask = next(iter(dataloader))
# use batch shape to determine state dimension
state_dim = norm_state.shape[-1]
act_dim = actions.shape[-1] # discrete action space
# use batch shape to determine context length


n_blocks = 4 # number of transformer blocks
h_dim = 96 # hidden dimension
n_heads = 6 # number of heads in multi-head attention
drop_p = 0.1 # dropout probability


In [9]:
# create the model
model = DecisionTransformer(state_dim, act_dim, n_blocks, h_dim, context_len, n_heads, drop_p).to(device)

# create optimizer
# use larger eps to try to avoid mixed precision overflow caused NaNs
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wt_decay, eps=1e-6)

# create scheduler
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda step: min(1.0, (step + 1) / warmup_steps))

# create a GradScaler for mixed precision training
scaler = torch.cuda.amp.GradScaler(growth_interval=150)
min_scale = 128

In [12]:
# test run the model
with torch.no_grad():
    norm_state, actions, rtg, timestep , traj_mask= next(iter(dataloader))
    norm_state = norm_state.to(device)
    actions = actions.to(device)
    # convert rtg to float
    rtg = rtg.to(device).float()
    timestep = timestep.to(device)
    traj_mask = traj_mask.to(device)
    action_targets = torch.clone(actions).detach().to(device)
    return_preds, state_preds, act_preds = model.forward(norm_state, rtg, timestep, actions)

    # check shape of norm_state
    print(norm_state.shape)
    # check shape of rtg
    print(rtg.shape)
    # check shape of timestep
    print(timestep.shape)
    # check shape of actions
    print(actions.shape)
    print(act_preds.shape)
    print(action_targets.shape)
    
    # consider only the action that are padded
    act_preds = act_preds.view(-1, act_dim)[traj_mask.view(-1) > 0]
    action_targets = action_targets.view(-1, act_dim)[traj_mask.view(-1) > 0]

    # check shape of action targets
    print(action_targets.shape)
    # check shape of action predictions
    print(act_preds.shape)

# check for nan values and inf values in the input and the output of the model
print(torch.isnan(norm_state).any())
print(torch.isnan(rtg).any())
print(torch.isnan(timestep).any())
print(torch.isnan(actions).any())
print(torch.isnan(act_preds).any())
print(torch.isnan(action_targets).any())



torch.Size([2, 20, 13])
torch.Size([2, 20, 1])
torch.Size([2, 20])
torch.Size([2, 20, 2])
torch.Size([2, 20, 2])
torch.Size([2, 20, 2])
torch.Size([40, 2])
torch.Size([40, 2])
tensor(False, device='cuda:0')
tensor(False, device='cuda:0')
tensor(False, device='cuda:0')
tensor(False, device='cuda:0')
tensor(False, device='cuda:0')
tensor(False, device='cuda:0')


In [13]:
print(act_preds)
print(action_targets)
print(norm_state)

tensor([[ 0.5414,  0.4863],
        [ 0.0879,  0.4650],
        [ 0.3458,  0.5598],
        [ 0.0757, -0.3416],
        [-0.0152,  0.6710],
        [ 0.5293,  0.5162],
        [ 0.2007,  0.6124],
        [ 0.3616,  0.5947],
        [ 0.5799,  0.3156],
        [ 0.1769,  0.7526],
        [-0.2917,  0.8149],
        [ 0.1653,  0.3332],
        [ 0.2824,  0.5594],
        [-0.6271,  0.3245],
        [-0.2168,  0.7031],
        [ 0.1997,  0.8121],
        [ 0.6366,  0.5355],
        [ 0.7419,  0.3198],
        [-0.2095,  0.5367],
        [ 0.5437,  0.5196],
        [-0.5138,  0.8217],
        [-0.2318,  0.4292],
        [-0.6535,  0.8390],
        [ 0.0945,  0.4401],
        [ 0.5770,  0.2203],
        [ 0.1478, -0.0228],
        [ 0.2972, -0.0091],
        [-0.2541,  0.3805],
        [ 0.2322,  0.7643],
        [ 0.8399, -0.2268],
        [ 0.2076, -0.4670],
        [ 0.3524,  0.7700],
        [ 0.3907,  0.6256],
        [ 0.8774,  0.4663],
        [ 0.7522,  0.5707],
        [ 0.6727,  0

In [10]:
# create training loop
from tqdm import tqdm

# get the start time to calculate training time
import datetime
start_time = datetime.datetime.now()
#training_log = {'epoch':[], 'loss':[], 'eval/avg_reward':[], 'eval/avg_ep_len':[]} # training log

for i in range(n_epochs):
    model.train()
    log_action_losses = []

    for norm_state, actions, rtg, timestep, traj_mask in tqdm(dataloader):
        # get batch data to device
        norm_state = norm_state.to(device)
        actions = actions.to(device)
        rtg = rtg.to(device).float()
        timestep = timestep.to(device)
        traj_mask = traj_mask.to(device)

        action_targets = torch.clone(actions).detach().to(device)

        # Zeroes out the gradients
        optimizer.zero_grad()

        # run forward pass with autocasting
        # disable autocasting for now to avoid mixed precision caused NaNs
        with torch.cuda.amp.autocast(enabled=False):
            return_preds, state_preds, act_preds = model.forward(norm_state, rtg, timestep, actions)

            # consider only the action that are padded
            act_preds = act_preds.view(-1, act_dim)[traj_mask.view(-1) > 0]
            action_targets = action_targets.view(-1, act_dim)[traj_mask.view(-1) > 0]

            # calculate losses just for actions
            loss = F.mse_loss(act_preds, action_targets, reduction='mean')

        # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
        scaler.scale(loss).backward()

        # unscale the gradients
        scaler.unscale_(optimizer)
        # Clips the gradients by norm
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)

        # scaler.step() first unscales the gradients of the optimizer's assigned params.
        # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
        # otherwise, optimizer.step() is skipped.
        scaler.step(optimizer)

        # Updates the learning rate according to the scheduler
        scheduler.step()
        # Updates the scale for next iteration.
        scaler.update()
        # enforce min scale to avoid mixed precision caused NaNs
        if scaler.get_scale() < min_scale:
            #print('current scale is ', scaler.get_scale())
            scaler._scale = torch.tensor(min_scale).to(scaler._scale)
            #print('new scale is ', scaler.get_scale())
        
        # append action loss to log
        log_action_losses.append(loss.detach().cpu().item())
    # print every 100 loss log
    if len(log_action_losses) % 10 == 0 or len(log_action_losses) == 1:
        print('Loss: ', log_action_losses[-1])
    

# record training time
end_time = datetime.datetime.now()
print('Training time: ', end_time - start_time)

# create environment to evaluate the model


100%|██████████| 500/500 [00:07<00:00, 69.28it/s]


Loss:  0.44762077927589417


100%|██████████| 500/500 [00:05<00:00, 88.63it/s]


Loss:  0.2813029885292053


100%|██████████| 500/500 [00:05<00:00, 87.62it/s]


Loss:  0.2847488522529602


100%|██████████| 500/500 [00:05<00:00, 86.94it/s]


Loss:  0.24672436714172363


100%|██████████| 500/500 [00:05<00:00, 86.96it/s]


Loss:  0.23156046867370605


100%|██████████| 500/500 [00:05<00:00, 90.99it/s]


Loss:  0.24840831756591797


100%|██████████| 500/500 [00:05<00:00, 88.91it/s]


Loss:  0.2046828269958496


100%|██████████| 500/500 [00:05<00:00, 87.52it/s]


Loss:  0.26353907585144043


100%|██████████| 500/500 [00:05<00:00, 87.86it/s]


Loss:  0.23492352664470673


100%|██████████| 500/500 [00:05<00:00, 86.65it/s]


Loss:  0.1733994483947754


100%|██████████| 500/500 [00:05<00:00, 84.34it/s]


Loss:  0.2349710911512375


100%|██████████| 500/500 [00:06<00:00, 82.72it/s]


Loss:  0.19635772705078125


100%|██████████| 500/500 [00:05<00:00, 83.52it/s]


Loss:  0.1427597552537918


100%|██████████| 500/500 [00:05<00:00, 88.15it/s]


Loss:  0.20441170036792755


100%|██████████| 500/500 [00:05<00:00, 86.23it/s]


Loss:  0.22004838287830353


100%|██████████| 500/500 [00:05<00:00, 87.17it/s]


Loss:  0.21005025506019592


100%|██████████| 500/500 [00:05<00:00, 86.76it/s]


Loss:  0.19965222477912903


100%|██████████| 500/500 [00:05<00:00, 86.78it/s]


Loss:  0.19974474608898163


100%|██████████| 500/500 [00:05<00:00, 85.62it/s]


Loss:  0.16759121417999268


100%|██████████| 500/500 [00:05<00:00, 87.82it/s]


Loss:  0.23926185071468353


100%|██████████| 500/500 [00:05<00:00, 84.09it/s]


Loss:  0.20161238312721252


100%|██████████| 500/500 [00:05<00:00, 85.59it/s]


Loss:  0.23647136986255646


100%|██████████| 500/500 [00:05<00:00, 87.58it/s]


Loss:  0.20314303040504456


100%|██████████| 500/500 [00:05<00:00, 88.75it/s]


Loss:  0.2196720689535141


100%|██████████| 500/500 [00:05<00:00, 84.69it/s]


Loss:  0.22482071816921234


100%|██████████| 500/500 [00:06<00:00, 83.05it/s]


Loss:  0.23011556267738342


100%|██████████| 500/500 [00:05<00:00, 83.62it/s]


Loss:  0.21213269233703613


100%|██████████| 500/500 [00:05<00:00, 85.74it/s]


Loss:  0.21860432624816895


100%|██████████| 500/500 [00:06<00:00, 81.74it/s]


Loss:  0.21944360435009003


100%|██████████| 500/500 [00:05<00:00, 84.66it/s]


Loss:  0.16066503524780273


100%|██████████| 500/500 [00:06<00:00, 82.68it/s]


Loss:  0.16959579288959503


100%|██████████| 500/500 [00:05<00:00, 83.90it/s]


Loss:  0.22919774055480957


100%|██████████| 500/500 [00:05<00:00, 86.36it/s]


Loss:  0.25287675857543945


100%|██████████| 500/500 [00:05<00:00, 87.25it/s]


Loss:  0.20196381211280823


100%|██████████| 500/500 [00:04<00:00, 118.74it/s]


Loss:  0.204213485121727


100%|██████████| 500/500 [00:04<00:00, 118.57it/s]


Loss:  0.21529920399188995


100%|██████████| 500/500 [00:04<00:00, 119.80it/s]


Loss:  0.21103596687316895


100%|██████████| 500/500 [00:04<00:00, 118.19it/s]


Loss:  0.23504666984081268


100%|██████████| 500/500 [00:04<00:00, 112.65it/s]


Loss:  0.21441331505775452


100%|██████████| 500/500 [00:04<00:00, 114.66it/s]


Loss:  0.18889153003692627


100%|██████████| 500/500 [00:04<00:00, 119.44it/s]


Loss:  0.2000274658203125


100%|██████████| 500/500 [00:04<00:00, 115.55it/s]


Loss:  0.22043228149414062


100%|██████████| 500/500 [00:04<00:00, 119.38it/s]


Loss:  0.16390475630760193


100%|██████████| 500/500 [00:04<00:00, 119.63it/s]


Loss:  0.21200518310070038


100%|██████████| 500/500 [00:04<00:00, 118.95it/s]


Loss:  0.21031728386878967


100%|██████████| 500/500 [00:04<00:00, 119.42it/s]


Loss:  0.21698637306690216


100%|██████████| 500/500 [00:04<00:00, 120.43it/s]


Loss:  0.20093785226345062


100%|██████████| 500/500 [00:04<00:00, 118.62it/s]


Loss:  0.22504399716854095


100%|██████████| 500/500 [00:03<00:00, 126.35it/s]


Loss:  0.24769194424152374


100%|██████████| 500/500 [00:04<00:00, 122.74it/s]


Loss:  0.21017317473888397


100%|██████████| 500/500 [00:04<00:00, 118.49it/s]


Loss:  0.22247214615345


100%|██████████| 500/500 [00:04<00:00, 121.22it/s]


Loss:  0.21996913850307465


100%|██████████| 500/500 [00:03<00:00, 130.14it/s]


Loss:  0.18903808295726776


100%|██████████| 500/500 [00:04<00:00, 120.73it/s]


Loss:  0.22761164605617523


100%|██████████| 500/500 [00:04<00:00, 119.96it/s]


Loss:  0.18391339480876923


100%|██████████| 500/500 [00:04<00:00, 120.23it/s]


Loss:  0.1973189413547516


100%|██████████| 500/500 [00:04<00:00, 124.58it/s]


Loss:  0.21555157005786896


100%|██████████| 500/500 [00:04<00:00, 119.67it/s]


Loss:  0.2921702563762665


100%|██████████| 500/500 [00:04<00:00, 117.26it/s]


Loss:  0.18958842754364014


100%|██████████| 500/500 [00:04<00:00, 114.40it/s]


Loss:  0.2642349898815155


100%|██████████| 500/500 [00:04<00:00, 114.46it/s]


Loss:  0.21936288475990295


100%|██████████| 500/500 [00:04<00:00, 116.63it/s]


Loss:  0.19267939031124115


100%|██████████| 500/500 [00:04<00:00, 115.49it/s]


Loss:  0.230363130569458


100%|██████████| 500/500 [00:04<00:00, 116.31it/s]


Loss:  0.25871357321739197


100%|██████████| 500/500 [00:04<00:00, 117.77it/s]


Loss:  0.21830616891384125


100%|██████████| 500/500 [00:04<00:00, 113.13it/s]


Loss:  0.22719894349575043


100%|██████████| 500/500 [00:04<00:00, 113.69it/s]


Loss:  0.18511009216308594


100%|██████████| 500/500 [00:04<00:00, 120.05it/s]


Loss:  0.2365347445011139


100%|██████████| 500/500 [00:04<00:00, 119.66it/s]


Loss:  0.23834191262722015


100%|██████████| 500/500 [00:04<00:00, 116.99it/s]


Loss:  0.1633765548467636


100%|██████████| 500/500 [00:04<00:00, 118.24it/s]


Loss:  0.19686034321784973


100%|██████████| 500/500 [00:04<00:00, 119.14it/s]


Loss:  0.1863093227148056


100%|██████████| 500/500 [00:04<00:00, 117.44it/s]


Loss:  0.2056809961795807


100%|██████████| 500/500 [00:04<00:00, 118.24it/s]


Loss:  0.21677832305431366


100%|██████████| 500/500 [00:04<00:00, 110.75it/s]


Loss:  0.20924916863441467


100%|██████████| 500/500 [00:04<00:00, 117.33it/s]


Loss:  0.1921752542257309


100%|██████████| 500/500 [00:04<00:00, 114.89it/s]


Loss:  0.1880958527326584


100%|██████████| 500/500 [00:04<00:00, 116.32it/s]


Loss:  0.19165204465389252


100%|██████████| 500/500 [00:04<00:00, 117.21it/s]


Loss:  0.2465190440416336


100%|██████████| 500/500 [00:04<00:00, 120.07it/s]


Loss:  0.2043120414018631


100%|██████████| 500/500 [00:04<00:00, 114.19it/s]


Loss:  0.2320447415113449


100%|██████████| 500/500 [00:04<00:00, 119.06it/s]


Loss:  0.1764604151248932


100%|██████████| 500/500 [00:04<00:00, 114.39it/s]


Loss:  0.2239331752061844


100%|██████████| 500/500 [00:04<00:00, 112.01it/s]


Loss:  0.19248822331428528


100%|██████████| 500/500 [00:04<00:00, 110.23it/s]


Loss:  0.2098599225282669


100%|██████████| 500/500 [00:04<00:00, 114.11it/s]


Loss:  0.18028312921524048


100%|██████████| 500/500 [00:04<00:00, 122.30it/s]


Loss:  0.2006310075521469


100%|██████████| 500/500 [00:04<00:00, 111.65it/s]


Loss:  0.20567981898784637


100%|██████████| 500/500 [00:04<00:00, 111.96it/s]


Loss:  0.22117061913013458


100%|██████████| 500/500 [00:04<00:00, 118.04it/s]


Loss:  0.16662737727165222


100%|██████████| 500/500 [00:04<00:00, 119.26it/s]


Loss:  0.17948973178863525


100%|██████████| 500/500 [00:04<00:00, 119.13it/s]


Loss:  0.16826555132865906


100%|██████████| 500/500 [00:04<00:00, 117.27it/s]


Loss:  0.17719411849975586


100%|██████████| 500/500 [00:04<00:00, 117.83it/s]


Loss:  0.22513537108898163


100%|██████████| 500/500 [00:04<00:00, 118.60it/s]


Loss:  0.1958017498254776


100%|██████████| 500/500 [00:04<00:00, 117.68it/s]


Loss:  0.22577106952667236


100%|██████████| 500/500 [00:04<00:00, 119.89it/s]


Loss:  0.2231007069349289


100%|██████████| 500/500 [00:04<00:00, 113.34it/s]


Loss:  0.22671714425086975


100%|██████████| 500/500 [00:04<00:00, 118.41it/s]


Loss:  0.18804679811000824


100%|██████████| 500/500 [00:04<00:00, 120.29it/s]


Loss:  0.17973823845386505


100%|██████████| 500/500 [00:04<00:00, 119.14it/s]


Loss:  0.21025429666042328


100%|██████████| 500/500 [00:04<00:00, 119.64it/s]


Loss:  0.1593845635652542


100%|██████████| 500/500 [00:04<00:00, 119.70it/s]


Loss:  0.2370883971452713


100%|██████████| 500/500 [00:04<00:00, 119.42it/s]


Loss:  0.18218253552913666


100%|██████████| 500/500 [00:04<00:00, 119.55it/s]


Loss:  0.19385933876037598


100%|██████████| 500/500 [00:04<00:00, 119.70it/s]


Loss:  0.1962352991104126


100%|██████████| 500/500 [00:04<00:00, 120.04it/s]


Loss:  0.22082999348640442


100%|██████████| 500/500 [00:04<00:00, 119.56it/s]


Loss:  0.19120971858501434


100%|██████████| 500/500 [00:04<00:00, 120.30it/s]


Loss:  0.26952868700027466


100%|██████████| 500/500 [00:04<00:00, 120.01it/s]


Loss:  0.1956062614917755


100%|██████████| 500/500 [00:04<00:00, 119.77it/s]


Loss:  0.2071293592453003


100%|██████████| 500/500 [00:03<00:00, 127.47it/s]


Loss:  0.20634594559669495


100%|██████████| 500/500 [00:04<00:00, 119.79it/s]


Loss:  0.2204890251159668


100%|██████████| 500/500 [00:04<00:00, 122.49it/s]


Loss:  0.17854009568691254


100%|██████████| 500/500 [00:04<00:00, 120.23it/s]


Loss:  0.22940610349178314


100%|██████████| 500/500 [00:04<00:00, 119.36it/s]


Loss:  0.15278299152851105


100%|██████████| 500/500 [00:04<00:00, 120.00it/s]


Loss:  0.17023272812366486


100%|██████████| 500/500 [00:04<00:00, 119.80it/s]


Loss:  0.19367817044258118


100%|██████████| 500/500 [00:04<00:00, 119.59it/s]


Loss:  0.2014637440443039


100%|██████████| 500/500 [00:04<00:00, 119.32it/s]


Loss:  0.2014094591140747


100%|██████████| 500/500 [00:04<00:00, 119.96it/s]


Loss:  0.21823732554912567


100%|██████████| 500/500 [00:04<00:00, 119.33it/s]


Loss:  0.2203671783208847


100%|██████████| 500/500 [00:04<00:00, 119.34it/s]


Loss:  0.22112885117530823


100%|██████████| 500/500 [00:04<00:00, 119.56it/s]


Loss:  0.20331232249736786


100%|██████████| 500/500 [00:04<00:00, 120.00it/s]


Loss:  0.24002595245838165


100%|██████████| 500/500 [00:04<00:00, 120.05it/s]


Loss:  0.23767852783203125


100%|██████████| 500/500 [00:04<00:00, 120.28it/s]


Loss:  0.21318228542804718


100%|██████████| 500/500 [00:04<00:00, 123.14it/s]


Loss:  0.183013916015625


100%|██████████| 500/500 [00:04<00:00, 120.02it/s]


Loss:  0.20579032599925995


100%|██████████| 500/500 [00:04<00:00, 119.70it/s]


Loss:  0.2200401872396469


100%|██████████| 500/500 [00:04<00:00, 119.56it/s]


Loss:  0.1933223307132721


100%|██████████| 500/500 [00:04<00:00, 119.54it/s]


Loss:  0.1825474500656128


100%|██████████| 500/500 [00:04<00:00, 121.55it/s]


Loss:  0.21685905754566193


100%|██████████| 500/500 [00:04<00:00, 120.69it/s]


Loss:  0.22952909767627716


100%|██████████| 500/500 [00:04<00:00, 119.57it/s]


Loss:  0.2205292284488678


100%|██████████| 500/500 [00:04<00:00, 119.78it/s]


Loss:  0.16412726044654846


100%|██████████| 500/500 [00:04<00:00, 120.05it/s]


Loss:  0.21994450688362122


100%|██████████| 500/500 [00:04<00:00, 120.98it/s]


Loss:  0.22118143737316132


100%|██████████| 500/500 [00:04<00:00, 119.84it/s]


Loss:  0.22297726571559906


100%|██████████| 500/500 [00:04<00:00, 118.96it/s]


Loss:  0.20063696801662445


100%|██████████| 500/500 [00:04<00:00, 119.72it/s]


Loss:  0.22348575294017792


100%|██████████| 500/500 [00:04<00:00, 123.77it/s]


Loss:  0.17102034389972687


100%|██████████| 500/500 [00:04<00:00, 119.72it/s]


Loss:  0.2108737975358963


100%|██████████| 500/500 [00:04<00:00, 120.31it/s]


Loss:  0.2580624222755432


100%|██████████| 500/500 [00:04<00:00, 122.50it/s]


Loss:  0.2040671408176422


100%|██████████| 500/500 [00:04<00:00, 119.97it/s]


Loss:  0.20284529030323029


100%|██████████| 500/500 [00:04<00:00, 119.61it/s]


Loss:  0.2161247581243515


100%|██████████| 500/500 [00:04<00:00, 119.66it/s]


Loss:  0.1988251656293869


100%|██████████| 500/500 [00:04<00:00, 122.59it/s]


Loss:  0.21794500946998596


100%|██████████| 500/500 [00:04<00:00, 119.84it/s]


Loss:  0.22308678925037384


100%|██████████| 500/500 [00:04<00:00, 120.03it/s]


Loss:  0.20408721268177032


100%|██████████| 500/500 [00:04<00:00, 119.31it/s]


Loss:  0.18253110349178314


100%|██████████| 500/500 [00:04<00:00, 119.42it/s]


Loss:  0.24757306277751923


100%|██████████| 500/500 [00:04<00:00, 119.68it/s]


Loss:  0.15458352863788605


100%|██████████| 500/500 [00:04<00:00, 118.65it/s]


Loss:  0.2187660187482834


100%|██████████| 500/500 [00:04<00:00, 119.74it/s]


Loss:  0.19537585973739624


100%|██████████| 500/500 [00:04<00:00, 119.82it/s]


Loss:  0.21226544678211212


100%|██████████| 500/500 [00:04<00:00, 119.80it/s]


Loss:  0.19238106906414032


100%|██████████| 500/500 [00:04<00:00, 119.55it/s]


Loss:  0.1523614078760147


100%|██████████| 500/500 [00:04<00:00, 120.03it/s]


Loss:  0.19551992416381836


100%|██████████| 500/500 [00:04<00:00, 119.59it/s]


Loss:  0.19027705490589142


100%|██████████| 500/500 [00:04<00:00, 120.58it/s]


Loss:  0.22469298541545868


100%|██████████| 500/500 [00:04<00:00, 119.97it/s]


Loss:  0.24994294345378876


100%|██████████| 500/500 [00:04<00:00, 119.50it/s]


Loss:  0.15460915863513947


100%|██████████| 500/500 [00:04<00:00, 119.55it/s]


Loss:  0.23170660436153412


100%|██████████| 500/500 [00:04<00:00, 119.76it/s]


Loss:  0.16821685433387756


100%|██████████| 500/500 [00:03<00:00, 129.11it/s]


Loss:  0.21851961314678192


100%|██████████| 500/500 [00:04<00:00, 118.66it/s]


Loss:  0.21653907001018524


100%|██████████| 500/500 [00:04<00:00, 119.53it/s]


Loss:  0.2500600218772888


100%|██████████| 500/500 [00:04<00:00, 120.71it/s]


Loss:  0.2050011157989502


100%|██████████| 500/500 [00:04<00:00, 119.63it/s]


Loss:  0.18917374312877655


100%|██████████| 500/500 [00:04<00:00, 119.52it/s]


Loss:  0.22466182708740234


100%|██████████| 500/500 [00:04<00:00, 118.25it/s]


Loss:  0.15399417281150818


100%|██████████| 500/500 [00:04<00:00, 119.26it/s]


Loss:  0.1757735013961792


100%|██████████| 500/500 [00:04<00:00, 120.53it/s]


Loss:  0.2285814732313156


100%|██████████| 500/500 [00:04<00:00, 122.45it/s]


Loss:  0.19846437871456146


100%|██████████| 500/500 [00:04<00:00, 123.20it/s]


Loss:  0.1764616072177887


100%|██████████| 500/500 [00:04<00:00, 123.30it/s]


Loss:  0.22823862731456757


100%|██████████| 500/500 [00:04<00:00, 123.24it/s]


Loss:  0.2521606385707855


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.19880598783493042


100%|██████████| 500/500 [00:03<00:00, 125.48it/s]


Loss:  0.24255381524562836


100%|██████████| 500/500 [00:03<00:00, 135.36it/s]


Loss:  0.18186180293560028


100%|██████████| 500/500 [00:04<00:00, 124.28it/s]


Loss:  0.24194470047950745


100%|██████████| 500/500 [00:04<00:00, 122.85it/s]


Loss:  0.20003178715705872


100%|██████████| 500/500 [00:04<00:00, 123.23it/s]


Loss:  0.25256142020225525


100%|██████████| 500/500 [00:04<00:00, 123.43it/s]


Loss:  0.22011780738830566


100%|██████████| 500/500 [00:04<00:00, 122.83it/s]


Loss:  0.19038031995296478


100%|██████████| 500/500 [00:04<00:00, 122.98it/s]


Loss:  0.22758392989635468


100%|██████████| 500/500 [00:04<00:00, 123.02it/s]


Loss:  0.2007126361131668


100%|██████████| 500/500 [00:04<00:00, 122.93it/s]


Loss:  0.2046504020690918


100%|██████████| 500/500 [00:04<00:00, 122.97it/s]


Loss:  0.22351904213428497


100%|██████████| 500/500 [00:04<00:00, 123.11it/s]


Loss:  0.19399398565292358


100%|██████████| 500/500 [00:04<00:00, 122.46it/s]


Loss:  0.24401243031024933


100%|██████████| 500/500 [00:03<00:00, 128.17it/s]


Loss:  0.20844264328479767


100%|██████████| 500/500 [00:03<00:00, 134.72it/s]


Loss:  0.198368102312088


100%|██████████| 500/500 [00:03<00:00, 132.31it/s]


Loss:  0.22007706761360168


100%|██████████| 500/500 [00:04<00:00, 123.01it/s]


Loss:  0.20939277112483978


100%|██████████| 500/500 [00:04<00:00, 123.10it/s]


Loss:  0.20511551201343536


100%|██████████| 500/500 [00:04<00:00, 122.40it/s]


Loss:  0.19493882358074188


100%|██████████| 500/500 [00:04<00:00, 123.39it/s]


Loss:  0.20318801701068878


100%|██████████| 500/500 [00:04<00:00, 122.50it/s]


Loss:  0.1900104135274887


100%|██████████| 500/500 [00:04<00:00, 122.40it/s]


Loss:  0.21949949860572815


100%|██████████| 500/500 [00:04<00:00, 123.34it/s]


Loss:  0.18833385407924652


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.2308632880449295


100%|██████████| 500/500 [00:04<00:00, 122.88it/s]


Loss:  0.18429946899414062


100%|██████████| 500/500 [00:04<00:00, 123.32it/s]


Loss:  0.18195730447769165


100%|██████████| 500/500 [00:04<00:00, 123.35it/s]


Loss:  0.2259541004896164


100%|██████████| 500/500 [00:04<00:00, 122.95it/s]


Loss:  0.2143358290195465


100%|██████████| 500/500 [00:04<00:00, 123.58it/s]


Loss:  0.18482740223407745


100%|██████████| 500/500 [00:04<00:00, 121.76it/s]


Loss:  0.20334553718566895


100%|██████████| 500/500 [00:04<00:00, 123.17it/s]


Loss:  0.17117488384246826


100%|██████████| 500/500 [00:03<00:00, 133.62it/s]


Loss:  0.23135529458522797


100%|██████████| 500/500 [00:04<00:00, 123.86it/s]


Loss:  0.20074987411499023


100%|██████████| 500/500 [00:04<00:00, 123.25it/s]


Loss:  0.17234192788600922


100%|██████████| 500/500 [00:04<00:00, 123.10it/s]


Loss:  0.17285840213298798


100%|██████████| 500/500 [00:04<00:00, 123.22it/s]


Loss:  0.21131721138954163


100%|██████████| 500/500 [00:04<00:00, 122.83it/s]


Loss:  0.17222066223621368


100%|██████████| 500/500 [00:04<00:00, 123.26it/s]


Loss:  0.15226925909519196


100%|██████████| 500/500 [00:04<00:00, 123.19it/s]


Loss:  0.24897153675556183


100%|██████████| 500/500 [00:04<00:00, 122.74it/s]


Loss:  0.19226567447185516


100%|██████████| 500/500 [00:04<00:00, 122.29it/s]


Loss:  0.19376583397388458


100%|██████████| 500/500 [00:04<00:00, 123.10it/s]


Loss:  0.2025837004184723


100%|██████████| 500/500 [00:04<00:00, 122.14it/s]


Loss:  0.18365263938903809


100%|██████████| 500/500 [00:04<00:00, 122.20it/s]


Loss:  0.19904185831546783


100%|██████████| 500/500 [00:04<00:00, 122.11it/s]


Loss:  0.23184779286384583


100%|██████████| 500/500 [00:04<00:00, 122.47it/s]


Loss:  0.18902836740016937


100%|██████████| 500/500 [00:04<00:00, 123.56it/s]


Loss:  0.17895929515361786


100%|██████████| 500/500 [00:04<00:00, 123.24it/s]


Loss:  0.2048218697309494


100%|██████████| 500/500 [00:04<00:00, 123.38it/s]


Loss:  0.20547614991664886


100%|██████████| 500/500 [00:04<00:00, 123.56it/s]


Loss:  0.23939143121242523


100%|██████████| 500/500 [00:04<00:00, 123.13it/s]


Loss:  0.19395864009857178


100%|██████████| 500/500 [00:04<00:00, 122.89it/s]


Loss:  0.22681879997253418


100%|██████████| 500/500 [00:04<00:00, 123.02it/s]


Loss:  0.20899875462055206


100%|██████████| 500/500 [00:04<00:00, 122.93it/s]


Loss:  0.19914399087429047


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.2014511674642563


100%|██████████| 500/500 [00:04<00:00, 123.37it/s]


Loss:  0.22130964696407318


100%|██████████| 500/500 [00:04<00:00, 123.07it/s]


Loss:  0.18762043118476868


100%|██████████| 500/500 [00:04<00:00, 122.98it/s]


Loss:  0.25454455614089966


100%|██████████| 500/500 [00:04<00:00, 123.47it/s]


Loss:  0.22082392871379852


100%|██████████| 500/500 [00:04<00:00, 123.53it/s]


Loss:  0.1857568770647049


100%|██████████| 500/500 [00:04<00:00, 123.25it/s]


Loss:  0.23315000534057617


100%|██████████| 500/500 [00:04<00:00, 123.25it/s]


Loss:  0.22977471351623535


100%|██████████| 500/500 [00:04<00:00, 122.86it/s]


Loss:  0.22578750550746918


100%|██████████| 500/500 [00:04<00:00, 122.53it/s]


Loss:  0.16355419158935547


100%|██████████| 500/500 [00:03<00:00, 126.67it/s]


Loss:  0.1940177083015442


100%|██████████| 500/500 [00:03<00:00, 128.56it/s]


Loss:  0.19348980486392975


100%|██████████| 500/500 [00:04<00:00, 121.49it/s]


Loss:  0.19911518692970276


100%|██████████| 500/500 [00:04<00:00, 121.89it/s]


Loss:  0.20085345208644867


100%|██████████| 500/500 [00:04<00:00, 122.05it/s]


Loss:  0.2167981117963791


100%|██████████| 500/500 [00:04<00:00, 122.07it/s]


Loss:  0.21262870728969574


100%|██████████| 500/500 [00:04<00:00, 122.68it/s]


Loss:  0.18539288640022278


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.2234940528869629


100%|██████████| 500/500 [00:04<00:00, 123.84it/s]


Loss:  0.1815975159406662


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.16941463947296143


100%|██████████| 500/500 [00:04<00:00, 123.34it/s]


Loss:  0.21855907142162323


100%|██████████| 500/500 [00:04<00:00, 123.56it/s]


Loss:  0.190866619348526


100%|██████████| 500/500 [00:04<00:00, 123.44it/s]


Loss:  0.1878208965063095


100%|██████████| 500/500 [00:04<00:00, 123.61it/s]


Loss:  0.21639452874660492


100%|██████████| 500/500 [00:04<00:00, 123.49it/s]


Loss:  0.1830332726240158


100%|██████████| 500/500 [00:04<00:00, 123.71it/s]


Loss:  0.19279281795024872


100%|██████████| 500/500 [00:04<00:00, 123.53it/s]


Loss:  0.22818303108215332


100%|██████████| 500/500 [00:04<00:00, 123.38it/s]


Loss:  0.19870762526988983


100%|██████████| 500/500 [00:04<00:00, 124.20it/s]


Loss:  0.17281848192214966


100%|██████████| 500/500 [00:04<00:00, 123.04it/s]


Loss:  0.16554532945156097


100%|██████████| 500/500 [00:04<00:00, 123.22it/s]


Loss:  0.2053685486316681


100%|██████████| 500/500 [00:04<00:00, 123.13it/s]


Loss:  0.18752330541610718


100%|██████████| 500/500 [00:04<00:00, 123.81it/s]


Loss:  0.24471822381019592


100%|██████████| 500/500 [00:04<00:00, 123.69it/s]


Loss:  0.20900984108448029


100%|██████████| 500/500 [00:04<00:00, 123.34it/s]


Loss:  0.17296628654003143


100%|██████████| 500/500 [00:04<00:00, 123.34it/s]


Loss:  0.25131964683532715


100%|██████████| 500/500 [00:04<00:00, 123.35it/s]


Loss:  0.25767895579338074


100%|██████████| 500/500 [00:04<00:00, 123.50it/s]


Loss:  0.18701037764549255


100%|██████████| 500/500 [00:04<00:00, 123.33it/s]


Loss:  0.20741455256938934


100%|██████████| 500/500 [00:04<00:00, 123.52it/s]


Loss:  0.2318057119846344


100%|██████████| 500/500 [00:04<00:00, 123.35it/s]


Loss:  0.2661566436290741


100%|██████████| 500/500 [00:04<00:00, 123.65it/s]


Loss:  0.15969376266002655


100%|██████████| 500/500 [00:03<00:00, 136.57it/s]


Loss:  0.19048182666301727


100%|██████████| 500/500 [00:03<00:00, 136.56it/s]


Loss:  0.2177366465330124


100%|██████████| 500/500 [00:03<00:00, 136.54it/s]


Loss:  0.21529801189899445


100%|██████████| 500/500 [00:04<00:00, 122.98it/s]


Loss:  0.19076962769031525


100%|██████████| 500/500 [00:03<00:00, 125.36it/s]


Loss:  0.20059703290462494


100%|██████████| 500/500 [00:04<00:00, 123.43it/s]


Loss:  0.19830231368541718


100%|██████████| 500/500 [00:04<00:00, 123.86it/s]


Loss:  0.20612077414989471


100%|██████████| 500/500 [00:04<00:00, 123.53it/s]


Loss:  0.20183077454566956


100%|██████████| 500/500 [00:04<00:00, 124.06it/s]


Loss:  0.2072569578886032


100%|██████████| 500/500 [00:04<00:00, 124.73it/s]


Loss:  0.18035316467285156


100%|██████████| 500/500 [00:04<00:00, 123.58it/s]


Loss:  0.1998944729566574


100%|██████████| 500/500 [00:04<00:00, 123.56it/s]


Loss:  0.17991964519023895


100%|██████████| 500/500 [00:04<00:00, 123.77it/s]


Loss:  0.21989230811595917


100%|██████████| 500/500 [00:04<00:00, 123.68it/s]


Loss:  0.21269528567790985


100%|██████████| 500/500 [00:03<00:00, 137.32it/s]


Loss:  0.17403937876224518


100%|██████████| 500/500 [00:04<00:00, 124.75it/s]


Loss:  0.1974502056837082


100%|██████████| 500/500 [00:04<00:00, 124.37it/s]


Loss:  0.22367672622203827


100%|██████████| 500/500 [00:04<00:00, 123.73it/s]


Loss:  0.15663054585456848


100%|██████████| 500/500 [00:04<00:00, 122.76it/s]


Loss:  0.20947694778442383


100%|██████████| 500/500 [00:04<00:00, 123.71it/s]


Loss:  0.21466396749019623


100%|██████████| 500/500 [00:04<00:00, 123.93it/s]


Loss:  0.19354699552059174


100%|██████████| 500/500 [00:04<00:00, 123.92it/s]


Loss:  0.20687150955200195


100%|██████████| 500/500 [00:04<00:00, 124.19it/s]


Loss:  0.24825505912303925


100%|██████████| 500/500 [00:04<00:00, 123.79it/s]


Loss:  0.24435047805309296


100%|██████████| 500/500 [00:04<00:00, 123.19it/s]


Loss:  0.21210074424743652


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.20960693061351776


100%|██████████| 500/500 [00:04<00:00, 123.63it/s]


Loss:  0.2260669767856598


100%|██████████| 500/500 [00:04<00:00, 123.54it/s]


Loss:  0.23952598869800568


100%|██████████| 500/500 [00:04<00:00, 123.49it/s]


Loss:  0.2516389787197113


100%|██████████| 500/500 [00:04<00:00, 123.45it/s]


Loss:  0.23153260350227356


100%|██████████| 500/500 [00:04<00:00, 123.62it/s]


Loss:  0.18150556087493896


100%|██████████| 500/500 [00:04<00:00, 123.08it/s]


Loss:  0.1826287806034088


100%|██████████| 500/500 [00:04<00:00, 123.36it/s]


Loss:  0.15888333320617676


100%|██████████| 500/500 [00:04<00:00, 123.36it/s]


Loss:  0.27400892972946167


100%|██████████| 500/500 [00:04<00:00, 123.33it/s]


Loss:  0.23399117588996887


100%|██████████| 500/500 [00:04<00:00, 122.92it/s]


Loss:  0.22232113778591156


100%|██████████| 500/500 [00:04<00:00, 124.26it/s]


Loss:  0.22364400327205658


100%|██████████| 500/500 [00:04<00:00, 123.30it/s]


Loss:  0.2448231726884842


100%|██████████| 500/500 [00:03<00:00, 126.51it/s]


Loss:  0.221021369099617


100%|██████████| 500/500 [00:03<00:00, 132.63it/s]


Loss:  0.17423060536384583


100%|██████████| 500/500 [00:03<00:00, 136.64it/s]


Loss:  0.18342146277427673


100%|██████████| 500/500 [00:03<00:00, 136.52it/s]


Loss:  0.16672103106975555


100%|██████████| 500/500 [00:03<00:00, 136.06it/s]


Loss:  0.20695297420024872


100%|██████████| 500/500 [00:03<00:00, 136.58it/s]


Loss:  0.2310313731431961


100%|██████████| 500/500 [00:03<00:00, 131.51it/s]


Loss:  0.21326188743114471


100%|██████████| 500/500 [00:03<00:00, 130.22it/s]


Loss:  0.21924316883087158


100%|██████████| 500/500 [00:04<00:00, 123.13it/s]


Loss:  0.23048977553844452


100%|██████████| 500/500 [00:04<00:00, 123.28it/s]


Loss:  0.19969116151332855


100%|██████████| 500/500 [00:04<00:00, 123.52it/s]


Loss:  0.15683497488498688


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.18547002971172333


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.2221173793077469


100%|██████████| 500/500 [00:04<00:00, 123.28it/s]


Loss:  0.24676476418972015


100%|██████████| 500/500 [00:03<00:00, 127.62it/s]


Loss:  0.16729354858398438


100%|██████████| 500/500 [00:03<00:00, 136.75it/s]


Loss:  0.18556182086467743


100%|██████████| 500/500 [00:03<00:00, 136.77it/s]


Loss:  0.18781150877475739


100%|██████████| 500/500 [00:04<00:00, 123.22it/s]


Loss:  0.2134169340133667


100%|██████████| 500/500 [00:03<00:00, 136.75it/s]


Loss:  0.17925556004047394


100%|██████████| 500/500 [00:04<00:00, 124.68it/s]


Loss:  0.20652012526988983


100%|██████████| 500/500 [00:04<00:00, 123.41it/s]


Loss:  0.19689832627773285


100%|██████████| 500/500 [00:04<00:00, 123.93it/s]


Loss:  0.23087768256664276


100%|██████████| 500/500 [00:04<00:00, 123.49it/s]


Loss:  0.16048400104045868


100%|██████████| 500/500 [00:04<00:00, 123.37it/s]


Loss:  0.20265822112560272


100%|██████████| 500/500 [00:03<00:00, 137.23it/s]


Loss:  0.18262477219104767


100%|██████████| 500/500 [00:03<00:00, 137.91it/s]


Loss:  0.23759494721889496


100%|██████████| 500/500 [00:03<00:00, 125.04it/s]


Loss:  0.19019047915935516


100%|██████████| 500/500 [00:04<00:00, 123.97it/s]


Loss:  0.21367311477661133


100%|██████████| 500/500 [00:04<00:00, 123.99it/s]


Loss:  0.21332648396492004


100%|██████████| 500/500 [00:04<00:00, 124.58it/s]


Loss:  0.18988482654094696


100%|██████████| 500/500 [00:04<00:00, 124.29it/s]


Loss:  0.2369205802679062


100%|██████████| 500/500 [00:03<00:00, 138.76it/s]


Loss:  0.23486857116222382


100%|██████████| 500/500 [00:03<00:00, 140.83it/s]


Loss:  0.21637628972530365


100%|██████████| 500/500 [00:03<00:00, 140.06it/s]


Loss:  0.1987254023551941


100%|██████████| 500/500 [00:04<00:00, 123.54it/s]


Loss:  0.18512235581874847


100%|██████████| 500/500 [00:04<00:00, 123.55it/s]


Loss:  0.23577462136745453


100%|██████████| 500/500 [00:04<00:00, 123.84it/s]


Loss:  0.23114614188671112


100%|██████████| 500/500 [00:04<00:00, 123.86it/s]


Loss:  0.21528883278369904


100%|██████████| 500/500 [00:03<00:00, 127.21it/s]


Loss:  0.21989011764526367


100%|██████████| 500/500 [00:04<00:00, 123.43it/s]


Loss:  0.19034534692764282


100%|██████████| 500/500 [00:04<00:00, 123.81it/s]


Loss:  0.19747976958751678


100%|██████████| 500/500 [00:04<00:00, 123.25it/s]


Loss:  0.18351009488105774


100%|██████████| 500/500 [00:04<00:00, 123.53it/s]


Loss:  0.20353785157203674


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.23806443810462952


100%|██████████| 500/500 [00:04<00:00, 123.19it/s]


Loss:  0.23274342715740204


100%|██████████| 500/500 [00:04<00:00, 123.74it/s]


Loss:  0.2143181562423706


100%|██████████| 500/500 [00:04<00:00, 123.41it/s]


Loss:  0.20696775615215302


100%|██████████| 500/500 [00:04<00:00, 123.71it/s]


Loss:  0.20182771980762482


100%|██████████| 500/500 [00:04<00:00, 123.37it/s]


Loss:  0.16058124601840973


100%|██████████| 500/500 [00:04<00:00, 123.55it/s]


Loss:  0.17896336317062378


100%|██████████| 500/500 [00:04<00:00, 123.46it/s]


Loss:  0.17958390712738037


100%|██████████| 500/500 [00:04<00:00, 123.19it/s]


Loss:  0.22318235039710999


100%|██████████| 500/500 [00:04<00:00, 123.56it/s]


Loss:  0.19464200735092163


100%|██████████| 500/500 [00:04<00:00, 123.49it/s]


Loss:  0.20465560257434845


100%|██████████| 500/500 [00:04<00:00, 123.22it/s]


Loss:  0.23400378227233887


100%|██████████| 500/500 [00:04<00:00, 123.55it/s]


Loss:  0.22973774373531342


100%|██████████| 500/500 [00:04<00:00, 123.56it/s]


Loss:  0.18015597760677338


100%|██████████| 500/500 [00:04<00:00, 123.65it/s]


Loss:  0.18572863936424255


100%|██████████| 500/500 [00:04<00:00, 122.29it/s]


Loss:  0.2057783156633377


100%|██████████| 500/500 [00:04<00:00, 122.72it/s]


Loss:  0.21861796081066132


100%|██████████| 500/500 [00:04<00:00, 121.98it/s]


Loss:  0.21931783854961395


100%|██████████| 500/500 [00:04<00:00, 123.68it/s]


Loss:  0.1981457769870758


100%|██████████| 500/500 [00:04<00:00, 124.88it/s]


Loss:  0.2730201184749603


100%|██████████| 500/500 [00:04<00:00, 123.80it/s]


Loss:  0.16204284131526947


100%|██████████| 500/500 [00:04<00:00, 123.06it/s]


Loss:  0.14874966442584991


100%|██████████| 500/500 [00:04<00:00, 122.00it/s]


Loss:  0.21930888295173645


100%|██████████| 500/500 [00:04<00:00, 122.02it/s]


Loss:  0.18394571542739868


100%|██████████| 500/500 [00:04<00:00, 122.38it/s]


Loss:  0.2081315517425537


100%|██████████| 500/500 [00:04<00:00, 123.28it/s]


Loss:  0.26744070649147034


100%|██████████| 500/500 [00:04<00:00, 123.10it/s]


Loss:  0.21029675006866455


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.185111865401268


100%|██████████| 500/500 [00:04<00:00, 122.80it/s]


Loss:  0.1899379938840866


100%|██████████| 500/500 [00:04<00:00, 122.28it/s]


Loss:  0.205002099275589


100%|██████████| 500/500 [00:04<00:00, 121.99it/s]


Loss:  0.21824271976947784


100%|██████████| 500/500 [00:04<00:00, 122.18it/s]


Loss:  0.2282143086194992


100%|██████████| 500/500 [00:04<00:00, 123.40it/s]


Loss:  0.1975972056388855


100%|██████████| 500/500 [00:04<00:00, 123.85it/s]


Loss:  0.2050960510969162


100%|██████████| 500/500 [00:03<00:00, 127.04it/s]


Loss:  0.199118971824646


100%|██████████| 500/500 [00:04<00:00, 123.58it/s]


Loss:  0.2364005148410797


100%|██████████| 500/500 [00:04<00:00, 123.16it/s]


Loss:  0.21401230990886688


100%|██████████| 500/500 [00:04<00:00, 123.44it/s]


Loss:  0.22815799713134766


100%|██████████| 500/500 [00:03<00:00, 126.29it/s]


Loss:  0.22034083306789398


100%|██████████| 500/500 [00:03<00:00, 139.69it/s]


Loss:  0.2023569643497467


100%|██████████| 500/500 [00:03<00:00, 137.26it/s]


Loss:  0.18745069205760956


100%|██████████| 500/500 [00:03<00:00, 134.05it/s]


Loss:  0.2142268717288971


100%|██████████| 500/500 [00:03<00:00, 139.69it/s]


Loss:  0.20804746448993683


100%|██████████| 500/500 [00:03<00:00, 138.68it/s]


Loss:  0.18683816492557526


100%|██████████| 500/500 [00:03<00:00, 132.75it/s]


Loss:  0.207187220454216


100%|██████████| 500/500 [00:04<00:00, 123.77it/s]


Loss:  0.2704623341560364


100%|██████████| 500/500 [00:04<00:00, 123.71it/s]


Loss:  0.19835738837718964


100%|██████████| 500/500 [00:04<00:00, 121.70it/s]


Loss:  0.24021589756011963


100%|██████████| 500/500 [00:04<00:00, 123.12it/s]


Loss:  0.22346608340740204


100%|██████████| 500/500 [00:04<00:00, 123.52it/s]


Loss:  0.2098371982574463


100%|██████████| 500/500 [00:04<00:00, 122.58it/s]


Loss:  0.19908103346824646


100%|██████████| 500/500 [00:04<00:00, 122.66it/s]


Loss:  0.21602030098438263


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.16431593894958496


100%|██████████| 500/500 [00:04<00:00, 123.74it/s]


Loss:  0.2570256292819977


100%|██████████| 500/500 [00:04<00:00, 123.62it/s]


Loss:  0.18873576819896698


100%|██████████| 500/500 [00:04<00:00, 124.19it/s]


Loss:  0.20670345425605774


100%|██████████| 500/500 [00:04<00:00, 124.13it/s]


Loss:  0.19149990379810333


100%|██████████| 500/500 [00:04<00:00, 124.04it/s]


Loss:  0.21160851418972015


100%|██████████| 500/500 [00:04<00:00, 123.81it/s]


Loss:  0.20295687019824982


100%|██████████| 500/500 [00:04<00:00, 124.26it/s]


Loss:  0.19772444665431976


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.2085406333208084


100%|██████████| 500/500 [00:04<00:00, 123.62it/s]


Loss:  0.18989388644695282


100%|██████████| 500/500 [00:04<00:00, 123.48it/s]


Loss:  0.2103213369846344


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.20054979622364044


100%|██████████| 500/500 [00:04<00:00, 123.19it/s]


Loss:  0.18666474521160126


100%|██████████| 500/500 [00:04<00:00, 123.52it/s]


Loss:  0.24647966027259827


100%|██████████| 500/500 [00:04<00:00, 123.62it/s]


Loss:  0.20955076813697815


100%|██████████| 500/500 [00:04<00:00, 123.25it/s]


Loss:  0.14409655332565308


100%|██████████| 500/500 [00:04<00:00, 123.40it/s]


Loss:  0.1710377037525177


100%|██████████| 500/500 [00:04<00:00, 122.98it/s]


Loss:  0.20480994880199432


100%|██████████| 500/500 [00:04<00:00, 122.13it/s]


Loss:  0.2186068743467331


100%|██████████| 500/500 [00:04<00:00, 123.68it/s]


Loss:  0.21661250293254852


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.20538365840911865


100%|██████████| 500/500 [00:04<00:00, 123.79it/s]


Loss:  0.21780209243297577


100%|██████████| 500/500 [00:03<00:00, 136.54it/s]


Loss:  0.2062615603208542


100%|██████████| 500/500 [00:03<00:00, 136.69it/s]


Loss:  0.22564175724983215


100%|██████████| 500/500 [00:03<00:00, 136.48it/s]


Loss:  0.2499886006116867


100%|██████████| 500/500 [00:03<00:00, 131.52it/s]


Loss:  0.15098097920417786


100%|██████████| 500/500 [00:04<00:00, 123.55it/s]


Loss:  0.18622371554374695


100%|██████████| 500/500 [00:04<00:00, 123.50it/s]


Loss:  0.24631989002227783


100%|██████████| 500/500 [00:04<00:00, 123.37it/s]


Loss:  0.2096271961927414


100%|██████████| 500/500 [00:04<00:00, 121.94it/s]


Loss:  0.2071371078491211


100%|██████████| 500/500 [00:04<00:00, 123.11it/s]


Loss:  0.24219398200511932


100%|██████████| 500/500 [00:03<00:00, 125.86it/s]


Loss:  0.2588253617286682


100%|██████████| 500/500 [00:04<00:00, 122.68it/s]


Loss:  0.2009112685918808


100%|██████████| 500/500 [00:04<00:00, 122.71it/s]


Loss:  0.22452814877033234


100%|██████████| 500/500 [00:04<00:00, 123.18it/s]


Loss:  0.23496432602405548


100%|██████████| 500/500 [00:04<00:00, 123.35it/s]


Loss:  0.22388342022895813


100%|██████████| 500/500 [00:04<00:00, 123.40it/s]


Loss:  0.22932754456996918


100%|██████████| 500/500 [00:04<00:00, 123.35it/s]


Loss:  0.2551541030406952


100%|██████████| 500/500 [00:04<00:00, 123.34it/s]


Loss:  0.23576703667640686


100%|██████████| 500/500 [00:04<00:00, 123.89it/s]


Loss:  0.21462085843086243


100%|██████████| 500/500 [00:04<00:00, 123.74it/s]


Loss:  0.22133760154247284


100%|██████████| 500/500 [00:04<00:00, 123.74it/s]


Loss:  0.1841064840555191


100%|██████████| 500/500 [00:04<00:00, 123.90it/s]


Loss:  0.203579381108284


100%|██████████| 500/500 [00:04<00:00, 123.47it/s]


Loss:  0.22537218034267426


100%|██████████| 500/500 [00:04<00:00, 123.64it/s]


Loss:  0.20679795742034912


100%|██████████| 500/500 [00:03<00:00, 125.43it/s]


Loss:  0.20770123600959778


100%|██████████| 500/500 [00:03<00:00, 128.53it/s]


Loss:  0.17323648929595947


100%|██████████| 500/500 [00:04<00:00, 123.97it/s]


Loss:  0.15793685615062714


100%|██████████| 500/500 [00:04<00:00, 122.78it/s]


Loss:  0.23601126670837402


100%|██████████| 500/500 [00:04<00:00, 122.44it/s]


Loss:  0.18363557755947113


100%|██████████| 500/500 [00:04<00:00, 123.47it/s]


Loss:  0.2198697179555893


100%|██████████| 500/500 [00:04<00:00, 124.77it/s]


Loss:  0.20305831730365753


100%|██████████| 500/500 [00:04<00:00, 123.80it/s]


Loss:  0.19250474870204926


100%|██████████| 500/500 [00:04<00:00, 124.78it/s]


Loss:  0.19471199810504913


100%|██████████| 500/500 [00:04<00:00, 123.57it/s]


Loss:  0.2533465325832367


100%|██████████| 500/500 [00:04<00:00, 123.21it/s]


Loss:  0.2317359894514084


100%|██████████| 500/500 [00:04<00:00, 123.49it/s]


Loss:  0.22511528432369232


100%|██████████| 500/500 [00:04<00:00, 123.75it/s]


Loss:  0.2300187647342682


100%|██████████| 500/500 [00:04<00:00, 123.44it/s]


Loss:  0.23718690872192383


100%|██████████| 500/500 [00:04<00:00, 123.26it/s]


Loss:  0.17743079364299774


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.1588590443134308


100%|██████████| 500/500 [00:04<00:00, 123.28it/s]


Loss:  0.18774275481700897


100%|██████████| 500/500 [00:04<00:00, 123.16it/s]


Loss:  0.18087683618068695


100%|██████████| 500/500 [00:04<00:00, 123.53it/s]


Loss:  0.18242806196212769


100%|██████████| 500/500 [00:04<00:00, 123.66it/s]


Loss:  0.17897039651870728


100%|██████████| 500/500 [00:04<00:00, 123.64it/s]


Loss:  0.21409998834133148


100%|██████████| 500/500 [00:04<00:00, 123.50it/s]


Loss:  0.19175031781196594


100%|██████████| 500/500 [00:04<00:00, 123.53it/s]


Loss:  0.15405075252056122


100%|██████████| 500/500 [00:04<00:00, 123.31it/s]


Loss:  0.18265168368816376


100%|██████████| 500/500 [00:04<00:00, 123.66it/s]


Loss:  0.189165398478508


100%|██████████| 500/500 [00:04<00:00, 123.55it/s]


Loss:  0.2042711079120636


100%|██████████| 500/500 [00:03<00:00, 129.05it/s]


Loss:  0.20868805050849915


100%|██████████| 500/500 [00:03<00:00, 137.63it/s]


Loss:  0.23926663398742676


100%|██████████| 500/500 [00:03<00:00, 137.99it/s]


Loss:  0.2353510856628418


100%|██████████| 500/500 [00:03<00:00, 133.84it/s]


Loss:  0.20116601884365082


100%|██████████| 500/500 [00:04<00:00, 123.79it/s]


Loss:  0.21061144769191742


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]


Loss:  0.17234471440315247


100%|██████████| 500/500 [00:04<00:00, 123.80it/s]


Loss:  0.21823492646217346


100%|██████████| 500/500 [00:04<00:00, 123.55it/s]


Loss:  0.20013907551765442


100%|██████████| 500/500 [00:04<00:00, 123.40it/s]


Loss:  0.21429231762886047


100%|██████████| 500/500 [00:04<00:00, 122.11it/s]


Loss:  0.22006312012672424


100%|██████████| 500/500 [00:04<00:00, 123.06it/s]


Loss:  0.2350851595401764


100%|██████████| 500/500 [00:04<00:00, 123.43it/s]


Loss:  0.21033965051174164


100%|██████████| 500/500 [00:04<00:00, 123.46it/s]


Loss:  0.1696123331785202


100%|██████████| 500/500 [00:04<00:00, 123.60it/s]


Loss:  0.25007399916648865


100%|██████████| 500/500 [00:04<00:00, 123.06it/s]


Loss:  0.18472306430339813


100%|██████████| 500/500 [00:04<00:00, 123.66it/s]


Loss:  0.18438343703746796


100%|██████████| 500/500 [00:04<00:00, 123.77it/s]


Loss:  0.21696357429027557


100%|██████████| 500/500 [00:04<00:00, 123.21it/s]


Loss:  0.19685880839824677


100%|██████████| 500/500 [00:04<00:00, 123.47it/s]


Loss:  0.17367610335350037


100%|██████████| 500/500 [00:04<00:00, 123.59it/s]

Loss:  0.21472211182117462
Training time:  0:34:54.296323



