# VRNN
Original paper: A Recurrent Latent Variable Model for Sequential Data (https://arxiv.org/pdf/1506.02216.pdf )

In [74]:
from tqdm import tqdm

import torch
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from tensorboardX import SummaryWriter

batch_size = 256
epochs = 30
seed = 1
torch.manual_seed(seed)

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [75]:
# Experiment Setting
# generate MNIST by stacking row images(consider row as time step)
def init_dataset(f_batch_size):
    kwargs = {'num_workers': 1, 'pin_memory': True}
    data_dir = '../data'
    mnist_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda data: data[0])
    ])
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(data_dir, train=True, download=True,
                       transform=mnist_transform),
        batch_size=f_batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(data_dir, train=False, transform=mnist_transform),
        batch_size=f_batch_size, shuffle=True, **kwargs)

    fixed_t_size = 28
    return train_loader, test_loader, fixed_t_size

train_loader, test_loader, t_max = init_dataset(batch_size)

In [76]:
from pixyz.models import Model
from pixyz.losses import KullbackLeibler, StochasticReconstructionLoss
from pixyz.losses import IterativeLoss
from pixyz.distributions import Bernoulli, Normal, Deterministic
from pixyz.utils import print_latex

In [77]:
x_dim = 28
h_dim = 100
z_dim = 64
t_max = x_dim

# feature extraction for x
class Phi_x(nn.Module):
    def __init__(self):
        super(Phi_x, self).__init__()
        self.fc0 = nn.Linear(x_dim, h_dim)

    def forward(self, x):
        return F.relu(self.fc0(x))


# feature extraction for z
class Phi_z(nn.Module):
    def __init__(self):
        super(Phi_z, self).__init__()
        self.fc0 = nn.Linear(z_dim, h_dim)

    def forward(self, z):
        return F.relu(self.fc0(z))

f_phi_x = Phi_x().to(device)
f_phi_z = Phi_z().to(device)

In [78]:
class Generator(Bernoulli):
    '''
    Parameterizes the bernoulli(for MNIST) observation likelihood p(x_t | z_t, h_{t-1})
    '''
    def __init__(self):
        super(Generator, self).__init__(cond_var=["z", "h_prev"], var=["x"])
        self.fc1 = nn.Linear(h_dim + h_dim, h_dim)
        self.fc2 = nn.Linear(h_dim, h_dim)
        self.fc3 = nn.Linear(h_dim, x_dim)
        self.f_phi_z = f_phi_z

    def forward(self, z, h_prev):
        '''
        Given the latent z at a particular time step t and hidden state,
        return the vector of probabilities taht parameterizes the bernoulli distribution
        p(x_t | z_t, h_{t-1})
        
        '''
        h = torch.cat((self.f_phi_z(z), h_prev), dim=-1)
        h = F.relu(self.fc1(h))
        h = F.relu(self.fc2(h))
        return {"probs": torch.sigmoid(self.fc3(h))}

class Prior(Normal):
    '''
    Compared to normal VAE,
    VRNN's Prior for latent z is parameterized by hidden_state h_{t-1}
    z ~ N(loc(h_{t-1}), scale(h_{t-1}))
    '''
    def __init__(self):
        super(Prior, self).__init__(cond_var=["h_prev"], var=["z"])
        self.fc1 = nn.Linear(h_dim, h_dim)
        self.fc21 = nn.Linear(h_dim, z_dim)
        self.fc22 = nn.Linear(h_dim, z_dim)

    def forward(self, h_prev):
        h = F.relu(self.fc1(h_prev))
        return {"loc": self.fc21(h), "scale": F.softplus(self.fc22(h))}

class Inference(Normal):
    '''
    Parameterizes q(z_t | h_{t-1}, x_t)
    infered z ~ N(loc(h_{t-1}, x_t), scale(h_{t-1}, x_t))
    '''
    def __init__(self):
        super(Inference, self).__init__(cond_var=["x", "h_prev"], var=["z"], name="q")
        self.fc1 = nn.Linear(h_dim + h_dim, h_dim)
        self.fc21 = nn.Linear(h_dim, z_dim)
        self.fc22 = nn.Linear(h_dim, z_dim)
        self.f_phi_x = f_phi_x

    def forward(self, x, h_prev):
        h = torch.cat((self.f_phi_x(x), h_prev), dim=-1)
        h = F.relu(self.fc1(h))
        return {"loc": self.fc21(h), "scale": F.softplus(self.fc22(h))}

class Recurrence(Deterministic):
    '''
    RNN for hidden_state
    '''
    def __init__(self):
        super(Recurrence, self).__init__(cond_var=["x", "z", "h_prev"], var=["h"])
        self.rnncell = nn.GRUCell(h_dim * 2, h_dim).to(device)
        self.f_phi_x = f_phi_x
        self.f_phi_z = f_phi_z
        self.hidden_size = self.rnncell.hidden_size

    def forward(self, x, z, h_prev):
        h_next = self.rnncell(torch.cat((self.f_phi_z(z), self.f_phi_x(x)), dim=-1), h_prev)
        return {"h": h_next}

prior = Prior().to(device)
decoder = Generator().to(device)
encoder = Inference().to(device)
recurrence = Recurrence().to(device)

In [79]:
encoder_with_recurrence = encoder * recurrence
generate_from_prior = prior * decoder * recurrence

In [80]:
print_latex(encoder_with_recurrence)

<IPython.core.display.Math object>

In [81]:
print_latex(generate_from_prior)

<IPython.core.display.Math object>

In [82]:
reconst = StochasticReconstructionLoss(encoder_with_recurrence, decoder)
kl = KullbackLeibler(encoder, prior)

# 1 time step loss
step_loss = (reconst + kl).mean()
# Iterative loss for total time step
loss = IterativeLoss(step_loss, max_iter=t_max,
                     series_var=['x'],
                     update_value={"h": "h_prev"})

vrnn = Model(loss, distributions=[encoder, decoder, prior, recurrence],
             optimizer=optim.Adam, optimizer_params={'lr': 1e-3})

print(vrnn)
print_latex(vrnn)

Distributions (for training): 
  q(z|x,h_{prev}), p(x|z,h_{prev}), p(z|h_{prev}), p(h|x,z,h_{prev}) 
Loss function: 
  \sum_{t=1}^{28} mean \left(D_{KL} \left[q(z|x,h_{prev})||p(z|h_{prev}) \right] - \mathbb{E}_{p(h,z|x,h_{prev})} \left[\log p(x|z,h_{prev}) \right] \right) 
Optimizer: 
  Adam (
  Parameter Group 0
      amsgrad: False
      betas: (0.9, 0.999)
      eps: 1e-08
      lr: 0.001
      weight_decay: 0
  )


<IPython.core.display.Math object>

In [83]:
def data_loop(epoch, loader, model, device, train_mode=False):
    mean_loss = 0
    for batch_idx, (data, _) in enumerate(tqdm(loader)):
        data = data.to(device)
        batch_size = data.size()[0]
        x = data.transpose(0, 1)
        h_prev = torch.zeros(batch_size, recurrence.hidden_size).to(device)
        if train_mode:
            mean_loss += model.train({'x': x, 'h_prev': h_prev}).item() * batch_size
        else:
            mean_loss += model.test({'x': x, 'h_prev': h_prev}).item() * batch_size

    mean_loss /= len(loader.dataset)
    if train_mode:
        print('Epoch: {} Train loss: {:.4f}'.format(epoch, mean_loss))
    else:
        print('Test loss: {:.4f}'.format(mean_loss))
    return mean_loss

In [84]:
def plot_image_from_latent(batch_size):
    x = []
    h_prev = torch.zeros(batch_size, recurrence.hidden_size).to(device)
    for step in range(t_max):
        samples = generate_from_prior.sample({'h_prev': h_prev})
        x_t = decoder.sample_mean({"z": samples["z"], "h_prev": samples["h_prev"]})
        h_prev = samples["h"]
        x.append(x_t[None, :])
    x = torch.cat(x, dim=0).transpose(0, 1)
    return x

In [85]:
def generate_image_after_nsteps(n_step_num, original_data):
    xs = []
    x = original_data.transpose(0, 1)
    batch_size = original_data.size()[0]
    h_prev = torch.zeros(batch_size, recurrence.hidden_size).to(device)
    for t in range(28):
        if t < n_step_num - 1:
            # before n_step, reconstruct
            x_t = x[t]
            z_t = encoder.sample_mean({'x': x_t, 'h_prev': h_prev})
            h = recurrence.sample_mean({'x': x_t, 'h_prev': h_prev, 'z': z_t})
            dec_x = decoder.sample_mean({'h_prev': h_prev, 'z': z_t})
            h_prev = h
            xs.append(dec_x[None, :])
        else:
            # generate
            samples = generate_from_prior.sample({'h_prev': h_prev})
            x_t = decoder.sample_mean({"z": samples["z"], "h_prev": samples["h_prev"]})
            h_prev = samples["h"]
            xs.append(x_t[None, :])
    generated_img = torch.cat(xs, dim=0).transpose(0, 1)
    return generated_img

In [86]:
def reconst_image(original_data):
    xs = []
    x = original_data.transpose(0, 1)
    batch_size = original_data.size()[0]
    h_prev = torch.zeros(batch_size, recurrence.hidden_size).to(device)
    for t in range(28):
        # before n_step, reconstruct
        x_t = x[t]
        z_t = encoder.sample_mean({'x': x_t, 'h_prev': h_prev})
        h = recurrence.sample_mean({'x': x_t, 'h_prev': h_prev, 'z': z_t})
        dec_x = decoder.sample_mean({'h_prev': h_prev, 'z': z_t})
        h_prev = h
        xs.append(dec_x[None, :])
    recon_img = torch.cat(xs, dim=0).transpose(0, 1)
    return recon_img

In [None]:
writer = SummaryWriter()
# fixed _x for watching reconstruction improvement
_x, _ = iter(test_loader).next()
_x = _x.to(device)

for epoch in range(1, epochs + 1):
    train_loss = data_loop(epoch, train_loader, vrnn, device, train_mode=True)
    test_loss = data_loop(epoch, test_loader, vrnn, device)

    writer.add_scalar('train_loss', train_loss, epoch)
    writer.add_scalar('test_loss', test_loss, epoch)

    sample = plot_image_from_latent(batch_size)[:, None]
    writer.add_images('Image_from_latent', sample, epoch)
    generated_img_7 = generate_image_after_nsteps(7, _x)
    writer.add_images('Generate_after_7steps', generated_img_7[:, None], epoch)
    
    generated_img_14 = generate_image_after_nsteps(14, _x)
    writer.add_images('Generate_after_14steps', generated_img_14[:, None], epoch)
    
    recon_img = reconst_image(_x)
    writer.add_images('Reconstructed',  recon_img[:, None], epoch)
    
    writer.add_images('orignal', _x[:, None], epoch)


  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:00,  3.86it/s][A
  1%|          | 2/235 [00:00<00:54,  4.30it/s][A
  1%|▏         | 3/235 [00:00<00:49,  4.67it/s][A
  2%|▏         | 4/235 [00:00<00:46,  4.97it/s][A
  2%|▏         | 5/235 [00:00<00:44,  5.22it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.39it/s][A
  3%|▎         | 7/235 [00:01<00:42,  5.35it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.50it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.44it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.55it/s][A
  5%|▍         | 11/235 [00:02<00:40,  5.59it/s][A
  5%|▌         | 12/235 [00:02<00:39,  5.66it/s][A
  6%|▌         | 13/235 [00:02<00:40,  5.52it/s][A
  6%|▌         | 14/235 [00:02<00:39,  5.62it/s][A
  6%|▋         | 15/235 [00:02<00:39,  5.51it/s][A
  7%|▋         | 16/235 [00:02<00:39,  5.60it/s][A
  7%|▋         | 17/235 [00:03<00:39,  5.50it/s][A
  8%|▊         | 18/235 [00:03<00:38,  5.60it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:15,  5.27it/s][A
 67%|██████▋   | 157/235 [00:27<00:14,  5.26it/s][A
 67%|██████▋   | 158/235 [00:28<00:14,  5.26it/s][A
 68%|██████▊   | 159/235 [00:28<00:14,  5.25it/s][A
 68%|██████▊   | 160/235 [00:28<00:14,  5.25it/s][A
 69%|██████▊   | 161/235 [00:28<00:14,  5.24it/s][A
 69%|██████▉   | 162/235 [00:28<00:13,  5.24it/s][A
 69%|██████▉   | 163/235 [00:29<00:13,  5.25it/s][A
 70%|██████▉   | 164/235 [00:29<00:13,  5.25it/s][A
 70%|███████   | 165/235 [00:29<00:13,  5.25it/s][A
 71%|███████   | 166/235 [00:29<00:12,  5.34it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.46it/s][A
 71%|███████▏  | 168/235 [00:30<00:12,  5.54it/s][A
 72%|███████▏  | 169/235 [00:30<00:12,  5.44it/s][A
 72%|███████▏  | 170/235 [00:30<00:12,  5.39it/s][A
 73%|███████▎  | 171/235 [00:30<00:11,  5.50it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.41it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.51it/s][A
 74%|███████▍  | 174/235 [00:31<00:10,  5.62it

Epoch: 1 Train loss: 255.8023



  2%|▎         | 1/40 [00:00<00:06,  6.35it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.47it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.54it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.50it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.29it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.94it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.45it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.83it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.11it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.31it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.47it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.57it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.65it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.71it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.75it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.78it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.80it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.81it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.81it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 223.2199



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:08,  3.43it/s][A
  1%|          | 2/235 [00:00<00:59,  3.92it/s][A
  1%|▏         | 3/235 [00:00<00:53,  4.35it/s][A
  2%|▏         | 4/235 [00:00<00:48,  4.73it/s][A
  2%|▏         | 5/235 [00:00<00:45,  5.03it/s][A
  3%|▎         | 6/235 [00:01<00:43,  5.27it/s][A
  3%|▎         | 7/235 [00:01<00:42,  5.34it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.50it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.62it/s][A
  4%|▍         | 10/235 [00:01<00:39,  5.71it/s][A
  5%|▍         | 11/235 [00:01<00:38,  5.77it/s][A
  5%|▌         | 12/235 [00:02<00:38,  5.81it/s][A
  6%|▌         | 13/235 [00:02<00:37,  5.85it/s][A
  6%|▌         | 14/235 [00:02<00:37,  5.90it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.92it/s][A
  7%|▋         | 16/235 [00:02<00:36,  5.93it/s][A
  7%|▋         | 17/235 [00:03<00:36,  5.94it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.95it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.97it/s][A
 67%|██████▋   | 157/235 [00:26<00:13,  5.97it/s][A
 67%|██████▋   | 158/235 [00:26<00:12,  5.96it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.96it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.96it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.96it/s][A
 69%|██████▉   | 162/235 [00:27<00:12,  5.96it/s][A
 69%|██████▉   | 163/235 [00:27<00:12,  5.97it/s][A
 70%|██████▉   | 164/235 [00:27<00:11,  5.97it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.97it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.97it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.95it/s][A
 71%|███████▏  | 168/235 [00:28<00:11,  5.96it/s][A
 72%|███████▏  | 169/235 [00:28<00:11,  5.96it/s][A
 72%|███████▏  | 170/235 [00:28<00:10,  5.96it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.95it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.94it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.95it/s][A
 74%|███████▍  | 174/235 [00:29<00:10,  5.95it

Epoch: 2 Train loss: 218.7091



  2%|▎         | 1/40 [00:00<00:06,  6.41it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.53it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.59it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.52it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.29it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.93it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.41it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.78it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.05it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.26it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.41it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.48it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.54it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.62it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.67it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.70it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.73it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.74it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.72it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 207.2899



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.95it/s][A
  1%|          | 2/235 [00:00<00:53,  4.38it/s][A
  1%|▏         | 3/235 [00:00<00:49,  4.73it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.03it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.25it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.44it/s][A
  3%|▎         | 7/235 [00:01<00:41,  5.55it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.47it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.41it/s][A
  4%|▍         | 10/235 [00:01<00:41,  5.38it/s][A
  5%|▍         | 11/235 [00:02<00:41,  5.34it/s][A
  5%|▌         | 12/235 [00:02<00:40,  5.49it/s][A
  6%|▌         | 13/235 [00:02<00:39,  5.60it/s][A
  6%|▌         | 14/235 [00:02<00:40,  5.50it/s][A
  6%|▋         | 15/235 [00:02<00:40,  5.44it/s][A
  7%|▋         | 16/235 [00:02<00:39,  5.58it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.68it/s][A
  8%|▊         | 18/235 [00:03<00:37,  5.75it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:14,  5.55it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.66it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.56it/s][A
 68%|██████▊   | 159/235 [00:27<00:13,  5.67it/s][A
 68%|██████▊   | 160/235 [00:27<00:13,  5.56it/s][A
 69%|██████▊   | 161/235 [00:27<00:13,  5.68it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.74it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.62it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.73it/s][A
 70%|███████   | 165/235 [00:28<00:12,  5.80it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.86it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.71it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.79it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.85it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.88it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.90it/s][A
 73%|███████▎  | 172/235 [00:29<00:11,  5.71it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.77it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.82it

Epoch: 3 Train loss: 205.6511



  2%|▎         | 1/40 [00:00<00:06,  5.60it/s][A
  8%|▊         | 3/40 [00:00<00:05,  6.73it/s][A
 10%|█         | 4/40 [00:00<00:04,  7.45it/s][A
 15%|█▌        | 6/40 [00:00<00:04,  8.32it/s][A
 20%|██        | 8/40 [00:00<00:03,  9.28it/s][A
 25%|██▌       | 10/40 [00:00<00:03,  9.83it/s][A
 30%|███       | 12/40 [00:01<00:02, 10.59it/s][A
 35%|███▌      | 14/40 [00:01<00:02, 11.20it/s][A
 40%|████      | 16/40 [00:01<00:02, 11.68it/s][A
 45%|████▌     | 18/40 [00:01<00:01, 12.02it/s][A
 50%|█████     | 20/40 [00:01<00:01, 12.28it/s][A
 55%|█████▌    | 22/40 [00:01<00:01, 12.47it/s][A
 60%|██████    | 24/40 [00:02<00:01, 12.59it/s][A
 65%|██████▌   | 26/40 [00:02<00:01, 12.69it/s][A
 70%|███████   | 28/40 [00:02<00:00, 12.76it/s][A
 75%|███████▌  | 30/40 [00:02<00:00, 12.80it/s][A
 80%|████████  | 32/40 [00:02<00:00, 12.83it/s][A
 85%|████████▌ | 34/40 [00:02<00:00, 12.84it/s][A
 90%|█████████ | 36/40 [00:02<00:00, 12.87it/s][A
 95%|█████████▌| 38/40 [00:03<00:00

Test loss: 203.0328



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:58,  3.98it/s][A
  1%|          | 2/235 [00:00<00:52,  4.42it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.75it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.06it/s][A
  2%|▏         | 5/235 [00:00<00:44,  5.15it/s][A
  3%|▎         | 6/235 [00:01<00:44,  5.16it/s][A
  3%|▎         | 7/235 [00:01<00:44,  5.17it/s][A
  3%|▎         | 8/235 [00:01<00:42,  5.36it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.52it/s][A
  4%|▍         | 10/235 [00:01<00:39,  5.65it/s][A
  5%|▍         | 11/235 [00:02<00:39,  5.61it/s][A
  5%|▌         | 12/235 [00:02<00:39,  5.71it/s][A
  6%|▌         | 13/235 [00:02<00:38,  5.78it/s][A
  6%|▌         | 14/235 [00:02<00:39,  5.64it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.70it/s][A
  7%|▋         | 16/235 [00:02<00:38,  5.75it/s][A
  7%|▋         | 17/235 [00:03<00:37,  5.76it/s][A
  8%|▊         | 18/235 [00:03<00:37,  5.81it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.84it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.88it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.90it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.91it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.93it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.91it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.93it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.93it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.89it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.90it/s][A
 71%|███████   | 166/235 [00:28<00:12,  5.70it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.58it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.49it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.60it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.51it/s][A
 73%|███████▎  | 171/235 [00:29<00:11,  5.61it/s][A
 73%|███████▎  | 172/235 [00:29<00:11,  5.53it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.62it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.52it

Epoch: 4 Train loss: 198.7287



  2%|▎         | 1/40 [00:00<00:06,  6.47it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.58it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.65it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.58it/s][A
 22%|██▎       | 9/40 [00:00<00:02, 10.38it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 11.03it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.53it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.92it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.20it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.39it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.55it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.65it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.73it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.76it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.76it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.81it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.83it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.84it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.84it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 196.2658



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.96it/s][A
  1%|          | 2/235 [00:00<00:53,  4.39it/s][A
  1%|▏         | 3/235 [00:00<00:50,  4.63it/s][A
  2%|▏         | 4/235 [00:00<00:46,  4.93it/s][A
  2%|▏         | 5/235 [00:00<00:45,  5.04it/s][A
  3%|▎         | 6/235 [00:01<00:44,  5.12it/s][A
  3%|▎         | 7/235 [00:01<00:43,  5.23it/s][A
  3%|▎         | 8/235 [00:01<00:42,  5.28it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.45it/s][A
  4%|▍         | 10/235 [00:01<00:41,  5.41it/s][A
  5%|▍         | 11/235 [00:02<00:40,  5.54it/s][A
  5%|▌         | 12/235 [00:02<00:40,  5.46it/s][A
  6%|▌         | 13/235 [00:02<00:39,  5.58it/s][A
  6%|▌         | 14/235 [00:02<00:40,  5.49it/s][A
  6%|▋         | 15/235 [00:02<00:40,  5.43it/s][A
  7%|▋         | 16/235 [00:02<00:39,  5.57it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.67it/s][A
  8%|▊         | 18/235 [00:03<00:39,  5.55it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:14,  5.62it/s][A
 67%|██████▋   | 157/235 [00:27<00:14,  5.53it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.61it/s][A
 68%|██████▊   | 159/235 [00:28<00:13,  5.52it/s][A
 68%|██████▊   | 160/235 [00:28<00:13,  5.45it/s][A
 69%|██████▊   | 161/235 [00:28<00:13,  5.57it/s][A
 69%|██████▉   | 162/235 [00:28<00:13,  5.49it/s][A
 69%|██████▉   | 163/235 [00:28<00:13,  5.43it/s][A
 70%|██████▉   | 164/235 [00:29<00:13,  5.39it/s][A
 70%|███████   | 165/235 [00:29<00:13,  5.35it/s][A
 71%|███████   | 166/235 [00:29<00:12,  5.33it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.32it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.30it/s][A
 72%|███████▏  | 169/235 [00:30<00:12,  5.29it/s][A
 72%|███████▏  | 170/235 [00:30<00:12,  5.28it/s][A
 73%|███████▎  | 171/235 [00:30<00:12,  5.29it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.45it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.41it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.37it

Epoch: 5 Train loss: 193.3568



  2%|▎         | 1/40 [00:00<00:06,  6.40it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.52it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.60it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.55it/s][A
 22%|██▎       | 9/40 [00:00<00:02, 10.35it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 11.00it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.51it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.90it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.18it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.38it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.56it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.66it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.74it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.77it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.80it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.82it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.85it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.85it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.87it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 189.4892



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.95it/s][A
  1%|          | 2/235 [00:00<00:53,  4.38it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.74it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.03it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.23it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.42it/s][A
  3%|▎         | 7/235 [00:01<00:40,  5.57it/s][A
  3%|▎         | 8/235 [00:01<00:39,  5.68it/s][A
  4%|▍         | 9/235 [00:01<00:39,  5.75it/s][A
  4%|▍         | 10/235 [00:01<00:38,  5.79it/s][A
  5%|▍         | 11/235 [00:01<00:38,  5.82it/s][A
  5%|▌         | 12/235 [00:02<00:39,  5.66it/s][A
  6%|▌         | 13/235 [00:02<00:38,  5.73it/s][A
  6%|▌         | 14/235 [00:02<00:39,  5.60it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.67it/s][A
  7%|▋         | 16/235 [00:02<00:39,  5.55it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.64it/s][A
  8%|▊         | 18/235 [00:03<00:39,  5.54it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:14,  5.47it/s][A
 67%|██████▋   | 157/235 [00:27<00:14,  5.43it/s][A
 67%|██████▋   | 158/235 [00:27<00:14,  5.40it/s][A
 68%|██████▊   | 159/235 [00:27<00:14,  5.36it/s][A
 68%|██████▊   | 160/235 [00:28<00:14,  5.35it/s][A
 69%|██████▊   | 161/235 [00:28<00:13,  5.51it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.63it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.71it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.75it/s][A
 70%|███████   | 165/235 [00:28<00:12,  5.80it/s][A
 71%|███████   | 166/235 [00:29<00:11,  5.83it/s][A
 71%|███████   | 167/235 [00:29<00:11,  5.86it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.89it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.90it/s][A
 72%|███████▏  | 170/235 [00:29<00:10,  5.91it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.92it/s][A
 73%|███████▎  | 172/235 [00:30<00:10,  5.91it/s][A
 74%|███████▎  | 173/235 [00:30<00:10,  5.92it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.93it

Epoch: 6 Train loss: 189.2848



  2%|▎         | 1/40 [00:00<00:06,  6.31it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.43it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.50it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.45it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.26it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.90it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.41it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.79it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.07it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.29it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.44it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.54it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.63it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.64it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.69it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.74it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.76it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.78it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.79it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 187.5977



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.92it/s][A
  1%|          | 2/235 [00:00<00:53,  4.37it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.74it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.06it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.30it/s][A
  3%|▎         | 6/235 [00:01<00:41,  5.47it/s][A
  3%|▎         | 7/235 [00:01<00:40,  5.61it/s][A
  3%|▎         | 8/235 [00:01<00:39,  5.71it/s][A
  4%|▍         | 9/235 [00:01<00:39,  5.77it/s][A
  4%|▍         | 10/235 [00:01<00:38,  5.82it/s][A
  5%|▍         | 11/235 [00:01<00:38,  5.86it/s][A
  5%|▌         | 12/235 [00:02<00:37,  5.90it/s][A
  6%|▌         | 13/235 [00:02<00:37,  5.91it/s][A
  6%|▌         | 14/235 [00:02<00:37,  5.91it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.93it/s][A
  7%|▋         | 16/235 [00:02<00:36,  5.93it/s][A
  7%|▋         | 17/235 [00:02<00:36,  5.94it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.95it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.90it/s][A
 67%|██████▋   | 157/235 [00:26<00:13,  5.92it/s][A
 67%|██████▋   | 158/235 [00:26<00:12,  5.94it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.94it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.95it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.93it/s][A
 69%|██████▉   | 162/235 [00:27<00:12,  5.91it/s][A
 69%|██████▉   | 163/235 [00:27<00:12,  5.93it/s][A
 70%|██████▉   | 164/235 [00:27<00:12,  5.91it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.92it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.93it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.95it/s][A
 71%|███████▏  | 168/235 [00:28<00:11,  5.95it/s][A
 72%|███████▏  | 169/235 [00:28<00:11,  5.96it/s][A
 72%|███████▏  | 170/235 [00:28<00:10,  5.95it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.95it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.96it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.95it/s][A
 74%|███████▍  | 174/235 [00:29<00:10,  5.95it

Epoch: 7 Train loss: 186.3502



  2%|▎         | 1/40 [00:00<00:06,  6.31it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.43it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.51it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.46it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.27it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.92it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.42it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.80it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.09it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.30it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.46it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.56it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.64it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.69it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.73it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.77it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.79it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.80it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.82it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 184.4267



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:58,  3.99it/s][A
  1%|          | 2/235 [00:00<00:52,  4.42it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.77it/s][A
  2%|▏         | 4/235 [00:00<00:46,  4.92it/s][A
  2%|▏         | 5/235 [00:00<00:44,  5.17it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.38it/s][A
  3%|▎         | 7/235 [00:01<00:41,  5.54it/s][A
  3%|▎         | 8/235 [00:01<00:40,  5.66it/s][A
  4%|▍         | 9/235 [00:01<00:39,  5.74it/s][A
  4%|▍         | 10/235 [00:01<00:38,  5.81it/s][A
  5%|▍         | 11/235 [00:01<00:38,  5.85it/s][A
  5%|▌         | 12/235 [00:02<00:37,  5.87it/s][A
  6%|▌         | 13/235 [00:02<00:37,  5.90it/s][A
  6%|▌         | 14/235 [00:02<00:37,  5.89it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.91it/s][A
  7%|▋         | 16/235 [00:02<00:36,  5.93it/s][A
  7%|▋         | 17/235 [00:02<00:36,  5.94it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.93it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.96it/s][A
 67%|██████▋   | 157/235 [00:26<00:13,  5.97it/s][A
 67%|██████▋   | 158/235 [00:26<00:12,  5.94it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.94it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.96it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.94it/s][A
 69%|██████▉   | 162/235 [00:27<00:12,  5.93it/s][A
 69%|██████▉   | 163/235 [00:27<00:12,  5.94it/s][A
 70%|██████▉   | 164/235 [00:27<00:11,  5.95it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.96it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.96it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.97it/s][A
 71%|███████▏  | 168/235 [00:28<00:11,  5.97it/s][A
 72%|███████▏  | 169/235 [00:28<00:11,  5.97it/s][A
 72%|███████▏  | 170/235 [00:28<00:10,  5.97it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.97it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.97it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.96it/s][A
 74%|███████▍  | 174/235 [00:29<00:10,  5.96it

Epoch: 8 Train loss: 184.7236



  2%|▎         | 1/40 [00:00<00:06,  6.25it/s][A
  8%|▊         | 3/40 [00:00<00:05,  7.37it/s][A
 10%|█         | 4/40 [00:00<00:04,  8.00it/s][A
 15%|█▌        | 6/40 [00:00<00:03,  9.01it/s][A
 20%|██        | 8/40 [00:00<00:03,  9.61it/s][A
 25%|██▌       | 10/40 [00:00<00:02, 10.39it/s][A
 30%|███       | 12/40 [00:01<00:02, 10.99it/s][A
 35%|███▌      | 14/40 [00:01<00:02, 11.43it/s][A
 40%|████      | 16/40 [00:01<00:02, 11.79it/s][A
 45%|████▌     | 18/40 [00:01<00:01, 12.05it/s][A
 50%|█████     | 20/40 [00:01<00:01, 12.24it/s][A
 55%|█████▌    | 22/40 [00:01<00:01, 12.37it/s][A
 60%|██████    | 24/40 [00:02<00:01, 12.47it/s][A
 65%|██████▌   | 26/40 [00:02<00:01, 12.54it/s][A
 70%|███████   | 28/40 [00:02<00:00, 12.59it/s][A
 75%|███████▌  | 30/40 [00:02<00:00, 12.61it/s][A
 80%|████████  | 32/40 [00:02<00:00, 12.63it/s][A
 85%|████████▌ | 34/40 [00:02<00:00, 12.65it/s][A
 90%|█████████ | 36/40 [00:02<00:00, 12.68it/s][A
 95%|█████████▌| 38/40 [00:03<00:00

Test loss: 183.3806



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:00,  3.88it/s][A
  1%|          | 2/235 [00:00<00:53,  4.33it/s][A
  1%|▏         | 3/235 [00:00<00:49,  4.72it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.02it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.27it/s][A
  3%|▎         | 6/235 [00:01<00:41,  5.46it/s][A
  3%|▎         | 7/235 [00:01<00:40,  5.58it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.49it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.62it/s][A
  4%|▍         | 10/235 [00:01<00:39,  5.72it/s][A
  5%|▍         | 11/235 [00:01<00:38,  5.80it/s][A
  5%|▌         | 12/235 [00:02<00:38,  5.83it/s][A
  6%|▌         | 13/235 [00:02<00:38,  5.79it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.68it/s][A
  6%|▋         | 15/235 [00:02<00:39,  5.56it/s][A
  7%|▋         | 16/235 [00:02<00:38,  5.65it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.73it/s][A
  8%|▊         | 18/235 [00:03<00:38,  5.60it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:14,  5.48it/s][A
 67%|██████▋   | 157/235 [00:27<00:14,  5.39it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.53it/s][A
 68%|██████▊   | 159/235 [00:27<00:13,  5.63it/s][A
 68%|██████▊   | 160/235 [00:28<00:13,  5.51it/s][A
 69%|██████▊   | 161/235 [00:28<00:13,  5.33it/s][A
 69%|██████▉   | 162/235 [00:28<00:13,  5.29it/s][A
 69%|██████▉   | 163/235 [00:28<00:13,  5.29it/s][A
 70%|██████▉   | 164/235 [00:28<00:13,  5.29it/s][A
 70%|███████   | 165/235 [00:29<00:13,  5.30it/s][A
 71%|███████   | 166/235 [00:29<00:12,  5.47it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.42it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.55it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.66it/s][A
 72%|███████▏  | 170/235 [00:30<00:11,  5.73it/s][A
 73%|███████▎  | 171/235 [00:30<00:11,  5.60it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.50it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.45it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.41it

Epoch: 9 Train loss: 184.1277



  2%|▎         | 1/40 [00:00<00:06,  6.32it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.44it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.51it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.45it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.25it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.92it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.44it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.83it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.12it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.32it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.48it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.59it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.67it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.73it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.76it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.77it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.69it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.73it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.78it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 183.5424



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:58,  3.97it/s][A
  1%|          | 2/235 [00:00<00:53,  4.38it/s][A
  1%|▏         | 3/235 [00:00<00:50,  4.63it/s][A
  2%|▏         | 4/235 [00:00<00:46,  4.94it/s][A
  2%|▏         | 5/235 [00:00<00:45,  5.04it/s][A
  3%|▎         | 6/235 [00:01<00:43,  5.26it/s][A
  3%|▎         | 7/235 [00:01<00:42,  5.43it/s][A
  3%|▎         | 8/235 [00:01<00:42,  5.39it/s][A
  4%|▍         | 9/235 [00:01<00:42,  5.35it/s][A
  4%|▍         | 10/235 [00:01<00:42,  5.33it/s][A
  5%|▍         | 11/235 [00:02<00:42,  5.32it/s][A
  5%|▌         | 12/235 [00:02<00:42,  5.30it/s][A
  6%|▌         | 13/235 [00:02<00:41,  5.29it/s][A
  6%|▌         | 14/235 [00:02<00:41,  5.29it/s][A
  6%|▋         | 15/235 [00:02<00:41,  5.29it/s][A
  7%|▋         | 16/235 [00:02<00:40,  5.45it/s][A
  7%|▋         | 17/235 [00:03<00:40,  5.39it/s][A
  8%|▊         | 18/235 [00:03<00:39,  5.51it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.91it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.90it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.88it/s][A
 68%|██████▊   | 159/235 [00:28<00:12,  5.88it/s][A
 68%|██████▊   | 160/235 [00:28<00:12,  5.87it/s][A
 69%|██████▊   | 161/235 [00:28<00:12,  5.87it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.87it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.85it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.65it/s][A
 70%|███████   | 165/235 [00:29<00:12,  5.52it/s][A
 71%|███████   | 166/235 [00:29<00:12,  5.44it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.37it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.33it/s][A
 72%|███████▏  | 169/235 [00:29<00:12,  5.31it/s][A
 72%|███████▏  | 170/235 [00:30<00:12,  5.29it/s][A
 73%|███████▎  | 171/235 [00:30<00:12,  5.28it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.25it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.25it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.25it

Epoch: 10 Train loss: 183.6279



  2%|▎         | 1/40 [00:00<00:06,  6.37it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.48it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.52it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.45it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.24it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.86it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 10.99it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.44it/s][A
 42%|████▎     | 17/40 [00:01<00:02, 11.40it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 11.73it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 11.61it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 11.90it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.14it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.20it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.36it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.47it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.56it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.60it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.65it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 182.4944



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.94it/s][A
  1%|          | 2/235 [00:00<00:55,  4.17it/s][A
  1%|▏         | 3/235 [00:00<00:52,  4.44it/s][A
  2%|▏         | 4/235 [00:00<00:49,  4.66it/s][A
  2%|▏         | 5/235 [00:01<00:46,  4.97it/s][A
  3%|▎         | 6/235 [00:01<00:44,  5.20it/s][A
  3%|▎         | 7/235 [00:01<00:43,  5.22it/s][A
  3%|▎         | 8/235 [00:01<00:42,  5.38it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.52it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.62it/s][A
  5%|▍         | 11/235 [00:02<00:39,  5.68it/s][A
  5%|▌         | 12/235 [00:02<00:38,  5.73it/s][A
  6%|▌         | 13/235 [00:02<00:38,  5.77it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.79it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.81it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.80it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.64it/s][A
  8%|▊         | 18/235 [00:03<00:38,  5.69it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.87it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.86it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.86it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.86it/s][A
 68%|██████▊   | 160/235 [00:27<00:13,  5.68it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.76it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.79it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.83it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.87it/s][A
 70%|███████   | 165/235 [00:28<00:12,  5.68it/s][A
 71%|███████   | 166/235 [00:28<00:12,  5.56it/s][A
 71%|███████   | 167/235 [00:28<00:12,  5.66it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.72it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.75it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.78it/s][A
 73%|███████▎  | 171/235 [00:29<00:11,  5.80it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.82it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.83it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.86it

Epoch: 11 Train loss: 183.2425



  2%|▎         | 1/40 [00:00<00:06,  6.18it/s][A
  5%|▌         | 2/40 [00:00<00:05,  6.97it/s][A
 10%|█         | 4/40 [00:00<00:04,  7.89it/s][A
 15%|█▌        | 6/40 [00:00<00:03,  8.69it/s][A
 20%|██        | 8/40 [00:00<00:03,  9.63it/s][A
 25%|██▌       | 10/40 [00:00<00:02, 10.36it/s][A
 30%|███       | 12/40 [00:01<00:02, 10.97it/s][A
 35%|███▌      | 14/40 [00:01<00:02, 11.44it/s][A
 40%|████      | 16/40 [00:01<00:02, 11.81it/s][A
 45%|████▌     | 18/40 [00:01<00:01, 12.08it/s][A
 50%|█████     | 20/40 [00:01<00:01, 12.27it/s][A
 55%|█████▌    | 22/40 [00:01<00:01, 12.42it/s][A
 60%|██████    | 24/40 [00:02<00:01, 12.52it/s][A
 65%|██████▌   | 26/40 [00:02<00:01, 12.60it/s][A
 70%|███████   | 28/40 [00:02<00:00, 12.65it/s][A
 75%|███████▌  | 30/40 [00:02<00:00, 12.67it/s][A
 80%|████████  | 32/40 [00:02<00:00, 12.69it/s][A
 85%|████████▌ | 34/40 [00:02<00:00, 12.71it/s][A
 90%|█████████ | 36/40 [00:02<00:00, 12.72it/s][A
 95%|█████████▌| 38/40 [00:03<00:00

Test loss: 182.5409



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:05,  3.55it/s][A
  1%|          | 2/235 [00:00<00:57,  4.03it/s][A
  1%|▏         | 3/235 [00:00<00:52,  4.45it/s][A
  2%|▏         | 4/235 [00:00<00:48,  4.80it/s][A
  2%|▏         | 5/235 [00:00<00:45,  5.07it/s][A
  3%|▎         | 6/235 [00:01<00:43,  5.29it/s][A
  3%|▎         | 7/235 [00:01<00:41,  5.45it/s][A
  3%|▎         | 8/235 [00:01<00:40,  5.56it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.48it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.59it/s][A
  5%|▍         | 11/235 [00:02<00:39,  5.67it/s][A
  5%|▌         | 12/235 [00:02<00:38,  5.73it/s][A
  6%|▌         | 13/235 [00:02<00:38,  5.77it/s][A
  6%|▌         | 14/235 [00:02<00:37,  5.82it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.84it/s][A
  7%|▋         | 16/235 [00:02<00:38,  5.68it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.70it/s][A
  8%|▊         | 18/235 [00:03<00:37,  5.76it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.92it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.93it/s][A
 67%|██████▋   | 158/235 [00:27<00:12,  5.94it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.94it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.93it/s][A
 69%|██████▊   | 161/235 [00:28<00:12,  5.92it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.91it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.91it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.90it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.90it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.90it/s][A
 71%|███████   | 167/235 [00:29<00:11,  5.89it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.89it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.89it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.87it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.88it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.88it/s][A
 74%|███████▎  | 173/235 [00:30<00:10,  5.89it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.90it

Epoch: 12 Train loss: 182.8749



  2%|▎         | 1/40 [00:00<00:06,  6.36it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.48it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.53it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.47it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.27it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.90it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.40it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.77it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.05it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.25it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.40it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.50it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.57it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.63it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.66it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.69it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.71it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.72it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.72it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 181.7049



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:05,  3.57it/s][A
  1%|          | 2/235 [00:00<00:57,  4.05it/s][A
  1%|▏         | 3/235 [00:00<00:52,  4.46it/s][A
  2%|▏         | 4/235 [00:00<00:49,  4.68it/s][A
  2%|▏         | 5/235 [00:01<00:47,  4.84it/s][A
  3%|▎         | 6/235 [00:01<00:46,  4.94it/s][A
  3%|▎         | 7/235 [00:01<00:45,  5.03it/s][A
  3%|▎         | 8/235 [00:01<00:44,  5.09it/s][A
  4%|▍         | 9/235 [00:01<00:43,  5.14it/s][A
  4%|▍         | 10/235 [00:01<00:43,  5.16it/s][A
  5%|▍         | 11/235 [00:02<00:41,  5.36it/s][A
  5%|▌         | 12/235 [00:02<00:41,  5.33it/s][A
  6%|▌         | 13/235 [00:02<00:40,  5.46it/s][A
  6%|▌         | 14/235 [00:02<00:39,  5.58it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.66it/s][A
  7%|▋         | 16/235 [00:03<00:39,  5.55it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.64it/s][A
  8%|▊         | 18/235 [00:03<00:38,  5.70it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.85it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.85it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.84it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.85it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.86it/s][A
 69%|██████▊   | 161/235 [00:28<00:12,  5.87it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.88it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.90it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.89it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.89it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.90it/s][A
 71%|███████   | 167/235 [00:29<00:11,  5.91it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.93it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.74it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.61it/s][A
 73%|███████▎  | 171/235 [00:29<00:11,  5.50it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.42it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.54it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.63it

Epoch: 13 Train loss: 182.5064



  2%|▎         | 1/40 [00:00<00:06,  6.43it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.54it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.59it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.53it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.33it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.97it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.46it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.81it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.10it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.28it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.40it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.50it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.57it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.61it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.64it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.67it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.69it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.70it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.70it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 181.5097



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:58,  3.99it/s][A
  1%|          | 2/235 [00:00<00:52,  4.42it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.78it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.03it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.25it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.43it/s][A
  3%|▎         | 7/235 [00:01<00:40,  5.56it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.48it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.42it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.55it/s][A
  5%|▍         | 11/235 [00:01<00:39,  5.65it/s][A
  5%|▌         | 12/235 [00:02<00:38,  5.72it/s][A
  6%|▌         | 13/235 [00:02<00:38,  5.78it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.82it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.83it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.86it/s][A
  7%|▋         | 17/235 [00:03<00:37,  5.86it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.87it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.92it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.92it/s][A
 67%|██████▋   | 158/235 [00:27<00:12,  5.92it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.92it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.93it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.92it/s][A
 69%|██████▉   | 162/235 [00:27<00:12,  5.93it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.93it/s][A
 70%|██████▉   | 164/235 [00:28<00:11,  5.93it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.93it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.90it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.69it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.56it/s][A
 72%|███████▏  | 169/235 [00:29<00:12,  5.47it/s][A
 72%|███████▏  | 170/235 [00:29<00:12,  5.42it/s][A
 73%|███████▎  | 171/235 [00:29<00:11,  5.38it/s][A
 73%|███████▎  | 172/235 [00:29<00:11,  5.50it/s][A
 74%|███████▎  | 173/235 [00:29<00:11,  5.60it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.50it

Epoch: 14 Train loss: 182.2228



  2%|▎         | 1/40 [00:00<00:06,  6.19it/s][A
  8%|▊         | 3/40 [00:00<00:05,  7.31it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.38it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.34it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.15it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.80it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.32it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.70it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 11.99it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.19it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.36it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.05it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.28it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 11.98it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.19it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.34it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.45it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.53it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.61it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 181.3245



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.95it/s][A
  1%|          | 2/235 [00:00<00:53,  4.38it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.75it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.04it/s][A
  2%|▏         | 5/235 [00:00<00:45,  5.10it/s][A
  3%|▎         | 6/235 [00:01<00:43,  5.30it/s][A
  3%|▎         | 7/235 [00:01<00:42,  5.30it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.46it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.41it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.55it/s][A
  5%|▍         | 11/235 [00:02<00:40,  5.46it/s][A
  5%|▌         | 12/235 [00:02<00:39,  5.58it/s][A
  6%|▌         | 13/235 [00:02<00:40,  5.49it/s][A
  6%|▌         | 14/235 [00:02<00:39,  5.59it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.69it/s][A
  7%|▋         | 16/235 [00:02<00:38,  5.76it/s][A
  7%|▋         | 17/235 [00:03<00:37,  5.80it/s][A
  8%|▊         | 18/235 [00:03<00:37,  5.84it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.92it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.94it/s][A
 67%|██████▋   | 158/235 [00:27<00:12,  5.95it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.95it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.95it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.95it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.75it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.61it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.52it/s][A
 70%|███████   | 165/235 [00:28<00:12,  5.45it/s][A
 71%|███████   | 166/235 [00:28<00:12,  5.41it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.38it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.37it/s][A
 72%|███████▏  | 169/235 [00:29<00:12,  5.49it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.61it/s][A
 73%|███████▎  | 171/235 [00:29<00:11,  5.70it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.78it/s][A
 74%|███████▎  | 173/235 [00:30<00:10,  5.83it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.86it

Epoch: 15 Train loss: 181.8142



  2%|▎         | 1/40 [00:00<00:06,  6.17it/s][A
  8%|▊         | 3/40 [00:00<00:05,  7.30it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.38it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.35it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.18it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.86it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 10.67it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.19it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 11.64it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 11.97it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.23it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.41it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.54it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.60it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.67it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.72it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.76it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.79it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.82it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 180.6552



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.93it/s][A
  1%|          | 2/235 [00:00<00:53,  4.36it/s][A
  1%|▏         | 3/235 [00:00<00:49,  4.72it/s][A
  2%|▏         | 4/235 [00:00<00:46,  5.02it/s][A
  2%|▏         | 5/235 [00:00<00:45,  5.09it/s][A
  3%|▎         | 6/235 [00:01<00:43,  5.31it/s][A
  3%|▎         | 7/235 [00:01<00:42,  5.31it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.47it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.40it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.52it/s][A
  5%|▍         | 11/235 [00:02<00:41,  5.44it/s][A
  5%|▌         | 12/235 [00:02<00:40,  5.56it/s][A
  6%|▌         | 13/235 [00:02<00:39,  5.65it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.71it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.77it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.81it/s][A
  7%|▋         | 17/235 [00:03<00:37,  5.84it/s][A
  8%|▊         | 18/235 [00:03<00:37,  5.86it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.92it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.93it/s][A
 67%|██████▋   | 158/235 [00:27<00:12,  5.93it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.92it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.91it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.91it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.90it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.90it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.87it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.89it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.90it/s][A
 71%|███████   | 167/235 [00:29<00:11,  5.91it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.92it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.91it/s][A
 72%|███████▏  | 170/235 [00:29<00:10,  5.92it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.93it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.93it/s][A
 74%|███████▎  | 173/235 [00:30<00:10,  5.93it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.94it

Epoch: 16 Train loss: 181.2845



  2%|▎         | 1/40 [00:00<00:06,  5.91it/s][A
  8%|▊         | 3/40 [00:00<00:05,  7.03it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.12it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  8.89it/s][A
 22%|██▎       | 9/40 [00:00<00:03,  9.75it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.49it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.07it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.52it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 11.86it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.10it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.27it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.41it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.51it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.57it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.63it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.66it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.68it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.70it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.71it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 180.5903



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.91it/s][A
  1%|          | 2/235 [00:00<00:53,  4.34it/s][A
  1%|▏         | 3/235 [00:00<00:49,  4.71it/s][A
  2%|▏         | 4/235 [00:00<00:46,  5.01it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.25it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.43it/s][A
  3%|▎         | 7/235 [00:01<00:40,  5.57it/s][A
  3%|▎         | 8/235 [00:01<00:40,  5.66it/s][A
  4%|▍         | 9/235 [00:01<00:39,  5.73it/s][A
  4%|▍         | 10/235 [00:01<00:38,  5.78it/s][A
  5%|▍         | 11/235 [00:01<00:38,  5.83it/s][A
  5%|▌         | 12/235 [00:02<00:37,  5.87it/s][A
  6%|▌         | 13/235 [00:02<00:37,  5.88it/s][A
  6%|▌         | 14/235 [00:02<00:37,  5.90it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.91it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.90it/s][A
  7%|▋         | 17/235 [00:02<00:36,  5.92it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.93it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.85it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.85it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.85it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.85it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.87it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.88it/s][A
 69%|██████▉   | 162/235 [00:27<00:12,  5.89it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.90it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.89it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.89it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.87it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.87it/s][A
 71%|███████▏  | 168/235 [00:28<00:11,  5.86it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.86it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.86it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.86it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.86it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.86it/s][A
 74%|███████▍  | 174/235 [00:29<00:10,  5.87it

Epoch: 17 Train loss: 180.2587



  2%|▎         | 1/40 [00:00<00:06,  6.41it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.53it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.38it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.11it/s][A
 22%|██▎       | 9/40 [00:00<00:03,  9.42it/s][A
 28%|██▊       | 11/40 [00:01<00:02,  9.82it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 10.55it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.13it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 11.58it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 11.91it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.12it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.29it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.40it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.48it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.54it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.59it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.62it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.58it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.60it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 178.5254



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:00,  3.89it/s][A
  1%|          | 2/235 [00:00<00:53,  4.33it/s][A
  1%|▏         | 3/235 [00:00<00:49,  4.68it/s][A
  2%|▏         | 4/235 [00:00<00:46,  4.97it/s][A
  2%|▏         | 5/235 [00:00<00:44,  5.20it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.38it/s][A
  3%|▎         | 7/235 [00:01<00:41,  5.52it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.44it/s][A
  4%|▍         | 9/235 [00:01<00:41,  5.38it/s][A
  4%|▍         | 10/235 [00:01<00:42,  5.35it/s][A
  5%|▍         | 11/235 [00:02<00:42,  5.33it/s][A
  5%|▌         | 12/235 [00:02<00:40,  5.48it/s][A
  6%|▌         | 13/235 [00:02<00:39,  5.59it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.68it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.74it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.78it/s][A
  7%|▋         | 17/235 [00:03<00:37,  5.82it/s][A
  8%|▊         | 18/235 [00:03<00:37,  5.86it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:13,  5.70it/s][A
 67%|██████▋   | 157/235 [00:27<00:14,  5.57it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.66it/s][A
 68%|██████▊   | 159/235 [00:27<00:13,  5.73it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.78it/s][A
 69%|██████▊   | 161/235 [00:28<00:13,  5.62it/s][A
 69%|██████▉   | 162/235 [00:28<00:13,  5.53it/s][A
 69%|██████▉   | 163/235 [00:28<00:13,  5.46it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.58it/s][A
 70%|███████   | 165/235 [00:28<00:12,  5.49it/s][A
 71%|███████   | 166/235 [00:29<00:12,  5.44it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.58it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.67it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.56it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.47it/s][A
 73%|███████▎  | 171/235 [00:29<00:11,  5.42it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.38it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.36it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.34it

Epoch: 18 Train loss: 179.0203



  2%|▎         | 1/40 [00:00<00:06,  6.31it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.42it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.48it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.43it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.23it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.87it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.37it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.76it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.05it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.28it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.44it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.55it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.64it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.70it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.74it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.76it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.78it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.79it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.80it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 177.8565



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:59,  3.90it/s][A
  1%|          | 2/235 [00:00<00:55,  4.22it/s][A
  1%|▏         | 3/235 [00:00<00:51,  4.49it/s][A
  2%|▏         | 4/235 [00:00<00:49,  4.69it/s][A
  2%|▏         | 5/235 [00:01<00:47,  4.84it/s][A
  3%|▎         | 6/235 [00:01<00:45,  5.04it/s][A
  3%|▎         | 7/235 [00:01<00:43,  5.26it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.43it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.56it/s][A
  4%|▍         | 10/235 [00:01<00:39,  5.64it/s][A
  5%|▍         | 11/235 [00:02<00:40,  5.51it/s][A
  5%|▌         | 12/235 [00:02<00:39,  5.61it/s][A
  6%|▌         | 13/235 [00:02<00:40,  5.51it/s][A
  6%|▌         | 14/235 [00:02<00:39,  5.58it/s][A
  6%|▋         | 15/235 [00:02<00:40,  5.49it/s][A
  7%|▋         | 16/235 [00:02<00:39,  5.60it/s][A
  7%|▋         | 17/235 [00:03<00:39,  5.50it/s][A
  8%|▊         | 18/235 [00:03<00:38,  5.60it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:14,  5.53it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.63it/s][A
 67%|██████▋   | 158/235 [00:27<00:14,  5.43it/s][A
 68%|██████▊   | 159/235 [00:28<00:13,  5.56it/s][A
 68%|██████▊   | 160/235 [00:28<00:13,  5.66it/s][A
 69%|██████▊   | 161/235 [00:28<00:13,  5.54it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.64it/s][A
 69%|██████▉   | 163/235 [00:28<00:13,  5.53it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.63it/s][A
 70%|███████   | 165/235 [00:29<00:12,  5.52it/s][A
 71%|███████   | 166/235 [00:29<00:12,  5.45it/s][A
 71%|███████   | 167/235 [00:29<00:12,  5.40it/s][A
 71%|███████▏  | 168/235 [00:29<00:12,  5.37it/s][A
 72%|███████▏  | 169/235 [00:29<00:12,  5.33it/s][A
 72%|███████▏  | 170/235 [00:30<00:11,  5.44it/s][A
 73%|███████▎  | 171/235 [00:30<00:11,  5.36it/s][A
 73%|███████▎  | 172/235 [00:30<00:11,  5.30it/s][A
 74%|███████▎  | 173/235 [00:30<00:11,  5.29it/s][A
 74%|███████▍  | 174/235 [00:30<00:11,  5.29it

Epoch: 19 Train loss: 178.3480



  2%|▎         | 1/40 [00:00<00:06,  5.92it/s][A
  8%|▊         | 3/40 [00:00<00:05,  7.04it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.11it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.11it/s][A
 22%|██▎       | 9/40 [00:00<00:03,  9.96it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.29it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 10.92it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.03it/s][A
 42%|████▎     | 17/40 [00:01<00:02, 11.13it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 11.59it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 11.91it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.15it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 11.91it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 11.34it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 11.71it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 11.98it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.19it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.34it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.45it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 176.9466



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:04,  3.62it/s][A
  1%|          | 2/235 [00:00<00:56,  4.09it/s][A
  1%|▏         | 3/235 [00:00<00:51,  4.50it/s][A
  2%|▏         | 4/235 [00:00<00:47,  4.85it/s][A
  2%|▏         | 5/235 [00:00<00:44,  5.12it/s][A
  3%|▎         | 6/235 [00:01<00:42,  5.33it/s][A
  3%|▎         | 7/235 [00:01<00:41,  5.49it/s][A
  3%|▎         | 8/235 [00:01<00:40,  5.60it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.65it/s][A
  4%|▍         | 10/235 [00:01<00:40,  5.62it/s][A
  5%|▍         | 11/235 [00:02<00:40,  5.52it/s][A
  5%|▌         | 12/235 [00:02<00:40,  5.57it/s][A
  6%|▌         | 13/235 [00:02<00:39,  5.68it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.75it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.80it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.83it/s][A
  7%|▋         | 17/235 [00:03<00:37,  5.86it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.88it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:26<00:13,  5.97it/s][A
 67%|██████▋   | 157/235 [00:26<00:13,  5.96it/s][A
 67%|██████▋   | 158/235 [00:27<00:12,  5.97it/s][A
 68%|██████▊   | 159/235 [00:27<00:12,  5.95it/s][A
 68%|██████▊   | 160/235 [00:27<00:12,  5.95it/s][A
 69%|██████▊   | 161/235 [00:27<00:12,  5.95it/s][A
 69%|██████▉   | 162/235 [00:27<00:12,  5.95it/s][A
 69%|██████▉   | 163/235 [00:27<00:12,  5.94it/s][A
 70%|██████▉   | 164/235 [00:28<00:11,  5.94it/s][A
 70%|███████   | 165/235 [00:28<00:11,  5.94it/s][A
 71%|███████   | 166/235 [00:28<00:11,  5.94it/s][A
 71%|███████   | 167/235 [00:28<00:11,  5.94it/s][A
 71%|███████▏  | 168/235 [00:28<00:11,  5.93it/s][A
 72%|███████▏  | 169/235 [00:28<00:11,  5.95it/s][A
 72%|███████▏  | 170/235 [00:29<00:10,  5.94it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.95it/s][A
 73%|███████▎  | 172/235 [00:29<00:10,  5.94it/s][A
 74%|███████▎  | 173/235 [00:29<00:10,  5.93it/s][A
 74%|███████▍  | 174/235 [00:29<00:10,  5.94it

Epoch: 20 Train loss: 177.7379



  2%|▎         | 1/40 [00:00<00:06,  6.33it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.46it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.51it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.44it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.19it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.19it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 10.20it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 10.21it/s][A
 42%|████▎     | 17/40 [00:01<00:02, 10.87it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 11.37it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 11.76it/s][A
 57%|█████▊    | 23/40 [00:02<00:01, 11.95it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.18it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.35it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.48it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.56it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.63it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.66it/s][A
 92%|█████████▎| 37/40 [00:03<00:00, 12.70it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 176.4861



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<01:06,  3.52it/s][A
  1%|          | 2/235 [00:00<00:59,  3.91it/s][A
  1%|▏         | 3/235 [00:00<00:54,  4.25it/s][A
  2%|▏         | 4/235 [00:00<00:49,  4.64it/s][A
  2%|▏         | 5/235 [00:00<00:46,  4.96it/s][A
  3%|▎         | 6/235 [00:01<00:45,  5.06it/s][A
  3%|▎         | 7/235 [00:01<00:43,  5.29it/s][A
  3%|▎         | 8/235 [00:01<00:41,  5.46it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.59it/s][A
  4%|▍         | 10/235 [00:01<00:39,  5.68it/s][A
  5%|▍         | 11/235 [00:02<00:38,  5.75it/s][A
  5%|▌         | 12/235 [00:02<00:38,  5.81it/s][A
  6%|▌         | 13/235 [00:02<00:37,  5.84it/s][A
  6%|▌         | 14/235 [00:02<00:37,  5.87it/s][A
  6%|▋         | 15/235 [00:02<00:37,  5.86it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.88it/s][A
  7%|▋         | 17/235 [00:03<00:36,  5.90it/s][A
  8%|▊         | 18/235 [00:03<00:36,  5.87it/s][A
  8%|▊         | 19/235 [00:0

 66%|██████▋   | 156/235 [00:27<00:14,  5.60it/s][A
 67%|██████▋   | 157/235 [00:27<00:13,  5.67it/s][A
 67%|██████▋   | 158/235 [00:27<00:13,  5.68it/s][A
 68%|██████▊   | 159/235 [00:27<00:13,  5.71it/s][A
 68%|██████▊   | 160/235 [00:28<00:13,  5.70it/s][A
 69%|██████▊   | 161/235 [00:28<00:12,  5.74it/s][A
 69%|██████▉   | 162/235 [00:28<00:12,  5.77it/s][A
 69%|██████▉   | 163/235 [00:28<00:12,  5.77it/s][A
 70%|██████▉   | 164/235 [00:28<00:12,  5.76it/s][A
 70%|███████   | 165/235 [00:28<00:12,  5.81it/s][A
 71%|███████   | 166/235 [00:29<00:11,  5.85it/s][A
 71%|███████   | 167/235 [00:29<00:11,  5.88it/s][A
 71%|███████▏  | 168/235 [00:29<00:11,  5.91it/s][A
 72%|███████▏  | 169/235 [00:29<00:11,  5.71it/s][A
 72%|███████▏  | 170/235 [00:29<00:11,  5.76it/s][A
 73%|███████▎  | 171/235 [00:29<00:10,  5.82it/s][A
 73%|███████▎  | 172/235 [00:30<00:10,  5.86it/s][A
 74%|███████▎  | 173/235 [00:30<00:10,  5.88it/s][A
 74%|███████▍  | 174/235 [00:30<00:10,  5.88it

Epoch: 21 Train loss: 177.2875



  2%|▎         | 1/40 [00:00<00:06,  6.33it/s][A
  8%|▊         | 3/40 [00:00<00:04,  7.46it/s][A
 12%|█▎        | 5/40 [00:00<00:04,  8.53it/s][A
 18%|█▊        | 7/40 [00:00<00:03,  9.49it/s][A
 22%|██▎       | 9/40 [00:00<00:03, 10.30it/s][A
 28%|██▊       | 11/40 [00:00<00:02, 10.96it/s][A
 32%|███▎      | 13/40 [00:01<00:02, 11.47it/s][A
 38%|███▊      | 15/40 [00:01<00:02, 11.86it/s][A
 42%|████▎     | 17/40 [00:01<00:01, 12.12it/s][A
 48%|████▊     | 19/40 [00:01<00:01, 12.35it/s][A
 52%|█████▎    | 21/40 [00:01<00:01, 12.51it/s][A
 57%|█████▊    | 23/40 [00:01<00:01, 12.63it/s][A
 62%|██████▎   | 25/40 [00:02<00:01, 12.71it/s][A
 68%|██████▊   | 27/40 [00:02<00:01, 12.76it/s][A
 72%|███████▎  | 29/40 [00:02<00:00, 12.69it/s][A
 78%|███████▊  | 31/40 [00:02<00:00, 12.68it/s][A
 82%|████████▎ | 33/40 [00:02<00:00, 12.69it/s][A
 88%|████████▊ | 35/40 [00:02<00:00, 12.71it/s][A
 92%|█████████▎| 37/40 [00:02<00:00, 12.71it/s][A
100%|██████████| 40/40 [00:03<00:00

Test loss: 176.1702



  0%|          | 0/235 [00:00<?, ?it/s][A
  0%|          | 1/235 [00:00<00:58,  3.99it/s][A
  1%|          | 2/235 [00:00<00:52,  4.42it/s][A
  1%|▏         | 3/235 [00:00<00:48,  4.77it/s][A
  2%|▏         | 4/235 [00:00<00:45,  5.06it/s][A
  2%|▏         | 5/235 [00:00<00:43,  5.28it/s][A
  3%|▎         | 6/235 [00:01<00:43,  5.29it/s][A
  3%|▎         | 7/235 [00:01<00:41,  5.45it/s][A
  3%|▎         | 8/235 [00:01<00:42,  5.40it/s][A
  4%|▍         | 9/235 [00:01<00:40,  5.53it/s][A
  4%|▍         | 10/235 [00:01<00:41,  5.44it/s][A
  5%|▍         | 11/235 [00:02<00:41,  5.40it/s][A
  5%|▌         | 12/235 [00:02<00:40,  5.54it/s][A
  6%|▌         | 13/235 [00:02<00:39,  5.64it/s][A
  6%|▌         | 14/235 [00:02<00:38,  5.73it/s][A
  6%|▋         | 15/235 [00:02<00:38,  5.78it/s][A
  7%|▋         | 16/235 [00:02<00:37,  5.81it/s][A
  7%|▋         | 17/235 [00:03<00:38,  5.64it/s][A
  8%|▊         | 18/235 [00:03<00:39,  5.53it/s][A
  8%|▊         | 19/235 [00:0