# Imports

In [1]:
import pickle
import torch
import random
import math
import h5py
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchvision import models, transforms
from torchsummary import summary
from torchvision.transforms import v2
from google.colab import drive
from google.colab import runtime
from PIL import Image
!pip install torchinfo
from torchinfo import summary
drive.mount('/content/drive')

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0
Mounted at /content/drive


# Data

File Loading

In [2]:
# Load training data from HDF5 file
with h5py.File('/content/drive/My Drive/AV Research/train_data.h5', 'r') as train_file:
    train_data = np.array(train_file['data'])
    train_labels = np.array(train_file['labels'])

# Load validation data from HDF5 file
with h5py.File('/content/drive/My Drive/AV Research/val_data.h5', 'r') as val_file:
    val_data = np.array(val_file['data'])
    val_labels = np.array(val_file['labels'])

# print(train_data.shape)
# print(train_labels.shape)
# print(val_data.shape)
# print(val_labels.shape)

In [3]:
def get_mean_std(data):
  mean = np.mean(data, axis=(0, 1, 2))
  std = np.std(data, axis=(0, 1, 2))
  return mean, std

Data Transformations

In [4]:
class TrainDataAugmentation(nn.Module):
    def __init__(self):
        super(TrainDataAugmentation, self).__init__()
        self.transforms = v2.Compose([
            v2.ToImage(),
            v2.Grayscale(num_output_channels=1),
            v2.RandomAffine(degrees=(0, 30), translate=(0.2, 0.2)),
            v2.RandomRotation(degrees=(0, 180)),
            v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(mean=(0.4683,), std=(0.1137,))
        ])

    def forward(self, image):
        augmented_image = self.transforms(image)
        return augmented_image

class ValDataAugmentation(nn.Module):
    def __init__(self):
        super(ValDataAugmentation, self).__init__()
        self.transforms = v2.Compose([
            v2.ToImage(),
            v2.Grayscale(num_output_channels=1),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(mean=(0.4683,), std=(0.1137,))
        ])

    def forward(self, image):
        augmented_image = self.transforms(image)
        return augmented_image

Data Preprocessing

In [5]:
class RandomHorizontalFlipWithSteeringAngle(object):
    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, input, output):
        if random.random() < self.p:
            v2.functional.hflip(input)
            output[0] = -output[0]
        return input, output


class AVDataset(Dataset):
    def __init__(self, input_images, output_values, transform):
        self.input_images = input_images
        self.output_values = output_values
        self.transform = transform

    def __len__(self):
        return len(self.output_values)

    def __getitem__(self, idx):
        input_image = self.input_images[idx]
        output_value = self.output_values[idx]

        if self.transform:
            input_image = self.transform(input_image)
            input_image, output_value = RandomHorizontalFlipWithSteeringAngle(0.5)(input_image, output_value)

        return input_image, output_value

# train_dataset_path = '/content/drive/My Drive/AV Research/train_data.h5'
# val_dataset_path = '/content/drive/My Drive/AV Research/val_data.h5'

train_data_augmentation = TrainDataAugmentation()
val_data_augmentation = ValDataAugmentation()

train_dataset = AVDataset(input_images=train_data, output_values=train_labels, transform=train_data_augmentation)
val_dataset = AVDataset(input_images=val_data, output_values=val_labels, transform=val_data_augmentation)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, pin_memory=True)
del train_dataset, val_dataset, train_data, val_data, train_labels, val_labels

# Model

Neural Network

In [6]:
# From: https://github.com/wzlxjtu/PositionalEncoding2D
class PositionalEncoding2d(nn.Module):
    def __init__(self, d_model, height, width):
        super(PositionalEncoding2d, self).__init__()
        if d_model % 4 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                            "odd dimension (got dim={:d})".format(d_model))
        pe = torch.zeros(d_model, height, width)
        # Each dimension use half of d_model
        d_model = int(d_model / 2)
        div_term = torch.exp(torch.arange(0., d_model, 2) *
                            -(math.log(10000.0) / d_model))
        pos_w = torch.arange(0., width).unsqueeze(1)
        pos_h = torch.arange(0., height).unsqueeze(1)
        pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
        pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
        pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
        pe[d_model + 1::2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe

In [7]:
# Inspired by: https://github.com/reshalfahsi/separableconv-torch
class SeparableConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias):
        super(SeparableConv2d, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels, bias=bias)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, num_layers: int, pool: bool, short: bool):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.pooling = pool
        self.short = short

        self.inconv = nn.Sequential(
            SeparableConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False),
            nn.SELU()
        )

        layers = []
        for _ in range(num_layers - 1):
            layers.append(SeparableConv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False))
            layers.append(nn.SELU())
        self.convlayers = nn.Sequential(*layers)

        if self.pooling:
            self.pool = nn.MaxPool2d(kernel_size=kernel_size, stride=2, padding=1)
            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False)
        else:
            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)

        self.sact = nn.SELU()

    def forward(self, x):
        out = self.inconv(x)
        out = self.convlayers(out)

        if self.pooling:
            out = self.pool(out)

        if self.short:
            shortcut = self.shortcut(x)
            out = out + shortcut
            out = self.sact(out)

        return out

In [8]:
class AVModel(nn.Module):
    def __init__(self):
        super(AVModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.act = nn.SELU()
        self.positional_encoding = PositionalEncoding2d(64, 119, 256)

        self.conv_layers = nn.Sequential(
            ResidualBlock(in_channels=64, out_channels=64, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=64, out_channels=128, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=128, out_channels=256, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=256, out_channels=512, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=512, out_channels=1024, kernel_size=3, num_layers=4, pool=True, short=True),
        )

        self.attention = nn.MultiheadAttention(embed_dim=32, num_heads=4, dropout=0.5, batch_first=True)

        self.dense_layers = nn.Sequential(
            nn.Linear(1024, 2048, bias=False),
            nn.SELU(),
            nn.Linear(2048, 1024, bias=False),
            nn.SELU(),
            nn.Linear(1024, 1024, bias=False),
            nn.SELU(),
            nn.Dropout(0.5)
        )

        self.output_layer = nn.Linear(1024, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act(x)
        #x = F.layer_norm(self.positional_encoding(x), x.shape)
        x = self.conv_layers(x)

        # batch_size, channels, height, width = x.size()
        # x = x.view(batch_size, channels, height * width)
        # attention_output, _ = self.attention(x, x, x)
        # x = F.layer_norm(x + attention_output, x.shape)

        x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2) # GlobalAveragePooling2D
        x = self.dense_layers(x)
        x = self.output_layer(x)

        steering_output = F.hardtanh(x[:, 0:1])
        throttle_brake_output = F.hardtanh(x[:, 1:], min_val=0)
        out = torch.cat((steering_output, throttle_brake_output), dim=1)
        return out

In [14]:
class SimpleAVModel(nn.Module):
    def __init__(self):
        super(SimpleAVModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.act = nn.SELU()

        self.conv_layers = nn.Sequential(
            SeparableConv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            SeparableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            SeparableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            SeparableConv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )

        self.flatten = nn.Flatten()

        self.dense_layers = nn.Sequential(
            nn.Linear(65536, 256, bias=False),
            nn.SELU(),
            nn.Linear(256, 128, bias=False),
            nn.SELU(),
            nn.Linear(128, 64, bias=False),
            nn.SELU(),
            nn.Dropout(0.5)
        )

        self.output_layer = nn.Linear(64, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act(x)
        x = self.conv_layers(x)

        x = self.flatten(x)
        x = self.dense_layers(x)
        x = self.output_layer(x)

        steering_output = F.hardtanh(x[:, 0:1])
        throttle_brake_output = F.hardtanh(x[:, 1:], min_val=0)
        out = torch.cat((steering_output, throttle_brake_output), dim=1)
        return out

# Util

MFPE Loss

In [10]:
class MFPELoss(nn.Module):
    def __init__(self):
        super(MFPELoss, self).__init__()

    def forward(self, input, target):
        error = input - target
        fourth_power_error = error ** 4
        mean_fourth_power_error = torch.mean(fourth_power_error)
        return mean_fourth_power_error

Early Stopping

In [11]:
class EarlyStopping:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

Autoclip

In [12]:
# From: https://github.com/pseeth/autoclip/blob/master/autoclip.py
class AutoClip:
    def __init__(self, percentile):
        self.grad_history = []
        self.percentile = percentile

    def compute_grad_norm(self, model):
        total_norm = 0
        for p in model.parameters():
            if p.grad is not None:
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item() ** 2
        total_norm = total_norm ** (1. / 2)

        return total_norm

    def __call__(self, model):
        grad_norm = self.compute_grad_norm(model)
        self.grad_history.append(grad_norm)
        clip_value = np.percentile(self.grad_history, self.percentile)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)

# Training Loop

In [16]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = SimpleAVModel().to(device)
num_epochs = 30
learning_rate = 1e-5
criterion = MFPELoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, patience=5)
early_stopping = EarlyStopping(patience=5)

torch.autograd.set_detect_anomaly(False)
torch.backends.cudnn.benchmark = True
scaler = torch.cuda.amp.GradScaler()
autoclipper = AutoClip(percentile=10)

print(summary(model, input_size=(batch_size, 1, 119, 256)))

def train_loop(train_loader, model, criterion, optimizer, device):
    size = len(train_loader.dataset)
    model.train()
    train_loss = 0.0
    num_batches = len(train_loader)

    # Training loop
    for batch, (inputs, targets) in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()
        inputs = inputs.float().to(device)
        targets = targets.float().to(device)
        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
          outputs = model(inputs)
          loss = criterion(outputs, targets)

        train_loss += loss.item()
        scaler.scale(loss).backward()
        autoclipper(model)

        scaler.step(optimizer)
        scale = scaler.get_scale()
        scaler.update()
        skip_lr_sched = (scale != scaler.get_scale())

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(inputs)
            print(f"Training loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    train_loss = train_loss / num_batches
    return train_loss, skip_lr_sched

def val_loop(val_loader, model, criterion, device):
    model.eval()
    val_loss = 0.0
    num_batches = len(val_loader)

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.float().to(device, non_blocking=True)
            targets = targets.float().to(device, non_blocking=True)
            outputs = model(inputs)
            val_loss += criterion(outputs, targets).item()

    val_loss = val_loss / num_batches
    return val_loss

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss, skip_lr_sched = train_loop(train_loader, model, criterion, optimizer, device)
    val_loss = val_loop(val_loader, model, criterion, device)
    print(f"Train loss: {train_loss:>8f} - Val loss: {val_loss:>8f} \n")

    if not skip_lr_sched:
        scheduler.step(val_loss)

    # if early_stopping.early_stop(val_loss):
    #     print(f"Early stopping after {epoch+1} epochs \n")
    #     print(f"Best val loss: {early_stopping.min_validation_loss} \n")
    #     break

Layer (type:depth-idx)                   Output Shape              Param #
SimpleAVModel                            [32, 3]                   --
├─Conv2d: 1-1                            [32, 64, 119, 256]        576
├─SELU: 1-2                              [32, 64, 119, 256]        --
├─Sequential: 1-3                        [32, 512, 8, 16]          --
│    └─SeparableConv2d: 2-1              [32, 64, 119, 256]        --
│    │    └─Conv2d: 3-1                  [32, 64, 119, 256]        576
│    │    └─Conv2d: 3-2                  [32, 64, 119, 256]        4,096
│    └─SELU: 2-2                         [32, 64, 119, 256]        --
│    └─MaxPool2d: 2-3                    [32, 64, 60, 128]         --
│    └─SeparableConv2d: 2-4              [32, 128, 60, 128]        --
│    │    └─Conv2d: 3-3                  [32, 64, 60, 128]         576
│    │    └─Conv2d: 3-4                  [32, 128, 60, 128]        8,192
│    └─SELU: 2-5                         [32, 128, 60, 128]        --
│    └

  0%|          | 3/4350 [00:00<09:35,  7.56it/s]

Training loss: 0.120277  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:20,  9.64it/s]

Training loss: 0.029586  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<06:58,  9.90it/s]

Training loss: 0.026389  [ 6432/139199]


  7%|▋         | 303/4350 [00:30<06:46,  9.96it/s]

Training loss: 0.032061  [ 9632/139199]


  9%|▉         | 402/4350 [00:40<06:35,  9.97it/s]

Training loss: 0.023050  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:27,  9.94it/s]

Training loss: 0.032526  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:15,  9.97it/s]

Training loss: 0.027414  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:08,  9.91it/s]

Training loss: 0.031266  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:52, 10.06it/s]

Training loss: 0.038555  [25632/139199]


 21%|██        | 902/4350 [01:31<05:48,  9.88it/s]

Training loss: 0.030062  [28832/139199]


 23%|██▎       | 1002/4350 [01:41<05:34, 10.00it/s]

Training loss: 0.024914  [32032/139199]


 25%|██▌       | 1102/4350 [01:51<05:30,  9.83it/s]

Training loss: 0.028686  [35232/139199]


 28%|██▊       | 1202/4350 [02:01<05:18,  9.87it/s]

Training loss: 0.029280  [38432/139199]


 30%|██▉       | 1301/4350 [02:11<05:14,  9.68it/s]

Training loss: 0.024576  [41632/139199]


 32%|███▏      | 1403/4350 [02:21<04:55,  9.97it/s]

Training loss: 0.029236  [44832/139199]


 35%|███▍      | 1502/4350 [02:31<04:47,  9.92it/s]

Training loss: 0.024359  [48032/139199]


 37%|███▋      | 1603/4350 [02:41<04:36,  9.92it/s]

Training loss: 0.028211  [51232/139199]


 39%|███▉      | 1703/4350 [02:51<04:26,  9.93it/s]

Training loss: 0.024195  [54432/139199]


 41%|████▏     | 1802/4350 [03:01<04:38,  9.16it/s]

Training loss: 0.025077  [57632/139199]


 44%|████▎     | 1902/4350 [03:11<04:04, 10.02it/s]

Training loss: 0.026689  [60832/139199]


 46%|████▌     | 2002/4350 [03:22<04:04,  9.61it/s]

Training loss: 0.026870  [64032/139199]


 48%|████▊     | 2102/4350 [03:32<03:49,  9.78it/s]

Training loss: 0.028786  [67232/139199]


 51%|█████     | 2203/4350 [03:42<03:38,  9.85it/s]

Training loss: 0.029086  [70432/139199]


 53%|█████▎    | 2303/4350 [03:53<03:27,  9.87it/s]

Training loss: 0.034624  [73632/139199]


 55%|█████▌    | 2402/4350 [04:03<03:17,  9.86it/s]

Training loss: 0.021121  [76832/139199]


 58%|█████▊    | 2502/4350 [04:13<03:06,  9.89it/s]

Training loss: 0.022149  [80032/139199]


 60%|█████▉    | 2602/4350 [04:23<02:56,  9.93it/s]

Training loss: 0.023854  [83232/139199]


 62%|██████▏   | 2702/4350 [04:33<02:48,  9.79it/s]

Training loss: 0.020899  [86432/139199]


 64%|██████▍   | 2802/4350 [04:43<02:37,  9.83it/s]

Training loss: 0.021794  [89632/139199]


 67%|██████▋   | 2902/4350 [04:54<02:28,  9.74it/s]

Training loss: 0.031102  [92832/139199]


 69%|██████▉   | 3002/4350 [05:04<02:18,  9.76it/s]

Training loss: 0.021002  [96032/139199]


 71%|███████▏  | 3103/4350 [05:14<02:05,  9.94it/s]

Training loss: 0.035604  [99232/139199]


 74%|███████▎  | 3202/4350 [05:24<01:55,  9.97it/s]

Training loss: 0.026732  [102432/139199]


 76%|███████▌  | 3302/4350 [05:34<01:49,  9.59it/s]

Training loss: 0.035385  [105632/139199]


 78%|███████▊  | 3402/4350 [05:44<01:36,  9.87it/s]

Training loss: 0.026999  [108832/139199]


 80%|████████  | 3501/4350 [05:54<01:27,  9.76it/s]

Training loss: 0.027754  [112032/139199]


 83%|████████▎ | 3602/4350 [06:05<01:16,  9.75it/s]

Training loss: 0.024408  [115232/139199]


 85%|████████▌ | 3703/4350 [06:15<01:05,  9.94it/s]

Training loss: 0.022812  [118432/139199]


 87%|████████▋ | 3802/4350 [06:25<00:55,  9.85it/s]

Training loss: 0.032892  [121632/139199]


 90%|████████▉ | 3902/4350 [06:35<00:45,  9.85it/s]

Training loss: 0.024619  [124832/139199]


 92%|█████████▏| 4003/4350 [06:45<00:34, 10.02it/s]

Training loss: 0.027474  [128032/139199]


 94%|█████████▍| 4102/4350 [06:55<00:25,  9.83it/s]

Training loss: 0.024196  [131232/139199]


 97%|█████████▋| 4202/4350 [07:05<00:14,  9.89it/s]

Training loss: 0.025972  [134432/139199]


 99%|█████████▉| 4303/4350 [07:15<00:04,  9.99it/s]

Training loss: 0.023492  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.83it/s]


Train loss: 0.028807 - Val loss: 0.043424 

Epoch 2
-------------------------------


  0%|          | 1/4350 [00:00<16:07,  4.49it/s]

Training loss: 0.027805  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:09,  9.90it/s]

Training loss: 0.034751  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:02,  9.81it/s]

Training loss: 0.030555  [ 6432/139199]


  7%|▋         | 303/4350 [00:30<06:45,  9.99it/s]

Training loss: 0.021879  [ 9632/139199]


  9%|▉         | 403/4350 [00:40<06:38,  9.91it/s]

Training loss: 0.033085  [12832/139199]


 12%|█▏        | 502/4350 [00:50<06:28,  9.91it/s]

Training loss: 0.036940  [16032/139199]


 14%|█▍        | 603/4350 [01:01<06:17,  9.93it/s]

Training loss: 0.023743  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:09,  9.88it/s]

Training loss: 0.024128  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:51, 10.09it/s]

Training loss: 0.028571  [25632/139199]


 21%|██        | 902/4350 [01:31<05:45,  9.97it/s]

Training loss: 0.030261  [28832/139199]


 23%|██▎       | 1002/4350 [01:41<05:42,  9.78it/s]

Training loss: 0.028704  [32032/139199]


 25%|██▌       | 1102/4350 [01:51<05:31,  9.81it/s]

Training loss: 0.032905  [35232/139199]


 28%|██▊       | 1203/4350 [02:01<05:16,  9.94it/s]

Training loss: 0.027903  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:10,  9.82it/s]

Training loss: 0.020392  [41632/139199]


 32%|███▏      | 1402/4350 [02:22<04:59,  9.83it/s]

Training loss: 0.033038  [44832/139199]


 35%|███▍      | 1503/4350 [02:32<04:46,  9.92it/s]

Training loss: 0.026159  [48032/139199]


 37%|███▋      | 1602/4350 [02:42<04:44,  9.66it/s]

Training loss: 0.030040  [51232/139199]


 39%|███▉      | 1702/4350 [02:52<04:32,  9.72it/s]

Training loss: 0.025414  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:19,  9.81it/s]

Training loss: 0.027928  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:10,  9.77it/s]

Training loss: 0.028449  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<04:01,  9.74it/s]

Training loss: 0.027844  [64032/139199]


 48%|████▊     | 2102/4350 [03:33<03:51,  9.71it/s]

Training loss: 0.034285  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:41,  9.70it/s]

Training loss: 0.021747  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:29,  9.79it/s]

Training loss: 0.025373  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:17,  9.85it/s]

Training loss: 0.026952  [76832/139199]


 58%|█████▊    | 2503/4350 [04:14<03:03, 10.08it/s]

Training loss: 0.022426  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<02:55,  9.96it/s]

Training loss: 0.027730  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:52,  9.53it/s]

Training loss: 0.025751  [86432/139199]


 64%|██████▍   | 2803/4350 [04:45<02:35,  9.96it/s]

Training loss: 0.021023  [89632/139199]


 67%|██████▋   | 2903/4350 [04:55<02:24,  9.99it/s]

Training loss: 0.018677  [92832/139199]


 69%|██████▉   | 3001/4350 [05:05<02:17,  9.78it/s]

Training loss: 0.022648  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:07,  9.82it/s]

Training loss: 0.037071  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:56,  9.81it/s]

Training loss: 0.021142  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:46,  9.83it/s]

Training loss: 0.020554  [105632/139199]


 78%|███████▊  | 3403/4350 [05:46<01:33, 10.08it/s]

Training loss: 0.021488  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:24,  9.98it/s]

Training loss: 0.037615  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:15,  9.90it/s]

Training loss: 0.036258  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:05,  9.92it/s]

Training loss: 0.029824  [118432/139199]


 87%|████████▋ | 3802/4350 [06:26<00:55,  9.81it/s]

Training loss: 0.028361  [121632/139199]


 90%|████████▉ | 3902/4350 [06:36<00:45,  9.87it/s]

Training loss: 0.024057  [124832/139199]


 92%|█████████▏| 4003/4350 [06:46<00:34,  9.92it/s]

Training loss: 0.023387  [128032/139199]


 94%|█████████▍| 4102/4350 [06:56<00:25,  9.81it/s]

Training loss: 0.026052  [131232/139199]


 97%|█████████▋| 4202/4350 [07:06<00:15,  9.86it/s]

Training loss: 0.029434  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04, 10.02it/s]

Training loss: 0.028212  [137632/139199]


100%|██████████| 4350/4350 [07:21<00:00,  9.84it/s]


Train loss: 0.026950 - Val loss: 0.041260 

Epoch 3
-------------------------------


  0%|          | 3/4350 [00:00<09:31,  7.60it/s]

Training loss: 0.024643  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:18,  9.68it/s]

Training loss: 0.027411  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:05,  9.74it/s]

Training loss: 0.028790  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:50,  9.86it/s]

Training loss: 0.023391  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:41,  9.82it/s]

Training loss: 0.023612  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:33,  9.78it/s]

Training loss: 0.026035  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:20,  9.86it/s]

Training loss: 0.022138  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:17,  9.66it/s]

Training loss: 0.024985  [22432/139199]


 18%|█▊        | 803/4350 [01:22<05:54, 10.02it/s]

Training loss: 0.023580  [25632/139199]


 21%|██        | 902/4350 [01:32<05:50,  9.85it/s]

Training loss: 0.021731  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:41,  9.80it/s]

Training loss: 0.024896  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:26,  9.95it/s]

Training loss: 0.018249  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:30,  9.54it/s]

Training loss: 0.025819  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:16,  9.63it/s]

Training loss: 0.022567  [41632/139199]


 32%|███▏      | 1403/4350 [02:23<04:55,  9.97it/s]

Training loss: 0.029954  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:46,  9.93it/s]

Training loss: 0.023738  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:39,  9.82it/s]

Training loss: 0.026894  [51232/139199]


 39%|███▉      | 1703/4350 [02:53<04:26,  9.94it/s]

Training loss: 0.025978  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:16,  9.93it/s]

Training loss: 0.029634  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:06,  9.91it/s]

Training loss: 0.023704  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:56,  9.94it/s]

Training loss: 0.021323  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:51,  9.71it/s]

Training loss: 0.022042  [67232/139199]


 51%|█████     | 2203/4350 [03:44<03:36,  9.90it/s]

Training loss: 0.029625  [70432/139199]


 53%|█████▎    | 2303/4350 [03:54<03:26,  9.93it/s]

Training loss: 0.031499  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:17,  9.86it/s]

Training loss: 0.028130  [76832/139199]


 58%|█████▊    | 2503/4350 [04:14<03:05,  9.95it/s]

Training loss: 0.025402  [80032/139199]


 60%|█████▉    | 2603/4350 [04:24<02:56,  9.89it/s]

Training loss: 0.021184  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:47,  9.81it/s]

Training loss: 0.026836  [86432/139199]


 64%|██████▍   | 2803/4350 [04:45<02:35,  9.94it/s]

Training loss: 0.029168  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:25,  9.95it/s]

Training loss: 0.021404  [92832/139199]


 69%|██████▉   | 3003/4350 [05:05<02:15,  9.95it/s]

Training loss: 0.022203  [96032/139199]


 71%|███████▏  | 3101/4350 [05:15<02:05,  9.93it/s]

Training loss: 0.019113  [99232/139199]


 74%|███████▎  | 3203/4350 [05:25<01:55,  9.92it/s]

Training loss: 0.026360  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:45,  9.93it/s]

Training loss: 0.025394  [105632/139199]


 78%|███████▊  | 3402/4350 [05:45<01:38,  9.60it/s]

Training loss: 0.021212  [108832/139199]


 81%|████████  | 3502/4350 [05:55<01:25,  9.94it/s]

Training loss: 0.024874  [112032/139199]


 83%|████████▎ | 3602/4350 [06:05<01:17,  9.70it/s]

Training loss: 0.021036  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:05,  9.88it/s]

Training loss: 0.016681  [118432/139199]


 87%|████████▋ | 3803/4350 [06:26<00:54,  9.98it/s]

Training loss: 0.024713  [121632/139199]


 90%|████████▉ | 3901/4350 [06:36<00:46,  9.75it/s]

Training loss: 0.025221  [124832/139199]


 92%|█████████▏| 4002/4350 [06:46<00:35,  9.83it/s]

Training loss: 0.022169  [128032/139199]


 94%|█████████▍| 4102/4350 [06:56<00:25,  9.78it/s]

Training loss: 0.025069  [131232/139199]


 97%|█████████▋| 4203/4350 [07:07<00:14,  9.95it/s]

Training loss: 0.024837  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04,  9.86it/s]

Training loss: 0.026458  [137632/139199]


100%|██████████| 4350/4350 [07:21<00:00,  9.84it/s]


Train loss: 0.025318 - Val loss: 0.038863 

Epoch 4
-------------------------------


  0%|          | 3/4350 [00:00<09:30,  7.62it/s]

Training loss: 0.021502  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:15,  9.76it/s]

Training loss: 0.025556  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<06:59,  9.90it/s]

Training loss: 0.021311  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<07:03,  9.56it/s]

Training loss: 0.018968  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:49,  9.64it/s]

Training loss: 0.024027  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:28,  9.91it/s]

Training loss: 0.026161  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:24,  9.75it/s]

Training loss: 0.029106  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:11,  9.82it/s]

Training loss: 0.022876  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:04,  9.74it/s]

Training loss: 0.024604  [25632/139199]


 21%|██        | 902/4350 [01:32<05:46,  9.94it/s]

Training loss: 0.023964  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:37,  9.91it/s]

Training loss: 0.033742  [32032/139199]


 25%|██▌       | 1103/4350 [01:52<05:27,  9.93it/s]

Training loss: 0.025813  [35232/139199]


 28%|██▊       | 1203/4350 [02:02<05:14,  9.99it/s]

Training loss: 0.021912  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:09,  9.85it/s]

Training loss: 0.029160  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:00,  9.80it/s]

Training loss: 0.023041  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:50,  9.82it/s]

Training loss: 0.023229  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:41,  9.76it/s]

Training loss: 0.022825  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:28,  9.84it/s]

Training loss: 0.033570  [54432/139199]


 41%|████▏     | 1803/4350 [03:03<04:15,  9.97it/s]

Training loss: 0.019793  [57632/139199]


 44%|████▎     | 1903/4350 [03:13<04:05,  9.97it/s]

Training loss: 0.023227  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:57,  9.87it/s]

Training loss: 0.024061  [64032/139199]


 48%|████▊     | 2103/4350 [03:34<03:46,  9.92it/s]

Training loss: 0.022536  [67232/139199]


 51%|█████     | 2203/4350 [03:44<03:35,  9.95it/s]

Training loss: 0.018960  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:28,  9.81it/s]

Training loss: 0.023267  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:18,  9.79it/s]

Training loss: 0.025687  [76832/139199]


 58%|█████▊    | 2502/4350 [04:14<03:08,  9.82it/s]

Training loss: 0.020435  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<02:58,  9.78it/s]

Training loss: 0.024519  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:51,  9.58it/s]

Training loss: 0.018402  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:39,  9.69it/s]

Training loss: 0.030809  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:29,  9.68it/s]

Training loss: 0.020516  [92832/139199]


 69%|██████▉   | 3003/4350 [05:05<02:16,  9.88it/s]

Training loss: 0.025345  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:08,  9.68it/s]

Training loss: 0.027127  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:59,  9.63it/s]

Training loss: 0.026473  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:47,  9.73it/s]

Training loss: 0.020026  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:34, 10.08it/s]

Training loss: 0.022974  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:24, 10.05it/s]

Training loss: 0.027770  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:15,  9.85it/s]

Training loss: 0.021316  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:05,  9.90it/s]

Training loss: 0.025951  [118432/139199]


 87%|████████▋ | 3802/4350 [06:26<00:55,  9.87it/s]

Training loss: 0.024577  [121632/139199]


 90%|████████▉ | 3903/4350 [06:37<00:44,  9.94it/s]

Training loss: 0.023099  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:35,  9.80it/s]

Training loss: 0.028143  [128032/139199]


 94%|█████████▍| 4103/4350 [06:57<00:24,  9.89it/s]

Training loss: 0.024098  [131232/139199]


 97%|█████████▋| 4203/4350 [07:07<00:14,  9.96it/s]

Training loss: 0.027647  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04,  9.87it/s]

Training loss: 0.016963  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.83it/s]


Train loss: 0.024041 - Val loss: 0.038314 

Epoch 5
-------------------------------


  0%|          | 3/4350 [00:00<09:35,  7.55it/s]

Training loss: 0.024011  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:15,  9.76it/s]

Training loss: 0.019904  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:18,  9.47it/s]

Training loss: 0.020699  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:59,  9.64it/s]

Training loss: 0.021491  [ 9632/139199]


  9%|▉         | 403/4350 [00:41<06:40,  9.86it/s]

Training loss: 0.025883  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:31,  9.84it/s]

Training loss: 0.023702  [16032/139199]


 14%|█▍        | 602/4350 [01:02<06:25,  9.72it/s]

Training loss: 0.024015  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:27,  9.40it/s]

Training loss: 0.020504  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:04,  9.73it/s]

Training loss: 0.019263  [25632/139199]


 21%|██        | 902/4350 [01:32<05:49,  9.88it/s]

Training loss: 0.025090  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:43,  9.74it/s]

Training loss: 0.022158  [32032/139199]


 25%|██▌       | 1102/4350 [01:53<05:30,  9.84it/s]

Training loss: 0.023490  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:29,  9.56it/s]

Training loss: 0.017816  [38432/139199]


 30%|██▉       | 1303/4350 [02:13<05:02, 10.06it/s]

Training loss: 0.022786  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<04:59,  9.85it/s]

Training loss: 0.024262  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:46,  9.93it/s]

Training loss: 0.023015  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:38,  9.88it/s]

Training loss: 0.019329  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:26,  9.95it/s]

Training loss: 0.018439  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:22,  9.70it/s]

Training loss: 0.020133  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:06,  9.93it/s]

Training loss: 0.019596  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<04:00,  9.76it/s]

Training loss: 0.025609  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:50,  9.75it/s]

Training loss: 0.027730  [67232/139199]


 51%|█████     | 2203/4350 [03:44<03:35,  9.95it/s]

Training loss: 0.022324  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:32,  9.65it/s]

Training loss: 0.019497  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:18,  9.80it/s]

Training loss: 0.028959  [76832/139199]


 58%|█████▊    | 2503/4350 [04:15<03:05,  9.95it/s]

Training loss: 0.026359  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<02:59,  9.74it/s]

Training loss: 0.027089  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:46,  9.90it/s]

Training loss: 0.025114  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:36,  9.88it/s]

Training loss: 0.036240  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:26,  9.89it/s]

Training loss: 0.021640  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:21,  9.51it/s]

Training loss: 0.018021  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:08,  9.70it/s]

Training loss: 0.026062  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:58,  9.71it/s]

Training loss: 0.016331  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:47,  9.76it/s]

Training loss: 0.024827  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:36,  9.86it/s]

Training loss: 0.018709  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:26,  9.81it/s]

Training loss: 0.024958  [112032/139199]


 83%|████████▎ | 3601/4350 [06:07<01:15,  9.86it/s]

Training loss: 0.026550  [115232/139199]


 85%|████████▌ | 3703/4350 [06:17<01:07,  9.62it/s]

Training loss: 0.013160  [118432/139199]


 87%|████████▋ | 3803/4350 [06:27<00:55,  9.90it/s]

Training loss: 0.028969  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:45,  9.80it/s]

Training loss: 0.022773  [124832/139199]


 92%|█████████▏| 4003/4350 [06:48<00:34,  9.95it/s]

Training loss: 0.021695  [128032/139199]


 94%|█████████▍| 4102/4350 [06:58<00:25,  9.88it/s]

Training loss: 0.021780  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:14,  9.90it/s]

Training loss: 0.020704  [134432/139199]


 99%|█████████▉| 4302/4350 [07:18<00:04,  9.91it/s]

Training loss: 0.024249  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.81it/s]


Train loss: 0.023421 - Val loss: 0.038901 

Epoch 6
-------------------------------


  0%|          | 3/4350 [00:00<09:29,  7.63it/s]

Training loss: 0.023067  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:12,  9.81it/s]

Training loss: 0.023639  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:05,  9.76it/s]

Training loss: 0.024159  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:53,  9.79it/s]

Training loss: 0.020928  [ 9632/139199]


  9%|▉         | 403/4350 [00:41<06:37,  9.92it/s]

Training loss: 0.023804  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:26,  9.95it/s]

Training loss: 0.020766  [16032/139199]


 14%|█▍        | 603/4350 [01:01<06:15,  9.99it/s]

Training loss: 0.019228  [19232/139199]


 16%|█▌        | 703/4350 [01:11<06:06,  9.95it/s]

Training loss: 0.020344  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:57,  9.92it/s]

Training loss: 0.018721  [25632/139199]


 21%|██        | 902/4350 [01:31<06:09,  9.32it/s]

Training loss: 0.021159  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:39,  9.86it/s]

Training loss: 0.021260  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:31,  9.79it/s]

Training loss: 0.025384  [35232/139199]


 28%|██▊       | 1203/4350 [02:02<05:23,  9.74it/s]

Training loss: 0.028459  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:10,  9.83it/s]

Training loss: 0.023493  [41632/139199]


 32%|███▏      | 1402/4350 [02:22<04:57,  9.92it/s]

Training loss: 0.028028  [44832/139199]


 35%|███▍      | 1501/4350 [02:32<04:49,  9.84it/s]

Training loss: 0.023048  [48032/139199]


 37%|███▋      | 1601/4350 [02:42<04:35,  9.96it/s]

Training loss: 0.017978  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:27,  9.88it/s]

Training loss: 0.019525  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:17,  9.90it/s]

Training loss: 0.024754  [57632/139199]


 44%|████▎     | 1903/4350 [03:13<04:03, 10.04it/s]

Training loss: 0.021798  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:56,  9.93it/s]

Training loss: 0.029798  [64032/139199]


 48%|████▊     | 2102/4350 [03:33<03:45,  9.97it/s]

Training loss: 0.025544  [67232/139199]


 51%|█████     | 2203/4350 [03:43<03:36,  9.94it/s]

Training loss: 0.018450  [70432/139199]


 53%|█████▎    | 2302/4350 [03:53<03:27,  9.85it/s]

Training loss: 0.025165  [73632/139199]


 55%|█████▌    | 2403/4350 [04:03<03:14,  9.99it/s]

Training loss: 0.026775  [76832/139199]


 58%|█████▊    | 2502/4350 [04:13<03:05,  9.95it/s]

Training loss: 0.023044  [80032/139199]


 60%|█████▉    | 2602/4350 [04:23<02:56,  9.89it/s]

Training loss: 0.038404  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:53,  9.49it/s]

Training loss: 0.026894  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:36,  9.89it/s]

Training loss: 0.019244  [89632/139199]


 67%|██████▋   | 2902/4350 [04:54<02:27,  9.83it/s]

Training loss: 0.019513  [92832/139199]


 69%|██████▉   | 3002/4350 [05:04<02:17,  9.83it/s]

Training loss: 0.019732  [96032/139199]


 71%|███████▏  | 3103/4350 [05:14<02:06,  9.85it/s]

Training loss: 0.018487  [99232/139199]


 74%|███████▎  | 3202/4350 [05:24<01:59,  9.58it/s]

Training loss: 0.019547  [102432/139199]


 76%|███████▌  | 3303/4350 [05:35<01:45,  9.94it/s]

Training loss: 0.021589  [105632/139199]


 78%|███████▊  | 3402/4350 [05:45<01:36,  9.81it/s]

Training loss: 0.017129  [108832/139199]


 81%|████████  | 3503/4350 [05:55<01:24,  9.97it/s]

Training loss: 0.018806  [112032/139199]


 83%|████████▎ | 3602/4350 [06:05<01:15,  9.90it/s]

Training loss: 0.014324  [115232/139199]


 85%|████████▌ | 3702/4350 [06:15<01:06,  9.72it/s]

Training loss: 0.020195  [118432/139199]


 87%|████████▋ | 3802/4350 [06:25<00:55,  9.91it/s]

Training loss: 0.022651  [121632/139199]


 90%|████████▉ | 3902/4350 [06:36<00:45,  9.86it/s]

Training loss: 0.029059  [124832/139199]


 92%|█████████▏| 4002/4350 [06:46<00:35,  9.84it/s]

Training loss: 0.019644  [128032/139199]


 94%|█████████▍| 4102/4350 [06:56<00:25,  9.84it/s]

Training loss: 0.020359  [131232/139199]


 97%|█████████▋| 4202/4350 [07:06<00:15,  9.83it/s]

Training loss: 0.020164  [134432/139199]


 99%|█████████▉| 4302/4350 [07:16<00:04,  9.85it/s]

Training loss: 0.021209  [137632/139199]


100%|██████████| 4350/4350 [07:21<00:00,  9.85it/s]


Train loss: 0.022625 - Val loss: 0.038122 

Epoch 7
-------------------------------


  0%|          | 3/4350 [00:00<09:44,  7.44it/s]

Training loss: 0.022801  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:10,  9.87it/s]

Training loss: 0.018588  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:03,  9.80it/s]

Training loss: 0.020811  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:53,  9.79it/s]

Training loss: 0.031281  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:40,  9.86it/s]

Training loss: 0.025429  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:21, 10.09it/s]

Training loss: 0.025421  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:26,  9.69it/s]

Training loss: 0.024343  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:15,  9.72it/s]

Training loss: 0.018417  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:59,  9.86it/s]

Training loss: 0.019925  [25632/139199]


 21%|██        | 903/4350 [01:32<05:45,  9.96it/s]

Training loss: 0.023921  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:44,  9.73it/s]

Training loss: 0.022194  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:26,  9.94it/s]

Training loss: 0.025902  [35232/139199]


 28%|██▊       | 1201/4350 [02:02<05:17,  9.92it/s]

Training loss: 0.018262  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:12,  9.75it/s]

Training loss: 0.017741  [41632/139199]


 32%|███▏      | 1403/4350 [02:22<04:56,  9.95it/s]

Training loss: 0.021952  [44832/139199]


 35%|███▍      | 1501/4350 [02:32<04:43, 10.04it/s]

Training loss: 0.020763  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:33, 10.04it/s]

Training loss: 0.028255  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:28,  9.87it/s]

Training loss: 0.025407  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:20,  9.78it/s]

Training loss: 0.016946  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:10,  9.76it/s]

Training loss: 0.017973  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:58,  9.82it/s]

Training loss: 0.020268  [64032/139199]


 48%|████▊     | 2102/4350 [03:33<03:47,  9.88it/s]

Training loss: 0.017232  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:37,  9.87it/s]

Training loss: 0.025313  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:27,  9.88it/s]

Training loss: 0.030686  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:17,  9.86it/s]

Training loss: 0.020839  [76832/139199]


 58%|█████▊    | 2502/4350 [04:14<03:09,  9.75it/s]

Training loss: 0.017842  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<02:59,  9.76it/s]

Training loss: 0.020999  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:48,  9.77it/s]

Training loss: 0.025415  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:36,  9.91it/s]

Training loss: 0.022717  [89632/139199]


 67%|██████▋   | 2903/4350 [04:55<02:25,  9.93it/s]

Training loss: 0.021024  [92832/139199]


 69%|██████▉   | 3002/4350 [05:05<02:16,  9.85it/s]

Training loss: 0.018473  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:06,  9.86it/s]

Training loss: 0.015480  [99232/139199]


 74%|███████▎  | 3203/4350 [05:25<01:55,  9.93it/s]

Training loss: 0.022238  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:45,  9.93it/s]

Training loss: 0.024204  [105632/139199]


 78%|███████▊  | 3402/4350 [05:45<01:36,  9.85it/s]

Training loss: 0.019646  [108832/139199]


 81%|████████  | 3502/4350 [05:55<01:26,  9.86it/s]

Training loss: 0.016593  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:15,  9.94it/s]

Training loss: 0.020189  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:05,  9.93it/s]

Training loss: 0.030201  [118432/139199]


 87%|████████▋ | 3802/4350 [06:26<00:55,  9.90it/s]

Training loss: 0.021020  [121632/139199]


 90%|████████▉ | 3902/4350 [06:36<00:45,  9.87it/s]

Training loss: 0.033815  [124832/139199]


 92%|█████████▏| 4002/4350 [06:46<00:36,  9.44it/s]

Training loss: 0.018495  [128032/139199]


 94%|█████████▍| 4102/4350 [06:56<00:25,  9.79it/s]

Training loss: 0.020733  [131232/139199]


 97%|█████████▋| 4201/4350 [07:07<00:15,  9.60it/s]

Training loss: 0.017872  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04,  9.87it/s]

Training loss: 0.028385  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.84it/s]


Train loss: 0.022106 - Val loss: 0.040690 

Epoch 8
-------------------------------


  0%|          | 3/4350 [00:00<09:32,  7.59it/s]

Training loss: 0.020570  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:22,  9.59it/s]

Training loss: 0.034620  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:08,  9.67it/s]

Training loss: 0.023289  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:52,  9.81it/s]

Training loss: 0.026307  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:48,  9.67it/s]

Training loss: 0.022208  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:33,  9.78it/s]

Training loss: 0.024950  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:21,  9.84it/s]

Training loss: 0.024853  [19232/139199]


 16%|█▌        | 703/4350 [01:12<06:05,  9.97it/s]

Training loss: 0.014621  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:05,  9.71it/s]

Training loss: 0.015336  [25632/139199]


 21%|██        | 901/4350 [01:32<05:48,  9.91it/s]

Training loss: 0.017248  [28832/139199]


 23%|██▎       | 1003/4350 [01:42<05:36,  9.95it/s]

Training loss: 0.018471  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:27,  9.92it/s]

Training loss: 0.026773  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:18,  9.88it/s]

Training loss: 0.018666  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:06,  9.94it/s]

Training loss: 0.019735  [41632/139199]


 32%|███▏      | 1402/4350 [02:22<04:59,  9.86it/s]

Training loss: 0.020164  [44832/139199]


 35%|███▍      | 1503/4350 [02:33<04:46,  9.95it/s]

Training loss: 0.024473  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:41,  9.75it/s]

Training loss: 0.016519  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:28,  9.87it/s]

Training loss: 0.024049  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:25,  9.61it/s]

Training loss: 0.019823  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:09,  9.81it/s]

Training loss: 0.020027  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:53, 10.04it/s]

Training loss: 0.021844  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:47,  9.86it/s]

Training loss: 0.024133  [67232/139199]


 51%|█████     | 2203/4350 [03:44<03:35,  9.95it/s]

Training loss: 0.017689  [70432/139199]


 53%|█████▎    | 2303/4350 [03:54<03:25,  9.97it/s]

Training loss: 0.026391  [73632/139199]


 55%|█████▌    | 2403/4350 [04:04<03:15,  9.95it/s]

Training loss: 0.020568  [76832/139199]


 57%|█████▋    | 2501/4350 [04:14<03:07,  9.84it/s]

Training loss: 0.022185  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<02:56,  9.90it/s]

Training loss: 0.021753  [83232/139199]


 62%|██████▏   | 2703/4350 [04:34<02:44,  9.99it/s]

Training loss: 0.018586  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:36,  9.92it/s]

Training loss: 0.017307  [89632/139199]


 67%|██████▋   | 2903/4350 [04:54<02:25,  9.92it/s]

Training loss: 0.021906  [92832/139199]


 69%|██████▉   | 3001/4350 [05:04<02:17,  9.81it/s]

Training loss: 0.022511  [96032/139199]


 71%|███████▏  | 3103/4350 [05:15<02:05,  9.91it/s]

Training loss: 0.018882  [99232/139199]


 74%|███████▎  | 3203/4350 [05:25<01:55,  9.93it/s]

Training loss: 0.024606  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:46,  9.84it/s]

Training loss: 0.022146  [105632/139199]


 78%|███████▊  | 3402/4350 [05:45<01:37,  9.73it/s]

Training loss: 0.028466  [108832/139199]


 81%|████████  | 3502/4350 [05:55<01:25,  9.95it/s]

Training loss: 0.025619  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:16,  9.74it/s]

Training loss: 0.016905  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:08,  9.46it/s]

Training loss: 0.028791  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:56,  9.76it/s]

Training loss: 0.027443  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:45,  9.88it/s]

Training loss: 0.031925  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:34,  9.96it/s]

Training loss: 0.021849  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:25,  9.85it/s]

Training loss: 0.023384  [131232/139199]


 97%|█████████▋| 4202/4350 [07:07<00:15,  9.64it/s]

Training loss: 0.022309  [134432/139199]


 99%|█████████▉| 4302/4350 [07:18<00:04,  9.84it/s]

Training loss: 0.022603  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.82it/s]


Train loss: 0.021576 - Val loss: 0.038531 

Epoch 9
-------------------------------


  0%|          | 3/4350 [00:00<09:32,  7.59it/s]

Training loss: 0.024438  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:35,  9.33it/s]

Training loss: 0.024005  [ 3232/139199]


  5%|▍         | 202/4350 [00:21<07:01,  9.83it/s]

Training loss: 0.025345  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:52,  9.82it/s]

Training loss: 0.017442  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:46,  9.71it/s]

Training loss: 0.021861  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:38,  9.66it/s]

Training loss: 0.017184  [16032/139199]


 14%|█▍        | 602/4350 [01:02<06:26,  9.70it/s]

Training loss: 0.022129  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:23,  9.51it/s]

Training loss: 0.020939  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:09,  9.60it/s]

Training loss: 0.020310  [25632/139199]


 21%|██        | 902/4350 [01:33<05:49,  9.88it/s]

Training loss: 0.018220  [28832/139199]


 23%|██▎       | 1002/4350 [01:43<05:38,  9.88it/s]

Training loss: 0.018985  [32032/139199]


 25%|██▌       | 1102/4350 [01:53<05:23, 10.04it/s]

Training loss: 0.017313  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:35,  9.38it/s]

Training loss: 0.020865  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:12,  9.76it/s]

Training loss: 0.017808  [41632/139199]


 32%|███▏      | 1402/4350 [02:24<04:59,  9.83it/s]

Training loss: 0.028164  [44832/139199]


 35%|███▍      | 1502/4350 [02:34<04:49,  9.83it/s]

Training loss: 0.020769  [48032/139199]


 37%|███▋      | 1602/4350 [02:44<04:43,  9.71it/s]

Training loss: 0.020412  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:28,  9.87it/s]

Training loss: 0.022843  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:21,  9.74it/s]

Training loss: 0.021819  [57632/139199]


 44%|████▎     | 1903/4350 [03:14<04:03, 10.05it/s]

Training loss: 0.024502  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:56,  9.91it/s]

Training loss: 0.022174  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:47,  9.87it/s]

Training loss: 0.019378  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:38,  9.84it/s]

Training loss: 0.037647  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:26,  9.93it/s]

Training loss: 0.018284  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:23,  9.57it/s]

Training loss: 0.019700  [76832/139199]


 58%|█████▊    | 2503/4350 [04:15<03:06,  9.90it/s]

Training loss: 0.020490  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<02:55,  9.98it/s]

Training loss: 0.027148  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:48,  9.77it/s]

Training loss: 0.019007  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:38,  9.79it/s]

Training loss: 0.021164  [89632/139199]


 67%|██████▋   | 2902/4350 [04:56<02:27,  9.83it/s]

Training loss: 0.021678  [92832/139199]


 69%|██████▉   | 3001/4350 [05:06<02:16,  9.86it/s]

Training loss: 0.031592  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:05,  9.92it/s]

Training loss: 0.017860  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:58,  9.67it/s]

Training loss: 0.020354  [102432/139199]


 76%|███████▌  | 3302/4350 [05:37<01:45,  9.91it/s]

Training loss: 0.020730  [105632/139199]


 78%|███████▊  | 3403/4350 [05:47<01:35,  9.89it/s]

Training loss: 0.018527  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:27,  9.67it/s]

Training loss: 0.014464  [112032/139199]


 83%|████████▎ | 3602/4350 [06:07<01:15,  9.87it/s]

Training loss: 0.020601  [115232/139199]


 85%|████████▌ | 3703/4350 [06:17<01:05,  9.91it/s]

Training loss: 0.021732  [118432/139199]


 87%|████████▋ | 3802/4350 [06:28<00:57,  9.46it/s]

Training loss: 0.016969  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:45,  9.80it/s]

Training loss: 0.020727  [124832/139199]


 92%|█████████▏| 4002/4350 [06:48<00:34,  9.97it/s]

Training loss: 0.021496  [128032/139199]


 94%|█████████▍| 4102/4350 [06:58<00:25,  9.83it/s]

Training loss: 0.017314  [131232/139199]


 97%|█████████▋| 4202/4350 [07:09<00:15,  9.70it/s]

Training loss: 0.016424  [134432/139199]


 99%|█████████▉| 4302/4350 [07:19<00:04,  9.80it/s]

Training loss: 0.018464  [137632/139199]


100%|██████████| 4350/4350 [07:24<00:00,  9.79it/s]


Train loss: 0.021346 - Val loss: 0.046324 

Epoch 10
-------------------------------


  0%|          | 3/4350 [00:00<09:27,  7.66it/s]

Training loss: 0.018103  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:12,  9.82it/s]

Training loss: 0.016154  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:10,  9.64it/s]

Training loss: 0.021732  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:48,  9.92it/s]

Training loss: 0.020492  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:48,  9.67it/s]

Training loss: 0.019395  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:31,  9.83it/s]

Training loss: 0.018822  [16032/139199]


 14%|█▍        | 603/4350 [01:01<06:17,  9.93it/s]

Training loss: 0.015127  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:11,  9.82it/s]

Training loss: 0.016423  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:58,  9.89it/s]

Training loss: 0.025499  [25632/139199]


 21%|██        | 902/4350 [01:31<05:49,  9.86it/s]

Training loss: 0.019739  [28832/139199]


 23%|██▎       | 1002/4350 [01:41<05:43,  9.76it/s]

Training loss: 0.019152  [32032/139199]


 25%|██▌       | 1103/4350 [01:52<05:26,  9.94it/s]

Training loss: 0.021407  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:17,  9.91it/s]

Training loss: 0.023013  [38432/139199]


 30%|██▉       | 1303/4350 [02:12<05:05,  9.99it/s]

Training loss: 0.018138  [41632/139199]


 32%|███▏      | 1402/4350 [02:22<05:01,  9.79it/s]

Training loss: 0.038112  [44832/139199]


 35%|███▍      | 1502/4350 [02:32<04:48,  9.89it/s]

Training loss: 0.014021  [48032/139199]


 37%|███▋      | 1603/4350 [02:42<04:35,  9.97it/s]

Training loss: 0.018522  [51232/139199]


 39%|███▉      | 1702/4350 [02:52<04:25,  9.98it/s]

Training loss: 0.019221  [54432/139199]


 41%|████▏     | 1802/4350 [03:02<04:18,  9.87it/s]

Training loss: 0.024237  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:09,  9.82it/s]

Training loss: 0.023547  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:59,  9.80it/s]

Training loss: 0.016456  [64032/139199]


 48%|████▊     | 2103/4350 [03:33<03:50,  9.76it/s]

Training loss: 0.019311  [67232/139199]


 51%|█████     | 2202/4350 [03:43<03:36,  9.92it/s]

Training loss: 0.019971  [70432/139199]


 53%|█████▎    | 2302/4350 [03:53<03:27,  9.85it/s]

Training loss: 0.027134  [73632/139199]


 55%|█████▌    | 2402/4350 [04:03<03:18,  9.81it/s]

Training loss: 0.021428  [76832/139199]


 58%|█████▊    | 2502/4350 [04:13<03:05,  9.96it/s]

Training loss: 0.023675  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<02:56,  9.92it/s]

Training loss: 0.027079  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:49,  9.75it/s]

Training loss: 0.015390  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:37,  9.85it/s]

Training loss: 0.017298  [89632/139199]


 67%|██████▋   | 2903/4350 [04:54<02:26,  9.89it/s]

Training loss: 0.019320  [92832/139199]


 69%|██████▉   | 3002/4350 [05:04<02:16,  9.87it/s]

Training loss: 0.017439  [96032/139199]


 71%|███████▏  | 3103/4350 [05:14<02:05,  9.91it/s]

Training loss: 0.025794  [99232/139199]


 74%|███████▎  | 3202/4350 [05:24<01:56,  9.88it/s]

Training loss: 0.027693  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:47,  9.78it/s]

Training loss: 0.018918  [105632/139199]


 78%|███████▊  | 3403/4350 [05:45<01:34, 10.03it/s]

Training loss: 0.018441  [108832/139199]


 81%|████████  | 3502/4350 [05:55<01:25,  9.87it/s]

Training loss: 0.028808  [112032/139199]


 83%|████████▎ | 3602/4350 [06:05<01:16,  9.79it/s]

Training loss: 0.019353  [115232/139199]


 85%|████████▌ | 3702/4350 [06:15<01:05,  9.89it/s]

Training loss: 0.022634  [118432/139199]


 87%|████████▋ | 3802/4350 [06:25<00:56,  9.77it/s]

Training loss: 0.025762  [121632/139199]


 90%|████████▉ | 3903/4350 [06:36<00:45,  9.93it/s]

Training loss: 0.017098  [124832/139199]


 92%|█████████▏| 4002/4350 [06:46<00:35,  9.89it/s]

Training loss: 0.025813  [128032/139199]


 94%|█████████▍| 4103/4350 [06:56<00:25,  9.84it/s]

Training loss: 0.019859  [131232/139199]


 97%|█████████▋| 4203/4350 [07:06<00:14,  9.97it/s]

Training loss: 0.031335  [134432/139199]


 99%|█████████▉| 4302/4350 [07:16<00:04,  9.93it/s]

Training loss: 0.028845  [137632/139199]


100%|██████████| 4350/4350 [07:21<00:00,  9.85it/s]


Train loss: 0.020925 - Val loss: 0.048762 

Epoch 11
-------------------------------


  0%|          | 3/4350 [00:00<09:27,  7.65it/s]

Training loss: 0.020118  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:08,  9.90it/s]

Training loss: 0.017762  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:00,  9.88it/s]

Training loss: 0.017723  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:54,  9.77it/s]

Training loss: 0.024089  [ 9632/139199]


  9%|▉         | 402/4350 [00:40<06:42,  9.81it/s]

Training loss: 0.018788  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:22, 10.05it/s]

Training loss: 0.015567  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:21,  9.81it/s]

Training loss: 0.020327  [19232/139199]


 16%|█▌        | 703/4350 [01:11<06:07,  9.93it/s]

Training loss: 0.016534  [22432/139199]


 18%|█▊        | 802/4350 [01:21<06:01,  9.82it/s]

Training loss: 0.021786  [25632/139199]


 21%|██        | 903/4350 [01:31<05:50,  9.84it/s]

Training loss: 0.014277  [28832/139199]


 23%|██▎       | 1002/4350 [01:41<05:36,  9.96it/s]

Training loss: 0.022668  [32032/139199]


 25%|██▌       | 1102/4350 [01:51<05:33,  9.74it/s]

Training loss: 0.023279  [35232/139199]


 28%|██▊       | 1202/4350 [02:01<05:19,  9.86it/s]

Training loss: 0.014062  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:05,  9.97it/s]

Training loss: 0.016043  [41632/139199]


 32%|███▏      | 1402/4350 [02:22<05:01,  9.79it/s]

Training loss: 0.026321  [44832/139199]


 35%|███▍      | 1501/4350 [02:32<04:55,  9.64it/s]

Training loss: 0.019800  [48032/139199]


 37%|███▋      | 1601/4350 [02:42<04:38,  9.87it/s]

Training loss: 0.020775  [51232/139199]


 39%|███▉      | 1703/4350 [02:52<04:27,  9.91it/s]

Training loss: 0.018919  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:26,  9.54it/s]

Training loss: 0.015680  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:08,  9.85it/s]

Training loss: 0.019696  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<04:01,  9.70it/s]

Training loss: 0.014389  [64032/139199]


 48%|████▊     | 2101/4350 [03:33<03:43, 10.04it/s]

Training loss: 0.024502  [67232/139199]


 51%|█████     | 2202/4350 [03:43<03:36,  9.93it/s]

Training loss: 0.024244  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:29,  9.79it/s]

Training loss: 0.016088  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:18,  9.83it/s]

Training loss: 0.017073  [76832/139199]


 58%|█████▊    | 2503/4350 [04:14<03:06,  9.89it/s]

Training loss: 0.012043  [80032/139199]


 60%|█████▉    | 2603/4350 [04:24<02:55,  9.98it/s]

Training loss: 0.022644  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:48,  9.77it/s]

Training loss: 0.017075  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:36,  9.87it/s]

Training loss: 0.018887  [89632/139199]


 67%|██████▋   | 2903/4350 [04:54<02:26,  9.88it/s]

Training loss: 0.021405  [92832/139199]


 69%|██████▉   | 3002/4350 [05:05<02:19,  9.65it/s]

Training loss: 0.019647  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:08,  9.70it/s]

Training loss: 0.018770  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:57,  9.75it/s]

Training loss: 0.019923  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:46,  9.83it/s]

Training loss: 0.014844  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:36,  9.84it/s]

Training loss: 0.023605  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:26,  9.79it/s]

Training loss: 0.021354  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:17,  9.66it/s]

Training loss: 0.019342  [115232/139199]


 85%|████████▌ | 3702/4350 [06:17<01:06,  9.78it/s]

Training loss: 0.022534  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:55,  9.87it/s]

Training loss: 0.024813  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:45,  9.85it/s]

Training loss: 0.020859  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:34,  9.95it/s]

Training loss: 0.023698  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:25,  9.75it/s]

Training loss: 0.021530  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:15,  9.60it/s]

Training loss: 0.015048  [134432/139199]


 99%|█████████▉| 4303/4350 [07:18<00:04, 10.00it/s]

Training loss: 0.020521  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.82it/s]


Train loss: 0.020592 - Val loss: 0.036303 

Epoch 12
-------------------------------


  0%|          | 2/4350 [00:00<11:18,  6.41it/s]

Training loss: 0.022796  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:21,  9.63it/s]

Training loss: 0.016930  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:06,  9.73it/s]

Training loss: 0.020181  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:49,  9.88it/s]

Training loss: 0.020441  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:45,  9.75it/s]

Training loss: 0.022557  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:26,  9.95it/s]

Training loss: 0.024797  [16032/139199]


 14%|█▍        | 603/4350 [01:01<06:24,  9.75it/s]

Training loss: 0.018739  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:09,  9.87it/s]

Training loss: 0.016574  [22432/139199]


 18%|█▊        | 802/4350 [01:22<05:58,  9.91it/s]

Training loss: 0.025785  [25632/139199]


 21%|██        | 902/4350 [01:32<05:51,  9.80it/s]

Training loss: 0.025629  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:37,  9.93it/s]

Training loss: 0.018728  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:27,  9.91it/s]

Training loss: 0.019934  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:19,  9.85it/s]

Training loss: 0.018084  [38432/139199]


 30%|██▉       | 1303/4350 [02:13<05:07,  9.92it/s]

Training loss: 0.018976  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:06,  9.63it/s]

Training loss: 0.021101  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<05:02,  9.41it/s]

Training loss: 0.019774  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:42,  9.72it/s]

Training loss: 0.015676  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:29,  9.84it/s]

Training loss: 0.024397  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:17,  9.91it/s]

Training loss: 0.018091  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:08,  9.86it/s]

Training loss: 0.022462  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:55,  9.98it/s]

Training loss: 0.022186  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:52,  9.68it/s]

Training loss: 0.023482  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:34, 10.02it/s]

Training loss: 0.021871  [70432/139199]


 53%|█████▎    | 2301/4350 [03:54<03:24, 10.04it/s]

Training loss: 0.027749  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:15,  9.96it/s]

Training loss: 0.020212  [76832/139199]


 57%|█████▋    | 2501/4350 [04:14<03:07,  9.84it/s]

Training loss: 0.028203  [80032/139199]


 60%|█████▉    | 2603/4350 [04:25<02:55,  9.98it/s]

Training loss: 0.015514  [83232/139199]


 62%|██████▏   | 2701/4350 [04:35<02:46,  9.90it/s]

Training loss: 0.027886  [86432/139199]


 64%|██████▍   | 2803/4350 [04:45<02:35,  9.95it/s]

Training loss: 0.019862  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:26,  9.85it/s]

Training loss: 0.024631  [92832/139199]


 69%|██████▉   | 3003/4350 [05:05<02:16,  9.90it/s]

Training loss: 0.019423  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:05,  9.94it/s]

Training loss: 0.018660  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:57,  9.81it/s]

Training loss: 0.023200  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:46,  9.84it/s]

Training loss: 0.021870  [105632/139199]


 78%|███████▊  | 3403/4350 [05:46<01:35,  9.95it/s]

Training loss: 0.017920  [108832/139199]


 81%|████████  | 3503/4350 [05:56<01:25,  9.92it/s]

Training loss: 0.022461  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:19,  9.42it/s]

Training loss: 0.024739  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:05,  9.93it/s]

Training loss: 0.024852  [118432/139199]


 87%|████████▋ | 3802/4350 [06:26<00:56,  9.73it/s]

Training loss: 0.018663  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:46,  9.57it/s]

Training loss: 0.020919  [124832/139199]


 92%|█████████▏| 4001/4350 [06:47<00:35,  9.88it/s]

Training loss: 0.021796  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:25,  9.88it/s]

Training loss: 0.024425  [131232/139199]


 97%|█████████▋| 4202/4350 [07:07<00:15,  9.76it/s]

Training loss: 0.020275  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04,  9.77it/s]

Training loss: 0.026370  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.82it/s]


Train loss: 0.020393 - Val loss: 0.035979 

Epoch 13
-------------------------------


  0%|          | 3/4350 [00:00<09:35,  7.55it/s]

Training loss: 0.023573  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:14,  9.78it/s]

Training loss: 0.020497  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:02,  9.82it/s]

Training loss: 0.019227  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:46,  9.96it/s]

Training loss: 0.016448  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:42,  9.81it/s]

Training loss: 0.021427  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:26,  9.95it/s]

Training loss: 0.014832  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:19,  9.87it/s]

Training loss: 0.018243  [19232/139199]


 16%|█▌        | 703/4350 [01:11<06:07,  9.92it/s]

Training loss: 0.021310  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:57,  9.93it/s]

Training loss: 0.017315  [25632/139199]


 21%|██        | 902/4350 [01:31<05:41, 10.09it/s]

Training loss: 0.028083  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:39,  9.85it/s]

Training loss: 0.014418  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:36,  9.64it/s]

Training loss: 0.018899  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:22,  9.76it/s]

Training loss: 0.023210  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:08,  9.88it/s]

Training loss: 0.017513  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<04:55,  9.98it/s]

Training loss: 0.022560  [44832/139199]


 35%|███▍      | 1503/4350 [02:33<04:43, 10.04it/s]

Training loss: 0.021386  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:33, 10.05it/s]

Training loss: 0.023283  [51232/139199]


 39%|███▉      | 1703/4350 [02:53<04:24, 10.01it/s]

Training loss: 0.021854  [54432/139199]


 41%|████▏     | 1803/4350 [03:03<04:16,  9.91it/s]

Training loss: 0.024445  [57632/139199]


 44%|████▎     | 1903/4350 [03:13<04:05,  9.95it/s]

Training loss: 0.020590  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:58,  9.85it/s]

Training loss: 0.017262  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:49,  9.80it/s]

Training loss: 0.021501  [67232/139199]


 51%|█████     | 2203/4350 [03:44<03:36,  9.92it/s]

Training loss: 0.016244  [70432/139199]


 53%|█████▎    | 2303/4350 [03:54<03:26,  9.91it/s]

Training loss: 0.021187  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:20,  9.73it/s]

Training loss: 0.023965  [76832/139199]


 58%|█████▊    | 2503/4350 [04:14<03:03, 10.08it/s]

Training loss: 0.015231  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<02:59,  9.73it/s]

Training loss: 0.015962  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:46,  9.92it/s]

Training loss: 0.025297  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:37,  9.84it/s]

Training loss: 0.023732  [89632/139199]


 67%|██████▋   | 2903/4350 [04:55<02:24, 10.00it/s]

Training loss: 0.034337  [92832/139199]


 69%|██████▉   | 3003/4350 [05:05<02:15,  9.91it/s]

Training loss: 0.016320  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:07,  9.80it/s]

Training loss: 0.020811  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:59,  9.61it/s]

Training loss: 0.018157  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:46,  9.86it/s]

Training loss: 0.020163  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:38,  9.66it/s]

Training loss: 0.024431  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:27,  9.74it/s]

Training loss: 0.017635  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:17,  9.59it/s]

Training loss: 0.022522  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:05,  9.89it/s]

Training loss: 0.024328  [118432/139199]


 87%|████████▋ | 3803/4350 [06:27<00:54,  9.98it/s]

Training loss: 0.015029  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:45,  9.74it/s]

Training loss: 0.027019  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:34,  9.95it/s]

Training loss: 0.031763  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:24,  9.99it/s]

Training loss: 0.015785  [131232/139199]


 97%|█████████▋| 4202/4350 [07:07<00:14,  9.89it/s]

Training loss: 0.016669  [134432/139199]


 99%|█████████▉| 4303/4350 [07:17<00:04,  9.90it/s]

Training loss: 0.019650  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.83it/s]


Train loss: 0.020067 - Val loss: 0.039404 

Epoch 14
-------------------------------


  0%|          | 3/4350 [00:00<09:30,  7.61it/s]

Training loss: 0.014926  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:12,  9.82it/s]

Training loss: 0.018670  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:00,  9.86it/s]

Training loss: 0.021869  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:57,  9.70it/s]

Training loss: 0.026531  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:50,  9.62it/s]

Training loss: 0.018703  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:38,  9.66it/s]

Training loss: 0.016297  [16032/139199]


 14%|█▍        | 601/4350 [01:01<06:19,  9.88it/s]

Training loss: 0.021140  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:17,  9.66it/s]

Training loss: 0.018552  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:00,  9.85it/s]

Training loss: 0.017247  [25632/139199]


 21%|██        | 902/4350 [01:32<05:47,  9.92it/s]

Training loss: 0.016584  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:42,  9.77it/s]

Training loss: 0.029580  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:40,  9.54it/s]

Training loss: 0.019081  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:16,  9.93it/s]

Training loss: 0.014568  [38432/139199]


 30%|██▉       | 1303/4350 [02:13<05:04, 10.00it/s]

Training loss: 0.022291  [41632/139199]


 32%|███▏      | 1403/4350 [02:23<04:58,  9.88it/s]

Training loss: 0.018342  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:47,  9.89it/s]

Training loss: 0.017861  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:34,  9.99it/s]

Training loss: 0.021729  [51232/139199]


 39%|███▉      | 1703/4350 [02:54<04:29,  9.81it/s]

Training loss: 0.011324  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:20,  9.78it/s]

Training loss: 0.021980  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:11,  9.73it/s]

Training loss: 0.021930  [60832/139199]


 46%|████▌     | 2003/4350 [03:24<03:57,  9.89it/s]

Training loss: 0.021173  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:57,  9.47it/s]

Training loss: 0.027839  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:39,  9.77it/s]

Training loss: 0.015381  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:29,  9.76it/s]

Training loss: 0.018807  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:17,  9.85it/s]

Training loss: 0.014389  [76832/139199]


 58%|█████▊    | 2503/4350 [04:15<03:05,  9.93it/s]

Training loss: 0.025474  [80032/139199]


 60%|█████▉    | 2603/4350 [04:25<02:54,  9.99it/s]

Training loss: 0.025228  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:51,  9.62it/s]

Training loss: 0.020725  [86432/139199]


 64%|██████▍   | 2803/4350 [04:45<02:35,  9.92it/s]

Training loss: 0.017241  [89632/139199]


 67%|██████▋   | 2903/4350 [04:56<02:26,  9.85it/s]

Training loss: 0.017644  [92832/139199]


 69%|██████▉   | 3003/4350 [05:06<02:15,  9.97it/s]

Training loss: 0.020292  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:06,  9.83it/s]

Training loss: 0.018849  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:59,  9.58it/s]

Training loss: 0.015642  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:47,  9.79it/s]

Training loss: 0.019292  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:37,  9.72it/s]

Training loss: 0.020305  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:25,  9.91it/s]

Training loss: 0.024300  [112032/139199]


 83%|████████▎ | 3602/4350 [06:07<01:14,  9.98it/s]

Training loss: 0.015832  [115232/139199]


 85%|████████▌ | 3702/4350 [06:17<01:06,  9.80it/s]

Training loss: 0.023787  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<01:03,  8.65it/s]

Training loss: 0.016901  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:47,  9.50it/s]

Training loss: 0.023631  [124832/139199]


 92%|█████████▏| 4002/4350 [06:49<00:35,  9.84it/s]

Training loss: 0.020716  [128032/139199]


 94%|█████████▍| 4103/4350 [06:59<00:25,  9.67it/s]

Training loss: 0.026708  [131232/139199]


 97%|█████████▋| 4202/4350 [07:09<00:15,  9.85it/s]

Training loss: 0.024362  [134432/139199]


 99%|█████████▉| 4302/4350 [07:20<00:04,  9.74it/s]

Training loss: 0.018162  [137632/139199]


100%|██████████| 4350/4350 [07:25<00:00,  9.77it/s]


Train loss: 0.019978 - Val loss: 0.046493 

Epoch 15
-------------------------------


  0%|          | 3/4350 [00:00<09:41,  7.47it/s]

Training loss: 0.020115  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:19,  9.67it/s]

Training loss: 0.023747  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:05,  9.75it/s]

Training loss: 0.019799  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:53,  9.78it/s]

Training loss: 0.016134  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:38,  9.90it/s]

Training loss: 0.027168  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:28,  9.91it/s]

Training loss: 0.016047  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:16,  9.95it/s]

Training loss: 0.019401  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:09,  9.86it/s]

Training loss: 0.017014  [22432/139199]


 18%|█▊        | 802/4350 [01:21<06:06,  9.69it/s]

Training loss: 0.017356  [25632/139199]


 21%|██        | 902/4350 [01:31<05:51,  9.81it/s]

Training loss: 0.022121  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:42,  9.76it/s]

Training loss: 0.018353  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:28,  9.89it/s]

Training loss: 0.015438  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:21,  9.79it/s]

Training loss: 0.022096  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:09,  9.83it/s]

Training loss: 0.021665  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:01,  9.77it/s]

Training loss: 0.022227  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:48,  9.86it/s]

Training loss: 0.014883  [48032/139199]


 37%|███▋      | 1603/4350 [02:43<04:36,  9.92it/s]

Training loss: 0.016114  [51232/139199]


 39%|███▉      | 1703/4350 [02:53<04:25,  9.98it/s]

Training loss: 0.020757  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:23,  9.69it/s]

Training loss: 0.017968  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:05,  9.96it/s]

Training loss: 0.015317  [60832/139199]


 46%|████▌     | 2003/4350 [03:24<03:57,  9.89it/s]

Training loss: 0.018804  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:48,  9.84it/s]

Training loss: 0.018180  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:37,  9.88it/s]

Training loss: 0.018970  [70432/139199]


 53%|█████▎    | 2303/4350 [03:54<03:25,  9.96it/s]

Training loss: 0.025930  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:18,  9.83it/s]

Training loss: 0.025866  [76832/139199]


 58%|█████▊    | 2502/4350 [04:14<03:11,  9.67it/s]

Training loss: 0.026251  [80032/139199]


 60%|█████▉    | 2603/4350 [04:24<02:54, 10.00it/s]

Training loss: 0.020371  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:45,  9.96it/s]

Training loss: 0.014508  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:39,  9.72it/s]

Training loss: 0.019052  [89632/139199]


 67%|██████▋   | 2903/4350 [04:55<02:24, 10.02it/s]

Training loss: 0.015194  [92832/139199]


 69%|██████▉   | 3002/4350 [05:05<02:16,  9.86it/s]

Training loss: 0.021545  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:05,  9.92it/s]

Training loss: 0.022362  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:57,  9.76it/s]

Training loss: 0.034525  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:45,  9.92it/s]

Training loss: 0.015421  [105632/139199]


 78%|███████▊  | 3403/4350 [05:46<01:35,  9.95it/s]

Training loss: 0.016873  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:24, 10.00it/s]

Training loss: 0.021636  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:15,  9.89it/s]

Training loss: 0.012596  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:07,  9.63it/s]

Training loss: 0.017970  [118432/139199]


 87%|████████▋ | 3802/4350 [06:26<00:55,  9.93it/s]

Training loss: 0.016273  [121632/139199]


 90%|████████▉ | 3902/4350 [06:36<00:45,  9.78it/s]

Training loss: 0.023956  [124832/139199]


 92%|█████████▏| 4003/4350 [06:47<00:34,  9.97it/s]

Training loss: 0.020341  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:25,  9.90it/s]

Training loss: 0.019976  [131232/139199]


 97%|█████████▋| 4203/4350 [07:07<00:14,  9.93it/s]

Training loss: 0.016120  [134432/139199]


 99%|█████████▉| 4303/4350 [07:17<00:04, 10.01it/s]

Training loss: 0.023771  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.83it/s]


Train loss: 0.019536 - Val loss: 0.031608 

Epoch 16
-------------------------------


  0%|          | 3/4350 [00:00<09:25,  7.69it/s]

Training loss: 0.017839  [   32/139199]


  2%|▏         | 103/4350 [00:10<07:09,  9.89it/s]

Training loss: 0.015994  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:06,  9.74it/s]

Training loss: 0.020812  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:50,  9.87it/s]

Training loss: 0.029036  [ 9632/139199]


  9%|▉         | 403/4350 [00:41<06:41,  9.82it/s]

Training loss: 0.017754  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:29,  9.87it/s]

Training loss: 0.023264  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:25,  9.73it/s]

Training loss: 0.025665  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:10,  9.86it/s]

Training loss: 0.014453  [22432/139199]


 18%|█▊        | 802/4350 [01:22<05:59,  9.86it/s]

Training loss: 0.014722  [25632/139199]


 21%|██        | 902/4350 [01:32<05:48,  9.90it/s]

Training loss: 0.013784  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:42,  9.77it/s]

Training loss: 0.025068  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:34,  9.70it/s]

Training loss: 0.012201  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:20,  9.81it/s]

Training loss: 0.023239  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:13,  9.74it/s]

Training loss: 0.019552  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:05,  9.66it/s]

Training loss: 0.023281  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:48,  9.88it/s]

Training loss: 0.014946  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:40,  9.79it/s]

Training loss: 0.017908  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:28,  9.87it/s]

Training loss: 0.014915  [54432/139199]


 41%|████▏     | 1803/4350 [03:04<04:16,  9.95it/s]

Training loss: 0.013748  [57632/139199]


 44%|████▎     | 1903/4350 [03:14<04:05,  9.96it/s]

Training loss: 0.018419  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:57,  9.90it/s]

Training loss: 0.021176  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:48,  9.82it/s]

Training loss: 0.025608  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:37,  9.88it/s]

Training loss: 0.015613  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:29,  9.79it/s]

Training loss: 0.016712  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:21,  9.65it/s]

Training loss: 0.026106  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:07,  9.87it/s]

Training loss: 0.017186  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<02:58,  9.79it/s]

Training loss: 0.022535  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:47,  9.82it/s]

Training loss: 0.016005  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:41,  9.56it/s]

Training loss: 0.015004  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:28,  9.74it/s]

Training loss: 0.018835  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:16,  9.85it/s]

Training loss: 0.016683  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:04,  9.99it/s]

Training loss: 0.015170  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:55,  9.91it/s]

Training loss: 0.022704  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:48,  9.69it/s]

Training loss: 0.021052  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:36,  9.83it/s]

Training loss: 0.016856  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:26,  9.77it/s]

Training loss: 0.015491  [112032/139199]


 83%|████████▎ | 3602/4350 [06:07<01:15,  9.96it/s]

Training loss: 0.018720  [115232/139199]


 85%|████████▌ | 3702/4350 [06:17<01:05,  9.89it/s]

Training loss: 0.020478  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:56,  9.75it/s]

Training loss: 0.016061  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:45,  9.76it/s]

Training loss: 0.021051  [124832/139199]


 92%|█████████▏| 4002/4350 [06:48<00:35,  9.83it/s]

Training loss: 0.023990  [128032/139199]


 94%|█████████▍| 4102/4350 [06:58<00:25,  9.69it/s]

Training loss: 0.023127  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:15,  9.81it/s]

Training loss: 0.015027  [134432/139199]


 99%|█████████▉| 4302/4350 [07:19<00:04,  9.86it/s]

Training loss: 0.021399  [137632/139199]


100%|██████████| 4350/4350 [07:24<00:00,  9.80it/s]


Train loss: 0.019465 - Val loss: 0.035892 

Epoch 17
-------------------------------


  0%|          | 3/4350 [00:00<09:39,  7.50it/s]

Training loss: 0.019239  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:06,  9.95it/s]

Training loss: 0.018567  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<06:58,  9.91it/s]

Training loss: 0.019905  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:56,  9.72it/s]

Training loss: 0.028820  [ 9632/139199]


  9%|▉         | 402/4350 [00:40<06:48,  9.66it/s]

Training loss: 0.019747  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:26,  9.97it/s]

Training loss: 0.019385  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:21,  9.83it/s]

Training loss: 0.017574  [19232/139199]


 16%|█▌        | 701/4350 [01:11<06:09,  9.88it/s]

Training loss: 0.015388  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:59,  9.88it/s]

Training loss: 0.021346  [25632/139199]


 21%|██        | 902/4350 [01:31<05:56,  9.67it/s]

Training loss: 0.021454  [28832/139199]


 23%|██▎       | 1002/4350 [01:41<05:37,  9.93it/s]

Training loss: 0.014213  [32032/139199]


 25%|██▌       | 1103/4350 [01:52<05:24, 10.01it/s]

Training loss: 0.018845  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:18,  9.88it/s]

Training loss: 0.011487  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:07,  9.93it/s]

Training loss: 0.018003  [41632/139199]


 32%|███▏      | 1403/4350 [02:22<04:56,  9.95it/s]

Training loss: 0.016661  [44832/139199]


 35%|███▍      | 1502/4350 [02:32<04:59,  9.52it/s]

Training loss: 0.015793  [48032/139199]


 37%|███▋      | 1601/4350 [02:42<04:33, 10.03it/s]

Training loss: 0.021825  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:23, 10.05it/s]

Training loss: 0.015458  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:15,  9.96it/s]

Training loss: 0.021291  [57632/139199]


 44%|████▎     | 1903/4350 [03:13<04:04, 10.00it/s]

Training loss: 0.017775  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<04:01,  9.73it/s]

Training loss: 0.011102  [64032/139199]


 48%|████▊     | 2102/4350 [03:33<03:48,  9.84it/s]

Training loss: 0.013481  [67232/139199]


 51%|█████     | 2202/4350 [03:43<03:37,  9.88it/s]

Training loss: 0.018296  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:28,  9.81it/s]

Training loss: 0.028074  [73632/139199]


 55%|█████▌    | 2402/4350 [04:04<03:19,  9.76it/s]

Training loss: 0.034579  [76832/139199]


 58%|█████▊    | 2503/4350 [04:14<03:05,  9.95it/s]

Training loss: 0.013855  [80032/139199]


 60%|█████▉    | 2602/4350 [04:24<03:05,  9.44it/s]

Training loss: 0.016627  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:48,  9.77it/s]

Training loss: 0.011587  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:37,  9.84it/s]

Training loss: 0.016722  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:27,  9.80it/s]

Training loss: 0.027280  [92832/139199]


 69%|██████▉   | 3002/4350 [05:05<02:16,  9.84it/s]

Training loss: 0.016702  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:05,  9.93it/s]

Training loss: 0.026607  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:55,  9.90it/s]

Training loss: 0.018354  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:48,  9.67it/s]

Training loss: 0.018547  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:38,  9.67it/s]

Training loss: 0.013990  [108832/139199]


 81%|████████  | 3503/4350 [05:56<01:25,  9.94it/s]

Training loss: 0.020256  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:16,  9.81it/s]

Training loss: 0.015148  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:04, 10.02it/s]

Training loss: 0.016385  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:56,  9.68it/s]

Training loss: 0.022065  [121632/139199]


 90%|████████▉ | 3903/4350 [06:37<00:45,  9.91it/s]

Training loss: 0.025825  [124832/139199]


 92%|█████████▏| 4003/4350 [06:47<00:34, 10.01it/s]

Training loss: 0.029882  [128032/139199]


 94%|█████████▍| 4101/4350 [06:57<00:25,  9.94it/s]

Training loss: 0.017434  [131232/139199]


 97%|█████████▋| 4202/4350 [07:07<00:15,  9.80it/s]

Training loss: 0.018935  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04,  9.72it/s]

Training loss: 0.020724  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.82it/s]


Train loss: 0.019211 - Val loss: 0.031258 

Epoch 18
-------------------------------


  0%|          | 3/4350 [00:00<09:31,  7.61it/s]

Training loss: 0.016476  [   32/139199]


  2%|▏         | 103/4350 [00:10<07:04,  9.99it/s]

Training loss: 0.026828  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:02,  9.83it/s]

Training loss: 0.015360  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<07:01,  9.61it/s]

Training loss: 0.023773  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:41,  9.84it/s]

Training loss: 0.022493  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:30,  9.85it/s]

Training loss: 0.015074  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:22,  9.80it/s]

Training loss: 0.018160  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:06,  9.96it/s]

Training loss: 0.022329  [22432/139199]


 18%|█▊        | 803/4350 [01:21<05:56,  9.95it/s]

Training loss: 0.015375  [25632/139199]


 21%|██        | 903/4350 [01:31<05:45,  9.98it/s]

Training loss: 0.016908  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:44,  9.73it/s]

Training loss: 0.015803  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:26,  9.95it/s]

Training loss: 0.013038  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:18,  9.87it/s]

Training loss: 0.019246  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:08,  9.89it/s]

Training loss: 0.019735  [41632/139199]


 32%|███▏      | 1402/4350 [02:22<05:01,  9.79it/s]

Training loss: 0.014641  [44832/139199]


 35%|███▍      | 1502/4350 [02:32<04:51,  9.77it/s]

Training loss: 0.040163  [48032/139199]


 37%|███▋      | 1603/4350 [02:43<04:35,  9.96it/s]

Training loss: 0.011823  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:29,  9.84it/s]

Training loss: 0.016096  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:16,  9.94it/s]

Training loss: 0.015759  [57632/139199]


 44%|████▎     | 1902/4350 [03:13<04:07,  9.90it/s]

Training loss: 0.018329  [60832/139199]


 46%|████▌     | 2002/4350 [03:23<03:55,  9.95it/s]

Training loss: 0.016724  [64032/139199]


 48%|████▊     | 2102/4350 [03:33<03:48,  9.82it/s]

Training loss: 0.017445  [67232/139199]


 51%|█████     | 2203/4350 [03:43<03:35,  9.97it/s]

Training loss: 0.016550  [70432/139199]


 53%|█████▎    | 2302/4350 [03:53<03:28,  9.80it/s]

Training loss: 0.015838  [73632/139199]


 55%|█████▌    | 2402/4350 [04:03<03:16,  9.93it/s]

Training loss: 0.011859  [76832/139199]


 58%|█████▊    | 2502/4350 [04:14<03:06,  9.90it/s]

Training loss: 0.023362  [80032/139199]


 60%|█████▉    | 2603/4350 [04:24<02:53, 10.05it/s]

Training loss: 0.018318  [83232/139199]


 62%|██████▏   | 2702/4350 [04:34<02:45,  9.96it/s]

Training loss: 0.019398  [86432/139199]


 64%|██████▍   | 2802/4350 [04:44<02:36,  9.88it/s]

Training loss: 0.020735  [89632/139199]


 67%|██████▋   | 2901/4350 [04:54<02:27,  9.85it/s]

Training loss: 0.015032  [92832/139199]


 69%|██████▉   | 3002/4350 [05:04<02:19,  9.64it/s]

Training loss: 0.017506  [96032/139199]


 71%|███████▏  | 3102/4350 [05:15<02:10,  9.59it/s]

Training loss: 0.013009  [99232/139199]


 74%|███████▎  | 3202/4350 [05:25<01:57,  9.77it/s]

Training loss: 0.016737  [102432/139199]


 76%|███████▌  | 3302/4350 [05:35<01:45,  9.90it/s]

Training loss: 0.015945  [105632/139199]


 78%|███████▊  | 3402/4350 [05:45<01:36,  9.78it/s]

Training loss: 0.015441  [108832/139199]


 81%|████████  | 3503/4350 [05:56<01:25,  9.92it/s]

Training loss: 0.018602  [112032/139199]


 83%|████████▎ | 3601/4350 [06:06<01:14, 10.02it/s]

Training loss: 0.021256  [115232/139199]


 85%|████████▌ | 3702/4350 [06:16<01:07,  9.54it/s]

Training loss: 0.023288  [118432/139199]


 87%|████████▋ | 3802/4350 [06:26<00:55,  9.91it/s]

Training loss: 0.015508  [121632/139199]


 90%|████████▉ | 3902/4350 [06:36<00:46,  9.64it/s]

Training loss: 0.013075  [124832/139199]


 92%|█████████▏| 4002/4350 [06:46<00:35,  9.73it/s]

Training loss: 0.016426  [128032/139199]


 94%|█████████▍| 4102/4350 [06:56<00:25,  9.54it/s]

Training loss: 0.020745  [131232/139199]


 97%|█████████▋| 4202/4350 [07:07<00:15,  9.73it/s]

Training loss: 0.018319  [134432/139199]


 99%|█████████▉| 4302/4350 [07:17<00:04,  9.88it/s]

Training loss: 0.015583  [137632/139199]


100%|██████████| 4350/4350 [07:22<00:00,  9.84it/s]


Train loss: 0.019006 - Val loss: 0.040265 

Epoch 19
-------------------------------


  0%|          | 3/4350 [00:00<09:36,  7.55it/s]

Training loss: 0.028314  [   32/139199]


  2%|▏         | 102/4350 [00:11<07:19,  9.67it/s]

Training loss: 0.020766  [ 3232/139199]


  5%|▍         | 202/4350 [00:21<07:00,  9.86it/s]

Training loss: 0.023077  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<07:15,  9.29it/s]

Training loss: 0.017419  [ 9632/139199]


  9%|▉         | 402/4350 [00:42<06:51,  9.59it/s]

Training loss: 0.013707  [12832/139199]


 12%|█▏        | 502/4350 [00:52<06:41,  9.58it/s]

Training loss: 0.015520  [16032/139199]


 14%|█▍        | 602/4350 [01:03<06:24,  9.74it/s]

Training loss: 0.020205  [19232/139199]


 16%|█▌        | 702/4350 [01:13<06:14,  9.74it/s]

Training loss: 0.015426  [22432/139199]


 18%|█▊        | 802/4350 [01:23<05:55,  9.98it/s]

Training loss: 0.026163  [25632/139199]


 21%|██        | 903/4350 [01:34<05:47,  9.93it/s]

Training loss: 0.024062  [28832/139199]


 23%|██▎       | 1002/4350 [01:44<05:43,  9.74it/s]

Training loss: 0.042005  [32032/139199]


 25%|██▌       | 1102/4350 [01:54<05:35,  9.69it/s]

Training loss: 0.017415  [35232/139199]


 28%|██▊       | 1202/4350 [02:04<05:23,  9.74it/s]

Training loss: 0.018127  [38432/139199]


 30%|██▉       | 1302/4350 [02:15<05:07,  9.93it/s]

Training loss: 0.016202  [41632/139199]


 32%|███▏      | 1401/4350 [02:25<04:56,  9.93it/s]

Training loss: 0.020722  [44832/139199]


 35%|███▍      | 1502/4350 [02:35<04:53,  9.72it/s]

Training loss: 0.016066  [48032/139199]


 37%|███▋      | 1602/4350 [02:45<04:41,  9.75it/s]

Training loss: 0.017126  [51232/139199]


 39%|███▉      | 1702/4350 [02:56<04:37,  9.55it/s]

Training loss: 0.019170  [54432/139199]


 41%|████▏     | 1802/4350 [03:06<04:19,  9.80it/s]

Training loss: 0.012719  [57632/139199]


 44%|████▎     | 1902/4350 [03:16<04:12,  9.68it/s]

Training loss: 0.018054  [60832/139199]


 46%|████▌     | 2002/4350 [03:26<04:02,  9.68it/s]

Training loss: 0.020537  [64032/139199]


 48%|████▊     | 2102/4350 [03:37<03:47,  9.88it/s]

Training loss: 0.022532  [67232/139199]


 51%|█████     | 2203/4350 [03:47<03:35,  9.94it/s]

Training loss: 0.025660  [70432/139199]


 53%|█████▎    | 2302/4350 [03:57<03:27,  9.86it/s]

Training loss: 0.026221  [73632/139199]


 55%|█████▌    | 2402/4350 [04:07<03:19,  9.79it/s]

Training loss: 0.021859  [76832/139199]


 58%|█████▊    | 2502/4350 [04:17<03:05,  9.96it/s]

Training loss: 0.015314  [80032/139199]


 60%|█████▉    | 2603/4350 [04:28<02:56,  9.91it/s]

Training loss: 0.015666  [83232/139199]


 62%|██████▏   | 2702/4350 [04:38<02:48,  9.75it/s]

Training loss: 0.017155  [86432/139199]


 64%|██████▍   | 2802/4350 [04:48<02:41,  9.59it/s]

Training loss: 0.026093  [89632/139199]


 67%|██████▋   | 2902/4350 [04:58<02:28,  9.74it/s]

Training loss: 0.019016  [92832/139199]


 69%|██████▉   | 3002/4350 [05:09<02:16,  9.90it/s]

Training loss: 0.014699  [96032/139199]


 71%|███████▏  | 3102/4350 [05:19<02:08,  9.70it/s]

Training loss: 0.026040  [99232/139199]


 74%|███████▎  | 3202/4350 [05:29<01:56,  9.83it/s]

Training loss: 0.017221  [102432/139199]


 76%|███████▌  | 3302/4350 [05:39<01:45,  9.97it/s]

Training loss: 0.019432  [105632/139199]


 78%|███████▊  | 3402/4350 [05:49<01:37,  9.74it/s]

Training loss: 0.019925  [108832/139199]


 81%|████████  | 3502/4350 [06:00<01:26,  9.85it/s]

Training loss: 0.018810  [112032/139199]


 83%|████████▎ | 3602/4350 [06:10<01:15,  9.85it/s]

Training loss: 0.014973  [115232/139199]


 85%|████████▌ | 3702/4350 [06:20<01:06,  9.77it/s]

Training loss: 0.029078  [118432/139199]


 87%|████████▋ | 3803/4350 [06:30<00:55,  9.94it/s]

Training loss: 0.021852  [121632/139199]


 90%|████████▉ | 3902/4350 [06:40<00:46,  9.72it/s]

Training loss: 0.015904  [124832/139199]


 92%|█████████▏| 4002/4350 [06:51<00:36,  9.59it/s]

Training loss: 0.022683  [128032/139199]


 94%|█████████▍| 4102/4350 [07:01<00:25,  9.83it/s]

Training loss: 0.013127  [131232/139199]


 97%|█████████▋| 4203/4350 [07:11<00:14,  9.97it/s]

Training loss: 0.018658  [134432/139199]


 99%|█████████▉| 4302/4350 [07:21<00:04,  9.83it/s]

Training loss: 0.016788  [137632/139199]


100%|██████████| 4350/4350 [07:26<00:00,  9.73it/s]


Train loss: 0.018958 - Val loss: 0.031142 

Epoch 20
-------------------------------


  0%|          | 3/4350 [00:00<09:39,  7.49it/s]

Training loss: 0.023499  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:16,  9.73it/s]

Training loss: 0.016207  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:05,  9.76it/s]

Training loss: 0.016907  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:52,  9.81it/s]

Training loss: 0.015082  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:46,  9.72it/s]

Training loss: 0.015830  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:27,  9.92it/s]

Training loss: 0.019879  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:18,  9.91it/s]

Training loss: 0.017099  [19232/139199]


 16%|█▌        | 703/4350 [01:11<06:08,  9.90it/s]

Training loss: 0.025847  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:59,  9.87it/s]

Training loss: 0.025923  [25632/139199]


 21%|██        | 902/4350 [01:32<05:48,  9.90it/s]

Training loss: 0.017859  [28832/139199]


 23%|██▎       | 1003/4350 [01:42<05:36,  9.95it/s]

Training loss: 0.018453  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:32,  9.78it/s]

Training loss: 0.016295  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:22,  9.76it/s]

Training loss: 0.013827  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:15,  9.67it/s]

Training loss: 0.015026  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:07,  9.59it/s]

Training loss: 0.021467  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:55,  9.65it/s]

Training loss: 0.023466  [48032/139199]


 37%|███▋      | 1603/4350 [02:43<04:36,  9.93it/s]

Training loss: 0.018149  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:30,  9.79it/s]

Training loss: 0.015027  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:19,  9.81it/s]

Training loss: 0.019423  [57632/139199]


 44%|████▎     | 1903/4350 [03:14<04:06,  9.92it/s]

Training loss: 0.015783  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<04:00,  9.75it/s]

Training loss: 0.020050  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:46,  9.93it/s]

Training loss: 0.024481  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:41,  9.69it/s]

Training loss: 0.015153  [70432/139199]


 53%|█████▎    | 2303/4350 [03:55<03:26,  9.91it/s]

Training loss: 0.021429  [73632/139199]


 55%|█████▌    | 2403/4350 [04:05<03:17,  9.86it/s]

Training loss: 0.024148  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:10,  9.70it/s]

Training loss: 0.029829  [80032/139199]


 60%|█████▉    | 2603/4350 [04:25<02:56,  9.92it/s]

Training loss: 0.019157  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:46,  9.90it/s]

Training loss: 0.018386  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:40,  9.67it/s]

Training loss: 0.014765  [89632/139199]


 67%|██████▋   | 2902/4350 [04:56<02:27,  9.79it/s]

Training loss: 0.020165  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:17,  9.80it/s]

Training loss: 0.014332  [96032/139199]


 71%|███████▏  | 3103/4350 [05:16<02:05,  9.94it/s]

Training loss: 0.019994  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:56,  9.81it/s]

Training loss: 0.028732  [102432/139199]


 76%|███████▌  | 3303/4350 [05:37<01:44, 10.00it/s]

Training loss: 0.021872  [105632/139199]


 78%|███████▊  | 3403/4350 [05:47<01:35,  9.93it/s]

Training loss: 0.019288  [108832/139199]


 81%|████████  | 3503/4350 [05:57<01:25,  9.90it/s]

Training loss: 0.017428  [112032/139199]


 83%|████████▎ | 3602/4350 [06:07<01:15,  9.95it/s]

Training loss: 0.012905  [115232/139199]


 85%|████████▌ | 3703/4350 [06:17<01:05,  9.92it/s]

Training loss: 0.017000  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:55,  9.84it/s]

Training loss: 0.016610  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:45,  9.93it/s]

Training loss: 0.015728  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:35,  9.93it/s]

Training loss: 0.010979  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:24,  9.97it/s]

Training loss: 0.018790  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:15,  9.83it/s]

Training loss: 0.019942  [134432/139199]


 99%|█████████▉| 4303/4350 [07:18<00:04, 10.04it/s]

Training loss: 0.017290  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.81it/s]


Train loss: 0.018814 - Val loss: 0.048462 

Epoch 21
-------------------------------


  0%|          | 3/4350 [00:00<09:38,  7.51it/s]

Training loss: 0.015294  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:16,  9.74it/s]

Training loss: 0.014329  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:11,  9.60it/s]

Training loss: 0.015619  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:52,  9.82it/s]

Training loss: 0.011881  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:48,  9.67it/s]

Training loss: 0.017900  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:37,  9.69it/s]

Training loss: 0.017371  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:29,  9.63it/s]

Training loss: 0.022216  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:13,  9.77it/s]

Training loss: 0.010736  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:05,  9.72it/s]

Training loss: 0.017195  [25632/139199]


 21%|██        | 902/4350 [01:32<05:53,  9.75it/s]

Training loss: 0.022787  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:42,  9.77it/s]

Training loss: 0.014737  [32032/139199]


 25%|██▌       | 1102/4350 [01:53<05:34,  9.70it/s]

Training loss: 0.021523  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:30,  9.53it/s]

Training loss: 0.017475  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:10,  9.82it/s]

Training loss: 0.019387  [41632/139199]


 32%|███▏      | 1401/4350 [02:23<04:57,  9.91it/s]

Training loss: 0.016120  [44832/139199]


 35%|███▍      | 1502/4350 [02:34<04:56,  9.61it/s]

Training loss: 0.017018  [48032/139199]


 37%|███▋      | 1603/4350 [02:44<04:35,  9.98it/s]

Training loss: 0.016698  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:28,  9.87it/s]

Training loss: 0.017936  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:22,  9.72it/s]

Training loss: 0.014767  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:07,  9.90it/s]

Training loss: 0.015155  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:56,  9.92it/s]

Training loss: 0.032705  [64032/139199]


 48%|████▊     | 2102/4350 [03:35<03:49,  9.78it/s]

Training loss: 0.013230  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:37,  9.85it/s]

Training loss: 0.019412  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:28,  9.81it/s]

Training loss: 0.015613  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:17,  9.86it/s]

Training loss: 0.015929  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:04, 10.01it/s]

Training loss: 0.019955  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<02:56,  9.91it/s]

Training loss: 0.016627  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:47,  9.81it/s]

Training loss: 0.021928  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:39,  9.73it/s]

Training loss: 0.015761  [89632/139199]


 67%|██████▋   | 2902/4350 [04:56<02:29,  9.69it/s]

Training loss: 0.013628  [92832/139199]


 69%|██████▉   | 3003/4350 [05:06<02:15,  9.96it/s]

Training loss: 0.017688  [96032/139199]


 71%|███████▏  | 3103/4350 [05:16<02:06,  9.88it/s]

Training loss: 0.017070  [99232/139199]


 74%|███████▎  | 3203/4350 [05:26<01:54, 10.03it/s]

Training loss: 0.017378  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:46,  9.85it/s]

Training loss: 0.022583  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:35,  9.90it/s]

Training loss: 0.019035  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:25,  9.89it/s]

Training loss: 0.019557  [112032/139199]


 83%|████████▎ | 3602/4350 [06:07<01:15,  9.86it/s]

Training loss: 0.019064  [115232/139199]


 85%|████████▌ | 3703/4350 [06:17<01:04,  9.97it/s]

Training loss: 0.014117  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:56,  9.70it/s]

Training loss: 0.016345  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:45,  9.89it/s]

Training loss: 0.015119  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:34,  9.95it/s]

Training loss: 0.015920  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:25,  9.80it/s]

Training loss: 0.012543  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:14,  9.96it/s]

Training loss: 0.017934  [134432/139199]


 99%|█████████▉| 4302/4350 [07:18<00:04,  9.92it/s]

Training loss: 0.014636  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.82it/s]


Train loss: 0.018532 - Val loss: 0.032465 

Epoch 22
-------------------------------


  0%|          | 1/4350 [00:00<17:24,  4.16it/s]

Training loss: 0.020552  [   32/139199]


  2%|▏         | 103/4350 [00:10<07:07,  9.94it/s]

Training loss: 0.027498  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:02,  9.82it/s]

Training loss: 0.016602  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:49,  9.89it/s]

Training loss: 0.018671  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:38,  9.92it/s]

Training loss: 0.023713  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:40,  9.62it/s]

Training loss: 0.019207  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:23,  9.77it/s]

Training loss: 0.026451  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:10,  9.85it/s]

Training loss: 0.025745  [22432/139199]


 18%|█▊        | 802/4350 [01:21<06:00,  9.84it/s]

Training loss: 0.025488  [25632/139199]


 21%|██        | 902/4350 [01:32<05:56,  9.66it/s]

Training loss: 0.026300  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:37,  9.91it/s]

Training loss: 0.015430  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:31,  9.79it/s]

Training loss: 0.015416  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:20,  9.81it/s]

Training loss: 0.020738  [38432/139199]


 30%|██▉       | 1302/4350 [02:12<05:09,  9.84it/s]

Training loss: 0.014243  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:04,  9.69it/s]

Training loss: 0.016758  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:59,  9.50it/s]

Training loss: 0.014441  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:41,  9.77it/s]

Training loss: 0.016768  [51232/139199]


 39%|███▉      | 1703/4350 [02:53<04:27,  9.88it/s]

Training loss: 0.015116  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:18,  9.87it/s]

Training loss: 0.017985  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:07,  9.89it/s]

Training loss: 0.015130  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:57,  9.87it/s]

Training loss: 0.021102  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:53,  9.64it/s]

Training loss: 0.017005  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:42,  9.66it/s]

Training loss: 0.019848  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:29,  9.76it/s]

Training loss: 0.020980  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:21,  9.66it/s]

Training loss: 0.017942  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:10,  9.72it/s]

Training loss: 0.018139  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<02:59,  9.72it/s]

Training loss: 0.026161  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:48,  9.77it/s]

Training loss: 0.016139  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:38,  9.77it/s]

Training loss: 0.023409  [89632/139199]


 67%|██████▋   | 2902/4350 [04:56<02:27,  9.81it/s]

Training loss: 0.024465  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:19,  9.64it/s]

Training loss: 0.026963  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:10,  9.58it/s]

Training loss: 0.013051  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:55,  9.92it/s]

Training loss: 0.021914  [102432/139199]


 76%|███████▌  | 3303/4350 [05:37<01:46,  9.87it/s]

Training loss: 0.019649  [105632/139199]


 78%|███████▊  | 3402/4350 [05:47<01:35,  9.90it/s]

Training loss: 0.014760  [108832/139199]


 81%|████████  | 3503/4350 [05:57<01:25,  9.92it/s]

Training loss: 0.019848  [112032/139199]


 83%|████████▎ | 3603/4350 [06:07<01:14, 10.01it/s]

Training loss: 0.017990  [115232/139199]


 85%|████████▌ | 3702/4350 [06:17<01:06,  9.75it/s]

Training loss: 0.015074  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:55,  9.80it/s]

Training loss: 0.016124  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:46,  9.72it/s]

Training loss: 0.017950  [124832/139199]


 92%|█████████▏| 4002/4350 [06:48<00:36,  9.65it/s]

Training loss: 0.028966  [128032/139199]


 94%|█████████▍| 4102/4350 [06:58<00:25,  9.77it/s]

Training loss: 0.018886  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:15,  9.85it/s]

Training loss: 0.015778  [134432/139199]


 99%|█████████▉| 4302/4350 [07:18<00:04,  9.79it/s]

Training loss: 0.017185  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.80it/s]


Train loss: 0.018529 - Val loss: 0.032897 

Epoch 23
-------------------------------


  0%|          | 3/4350 [00:00<09:39,  7.50it/s]

Training loss: 0.023846  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:18,  9.70it/s]

Training loss: 0.018790  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:05,  9.75it/s]

Training loss: 0.019605  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:48,  9.92it/s]

Training loss: 0.016006  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:45,  9.74it/s]

Training loss: 0.018967  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:26,  9.96it/s]

Training loss: 0.014886  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:28,  9.65it/s]

Training loss: 0.034961  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:07,  9.92it/s]

Training loss: 0.020882  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:17,  9.39it/s]

Training loss: 0.012180  [25632/139199]


 21%|██        | 902/4350 [01:32<05:50,  9.85it/s]

Training loss: 0.020232  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:41,  9.82it/s]

Training loss: 0.017043  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:32,  9.76it/s]

Training loss: 0.028142  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:22,  9.77it/s]

Training loss: 0.015169  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:09,  9.84it/s]

Training loss: 0.021338  [41632/139199]


 32%|███▏      | 1403/4350 [02:23<04:57,  9.90it/s]

Training loss: 0.021907  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:50,  9.79it/s]

Training loss: 0.022660  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:38,  9.87it/s]

Training loss: 0.018213  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:36,  9.57it/s]

Training loss: 0.020553  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:21,  9.73it/s]

Training loss: 0.021024  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:11,  9.75it/s]

Training loss: 0.014418  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:56,  9.93it/s]

Training loss: 0.011625  [64032/139199]


 48%|████▊     | 2103/4350 [03:35<03:44,  9.99it/s]

Training loss: 0.012783  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:37,  9.87it/s]

Training loss: 0.014258  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:26,  9.90it/s]

Training loss: 0.013408  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:26,  9.43it/s]

Training loss: 0.022273  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:11,  9.65it/s]

Training loss: 0.029093  [80032/139199]


 60%|█████▉    | 2602/4350 [04:26<02:57,  9.82it/s]

Training loss: 0.015418  [83232/139199]


 62%|██████▏   | 2702/4350 [04:36<02:49,  9.72it/s]

Training loss: 0.013876  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:39,  9.70it/s]

Training loss: 0.014187  [89632/139199]


 67%|██████▋   | 2903/4350 [04:56<02:25,  9.93it/s]

Training loss: 0.018345  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:17,  9.83it/s]

Training loss: 0.020590  [96032/139199]


 71%|███████▏  | 3102/4350 [05:17<02:07,  9.79it/s]

Training loss: 0.023091  [99232/139199]


 74%|███████▎  | 3201/4350 [05:27<01:56,  9.90it/s]

Training loss: 0.019264  [102432/139199]


 76%|███████▌  | 3303/4350 [05:37<01:45,  9.93it/s]

Training loss: 0.018739  [105632/139199]


 78%|███████▊  | 3401/4350 [05:47<01:39,  9.58it/s]

Training loss: 0.017421  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:28,  9.61it/s]

Training loss: 0.011969  [112032/139199]


 83%|████████▎ | 3602/4350 [06:08<01:16,  9.77it/s]

Training loss: 0.015269  [115232/139199]


 85%|████████▌ | 3702/4350 [06:18<01:06,  9.74it/s]

Training loss: 0.017084  [118432/139199]


 87%|████████▋ | 3802/4350 [06:28<00:55,  9.80it/s]

Training loss: 0.015045  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:45,  9.87it/s]

Training loss: 0.017666  [124832/139199]


 92%|█████████▏| 4002/4350 [06:49<00:35,  9.79it/s]

Training loss: 0.021616  [128032/139199]


 94%|█████████▍| 4102/4350 [06:59<00:25,  9.82it/s]

Training loss: 0.019878  [131232/139199]


 97%|█████████▋| 4201/4350 [07:09<00:15,  9.83it/s]

Training loss: 0.017555  [134432/139199]


 99%|█████████▉| 4302/4350 [07:19<00:04,  9.78it/s]

Training loss: 0.026063  [137632/139199]


100%|██████████| 4350/4350 [07:24<00:00,  9.79it/s]


Train loss: 0.018197 - Val loss: 0.042764 

Epoch 24
-------------------------------


  0%|          | 3/4350 [00:00<09:45,  7.42it/s]

Training loss: 0.009319  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:12,  9.81it/s]

Training loss: 0.020647  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:01,  9.84it/s]

Training loss: 0.020066  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:47,  9.92it/s]

Training loss: 0.017992  [ 9632/139199]


  9%|▉         | 403/4350 [00:41<06:37,  9.93it/s]

Training loss: 0.013818  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:34,  9.75it/s]

Training loss: 0.013680  [16032/139199]


 14%|█▍        | 603/4350 [01:01<06:16,  9.96it/s]

Training loss: 0.021303  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:19,  9.60it/s]

Training loss: 0.013209  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:04,  9.74it/s]

Training loss: 0.012526  [25632/139199]


 21%|██        | 902/4350 [01:32<05:50,  9.83it/s]

Training loss: 0.016567  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:42,  9.77it/s]

Training loss: 0.014284  [32032/139199]


 25%|██▌       | 1103/4350 [01:52<05:32,  9.77it/s]

Training loss: 0.014611  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:22,  9.76it/s]

Training loss: 0.016510  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:15,  9.65it/s]

Training loss: 0.034862  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:05,  9.65it/s]

Training loss: 0.016546  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:52,  9.73it/s]

Training loss: 0.024131  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:42,  9.72it/s]

Training loss: 0.016049  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:33,  9.67it/s]

Training loss: 0.014020  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:21,  9.75it/s]

Training loss: 0.018861  [57632/139199]


 44%|████▎     | 1903/4350 [03:14<04:03, 10.06it/s]

Training loss: 0.020662  [60832/139199]


 46%|████▌     | 2003/4350 [03:24<03:55,  9.95it/s]

Training loss: 0.013396  [64032/139199]


 48%|████▊     | 2101/4350 [03:34<03:47,  9.89it/s]

Training loss: 0.017061  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:40,  9.76it/s]

Training loss: 0.029572  [70432/139199]


 53%|█████▎    | 2303/4350 [03:55<03:27,  9.86it/s]

Training loss: 0.020403  [73632/139199]


 55%|█████▌    | 2403/4350 [04:05<03:14, 10.01it/s]

Training loss: 0.017048  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:06,  9.91it/s]

Training loss: 0.012791  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<02:55,  9.97it/s]

Training loss: 0.015192  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:51,  9.62it/s]

Training loss: 0.013682  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:38,  9.80it/s]

Training loss: 0.019817  [89632/139199]


 67%|██████▋   | 2902/4350 [04:56<02:26,  9.87it/s]

Training loss: 0.013940  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:16,  9.85it/s]

Training loss: 0.014460  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:06,  9.88it/s]

Training loss: 0.014168  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:56,  9.83it/s]

Training loss: 0.015782  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:46,  9.88it/s]

Training loss: 0.018788  [105632/139199]


 78%|███████▊  | 3402/4350 [05:47<01:34, 10.00it/s]

Training loss: 0.025765  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:26,  9.84it/s]

Training loss: 0.018844  [112032/139199]


 83%|████████▎ | 3602/4350 [06:07<01:16,  9.83it/s]

Training loss: 0.016997  [115232/139199]


 85%|████████▌ | 3703/4350 [06:17<01:05,  9.86it/s]

Training loss: 0.015110  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:56,  9.72it/s]

Training loss: 0.022249  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:47,  9.43it/s]

Training loss: 0.017489  [124832/139199]


 92%|█████████▏| 4002/4350 [06:48<00:36,  9.50it/s]

Training loss: 0.017489  [128032/139199]


 94%|█████████▍| 4101/4350 [06:58<00:25,  9.85it/s]

Training loss: 0.019547  [131232/139199]


 97%|█████████▋| 4202/4350 [07:09<00:15,  9.79it/s]

Training loss: 0.017504  [134432/139199]


 99%|█████████▉| 4303/4350 [07:19<00:04,  9.93it/s]

Training loss: 0.022924  [137632/139199]


100%|██████████| 4350/4350 [07:24<00:00,  9.79it/s]


Train loss: 0.018032 - Val loss: 0.034444 

Epoch 25
-------------------------------


  0%|          | 3/4350 [00:00<09:41,  7.48it/s]

Training loss: 0.023119  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:14,  9.79it/s]

Training loss: 0.020338  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:13,  9.58it/s]

Training loss: 0.015252  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:49,  9.87it/s]

Training loss: 0.011689  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:40,  9.86it/s]

Training loss: 0.018612  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:31,  9.84it/s]

Training loss: 0.019164  [16032/139199]


 14%|█▍        | 602/4350 [01:02<06:20,  9.84it/s]

Training loss: 0.010426  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:17,  9.65it/s]

Training loss: 0.021872  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:10,  9.58it/s]

Training loss: 0.019305  [25632/139199]


 21%|██        | 902/4350 [01:33<05:57,  9.65it/s]

Training loss: 0.010845  [28832/139199]


 23%|██▎       | 1002/4350 [01:43<05:38,  9.89it/s]

Training loss: 0.016301  [32032/139199]


 25%|██▌       | 1102/4350 [01:53<05:32,  9.76it/s]

Training loss: 0.022236  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:19,  9.85it/s]

Training loss: 0.015237  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:08,  9.87it/s]

Training loss: 0.018428  [41632/139199]


 32%|███▏      | 1402/4350 [02:24<05:02,  9.73it/s]

Training loss: 0.014746  [44832/139199]


 35%|███▍      | 1503/4350 [02:34<04:49,  9.85it/s]

Training loss: 0.020179  [48032/139199]


 37%|███▋      | 1602/4350 [02:44<04:40,  9.79it/s]

Training loss: 0.016146  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:28,  9.87it/s]

Training loss: 0.016463  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:18,  9.86it/s]

Training loss: 0.018403  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:10,  9.77it/s]

Training loss: 0.018877  [60832/139199]


 46%|████▌     | 2002/4350 [03:25<04:02,  9.70it/s]

Training loss: 0.021959  [64032/139199]


 48%|████▊     | 2102/4350 [03:35<03:48,  9.83it/s]

Training loss: 0.017546  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:43,  9.61it/s]

Training loss: 0.013530  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:29,  9.76it/s]

Training loss: 0.016175  [73632/139199]


 55%|█████▌    | 2402/4350 [04:06<03:19,  9.75it/s]

Training loss: 0.027842  [76832/139199]


 58%|█████▊    | 2502/4350 [04:16<03:06,  9.89it/s]

Training loss: 0.018231  [80032/139199]


 60%|█████▉    | 2602/4350 [04:26<03:00,  9.70it/s]

Training loss: 0.018581  [83232/139199]


 62%|██████▏   | 2702/4350 [04:36<02:53,  9.51it/s]

Training loss: 0.011980  [86432/139199]


 64%|██████▍   | 2802/4350 [04:47<02:35,  9.97it/s]

Training loss: 0.019508  [89632/139199]


 67%|██████▋   | 2903/4350 [04:57<02:26,  9.91it/s]

Training loss: 0.014689  [92832/139199]


 69%|██████▉   | 3002/4350 [05:07<02:17,  9.78it/s]

Training loss: 0.016248  [96032/139199]


 71%|███████▏  | 3102/4350 [05:17<02:07,  9.81it/s]

Training loss: 0.013958  [99232/139199]


 74%|███████▎  | 3202/4350 [05:28<01:58,  9.67it/s]

Training loss: 0.018997  [102432/139199]


 76%|███████▌  | 3302/4350 [05:38<01:48,  9.68it/s]

Training loss: 0.032941  [105632/139199]


 78%|███████▊  | 3402/4350 [05:48<01:37,  9.68it/s]

Training loss: 0.018414  [108832/139199]


 81%|████████  | 3502/4350 [05:58<01:27,  9.66it/s]

Training loss: 0.011442  [112032/139199]


 83%|████████▎ | 3602/4350 [06:08<01:17,  9.71it/s]

Training loss: 0.019345  [115232/139199]


 85%|████████▌ | 3702/4350 [06:18<01:05,  9.86it/s]

Training loss: 0.016977  [118432/139199]


 87%|████████▋ | 3803/4350 [06:29<00:55,  9.85it/s]

Training loss: 0.019552  [121632/139199]


 90%|████████▉ | 3902/4350 [06:39<00:45,  9.82it/s]

Training loss: 0.026568  [124832/139199]


 92%|█████████▏| 4002/4350 [06:49<00:36,  9.49it/s]

Training loss: 0.020311  [128032/139199]


 94%|█████████▍| 4102/4350 [06:59<00:25,  9.90it/s]

Training loss: 0.014098  [131232/139199]


 97%|█████████▋| 4202/4350 [07:09<00:15,  9.69it/s]

Training loss: 0.020366  [134432/139199]


 99%|█████████▉| 4302/4350 [07:20<00:04,  9.79it/s]

Training loss: 0.021359  [137632/139199]


100%|██████████| 4350/4350 [07:25<00:00,  9.77it/s]


Train loss: 0.018066 - Val loss: 0.044785 

Epoch 26
-------------------------------


  0%|          | 3/4350 [00:00<09:31,  7.60it/s]

Training loss: 0.047873  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:26,  9.52it/s]

Training loss: 0.016984  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:01,  9.84it/s]

Training loss: 0.017555  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:48,  9.90it/s]

Training loss: 0.014005  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:47,  9.70it/s]

Training loss: 0.013289  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:27,  9.92it/s]

Training loss: 0.016794  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:19,  9.86it/s]

Training loss: 0.032853  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:16,  9.69it/s]

Training loss: 0.014061  [22432/139199]


 18%|█▊        | 802/4350 [01:21<05:59,  9.87it/s]

Training loss: 0.017356  [25632/139199]


 21%|██        | 902/4350 [01:32<06:06,  9.41it/s]

Training loss: 0.014846  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:39,  9.87it/s]

Training loss: 0.017949  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:32,  9.78it/s]

Training loss: 0.011599  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:27,  9.60it/s]

Training loss: 0.014279  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:13,  9.74it/s]

Training loss: 0.014944  [41632/139199]


 32%|███▏      | 1403/4350 [02:23<04:56,  9.93it/s]

Training loss: 0.015659  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:50,  9.82it/s]

Training loss: 0.024489  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:38,  9.88it/s]

Training loss: 0.015107  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:31,  9.76it/s]

Training loss: 0.015622  [54432/139199]


 41%|████▏     | 1802/4350 [03:03<04:22,  9.70it/s]

Training loss: 0.014840  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:13,  9.67it/s]

Training loss: 0.018546  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:59,  9.80it/s]

Training loss: 0.021463  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:45,  9.95it/s]

Training loss: 0.017158  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:41,  9.72it/s]

Training loss: 0.018841  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:26,  9.93it/s]

Training loss: 0.016037  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:19,  9.77it/s]

Training loss: 0.015471  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:08,  9.78it/s]

Training loss: 0.015946  [80032/139199]


 60%|█████▉    | 2602/4350 [04:25<03:00,  9.70it/s]

Training loss: 0.015739  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:46,  9.89it/s]

Training loss: 0.022067  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:35,  9.94it/s]

Training loss: 0.012409  [89632/139199]


 67%|██████▋   | 2903/4350 [04:56<02:25,  9.97it/s]

Training loss: 0.016975  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:19,  9.66it/s]

Training loss: 0.013750  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:05,  9.92it/s]

Training loss: 0.013000  [99232/139199]


 74%|███████▎  | 3202/4350 [05:26<01:57,  9.78it/s]

Training loss: 0.019815  [102432/139199]


 76%|███████▌  | 3303/4350 [05:36<01:44, 10.03it/s]

Training loss: 0.018190  [105632/139199]


 78%|███████▊  | 3402/4350 [05:46<01:35,  9.90it/s]

Training loss: 0.017029  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:26,  9.82it/s]

Training loss: 0.019384  [112032/139199]


 83%|████████▎ | 3603/4350 [06:07<01:14,  9.97it/s]

Training loss: 0.016105  [115232/139199]


 85%|████████▌ | 3702/4350 [06:17<01:05,  9.88it/s]

Training loss: 0.013275  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:56,  9.75it/s]

Training loss: 0.014291  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:45,  9.80it/s]

Training loss: 0.011007  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:35,  9.86it/s]

Training loss: 0.025535  [128032/139199]


 94%|█████████▍| 4102/4350 [06:58<00:25,  9.66it/s]

Training loss: 0.016182  [131232/139199]


 97%|█████████▋| 4202/4350 [07:08<00:14,  9.91it/s]

Training loss: 0.018096  [134432/139199]


 99%|█████████▉| 4302/4350 [07:18<00:04,  9.78it/s]

Training loss: 0.019435  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.80it/s]


Train loss: 0.017099 - Val loss: 0.037013 

Epoch 27
-------------------------------


  0%|          | 1/4350 [00:00<16:54,  4.29it/s]

Training loss: 0.017461  [   32/139199]


  2%|▏         | 101/4350 [00:10<07:11,  9.85it/s]

Training loss: 0.016801  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<06:53, 10.03it/s]

Training loss: 0.017138  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:49,  9.89it/s]

Training loss: 0.017813  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:44,  9.75it/s]

Training loss: 0.013579  [12832/139199]


 12%|█▏        | 503/4350 [00:51<06:26,  9.95it/s]

Training loss: 0.016088  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:18,  9.91it/s]

Training loss: 0.020048  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:13,  9.75it/s]

Training loss: 0.016667  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:06,  9.69it/s]

Training loss: 0.021925  [25632/139199]


 21%|██        | 902/4350 [01:32<05:46,  9.95it/s]

Training loss: 0.013577  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:51,  9.53it/s]

Training loss: 0.023778  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:30,  9.84it/s]

Training loss: 0.016834  [35232/139199]


 28%|██▊       | 1202/4350 [02:02<05:22,  9.77it/s]

Training loss: 0.026465  [38432/139199]


 30%|██▉       | 1303/4350 [02:13<05:06,  9.93it/s]

Training loss: 0.017233  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<04:59,  9.83it/s]

Training loss: 0.015586  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:47,  9.92it/s]

Training loss: 0.013438  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:41,  9.76it/s]

Training loss: 0.022294  [51232/139199]


 39%|███▉      | 1702/4350 [02:53<04:30,  9.81it/s]

Training loss: 0.012312  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:17,  9.90it/s]

Training loss: 0.019573  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:12,  9.68it/s]

Training loss: 0.012505  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:58,  9.84it/s]

Training loss: 0.013631  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:50,  9.75it/s]

Training loss: 0.015450  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:38,  9.85it/s]

Training loss: 0.014921  [70432/139199]


 53%|█████▎    | 2302/4350 [03:54<03:27,  9.85it/s]

Training loss: 0.018568  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:22,  9.62it/s]

Training loss: 0.010361  [76832/139199]


 58%|█████▊    | 2502/4350 [04:15<03:14,  9.52it/s]

Training loss: 0.018041  [80032/139199]


 60%|█████▉    | 2603/4350 [04:25<02:56,  9.91it/s]

Training loss: 0.017707  [83232/139199]


 62%|██████▏   | 2702/4350 [04:36<02:54,  9.47it/s]

Training loss: 0.019476  [86432/139199]


 64%|██████▍   | 2802/4350 [04:46<02:45,  9.35it/s]

Training loss: 0.017465  [89632/139199]


 67%|██████▋   | 2902/4350 [04:57<02:29,  9.70it/s]

Training loss: 0.019327  [92832/139199]


 69%|██████▉   | 3003/4350 [05:07<02:14,  9.99it/s]

Training loss: 0.018211  [96032/139199]


 71%|███████▏  | 3102/4350 [05:17<02:06,  9.88it/s]

Training loss: 0.016504  [99232/139199]


 74%|███████▎  | 3202/4350 [05:27<01:59,  9.58it/s]

Training loss: 0.019363  [102432/139199]


 76%|███████▌  | 3302/4350 [05:37<01:48,  9.69it/s]

Training loss: 0.018483  [105632/139199]


 78%|███████▊  | 3402/4350 [05:47<01:36,  9.80it/s]

Training loss: 0.022151  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:27,  9.73it/s]

Training loss: 0.022485  [112032/139199]


 83%|████████▎ | 3602/4350 [06:08<01:17,  9.63it/s]

Training loss: 0.014113  [115232/139199]


 85%|████████▌ | 3702/4350 [06:18<01:06,  9.77it/s]

Training loss: 0.016257  [118432/139199]


 87%|████████▋ | 3802/4350 [06:28<00:55,  9.80it/s]

Training loss: 0.035105  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:45,  9.83it/s]

Training loss: 0.014507  [124832/139199]


 92%|█████████▏| 4002/4350 [06:49<00:35,  9.81it/s]

Training loss: 0.014150  [128032/139199]


 94%|█████████▍| 4102/4350 [06:59<00:26,  9.50it/s]

Training loss: 0.019683  [131232/139199]


 97%|█████████▋| 4202/4350 [07:09<00:14,  9.97it/s]

Training loss: 0.029080  [134432/139199]


 99%|█████████▉| 4302/4350 [07:19<00:04,  9.80it/s]

Training loss: 0.011309  [137632/139199]


100%|██████████| 4350/4350 [07:24<00:00,  9.79it/s]


Train loss: 0.016955 - Val loss: 0.035323 

Epoch 28
-------------------------------


  0%|          | 3/4350 [00:00<09:42,  7.46it/s]

Training loss: 0.015902  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:06,  9.95it/s]

Training loss: 0.014647  [ 3232/139199]


  5%|▍         | 202/4350 [00:20<07:05,  9.74it/s]

Training loss: 0.017458  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<06:45,  9.99it/s]

Training loss: 0.013123  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:49,  9.64it/s]

Training loss: 0.011301  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:33,  9.77it/s]

Training loss: 0.013627  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:24,  9.74it/s]

Training loss: 0.013571  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:15,  9.71it/s]

Training loss: 0.019066  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:05,  9.71it/s]

Training loss: 0.016217  [25632/139199]


 21%|██        | 902/4350 [01:32<05:55,  9.70it/s]

Training loss: 0.011041  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:41,  9.79it/s]

Training loss: 0.011218  [32032/139199]


 25%|██▌       | 1102/4350 [01:53<05:30,  9.82it/s]

Training loss: 0.015007  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:15,  9.98it/s]

Training loss: 0.020364  [38432/139199]


 30%|██▉       | 1303/4350 [02:13<05:05,  9.98it/s]

Training loss: 0.013083  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:06,  9.62it/s]

Training loss: 0.017391  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:55,  9.65it/s]

Training loss: 0.017104  [48032/139199]


 37%|███▋      | 1603/4350 [02:44<04:34,  9.99it/s]

Training loss: 0.019135  [51232/139199]


 39%|███▉      | 1703/4350 [02:54<04:29,  9.84it/s]

Training loss: 0.020781  [54432/139199]


 41%|████▏     | 1803/4350 [03:04<04:18,  9.87it/s]

Training loss: 0.020014  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:06,  9.92it/s]

Training loss: 0.018988  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<03:56,  9.93it/s]

Training loss: 0.012268  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:46,  9.93it/s]

Training loss: 0.012312  [67232/139199]


 51%|█████     | 2202/4350 [03:44<03:34, 10.02it/s]

Training loss: 0.013087  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:29,  9.79it/s]

Training loss: 0.018339  [73632/139199]


 55%|█████▌    | 2403/4350 [04:05<03:15,  9.96it/s]

Training loss: 0.021449  [76832/139199]


 58%|█████▊    | 2503/4350 [04:15<03:06,  9.92it/s]

Training loss: 0.022841  [80032/139199]


 60%|█████▉    | 2603/4350 [04:25<02:56,  9.89it/s]

Training loss: 0.020123  [83232/139199]


 62%|██████▏   | 2702/4350 [04:35<02:45,  9.97it/s]

Training loss: 0.020989  [86432/139199]


 64%|██████▍   | 2802/4350 [04:45<02:36,  9.89it/s]

Training loss: 0.015966  [89632/139199]


 67%|██████▋   | 2902/4350 [04:55<02:29,  9.71it/s]

Training loss: 0.016445  [92832/139199]


 69%|██████▉   | 3003/4350 [05:06<02:14,  9.98it/s]

Training loss: 0.013171  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:09,  9.60it/s]

Training loss: 0.012298  [99232/139199]


 74%|███████▎  | 3203/4350 [05:26<01:54, 10.02it/s]

Training loss: 0.013493  [102432/139199]


 76%|███████▌  | 3302/4350 [05:36<01:45,  9.95it/s]

Training loss: 0.014643  [105632/139199]


 78%|███████▊  | 3403/4350 [05:46<01:35,  9.92it/s]

Training loss: 0.012374  [108832/139199]


 81%|████████  | 3502/4350 [05:56<01:26,  9.80it/s]

Training loss: 0.012938  [112032/139199]


 83%|████████▎ | 3602/4350 [06:06<01:15,  9.92it/s]

Training loss: 0.018115  [115232/139199]


 85%|████████▌ | 3702/4350 [06:17<01:07,  9.66it/s]

Training loss: 0.028539  [118432/139199]


 87%|████████▋ | 3802/4350 [06:27<00:55,  9.79it/s]

Training loss: 0.014109  [121632/139199]


 90%|████████▉ | 3902/4350 [06:37<00:46,  9.64it/s]

Training loss: 0.013230  [124832/139199]


 92%|█████████▏| 4002/4350 [06:47<00:35,  9.87it/s]

Training loss: 0.016175  [128032/139199]


 94%|█████████▍| 4102/4350 [06:57<00:25,  9.77it/s]

Training loss: 0.012167  [131232/139199]


 97%|█████████▋| 4201/4350 [07:08<00:15,  9.79it/s]

Training loss: 0.011661  [134432/139199]


 99%|█████████▉| 4302/4350 [07:18<00:04,  9.80it/s]

Training loss: 0.012743  [137632/139199]


100%|██████████| 4350/4350 [07:23<00:00,  9.82it/s]


Train loss: 0.016920 - Val loss: 0.037328 

Epoch 29
-------------------------------


  0%|          | 1/4350 [00:00<17:14,  4.20it/s]

Training loss: 0.016983  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:06,  9.96it/s]

Training loss: 0.014026  [ 3232/139199]


  5%|▍         | 201/4350 [00:20<06:52, 10.05it/s]

Training loss: 0.015575  [ 6432/139199]


  7%|▋         | 302/4350 [00:30<07:01,  9.60it/s]

Training loss: 0.012386  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:55,  9.49it/s]

Training loss: 0.019379  [12832/139199]


 12%|█▏        | 502/4350 [00:51<06:35,  9.73it/s]

Training loss: 0.012060  [16032/139199]


 14%|█▍        | 602/4350 [01:01<06:31,  9.58it/s]

Training loss: 0.014064  [19232/139199]


 16%|█▌        | 702/4350 [01:11<06:11,  9.81it/s]

Training loss: 0.019031  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:08,  9.62it/s]

Training loss: 0.021963  [25632/139199]


 21%|██        | 902/4350 [01:32<05:51,  9.81it/s]

Training loss: 0.013859  [28832/139199]


 23%|██▎       | 1002/4350 [01:42<05:37,  9.91it/s]

Training loss: 0.022669  [32032/139199]


 25%|██▌       | 1102/4350 [01:52<05:47,  9.34it/s]

Training loss: 0.015296  [35232/139199]


 28%|██▊       | 1202/4350 [02:03<05:23,  9.73it/s]

Training loss: 0.015077  [38432/139199]


 30%|██▉       | 1302/4350 [02:13<05:07,  9.90it/s]

Training loss: 0.017504  [41632/139199]


 32%|███▏      | 1402/4350 [02:23<05:04,  9.69it/s]

Training loss: 0.015172  [44832/139199]


 35%|███▍      | 1502/4350 [02:33<04:48,  9.87it/s]

Training loss: 0.012539  [48032/139199]


 37%|███▋      | 1602/4350 [02:43<04:39,  9.83it/s]

Training loss: 0.012220  [51232/139199]


 39%|███▉      | 1702/4350 [02:54<04:33,  9.68it/s]

Training loss: 0.015980  [54432/139199]


 41%|████▏     | 1802/4350 [03:04<04:20,  9.79it/s]

Training loss: 0.012525  [57632/139199]


 44%|████▎     | 1902/4350 [03:14<04:13,  9.64it/s]

Training loss: 0.012754  [60832/139199]


 46%|████▌     | 2002/4350 [03:24<04:02,  9.70it/s]

Training loss: 0.013944  [64032/139199]


 48%|████▊     | 2102/4350 [03:34<03:52,  9.67it/s]

Training loss: 0.013021  [67232/139199]


 51%|█████     | 2202/4350 [03:45<03:36,  9.90it/s]

Training loss: 0.014350  [70432/139199]


 53%|█████▎    | 2302/4350 [03:55<03:32,  9.65it/s]

Training loss: 0.015195  [73632/139199]


 55%|█████▌    | 2402/4350 [04:05<03:17,  9.87it/s]

Training loss: 0.013083  [76832/139199]


 58%|█████▊    | 2503/4350 [04:15<03:05,  9.96it/s]

Training loss: 0.012312  [80032/139199]


 60%|█████▉    | 2603/4350 [04:25<02:56,  9.93it/s]

Training loss: 0.012868  [83232/139199]


 62%|██████▏   | 2702/4350 [04:36<02:47,  9.82it/s]

Training loss: 0.016262  [86432/139199]


 64%|██████▍   | 2803/4350 [04:46<02:34, 10.00it/s]

Training loss: 0.017325  [89632/139199]


 67%|██████▋   | 2902/4350 [04:56<02:26,  9.88it/s]

Training loss: 0.023766  [92832/139199]


 69%|██████▉   | 3002/4350 [05:06<02:19,  9.69it/s]

Training loss: 0.019168  [96032/139199]


 71%|███████▏  | 3102/4350 [05:16<02:08,  9.70it/s]

Training loss: 0.017320  [99232/139199]


 74%|███████▎  | 3202/4350 [05:27<01:59,  9.63it/s]

Training loss: 0.016740  [102432/139199]


 76%|███████▌  | 3302/4350 [05:37<01:46,  9.83it/s]

Training loss: 0.017863  [105632/139199]


 78%|███████▊  | 3402/4350 [05:47<01:38,  9.58it/s]

Training loss: 0.015644  [108832/139199]


 81%|████████  | 3502/4350 [05:57<01:26,  9.75it/s]

Training loss: 0.015372  [112032/139199]


 83%|████████▎ | 3602/4350 [06:08<01:16,  9.72it/s]

Training loss: 0.018203  [115232/139199]


 85%|████████▌ | 3702/4350 [06:18<01:07,  9.57it/s]

Training loss: 0.014829  [118432/139199]


 87%|████████▋ | 3802/4350 [06:28<00:56,  9.67it/s]

Training loss: 0.013670  [121632/139199]


 90%|████████▉ | 3902/4350 [06:38<00:45,  9.81it/s]

Training loss: 0.014736  [124832/139199]


 92%|█████████▏| 4002/4350 [06:48<00:35,  9.76it/s]

Training loss: 0.015427  [128032/139199]


 94%|█████████▍| 4102/4350 [06:59<00:25,  9.79it/s]

Training loss: 0.021687  [131232/139199]


 97%|█████████▋| 4202/4350 [07:09<00:14,  9.87it/s]

Training loss: 0.013962  [134432/139199]


 99%|█████████▉| 4303/4350 [07:19<00:04,  9.85it/s]

Training loss: 0.010318  [137632/139199]


100%|██████████| 4350/4350 [07:24<00:00,  9.79it/s]


Train loss: 0.016944 - Val loss: 0.035021 

Epoch 30
-------------------------------


  0%|          | 3/4350 [00:00<09:43,  7.45it/s]

Training loss: 0.020051  [   32/139199]


  2%|▏         | 102/4350 [00:10<07:17,  9.71it/s]

Training loss: 0.012431  [ 3232/139199]


  5%|▍         | 202/4350 [00:21<06:58,  9.91it/s]

Training loss: 0.016549  [ 6432/139199]


  7%|▋         | 302/4350 [00:31<06:53,  9.79it/s]

Training loss: 0.015882  [ 9632/139199]


  9%|▉         | 402/4350 [00:41<06:45,  9.74it/s]

Training loss: 0.009820  [12832/139199]


 12%|█▏        | 501/4350 [00:51<06:35,  9.74it/s]

Training loss: 0.013954  [16032/139199]


 14%|█▍        | 603/4350 [01:02<06:21,  9.83it/s]

Training loss: 0.019542  [19232/139199]


 16%|█▌        | 702/4350 [01:12<06:18,  9.63it/s]

Training loss: 0.024589  [22432/139199]


 18%|█▊        | 802/4350 [01:22<06:08,  9.63it/s]

Training loss: 0.025010  [25632/139199]


 21%|██        | 902/4350 [01:33<05:54,  9.74it/s]

Training loss: 0.017703  [28832/139199]


 23%|██▎       | 1002/4350 [01:43<05:50,  9.55it/s]

Training loss: 0.014639  [32032/139199]


 25%|██▌       | 1102/4350 [01:53<05:39,  9.56it/s]

Training loss: 0.014904  [35232/139199]


 28%|██▊       | 1203/4350 [02:04<05:18,  9.88it/s]

Training loss: 0.020384  [38432/139199]


 30%|██▉       | 1302/4350 [02:14<05:13,  9.72it/s]

Training loss: 0.020371  [41632/139199]


 32%|███▏      | 1402/4350 [02:24<05:01,  9.78it/s]

Training loss: 0.009256  [44832/139199]


 35%|███▍      | 1502/4350 [02:34<04:53,  9.71it/s]

Training loss: 0.017542  [48032/139199]


 37%|███▋      | 1602/4350 [02:45<04:49,  9.50it/s]

Training loss: 0.015142  [51232/139199]


 39%|███▉      | 1702/4350 [02:55<04:27,  9.90it/s]

Training loss: 0.013049  [54432/139199]


 41%|████▏     | 1803/4350 [03:05<04:16,  9.94it/s]

Training loss: 0.013741  [57632/139199]


 44%|████▎     | 1902/4350 [03:15<04:12,  9.71it/s]

Training loss: 0.014565  [60832/139199]


 46%|████▌     | 2002/4350 [03:26<03:59,  9.82it/s]

Training loss: 0.018035  [64032/139199]


 48%|████▊     | 2102/4350 [03:36<03:50,  9.74it/s]

Training loss: 0.013395  [67232/139199]


 51%|█████     | 2202/4350 [03:46<03:38,  9.85it/s]

Training loss: 0.019906  [70432/139199]


 53%|█████▎    | 2302/4350 [03:56<03:30,  9.73it/s]

Training loss: 0.022720  [73632/139199]


 55%|█████▌    | 2403/4350 [04:07<03:16,  9.91it/s]

Training loss: 0.017582  [76832/139199]


 58%|█████▊    | 2502/4350 [04:17<03:08,  9.79it/s]

Training loss: 0.018798  [80032/139199]


 60%|█████▉    | 2602/4350 [04:27<02:56,  9.92it/s]

Training loss: 0.029469  [83232/139199]


 62%|██████▏   | 2702/4350 [04:37<02:48,  9.77it/s]

Training loss: 0.018150  [86432/139199]


 64%|██████▍   | 2802/4350 [04:48<02:42,  9.52it/s]

Training loss: 0.012892  [89632/139199]


 67%|██████▋   | 2902/4350 [04:58<02:25,  9.96it/s]

Training loss: 0.019622  [92832/139199]


 69%|██████▉   | 3002/4350 [05:08<02:15,  9.91it/s]

Training loss: 0.023962  [96032/139199]


 71%|███████▏  | 3102/4350 [05:18<02:07,  9.75it/s]

Training loss: 0.017419  [99232/139199]


 74%|███████▎  | 3202/4350 [05:29<01:56,  9.84it/s]

Training loss: 0.016455  [102432/139199]


 76%|███████▌  | 3303/4350 [05:39<01:46,  9.80it/s]

Training loss: 0.014038  [105632/139199]


 78%|███████▊  | 3402/4350 [05:49<01:37,  9.71it/s]

Training loss: 0.014861  [108832/139199]


 81%|████████  | 3502/4350 [05:59<01:27,  9.66it/s]

Training loss: 0.012482  [112032/139199]


 83%|████████▎ | 3602/4350 [06:10<01:15,  9.89it/s]

Training loss: 0.017305  [115232/139199]


 85%|████████▌ | 3703/4350 [06:20<01:05,  9.82it/s]

Training loss: 0.012448  [118432/139199]


 87%|████████▋ | 3802/4350 [06:30<00:56,  9.63it/s]

Training loss: 0.016572  [121632/139199]


 90%|████████▉ | 3902/4350 [06:41<00:45,  9.77it/s]

Training loss: 0.018148  [124832/139199]


 92%|█████████▏| 4002/4350 [06:51<00:36,  9.54it/s]

Training loss: 0.020899  [128032/139199]


 94%|█████████▍| 4102/4350 [07:01<00:25,  9.69it/s]

Training loss: 0.013882  [131232/139199]


 97%|█████████▋| 4202/4350 [07:12<00:15,  9.71it/s]

Training loss: 0.015060  [134432/139199]


 99%|█████████▉| 4303/4350 [07:22<00:04,  9.89it/s]

Training loss: 0.014788  [137632/139199]


100%|██████████| 4350/4350 [07:27<00:00,  9.72it/s]


Train loss: 0.016780 - Val loss: 0.030170 



In [None]:
torch.save(model.state_dict(), "/content/drive/My Drive/AV Research/summer_torch_model256.pt")

In [None]:
from google.colab import runtime
runtime.unassign()