# Imports

In [2]:
import pickle
import torch
import random
import math
import h5py
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchvision import models, transforms
from torchsummary import summary
from torchvision.transforms import v2
from google.colab import drive
from google.colab import runtime
from PIL import Image
!pip install torchinfo
from torchinfo import summary
drive.mount('/content/drive')

Mounted at /content/drive


# Data

File Loading

In [3]:
# Load training data from HDF5 file
with h5py.File('/content/drive/My Drive/AV Research/train_data.h5', 'r') as train_file:
    train_data = np.array(train_file['data'])
    train_labels = np.array(train_file['labels'])

# Load validation data from HDF5 file
with h5py.File('/content/drive/My Drive/AV Research/val_data.h5', 'r') as val_file:
    val_data = np.array(val_file['data'])
    val_labels = np.array(val_file['labels'])

# print(train_data.shape)
# print(train_labels.shape)
# print(val_data.shape)
# print(val_labels.shape)

In [4]:
def get_mean_std(data):
  mean = np.mean(data, axis=(0, 1, 2))
  std = np.std(data, axis=(0, 1, 2))
  return mean, std

Data Transformations

In [5]:
class TrainDataAugmentation(nn.Module):
    def __init__(self):
        super(TrainDataAugmentation, self).__init__()
        self.transforms = v2.Compose([
            v2.ToImage(),
            v2.Resize((119//2, 256//2)),
            v2.Grayscale(num_output_channels=1),
            v2.RandomAffine(degrees=(0, 30), translate=(0.2, 0.2)),
            v2.RandomRotation(degrees=(0, 180)),
            v2.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(mean=(0.4683,), std=(0.1137,))
        ])

    def forward(self, image):
        augmented_image = self.transforms(image)
        return augmented_image

class ValDataAugmentation(nn.Module):
    def __init__(self):
        super(ValDataAugmentation, self).__init__()
        self.transforms = v2.Compose([
            v2.ToImage(),
            v2.Resize((119//2, 256//2)),
            v2.Grayscale(num_output_channels=1),
            v2.ToDtype(torch.float32, scale=True),
            v2.Normalize(mean=(0.4683,), std=(0.1137,))
        ])

    def forward(self, image):
        augmented_image = self.transforms(image)
        return augmented_image

Data Preprocessing

In [6]:
class RandomHorizontalFlipWithSteeringAngle(object):
    def __init__(self, p=0.5):
        self.p = p

    def __call__(self, input, output):
        if random.random() < self.p:
            v2.functional.hflip(input)
            output[0] = -output[0]
        return input, output


class AVDataset(Dataset):
    def __init__(self, input_images, output_values, transform):
        self.input_images = input_images
        self.output_values = output_values
        self.transform = transform

    def __len__(self):
        return len(self.output_values)

    def __getitem__(self, idx):
        input_image = self.input_images[idx]
        output_value = self.output_values[idx]

        if self.transform:
            input_image = self.transform(input_image)
            input_image, output_value = RandomHorizontalFlipWithSteeringAngle(0.5)(input_image, output_value)

        return input_image, output_value

# train_dataset_path = '/content/drive/My Drive/AV Research/train_data.h5'
# val_dataset_path = '/content/drive/My Drive/AV Research/val_data.h5'

train_data_augmentation = TrainDataAugmentation()
val_data_augmentation = ValDataAugmentation()

train_dataset = AVDataset(input_images=train_data, output_values=train_labels, transform=train_data_augmentation)
val_dataset = AVDataset(input_images=val_data, output_values=val_labels, transform=val_data_augmentation)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=1, pin_memory=True)
del train_dataset, val_dataset, train_data, val_data, train_labels, val_labels

# Model

Neural Network

In [7]:
# From: https://github.com/wzlxjtu/PositionalEncoding2D
class PositionalEncoding2d(nn.Module):
    def __init__(self, d_model, height, width):
        super(PositionalEncoding2d, self).__init__()
        if d_model % 4 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                            "odd dimension (got dim={:d})".format(d_model))
        pe = torch.zeros(d_model, height, width)
        # Each dimension use half of d_model
        d_model = int(d_model / 2)
        div_term = torch.exp(torch.arange(0., d_model, 2) *
                            -(math.log(10000.0) / d_model))
        pos_w = torch.arange(0., width).unsqueeze(1)
        pos_h = torch.arange(0., height).unsqueeze(1)
        pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
        pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
        pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
        pe[d_model + 1::2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe

In [8]:
# Inspired by: https://github.com/reshalfahsi/separableconv-torch
class SeparableConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding, bias):
        super(SeparableConv2d, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels, bias=bias)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class ResidualBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, num_layers: int, pool: bool, short: bool):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.pooling = pool
        self.short = short

        self.inconv = nn.Sequential(
            SeparableConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False),
            nn.SELU()
        )

        layers = []
        for _ in range(num_layers - 1):
            layers.append(SeparableConv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False))
            layers.append(nn.SELU())
        self.convlayers = nn.Sequential(*layers)

        if self.pooling:
            self.pool = nn.MaxPool2d(kernel_size=kernel_size, stride=2, padding=1)
            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False)
        else:
            self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)

        self.sact = nn.SELU()

    def forward(self, x):
        out = self.inconv(x)
        out = self.convlayers(out)

        if self.pooling:
            out = self.pool(out)

        if self.short:
            shortcut = self.shortcut(x)
            out = out + shortcut
            out = self.sact(out)

        return out

In [9]:
class AVModel(nn.Module):
    def __init__(self):
        super(AVModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.act = nn.SELU()
        self.positional_encoding = PositionalEncoding2d(64, 119, 256)

        self.conv_layers = nn.Sequential(
            ResidualBlock(in_channels=64, out_channels=64, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=64, out_channels=128, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=128, out_channels=256, kernel_size=3, num_layers=4, pool=True, short=True),
            ResidualBlock(in_channels=256, out_channels=512, kernel_size=3, num_layers=4, pool=True, short=True),
        )

        self.attention = nn.MultiheadAttention(embed_dim=32, num_heads=4, dropout=0.5, batch_first=True)

        self.dense_layers = nn.Sequential(
            nn.Linear(512, 1024, bias=False),
            nn.SELU(),
            nn.Linear(1024, 512, bias=False),
            nn.SELU(),
            nn.Linear(512, 256, bias=False),
            nn.SELU(),
            nn.Dropout(0.5)
        )

        self.output_layer = nn.Linear(256, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act(x)
        #x = F.layer_norm(self.positional_encoding(x), x.shape)
        x = self.conv_layers(x)

        # batch_size, channels, height, width = x.size()
        # x = x.view(batch_size, channels, height * width)
        # attention_output, _ = self.attention(x, x, x)
        # x = F.layer_norm(x + attention_output, x.shape)

        x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2) # GlobalAveragePooling2D
        x = self.dense_layers(x)
        x = self.output_layer(x)

        steering_output = F.hardtanh(x[:, 0:1])
        throttle_brake_output = F.hardtanh(x[:, 1:], min_val=0)
        out = torch.cat((steering_output, throttle_brake_output), dim=1)
        return out

In [10]:
class SimpleAVModel(nn.Module):
    def __init__(self):
        super(SimpleAVModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
        self.act = nn.SELU()

        self.conv_layers = nn.Sequential(
            SeparableConv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            SeparableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            SeparableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            SeparableConv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, bias=False),
            nn.SELU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
        )

        self.flatten = nn.Flatten()

        self.dense_layers = nn.Sequential(
            nn.Linear(65536, 1024, bias=False),
            nn.SELU(),
            nn.Linear(1024, 128, bias=False),
            nn.SELU(),
            nn.Linear(128, 64, bias=False),
            nn.SELU(),
            nn.Dropout(0.5)
        )

        self.output_layer = nn.Linear(64, 3)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act(x)
        x = self.conv_layers(x)

        x = self.flatten(x)
        x = self.dense_layers(x)
        x = self.output_layer(x)

        steering_output = F.hardtanh(x[:, 0:1])
        throttle_brake_output = F.hardtanh(x[:, 1:], min_val=0)
        out = torch.cat((steering_output, throttle_brake_output), dim=1)
        return out

# Util

MFPE Loss

In [11]:
class MFPELoss(nn.Module):
    def __init__(self):
        super(MFPELoss, self).__init__()

    def forward(self, input, target):
        error = input - target
        fourth_power_error = error ** 4
        mean_fourth_power_error = torch.mean(fourth_power_error)
        return mean_fourth_power_error

Early Stopping

In [12]:
class EarlyStopping:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

Autoclip

In [13]:
# From: https://github.com/pseeth/autoclip/blob/master/autoclip.py
class AutoClip:
    def __init__(self, percentile):
        self.grad_history = []
        self.percentile = percentile

    def compute_grad_norm(self, model):
        total_norm = 0
        for p in model.parameters():
            if p.grad is not None:
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item() ** 2
        total_norm = total_norm ** (1. / 2)

        return total_norm

    def __call__(self, model):
        grad_norm = self.compute_grad_norm(model)
        self.grad_history.append(grad_norm)
        clip_value = np.percentile(self.grad_history, self.percentile)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)

# Training Loop

In [14]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = AVModel().to(device)
num_epochs = 30
learning_rate = 5e-6
criterion = MFPELoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=(num_epochs // 5))
early_stopping = EarlyStopping(patience=5)

torch.autograd.set_detect_anomaly(False)
torch.backends.cudnn.benchmark = True
scaler = torch.cuda.amp.GradScaler()
autoclipper = AutoClip(percentile=10)

print(summary(model, input_size=(batch_size, 1, 119 // 2, 256 // 2)))

def train_loop(train_loader, model, criterion, optimizer, device):
    size = len(train_loader.dataset)
    model.train()
    train_loss = 0.0
    num_batches = len(train_loader)

    # Training loop
    for batch, (inputs, targets) in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()
        inputs = inputs.float().to(device)
        targets = targets.float().to(device)
        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
          outputs = model(inputs)
          loss = criterion(outputs, targets)

        train_loss += loss.item()
        scaler.scale(loss).backward()
        autoclipper(model)

        scaler.step(optimizer)
        scale = scaler.get_scale()
        scaler.update()
        skip_lr_sched = (scale != scaler.get_scale())

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(inputs)
            print(f"Training loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    train_loss = train_loss / num_batches
    return train_loss, skip_lr_sched

def val_loop(val_loader, model, criterion, device):
    model.eval()
    val_loss = 0.0
    num_batches = len(val_loader)

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.float().to(device, non_blocking=True)
            targets = targets.float().to(device, non_blocking=True)
            outputs = model(inputs)
            val_loss += criterion(outputs, targets).item()

    val_loss = val_loss / num_batches
    return val_loss

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss, skip_lr_sched = train_loop(train_loader, model, criterion, optimizer, device)
    val_loss = val_loop(val_loader, model, criterion, device)
    print(f"Train loss: {train_loss:>8f} - Val loss: {val_loss:>8f} \n")

    if not skip_lr_sched:
        scheduler.step()

    # if early_stopping.early_stop(val_loss):
    #     print(f"Early stopping after {epoch+1} epochs \n")
    #     print(f"Best val loss: {early_stopping.min_validation_loss} \n")
    #     break

Layer (type:depth-idx)                        Output Shape              Param #
AVModel                                       [32, 3]                   4,224
├─Conv2d: 1-1                                 [32, 64, 59, 128]         576
├─SELU: 1-2                                   [32, 64, 59, 128]         --
├─Sequential: 1-3                             [32, 512, 4, 8]           --
│    └─ResidualBlock: 2-1                     [32, 64, 30, 64]          --
│    │    └─Sequential: 3-1                   [32, 64, 59, 128]         4,672
│    │    └─Sequential: 3-2                   [32, 64, 59, 128]         14,016
│    │    └─MaxPool2d: 3-3                    [32, 64, 30, 64]          --
│    │    └─Conv2d: 3-4                       [32, 64, 30, 64]          4,096
│    │    └─SELU: 3-5                         [32, 64, 30, 64]          --
│    └─ResidualBlock: 2-2                     [32, 128, 15, 32]         --
│    │    └─Sequential: 3-6                   [32, 128, 30, 64]         8,768
│  

  0%|          | 4/4350 [00:03<44:18,  1.63it/s]  

Training loss: 0.149806  [   32/139199]


  2%|▏         | 102/4350 [00:11<05:41, 12.43it/s]

Training loss: 0.038477  [ 3232/139199]


  5%|▍         | 202/4350 [00:19<05:27, 12.68it/s]

Training loss: 0.030555  [ 6432/139199]


  7%|▋         | 302/4350 [00:27<05:28, 12.34it/s]

Training loss: 0.027385  [ 9632/139199]


  9%|▉         | 402/4350 [00:35<05:19, 12.35it/s]

Training loss: 0.027980  [12832/139199]


 12%|█▏        | 502/4350 [00:43<05:09, 12.45it/s]

Training loss: 0.026503  [16032/139199]


 14%|█▍        | 602/4350 [00:51<05:14, 11.91it/s]

Training loss: 0.035321  [19232/139199]


 16%|█▌        | 702/4350 [00:59<05:09, 11.80it/s]

Training loss: 0.025446  [22432/139199]


 18%|█▊        | 802/4350 [01:08<04:59, 11.84it/s]

Training loss: 0.034366  [25632/139199]


 21%|██        | 902/4350 [01:16<04:31, 12.72it/s]

Training loss: 0.026795  [28832/139199]


 23%|██▎       | 1002/4350 [01:24<04:27, 12.54it/s]

Training loss: 0.036392  [32032/139199]


 25%|██▌       | 1102/4350 [01:32<04:24, 12.27it/s]

Training loss: 0.028784  [35232/139199]


 28%|██▊       | 1202/4350 [01:40<04:12, 12.46it/s]

Training loss: 0.030219  [38432/139199]


 30%|██▉       | 1302/4350 [01:49<04:06, 12.39it/s]

Training loss: 0.031842  [41632/139199]


 32%|███▏      | 1402/4350 [01:56<03:56, 12.49it/s]

Training loss: 0.036900  [44832/139199]


 35%|███▍      | 1502/4350 [02:05<03:47, 12.51it/s]

Training loss: 0.029741  [48032/139199]


 37%|███▋      | 1602/4350 [02:13<03:55, 11.66it/s]

Training loss: 0.026171  [51232/139199]


 39%|███▉      | 1702/4350 [02:21<03:47, 11.66it/s]

Training loss: 0.025033  [54432/139199]


 41%|████▏     | 1802/4350 [02:29<03:25, 12.38it/s]

Training loss: 0.025323  [57632/139199]


 44%|████▎     | 1902/4350 [02:37<03:14, 12.61it/s]

Training loss: 0.030613  [60832/139199]


 46%|████▌     | 2002/4350 [02:45<03:11, 12.28it/s]

Training loss: 0.025731  [64032/139199]


 48%|████▊     | 2102/4350 [02:53<02:53, 12.99it/s]

Training loss: 0.030111  [67232/139199]


 51%|█████     | 2202/4350 [03:01<02:49, 12.64it/s]

Training loss: 0.028249  [70432/139199]


 53%|█████▎    | 2302/4350 [03:09<02:40, 12.80it/s]

Training loss: 0.026139  [73632/139199]


 55%|█████▌    | 2402/4350 [03:17<02:39, 12.19it/s]

Training loss: 0.022747  [76832/139199]


 58%|█████▊    | 2502/4350 [03:26<02:29, 12.32it/s]

Training loss: 0.029959  [80032/139199]


 60%|█████▉    | 2602/4350 [03:33<02:20, 12.44it/s]

Training loss: 0.024322  [83232/139199]


 62%|██████▏   | 2702/4350 [03:42<02:12, 12.46it/s]

Training loss: 0.024291  [86432/139199]


 64%|██████▍   | 2802/4350 [03:50<02:09, 11.95it/s]

Training loss: 0.026486  [89632/139199]


 67%|██████▋   | 2902/4350 [03:58<01:55, 12.54it/s]

Training loss: 0.028175  [92832/139199]


 69%|██████▉   | 3002/4350 [04:06<01:53, 11.88it/s]

Training loss: 0.033137  [96032/139199]


 71%|███████▏  | 3102/4350 [04:15<01:42, 12.16it/s]

Training loss: 0.024963  [99232/139199]


 74%|███████▎  | 3202/4350 [04:23<01:33, 12.31it/s]

Training loss: 0.026046  [102432/139199]


 76%|███████▌  | 3302/4350 [04:31<01:29, 11.75it/s]

Training loss: 0.025639  [105632/139199]


 78%|███████▊  | 3402/4350 [04:40<01:18, 12.03it/s]

Training loss: 0.029631  [108832/139199]


 81%|████████  | 3502/4350 [04:48<01:12, 11.64it/s]

Training loss: 0.026827  [112032/139199]


 83%|████████▎ | 3602/4350 [04:56<01:00, 12.37it/s]

Training loss: 0.029720  [115232/139199]


 85%|████████▌ | 3702/4350 [05:04<00:52, 12.28it/s]

Training loss: 0.029398  [118432/139199]


 87%|████████▋ | 3802/4350 [05:12<00:45, 11.98it/s]

Training loss: 0.025803  [121632/139199]


 90%|████████▉ | 3902/4350 [05:20<00:35, 12.55it/s]

Training loss: 0.025609  [124832/139199]


 92%|█████████▏| 4002/4350 [05:28<00:26, 12.94it/s]

Training loss: 0.024273  [128032/139199]


 94%|█████████▍| 4102/4350 [05:36<00:20, 12.17it/s]

Training loss: 0.027111  [131232/139199]


 97%|█████████▋| 4202/4350 [05:44<00:11, 12.62it/s]

Training loss: 0.028831  [134432/139199]


 99%|█████████▉| 4302/4350 [05:52<00:03, 12.83it/s]

Training loss: 0.028102  [137632/139199]


100%|██████████| 4350/4350 [05:57<00:00, 12.15it/s]


Train loss: 0.028882 - Val loss: 0.025153 

Epoch 2
-------------------------------


  0%|          | 3/4350 [00:00<08:39,  8.36it/s]

Training loss: 0.025428  [   32/139199]


  2%|▏         | 103/4350 [00:08<06:00, 11.77it/s]

Training loss: 0.025445  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<06:13, 11.11it/s]

Training loss: 0.024589  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:41, 11.84it/s]

Training loss: 0.033652  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:26, 12.08it/s]

Training loss: 0.032331  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:13, 12.27it/s]

Training loss: 0.027715  [16032/139199]


 14%|█▍        | 603/4350 [00:51<05:14, 11.92it/s]

Training loss: 0.023835  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:13, 11.64it/s]

Training loss: 0.030961  [22432/139199]


 18%|█▊        | 803/4350 [01:07<04:51, 12.16it/s]

Training loss: 0.026944  [25632/139199]


 21%|██        | 903/4350 [01:16<04:48, 11.94it/s]

Training loss: 0.022575  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:54, 11.37it/s]

Training loss: 0.028303  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:30, 12.01it/s]

Training loss: 0.024779  [35232/139199]


 28%|██▊       | 1203/4350 [01:41<04:12, 12.44it/s]

Training loss: 0.027400  [38432/139199]


 30%|██▉       | 1303/4350 [01:49<04:16, 11.87it/s]

Training loss: 0.027408  [41632/139199]


 32%|███▏      | 1403/4350 [01:58<04:06, 11.95it/s]

Training loss: 0.026068  [44832/139199]


 35%|███▍      | 1503/4350 [02:06<03:55, 12.11it/s]

Training loss: 0.027339  [48032/139199]


 37%|███▋      | 1603/4350 [02:14<03:47, 12.09it/s]

Training loss: 0.031820  [51232/139199]


 39%|███▉      | 1703/4350 [02:22<03:42, 11.92it/s]

Training loss: 0.030195  [54432/139199]


 41%|████▏     | 1803/4350 [02:31<03:33, 11.93it/s]

Training loss: 0.033046  [57632/139199]


 44%|████▎     | 1903/4350 [02:40<03:34, 11.40it/s]

Training loss: 0.027266  [60832/139199]


 46%|████▌     | 2003/4350 [02:48<03:20, 11.71it/s]

Training loss: 0.024833  [64032/139199]


 48%|████▊     | 2103/4350 [02:57<03:08, 11.90it/s]

Training loss: 0.029470  [67232/139199]


 51%|█████     | 2203/4350 [03:05<02:57, 12.12it/s]

Training loss: 0.032126  [70432/139199]


 53%|█████▎    | 2303/4350 [03:14<02:52, 11.84it/s]

Training loss: 0.022568  [73632/139199]


 55%|█████▌    | 2403/4350 [03:22<02:43, 11.90it/s]

Training loss: 0.027698  [76832/139199]


 58%|█████▊    | 2503/4350 [03:30<02:31, 12.16it/s]

Training loss: 0.032062  [80032/139199]


 60%|█████▉    | 2603/4350 [03:39<02:26, 11.94it/s]

Training loss: 0.023836  [83232/139199]


 62%|██████▏   | 2703/4350 [03:47<02:22, 11.56it/s]

Training loss: 0.031515  [86432/139199]


 64%|██████▍   | 2803/4350 [03:56<02:09, 11.92it/s]

Training loss: 0.026633  [89632/139199]


 67%|██████▋   | 2903/4350 [04:04<02:00, 12.00it/s]

Training loss: 0.023607  [92832/139199]


 69%|██████▉   | 3003/4350 [04:13<01:53, 11.87it/s]

Training loss: 0.028182  [96032/139199]


 71%|███████▏  | 3103/4350 [04:21<01:43, 12.05it/s]

Training loss: 0.028845  [99232/139199]


 74%|███████▎  | 3203/4350 [04:29<01:33, 12.32it/s]

Training loss: 0.028907  [102432/139199]


 76%|███████▌  | 3303/4350 [04:38<01:32, 11.37it/s]

Training loss: 0.027137  [105632/139199]


 78%|███████▊  | 3403/4350 [04:46<01:21, 11.62it/s]

Training loss: 0.026111  [108832/139199]


 81%|████████  | 3503/4350 [04:54<01:09, 12.19it/s]

Training loss: 0.024033  [112032/139199]


 83%|████████▎ | 3603/4350 [05:03<01:02, 11.96it/s]

Training loss: 0.037856  [115232/139199]


 85%|████████▌ | 3703/4350 [05:11<00:52, 12.23it/s]

Training loss: 0.021408  [118432/139199]


 87%|████████▋ | 3803/4350 [05:20<00:46, 11.80it/s]

Training loss: 0.025702  [121632/139199]


 90%|████████▉ | 3903/4350 [05:28<00:37, 11.86it/s]

Training loss: 0.024628  [124832/139199]


 92%|█████████▏| 4003/4350 [05:36<00:30, 11.44it/s]

Training loss: 0.028526  [128032/139199]


 94%|█████████▍| 4103/4350 [05:45<00:21, 11.37it/s]

Training loss: 0.027036  [131232/139199]


 97%|█████████▋| 4203/4350 [05:53<00:12, 11.80it/s]

Training loss: 0.023165  [134432/139199]


 99%|█████████▉| 4303/4350 [06:02<00:04, 10.92it/s]

Training loss: 0.032033  [137632/139199]


100%|██████████| 4350/4350 [06:06<00:00, 11.87it/s]


Train loss: 0.026918 - Val loss: 0.024715 

Epoch 3
-------------------------------


  0%|          | 3/4350 [00:00<08:12,  8.83it/s]

Training loss: 0.028486  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:52, 12.04it/s]

Training loss: 0.025282  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:45, 12.00it/s]

Training loss: 0.030732  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:54, 11.41it/s]

Training loss: 0.023459  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:21, 12.29it/s]

Training loss: 0.029003  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:19, 12.03it/s]

Training loss: 0.025731  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:36, 11.14it/s]

Training loss: 0.026967  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:06, 11.89it/s]

Training loss: 0.024596  [22432/139199]


 18%|█▊        | 803/4350 [01:07<04:47, 12.33it/s]

Training loss: 0.025758  [25632/139199]


 21%|██        | 903/4350 [01:16<04:46, 12.02it/s]

Training loss: 0.027541  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:32, 12.27it/s]

Training loss: 0.025293  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:25, 12.23it/s]

Training loss: 0.023981  [35232/139199]


 28%|██▊       | 1203/4350 [01:41<04:33, 11.49it/s]

Training loss: 0.024992  [38432/139199]


 30%|██▉       | 1303/4350 [01:49<04:10, 12.15it/s]

Training loss: 0.026080  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<04:07, 11.93it/s]

Training loss: 0.026090  [44832/139199]


 35%|███▍      | 1503/4350 [02:06<03:53, 12.20it/s]

Training loss: 0.025496  [48032/139199]


 37%|███▋      | 1603/4350 [02:14<03:52, 11.80it/s]

Training loss: 0.026044  [51232/139199]


 39%|███▉      | 1703/4350 [02:22<03:33, 12.37it/s]

Training loss: 0.022451  [54432/139199]


 41%|████▏     | 1803/4350 [02:31<03:29, 12.15it/s]

Training loss: 0.020428  [57632/139199]


 44%|████▎     | 1903/4350 [02:39<03:25, 11.88it/s]

Training loss: 0.026091  [60832/139199]


 46%|████▌     | 2003/4350 [02:47<03:11, 12.26it/s]

Training loss: 0.024959  [64032/139199]


 48%|████▊     | 2103/4350 [02:56<03:13, 11.58it/s]

Training loss: 0.028000  [67232/139199]


 51%|█████     | 2203/4350 [03:04<03:04, 11.62it/s]

Training loss: 0.024054  [70432/139199]


 53%|█████▎    | 2303/4350 [03:13<02:48, 12.16it/s]

Training loss: 0.031440  [73632/139199]


 55%|█████▌    | 2403/4350 [03:21<02:36, 12.41it/s]

Training loss: 0.023125  [76832/139199]


 58%|█████▊    | 2503/4350 [03:29<02:32, 12.09it/s]

Training loss: 0.025720  [80032/139199]


 60%|█████▉    | 2603/4350 [03:38<02:26, 11.96it/s]

Training loss: 0.026669  [83232/139199]


 62%|██████▏   | 2703/4350 [03:46<02:17, 11.96it/s]

Training loss: 0.023897  [86432/139199]


 64%|██████▍   | 2803/4350 [03:55<02:08, 12.00it/s]

Training loss: 0.029705  [89632/139199]


 67%|██████▋   | 2903/4350 [04:03<02:08, 11.22it/s]

Training loss: 0.027778  [92832/139199]


 69%|██████▉   | 3003/4350 [04:11<01:56, 11.57it/s]

Training loss: 0.027322  [96032/139199]


 71%|███████▏  | 3103/4350 [04:20<01:50, 11.27it/s]

Training loss: 0.028733  [99232/139199]


 74%|███████▎  | 3203/4350 [04:29<01:41, 11.35it/s]

Training loss: 0.030112  [102432/139199]


 76%|███████▌  | 3303/4350 [04:37<01:26, 12.17it/s]

Training loss: 0.029149  [105632/139199]


 78%|███████▊  | 3403/4350 [04:45<01:16, 12.32it/s]

Training loss: 0.028302  [108832/139199]


 81%|████████  | 3503/4350 [04:54<01:11, 11.90it/s]

Training loss: 0.026848  [112032/139199]


 83%|████████▎ | 3603/4350 [05:02<01:03, 11.76it/s]

Training loss: 0.025771  [115232/139199]


 85%|████████▌ | 3703/4350 [05:11<00:54, 11.78it/s]

Training loss: 0.025580  [118432/139199]


 87%|████████▋ | 3803/4350 [05:19<00:45, 11.97it/s]

Training loss: 0.028542  [121632/139199]


 90%|████████▉ | 3903/4350 [05:28<00:40, 11.16it/s]

Training loss: 0.029587  [124832/139199]


 92%|█████████▏| 4003/4350 [05:36<00:28, 12.04it/s]

Training loss: 0.032552  [128032/139199]


 94%|█████████▍| 4103/4350 [05:45<00:21, 11.71it/s]

Training loss: 0.024034  [131232/139199]


 97%|█████████▋| 4203/4350 [05:53<00:13, 11.26it/s]

Training loss: 0.024949  [134432/139199]


 99%|█████████▉| 4303/4350 [06:02<00:04, 11.56it/s]

Training loss: 0.026693  [137632/139199]


100%|██████████| 4350/4350 [06:06<00:00, 11.86it/s]


Train loss: 0.026767 - Val loss: 0.024161 

Epoch 4
-------------------------------


  0%|          | 3/4350 [00:00<08:31,  8.50it/s]

Training loss: 0.025176  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:56, 11.91it/s]

Training loss: 0.024259  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<06:03, 11.40it/s]

Training loss: 0.027076  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:33, 12.12it/s]

Training loss: 0.027573  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:29, 11.98it/s]

Training loss: 0.022058  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:51, 10.94it/s]

Training loss: 0.026228  [16032/139199]


 14%|█▍        | 603/4350 [00:51<05:04, 12.29it/s]

Training loss: 0.030947  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:13, 11.65it/s]

Training loss: 0.029259  [22432/139199]


 18%|█▊        | 803/4350 [01:08<05:13, 11.32it/s]

Training loss: 0.025800  [25632/139199]


 21%|██        | 903/4350 [01:16<04:59, 11.51it/s]

Training loss: 0.029465  [28832/139199]


 23%|██▎       | 1003/4350 [01:25<04:35, 12.16it/s]

Training loss: 0.021153  [32032/139199]


 25%|██▌       | 1103/4350 [01:34<04:34, 11.81it/s]

Training loss: 0.033049  [35232/139199]


 28%|██▊       | 1203/4350 [01:42<04:45, 11.01it/s]

Training loss: 0.029261  [38432/139199]


 30%|██▉       | 1303/4350 [01:51<04:17, 11.83it/s]

Training loss: 0.024482  [41632/139199]


 32%|███▏      | 1403/4350 [02:00<04:26, 11.06it/s]

Training loss: 0.022162  [44832/139199]


 35%|███▍      | 1503/4350 [02:08<04:03, 11.70it/s]

Training loss: 0.023139  [48032/139199]


 37%|███▋      | 1603/4350 [02:16<03:57, 11.56it/s]

Training loss: 0.029020  [51232/139199]


 39%|███▉      | 1703/4350 [02:25<03:35, 12.26it/s]

Training loss: 0.024497  [54432/139199]


 41%|████▏     | 1803/4350 [02:34<03:29, 12.16it/s]

Training loss: 0.025748  [57632/139199]


 44%|████▎     | 1903/4350 [02:42<03:36, 11.31it/s]

Training loss: 0.025041  [60832/139199]


 46%|████▌     | 2003/4350 [02:50<03:14, 12.05it/s]

Training loss: 0.027704  [64032/139199]


 48%|████▊     | 2103/4350 [02:59<03:06, 12.07it/s]

Training loss: 0.023203  [67232/139199]


 51%|█████     | 2203/4350 [03:07<03:01, 11.84it/s]

Training loss: 0.027787  [70432/139199]


 53%|█████▎    | 2303/4350 [03:15<02:49, 12.06it/s]

Training loss: 0.027481  [73632/139199]


 55%|█████▌    | 2403/4350 [03:24<02:40, 12.10it/s]

Training loss: 0.024482  [76832/139199]


 58%|█████▊    | 2503/4350 [03:32<02:37, 11.76it/s]

Training loss: 0.025375  [80032/139199]


 60%|█████▉    | 2603/4350 [03:40<02:24, 12.12it/s]

Training loss: 0.022613  [83232/139199]


 62%|██████▏   | 2703/4350 [03:49<02:15, 12.14it/s]

Training loss: 0.027526  [86432/139199]


 64%|██████▍   | 2803/4350 [03:57<02:09, 11.90it/s]

Training loss: 0.029767  [89632/139199]


 67%|██████▋   | 2903/4350 [04:05<02:00, 12.01it/s]

Training loss: 0.031871  [92832/139199]


 69%|██████▉   | 3003/4350 [04:14<01:51, 12.04it/s]

Training loss: 0.024808  [96032/139199]


 71%|███████▏  | 3103/4350 [04:22<01:39, 12.48it/s]

Training loss: 0.028529  [99232/139199]


 74%|███████▎  | 3203/4350 [04:30<01:38, 11.70it/s]

Training loss: 0.025677  [102432/139199]


 76%|███████▌  | 3303/4350 [04:39<01:26, 12.10it/s]

Training loss: 0.029340  [105632/139199]


 78%|███████▊  | 3403/4350 [04:47<01:21, 11.59it/s]

Training loss: 0.025746  [108832/139199]


 81%|████████  | 3503/4350 [04:56<01:13, 11.58it/s]

Training loss: 0.026500  [112032/139199]


 83%|████████▎ | 3603/4350 [05:04<01:01, 12.24it/s]

Training loss: 0.028918  [115232/139199]


 85%|████████▌ | 3703/4350 [05:13<00:53, 12.19it/s]

Training loss: 0.026997  [118432/139199]


 87%|████████▋ | 3803/4350 [05:21<00:45, 12.06it/s]

Training loss: 0.025172  [121632/139199]


 90%|████████▉ | 3903/4350 [05:29<00:35, 12.43it/s]

Training loss: 0.026695  [124832/139199]


 92%|█████████▏| 4003/4350 [05:38<00:28, 12.09it/s]

Training loss: 0.029123  [128032/139199]


 94%|█████████▍| 4103/4350 [05:46<00:20, 11.97it/s]

Training loss: 0.020309  [131232/139199]


 97%|█████████▋| 4203/4350 [05:54<00:12, 11.80it/s]

Training loss: 0.023284  [134432/139199]


 99%|█████████▉| 4303/4350 [06:03<00:03, 11.96it/s]

Training loss: 0.027831  [137632/139199]


100%|██████████| 4350/4350 [06:07<00:00, 11.85it/s]


Train loss: 0.026595 - Val loss: 0.023640 

Epoch 5
-------------------------------


  0%|          | 3/4350 [00:00<07:58,  9.09it/s]

Training loss: 0.024895  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:54, 11.97it/s]

Training loss: 0.029964  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:38, 12.24it/s]

Training loss: 0.027515  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:36, 12.03it/s]

Training loss: 0.026481  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:33, 11.82it/s]

Training loss: 0.028062  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:15, 12.20it/s]

Training loss: 0.025576  [16032/139199]


 14%|█▍        | 603/4350 [00:50<04:57, 12.58it/s]

Training loss: 0.027072  [19232/139199]


 16%|█▌        | 703/4350 [00:58<04:57, 12.27it/s]

Training loss: 0.025194  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:55, 12.00it/s]

Training loss: 0.026434  [25632/139199]


 21%|██        | 903/4350 [01:14<04:36, 12.47it/s]

Training loss: 0.028215  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:29, 12.43it/s]

Training loss: 0.032215  [32032/139199]


 25%|██▌       | 1103/4350 [01:31<04:28, 12.10it/s]

Training loss: 0.029817  [35232/139199]


 28%|██▊       | 1203/4350 [01:39<04:12, 12.45it/s]

Training loss: 0.028057  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:03, 12.50it/s]

Training loss: 0.024395  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<04:03, 12.13it/s]

Training loss: 0.024074  [44832/139199]


 35%|███▍      | 1503/4350 [02:04<03:54, 12.14it/s]

Training loss: 0.024825  [48032/139199]


 37%|███▋      | 1603/4350 [02:12<03:50, 11.94it/s]

Training loss: 0.025833  [51232/139199]


 39%|███▉      | 1703/4350 [02:20<03:40, 12.00it/s]

Training loss: 0.027369  [54432/139199]


 41%|████▏     | 1803/4350 [02:29<03:26, 12.33it/s]

Training loss: 0.024923  [57632/139199]


 44%|████▎     | 1903/4350 [02:37<03:21, 12.17it/s]

Training loss: 0.026105  [60832/139199]


 46%|████▌     | 2003/4350 [02:45<03:16, 11.95it/s]

Training loss: 0.023185  [64032/139199]


 48%|████▊     | 2103/4350 [02:53<03:06, 12.05it/s]

Training loss: 0.027260  [67232/139199]


 51%|█████     | 2203/4350 [03:02<02:52, 12.41it/s]

Training loss: 0.026526  [70432/139199]


 53%|█████▎    | 2303/4350 [03:10<02:50, 11.98it/s]

Training loss: 0.026171  [73632/139199]


 55%|█████▌    | 2403/4350 [03:18<02:40, 12.13it/s]

Training loss: 0.026383  [76832/139199]


 58%|█████▊    | 2503/4350 [03:26<02:30, 12.29it/s]

Training loss: 0.022984  [80032/139199]


 60%|█████▉    | 2603/4350 [03:35<02:21, 12.36it/s]

Training loss: 0.028426  [83232/139199]


 62%|██████▏   | 2703/4350 [03:43<02:14, 12.24it/s]

Training loss: 0.017948  [86432/139199]


 64%|██████▍   | 2803/4350 [03:51<02:11, 11.73it/s]

Training loss: 0.030434  [89632/139199]


 67%|██████▋   | 2903/4350 [04:00<02:03, 11.72it/s]

Training loss: 0.022669  [92832/139199]


 69%|██████▉   | 3003/4350 [04:08<01:53, 11.86it/s]

Training loss: 0.036995  [96032/139199]


 71%|███████▏  | 3103/4350 [04:16<01:39, 12.47it/s]

Training loss: 0.028136  [99232/139199]


 74%|███████▎  | 3203/4350 [04:25<01:35, 11.96it/s]

Training loss: 0.021424  [102432/139199]


 76%|███████▌  | 3303/4350 [04:33<01:26, 12.05it/s]

Training loss: 0.021948  [105632/139199]


 78%|███████▊  | 3403/4350 [04:41<01:16, 12.30it/s]

Training loss: 0.025456  [108832/139199]


 81%|████████  | 3503/4350 [04:49<01:09, 12.22it/s]

Training loss: 0.028346  [112032/139199]


 83%|████████▎ | 3603/4350 [04:57<01:03, 11.68it/s]

Training loss: 0.023999  [115232/139199]


 85%|████████▌ | 3703/4350 [05:06<00:52, 12.30it/s]

Training loss: 0.021677  [118432/139199]


 87%|████████▋ | 3803/4350 [05:14<00:44, 12.38it/s]

Training loss: 0.020740  [121632/139199]


 90%|████████▉ | 3903/4350 [05:22<00:37, 11.96it/s]

Training loss: 0.024477  [124832/139199]


 92%|█████████▏| 4003/4350 [05:30<00:27, 12.46it/s]

Training loss: 0.028454  [128032/139199]


 94%|█████████▍| 4103/4350 [05:39<00:20, 12.15it/s]

Training loss: 0.019808  [131232/139199]


 97%|█████████▋| 4203/4350 [05:47<00:11, 12.25it/s]

Training loss: 0.027029  [134432/139199]


 99%|█████████▉| 4303/4350 [05:55<00:04, 11.67it/s]

Training loss: 0.025475  [137632/139199]


100%|██████████| 4350/4350 [05:59<00:00, 12.09it/s]


Train loss: 0.026485 - Val loss: 0.023937 

Epoch 6
-------------------------------


  0%|          | 3/4350 [00:00<07:59,  9.07it/s]

Training loss: 0.025466  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:43, 12.36it/s]

Training loss: 0.025802  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:53, 11.74it/s]

Training loss: 0.024567  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:29, 12.27it/s]

Training loss: 0.029146  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:29, 11.96it/s]

Training loss: 0.024030  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:24, 11.84it/s]

Training loss: 0.022878  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:01, 12.44it/s]

Training loss: 0.019986  [19232/139199]


 16%|█▌        | 703/4350 [00:58<05:02, 12.05it/s]

Training loss: 0.028658  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:46, 12.38it/s]

Training loss: 0.028867  [25632/139199]


 21%|██        | 903/4350 [01:15<04:43, 12.17it/s]

Training loss: 0.026338  [28832/139199]


 23%|██▎       | 1003/4350 [01:23<04:33, 12.24it/s]

Training loss: 0.022278  [32032/139199]


 25%|██▌       | 1103/4350 [01:31<04:25, 12.23it/s]

Training loss: 0.024598  [35232/139199]


 28%|██▊       | 1203/4350 [01:39<04:18, 12.18it/s]

Training loss: 0.025101  [38432/139199]


 30%|██▉       | 1303/4350 [01:48<04:10, 12.17it/s]

Training loss: 0.027153  [41632/139199]


 32%|███▏      | 1403/4350 [01:56<04:03, 12.10it/s]

Training loss: 0.023646  [44832/139199]


 35%|███▍      | 1503/4350 [02:04<04:11, 11.33it/s]

Training loss: 0.026770  [48032/139199]


 37%|███▋      | 1603/4350 [02:13<03:50, 11.93it/s]

Training loss: 0.024620  [51232/139199]


 39%|███▉      | 1703/4350 [02:21<03:34, 12.34it/s]

Training loss: 0.031114  [54432/139199]


 41%|████▏     | 1803/4350 [02:29<03:36, 11.78it/s]

Training loss: 0.022808  [57632/139199]


 44%|████▎     | 1903/4350 [02:38<03:21, 12.13it/s]

Training loss: 0.018580  [60832/139199]


 46%|████▌     | 2003/4350 [02:46<03:10, 12.34it/s]

Training loss: 0.023094  [64032/139199]


 48%|████▊     | 2103/4350 [02:54<03:07, 11.97it/s]

Training loss: 0.027033  [67232/139199]


 51%|█████     | 2203/4350 [03:02<02:56, 12.17it/s]

Training loss: 0.024574  [70432/139199]


 53%|█████▎    | 2303/4350 [03:10<02:46, 12.33it/s]

Training loss: 0.027023  [73632/139199]


 55%|█████▌    | 2403/4350 [03:19<02:41, 12.08it/s]

Training loss: 0.025725  [76832/139199]


 58%|█████▊    | 2503/4350 [03:27<02:34, 11.98it/s]

Training loss: 0.025553  [80032/139199]


 60%|█████▉    | 2603/4350 [03:36<02:29, 11.65it/s]

Training loss: 0.026150  [83232/139199]


 62%|██████▏   | 2703/4350 [03:44<02:16, 12.11it/s]

Training loss: 0.025648  [86432/139199]


 64%|██████▍   | 2803/4350 [03:53<02:15, 11.45it/s]

Training loss: 0.026620  [89632/139199]


 67%|██████▋   | 2903/4350 [04:01<01:57, 12.29it/s]

Training loss: 0.024735  [92832/139199]


 69%|██████▉   | 3003/4350 [04:09<01:55, 11.66it/s]

Training loss: 0.026414  [96032/139199]


 71%|███████▏  | 3103/4350 [04:18<01:44, 11.95it/s]

Training loss: 0.025657  [99232/139199]


 74%|███████▎  | 3203/4350 [04:26<01:37, 11.82it/s]

Training loss: 0.024635  [102432/139199]


 76%|███████▌  | 3303/4350 [04:35<01:29, 11.75it/s]

Training loss: 0.026320  [105632/139199]


 78%|███████▊  | 3403/4350 [04:43<01:16, 12.39it/s]

Training loss: 0.025963  [108832/139199]


 81%|████████  | 3503/4350 [04:52<01:08, 12.35it/s]

Training loss: 0.028977  [112032/139199]


 83%|████████▎ | 3603/4350 [05:00<01:00, 12.29it/s]

Training loss: 0.024456  [115232/139199]


 85%|████████▌ | 3703/4350 [05:08<00:56, 11.38it/s]

Training loss: 0.028294  [118432/139199]


 87%|████████▋ | 3803/4350 [05:17<00:45, 12.01it/s]

Training loss: 0.028593  [121632/139199]


 90%|████████▉ | 3903/4350 [05:25<00:36, 12.19it/s]

Training loss: 0.024799  [124832/139199]


 92%|█████████▏| 4003/4350 [05:33<00:29, 11.65it/s]

Training loss: 0.025634  [128032/139199]


 94%|█████████▍| 4103/4350 [05:42<00:21, 11.28it/s]

Training loss: 0.023906  [131232/139199]


 97%|█████████▋| 4203/4350 [05:50<00:12, 11.82it/s]

Training loss: 0.027374  [134432/139199]


 99%|█████████▉| 4303/4350 [05:59<00:03, 12.49it/s]

Training loss: 0.026846  [137632/139199]


100%|██████████| 4350/4350 [06:02<00:00, 11.99it/s]


Train loss: 0.026416 - Val loss: 0.023786 

Epoch 7
-------------------------------


  0%|          | 3/4350 [00:00<07:52,  9.21it/s]

Training loss: 0.025960  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:40, 12.47it/s]

Training loss: 0.028016  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:33, 12.43it/s]

Training loss: 0.026972  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:25, 12.45it/s]

Training loss: 0.031141  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:18, 12.37it/s]

Training loss: 0.030609  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:13, 12.29it/s]

Training loss: 0.027499  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:10, 12.07it/s]

Training loss: 0.024760  [19232/139199]


 16%|█▌        | 703/4350 [00:58<05:05, 11.92it/s]

Training loss: 0.028472  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:49, 12.26it/s]

Training loss: 0.026870  [25632/139199]


 21%|██        | 903/4350 [01:14<04:50, 11.86it/s]

Training loss: 0.027042  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:55, 11.33it/s]

Training loss: 0.024373  [32032/139199]


 25%|██▌       | 1103/4350 [01:31<04:28, 12.11it/s]

Training loss: 0.027399  [35232/139199]


 28%|██▊       | 1203/4350 [01:39<04:16, 12.29it/s]

Training loss: 0.024918  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:23, 11.56it/s]

Training loss: 0.024661  [41632/139199]


 32%|███▏      | 1403/4350 [01:56<04:11, 11.72it/s]

Training loss: 0.024886  [44832/139199]


 35%|███▍      | 1503/4350 [02:04<03:49, 12.42it/s]

Training loss: 0.020594  [48032/139199]


 37%|███▋      | 1603/4350 [02:12<03:48, 12.03it/s]

Training loss: 0.028110  [51232/139199]


 39%|███▉      | 1703/4350 [02:20<03:38, 12.09it/s]

Training loss: 0.026658  [54432/139199]


 41%|████▏     | 1803/4350 [02:28<03:32, 11.99it/s]

Training loss: 0.029192  [57632/139199]


 44%|████▎     | 1903/4350 [02:37<03:21, 12.16it/s]

Training loss: 0.027155  [60832/139199]


 46%|████▌     | 2003/4350 [02:45<03:10, 12.34it/s]

Training loss: 0.026159  [64032/139199]


 48%|████▊     | 2103/4350 [02:53<03:03, 12.26it/s]

Training loss: 0.026264  [67232/139199]


 51%|█████     | 2203/4350 [03:02<02:58, 12.04it/s]

Training loss: 0.024842  [70432/139199]


 53%|█████▎    | 2303/4350 [03:10<02:50, 12.04it/s]

Training loss: 0.031075  [73632/139199]


 55%|█████▌    | 2403/4350 [03:18<02:48, 11.54it/s]

Training loss: 0.028292  [76832/139199]


 58%|█████▊    | 2503/4350 [03:27<02:30, 12.30it/s]

Training loss: 0.023760  [80032/139199]


 60%|█████▉    | 2603/4350 [03:35<02:26, 11.89it/s]

Training loss: 0.030006  [83232/139199]


 62%|██████▏   | 2703/4350 [03:44<02:17, 11.99it/s]

Training loss: 0.023116  [86432/139199]


 64%|██████▍   | 2803/4350 [03:52<02:08, 12.01it/s]

Training loss: 0.028704  [89632/139199]


 67%|██████▋   | 2903/4350 [04:01<02:05, 11.50it/s]

Training loss: 0.029469  [92832/139199]


 69%|██████▉   | 3003/4350 [04:09<01:57, 11.50it/s]

Training loss: 0.031782  [96032/139199]


 71%|███████▏  | 3103/4350 [04:18<01:42, 12.12it/s]

Training loss: 0.022695  [99232/139199]


 74%|███████▎  | 3203/4350 [04:26<01:34, 12.19it/s]

Training loss: 0.031546  [102432/139199]


 76%|███████▌  | 3303/4350 [04:34<01:29, 11.67it/s]

Training loss: 0.029047  [105632/139199]


 78%|███████▊  | 3403/4350 [04:43<01:18, 12.05it/s]

Training loss: 0.030155  [108832/139199]


 81%|████████  | 3503/4350 [04:51<01:12, 11.61it/s]

Training loss: 0.024179  [112032/139199]


 83%|████████▎ | 3603/4350 [05:00<01:05, 11.49it/s]

Training loss: 0.023032  [115232/139199]


 85%|████████▌ | 3703/4350 [05:08<00:52, 12.24it/s]

Training loss: 0.029423  [118432/139199]


 87%|████████▋ | 3803/4350 [05:17<00:46, 11.85it/s]

Training loss: 0.022841  [121632/139199]


 90%|████████▉ | 3903/4350 [05:25<00:38, 11.59it/s]

Training loss: 0.027091  [124832/139199]


 92%|█████████▏| 4003/4350 [05:33<00:28, 12.26it/s]

Training loss: 0.025660  [128032/139199]


 94%|█████████▍| 4103/4350 [05:41<00:20, 12.32it/s]

Training loss: 0.030251  [131232/139199]


 97%|█████████▋| 4203/4350 [05:50<00:11, 12.47it/s]

Training loss: 0.027517  [134432/139199]


 99%|█████████▉| 4303/4350 [05:58<00:03, 12.59it/s]

Training loss: 0.026675  [137632/139199]


100%|██████████| 4350/4350 [06:02<00:00, 12.00it/s]


Train loss: 0.026586 - Val loss: 0.024179 

Epoch 8
-------------------------------


  0%|          | 3/4350 [00:00<07:59,  9.07it/s]

Training loss: 0.031759  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:44, 12.34it/s]

Training loss: 0.033561  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:50, 11.83it/s]

Training loss: 0.029272  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:38, 11.95it/s]

Training loss: 0.021606  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:25, 12.11it/s]

Training loss: 0.021971  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:47, 11.06it/s]

Training loss: 0.027128  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:16, 11.82it/s]

Training loss: 0.023239  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:05, 11.94it/s]

Training loss: 0.022972  [22432/139199]


 18%|█▊        | 803/4350 [01:07<04:53, 12.09it/s]

Training loss: 0.026533  [25632/139199]


 21%|██        | 903/4350 [01:15<04:36, 12.46it/s]

Training loss: 0.026602  [28832/139199]


 23%|██▎       | 1003/4350 [01:23<04:29, 12.40it/s]

Training loss: 0.028647  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:29, 12.06it/s]

Training loss: 0.024160  [35232/139199]


 28%|██▊       | 1203/4350 [01:40<04:13, 12.40it/s]

Training loss: 0.034007  [38432/139199]


 30%|██▉       | 1303/4350 [01:48<04:09, 12.21it/s]

Training loss: 0.025435  [41632/139199]


 32%|███▏      | 1403/4350 [01:56<04:02, 12.15it/s]

Training loss: 0.025175  [44832/139199]


 35%|███▍      | 1503/4350 [02:05<03:57, 12.01it/s]

Training loss: 0.028769  [48032/139199]


 37%|███▋      | 1603/4350 [02:13<03:41, 12.40it/s]

Training loss: 0.028548  [51232/139199]


 39%|███▉      | 1703/4350 [02:21<03:41, 11.97it/s]

Training loss: 0.028003  [54432/139199]


 41%|████▏     | 1803/4350 [02:29<03:29, 12.17it/s]

Training loss: 0.026439  [57632/139199]


 44%|████▎     | 1903/4350 [02:38<03:18, 12.34it/s]

Training loss: 0.024485  [60832/139199]


 46%|████▌     | 2003/4350 [02:46<03:10, 12.29it/s]

Training loss: 0.025745  [64032/139199]


 48%|████▊     | 2103/4350 [02:54<03:20, 11.22it/s]

Training loss: 0.027898  [67232/139199]


 51%|█████     | 2203/4350 [03:02<02:59, 11.94it/s]

Training loss: 0.029610  [70432/139199]


 53%|█████▎    | 2303/4350 [03:11<02:49, 12.06it/s]

Training loss: 0.024729  [73632/139199]


 55%|█████▌    | 2403/4350 [03:19<02:38, 12.27it/s]

Training loss: 0.024180  [76832/139199]


 58%|█████▊    | 2503/4350 [03:27<02:29, 12.32it/s]

Training loss: 0.028921  [80032/139199]


 60%|█████▉    | 2603/4350 [03:36<02:24, 12.13it/s]

Training loss: 0.023184  [83232/139199]


 62%|██████▏   | 2703/4350 [03:44<02:14, 12.26it/s]

Training loss: 0.024688  [86432/139199]


 64%|██████▍   | 2803/4350 [03:52<02:06, 12.19it/s]

Training loss: 0.022997  [89632/139199]


 67%|██████▋   | 2903/4350 [04:00<02:00, 12.00it/s]

Training loss: 0.024244  [92832/139199]


 69%|██████▉   | 3003/4350 [04:09<01:48, 12.46it/s]

Training loss: 0.025057  [96032/139199]


 71%|███████▏  | 3103/4350 [04:17<01:43, 12.07it/s]

Training loss: 0.034890  [99232/139199]


 74%|███████▎  | 3203/4350 [04:25<01:36, 11.95it/s]

Training loss: 0.027903  [102432/139199]


 76%|███████▌  | 3303/4350 [04:33<01:25, 12.28it/s]

Training loss: 0.022245  [105632/139199]


 78%|███████▊  | 3403/4350 [04:42<01:23, 11.36it/s]

Training loss: 0.024457  [108832/139199]


 81%|████████  | 3503/4350 [04:50<01:08, 12.30it/s]

Training loss: 0.024425  [112032/139199]


 83%|████████▎ | 3603/4350 [04:58<01:00, 12.45it/s]

Training loss: 0.033363  [115232/139199]


 85%|████████▌ | 3703/4350 [05:06<00:55, 11.75it/s]

Training loss: 0.022950  [118432/139199]


 87%|████████▋ | 3803/4350 [05:15<00:44, 12.33it/s]

Training loss: 0.027477  [121632/139199]


 90%|████████▉ | 3903/4350 [05:23<00:36, 12.31it/s]

Training loss: 0.028171  [124832/139199]


 92%|█████████▏| 4003/4350 [05:31<00:29, 11.61it/s]

Training loss: 0.025225  [128032/139199]


 94%|█████████▍| 4103/4350 [05:39<00:19, 12.51it/s]

Training loss: 0.029732  [131232/139199]


 97%|█████████▋| 4203/4350 [05:47<00:11, 12.46it/s]

Training loss: 0.026329  [134432/139199]


 99%|█████████▉| 4303/4350 [05:55<00:04, 11.56it/s]

Training loss: 0.026149  [137632/139199]


100%|██████████| 4350/4350 [05:59<00:00, 12.09it/s]


Train loss: 0.026455 - Val loss: 0.024045 

Epoch 9
-------------------------------


  0%|          | 3/4350 [00:00<07:47,  9.29it/s]

Training loss: 0.021868  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:51, 12.09it/s]

Training loss: 0.020578  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:56, 11.62it/s]

Training loss: 0.026131  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:31, 12.22it/s]

Training loss: 0.022626  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:35, 11.78it/s]

Training loss: 0.023170  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:13, 12.28it/s]

Training loss: 0.025570  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:06, 12.24it/s]

Training loss: 0.028944  [19232/139199]


 16%|█▌        | 703/4350 [00:58<05:03, 12.00it/s]

Training loss: 0.023178  [22432/139199]


 18%|█▊        | 803/4350 [01:07<05:07, 11.54it/s]

Training loss: 0.024165  [25632/139199]


 21%|██        | 903/4350 [01:15<05:12, 11.02it/s]

Training loss: 0.025192  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:29, 12.41it/s]

Training loss: 0.028123  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:30, 11.99it/s]

Training loss: 0.031279  [35232/139199]


 28%|██▊       | 1203/4350 [01:40<04:22, 12.00it/s]

Training loss: 0.031844  [38432/139199]


 30%|██▉       | 1303/4350 [01:48<04:01, 12.64it/s]

Training loss: 0.024087  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<03:57, 12.39it/s]

Training loss: 0.023172  [44832/139199]


 35%|███▍      | 1503/4350 [02:05<03:53, 12.17it/s]

Training loss: 0.021931  [48032/139199]


 37%|███▋      | 1603/4350 [02:13<03:47, 12.08it/s]

Training loss: 0.026548  [51232/139199]


 39%|███▉      | 1703/4350 [02:22<03:45, 11.73it/s]

Training loss: 0.024508  [54432/139199]


 41%|████▏     | 1803/4350 [02:30<03:35, 11.81it/s]

Training loss: 0.028532  [57632/139199]


 44%|████▎     | 1903/4350 [02:39<03:28, 11.75it/s]

Training loss: 0.032334  [60832/139199]


 46%|████▌     | 2003/4350 [02:47<03:10, 12.32it/s]

Training loss: 0.029897  [64032/139199]


 48%|████▊     | 2103/4350 [02:55<03:08, 11.90it/s]

Training loss: 0.027292  [67232/139199]


 51%|█████     | 2203/4350 [03:04<03:00, 11.88it/s]

Training loss: 0.029181  [70432/139199]


 53%|█████▎    | 2303/4350 [03:12<02:45, 12.36it/s]

Training loss: 0.024497  [73632/139199]


 55%|█████▌    | 2403/4350 [03:20<02:38, 12.26it/s]

Training loss: 0.025470  [76832/139199]


 58%|█████▊    | 2503/4350 [03:28<02:37, 11.69it/s]

Training loss: 0.032732  [80032/139199]


 60%|█████▉    | 2603/4350 [03:36<02:23, 12.17it/s]

Training loss: 0.025332  [83232/139199]


 62%|██████▏   | 2703/4350 [03:45<02:15, 12.12it/s]

Training loss: 0.026084  [86432/139199]


 64%|██████▍   | 2803/4350 [03:53<02:04, 12.47it/s]

Training loss: 0.025980  [89632/139199]


 67%|██████▋   | 2903/4350 [04:01<01:57, 12.37it/s]

Training loss: 0.028088  [92832/139199]


 69%|██████▉   | 3003/4350 [04:10<01:49, 12.34it/s]

Training loss: 0.021280  [96032/139199]


 71%|███████▏  | 3103/4350 [04:18<01:41, 12.27it/s]

Training loss: 0.032045  [99232/139199]


 74%|███████▎  | 3203/4350 [04:26<01:35, 12.07it/s]

Training loss: 0.022595  [102432/139199]


 76%|███████▌  | 3303/4350 [04:35<01:25, 12.30it/s]

Training loss: 0.027173  [105632/139199]


 78%|███████▊  | 3403/4350 [04:43<01:22, 11.45it/s]

Training loss: 0.023745  [108832/139199]


 81%|████████  | 3503/4350 [04:51<01:11, 11.85it/s]

Training loss: 0.022348  [112032/139199]


 83%|████████▎ | 3603/4350 [04:59<00:59, 12.49it/s]

Training loss: 0.025418  [115232/139199]


 85%|████████▌ | 3703/4350 [05:08<00:52, 12.26it/s]

Training loss: 0.025929  [118432/139199]


 87%|████████▋ | 3803/4350 [05:16<00:46, 11.80it/s]

Training loss: 0.025631  [121632/139199]


 90%|████████▉ | 3903/4350 [05:24<00:36, 12.32it/s]

Training loss: 0.026398  [124832/139199]


 92%|█████████▏| 4003/4350 [05:33<00:28, 12.05it/s]

Training loss: 0.021173  [128032/139199]


 94%|█████████▍| 4103/4350 [05:41<00:21, 11.56it/s]

Training loss: 0.020861  [131232/139199]


 97%|█████████▋| 4203/4350 [05:49<00:11, 12.48it/s]

Training loss: 0.032859  [134432/139199]


 99%|█████████▉| 4303/4350 [05:58<00:03, 11.98it/s]

Training loss: 0.017752  [137632/139199]


100%|██████████| 4350/4350 [06:02<00:00, 12.01it/s]


Train loss: 0.026401 - Val loss: 0.023749 

Epoch 10
-------------------------------


  0%|          | 1/4350 [00:00<14:16,  5.08it/s]

Training loss: 0.026928  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:38, 12.54it/s]

Training loss: 0.027718  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:31, 12.50it/s]

Training loss: 0.021908  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:25, 12.43it/s]

Training loss: 0.033852  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:24, 12.15it/s]

Training loss: 0.028157  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:10, 12.38it/s]

Training loss: 0.027978  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:00, 12.48it/s]

Training loss: 0.028229  [19232/139199]


 16%|█▌        | 703/4350 [00:57<05:08, 11.82it/s]

Training loss: 0.026361  [22432/139199]


 18%|█▊        | 803/4350 [01:05<04:45, 12.42it/s]

Training loss: 0.023769  [25632/139199]


 21%|██        | 903/4350 [01:13<04:37, 12.40it/s]

Training loss: 0.030432  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:37, 12.06it/s]

Training loss: 0.028219  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:17, 12.61it/s]

Training loss: 0.021851  [35232/139199]


 28%|██▊       | 1203/4350 [01:38<04:16, 12.26it/s]

Training loss: 0.026107  [38432/139199]


 30%|██▉       | 1303/4350 [01:46<04:27, 11.39it/s]

Training loss: 0.021467  [41632/139199]


 32%|███▏      | 1403/4350 [01:54<03:54, 12.58it/s]

Training loss: 0.028616  [44832/139199]


 35%|███▍      | 1503/4350 [02:02<03:47, 12.52it/s]

Training loss: 0.028819  [48032/139199]


 37%|███▋      | 1603/4350 [02:10<03:58, 11.50it/s]

Training loss: 0.025834  [51232/139199]


 39%|███▉      | 1703/4350 [02:18<03:28, 12.67it/s]

Training loss: 0.025069  [54432/139199]


 41%|████▏     | 1803/4350 [02:26<03:19, 12.76it/s]

Training loss: 0.023783  [57632/139199]


 44%|████▎     | 1903/4350 [02:35<03:31, 11.58it/s]

Training loss: 0.027850  [60832/139199]


 46%|████▌     | 2003/4350 [02:43<03:09, 12.36it/s]

Training loss: 0.026437  [64032/139199]


 48%|████▊     | 2103/4350 [02:51<03:11, 11.74it/s]

Training loss: 0.031011  [67232/139199]


 51%|█████     | 2203/4350 [02:59<03:08, 11.37it/s]

Training loss: 0.021395  [70432/139199]


 53%|█████▎    | 2303/4350 [03:08<02:56, 11.61it/s]

Training loss: 0.023791  [73632/139199]


 55%|█████▌    | 2403/4350 [03:16<02:42, 11.97it/s]

Training loss: 0.022454  [76832/139199]


 58%|█████▊    | 2503/4350 [03:24<02:35, 11.87it/s]

Training loss: 0.027285  [80032/139199]


 60%|█████▉    | 2603/4350 [03:32<02:20, 12.42it/s]

Training loss: 0.023088  [83232/139199]


 62%|██████▏   | 2703/4350 [03:41<02:12, 12.39it/s]

Training loss: 0.025886  [86432/139199]


 64%|██████▍   | 2803/4350 [03:49<02:04, 12.43it/s]

Training loss: 0.025598  [89632/139199]


 67%|██████▋   | 2903/4350 [03:57<02:04, 11.58it/s]

Training loss: 0.022912  [92832/139199]


 69%|██████▉   | 3003/4350 [04:05<01:49, 12.26it/s]

Training loss: 0.026597  [96032/139199]


 71%|███████▏  | 3103/4350 [04:13<01:43, 12.10it/s]

Training loss: 0.030237  [99232/139199]


 74%|███████▎  | 3203/4350 [04:21<01:32, 12.39it/s]

Training loss: 0.028130  [102432/139199]


 76%|███████▌  | 3303/4350 [04:30<01:24, 12.42it/s]

Training loss: 0.024694  [105632/139199]


 78%|███████▊  | 3403/4350 [04:38<01:16, 12.32it/s]

Training loss: 0.025047  [108832/139199]


 81%|████████  | 3503/4350 [04:46<01:12, 11.75it/s]

Training loss: 0.023933  [112032/139199]


 83%|████████▎ | 3603/4350 [04:54<01:01, 12.22it/s]

Training loss: 0.027819  [115232/139199]


 85%|████████▌ | 3703/4350 [05:03<00:53, 12.18it/s]

Training loss: 0.029053  [118432/139199]


 87%|████████▋ | 3803/4350 [05:11<00:47, 11.58it/s]

Training loss: 0.021940  [121632/139199]


 90%|████████▉ | 3903/4350 [05:19<00:36, 12.29it/s]

Training loss: 0.031750  [124832/139199]


 92%|█████████▏| 4003/4350 [05:27<00:27, 12.41it/s]

Training loss: 0.027332  [128032/139199]


 94%|█████████▍| 4103/4350 [05:36<00:20, 12.09it/s]

Training loss: 0.026431  [131232/139199]


 97%|█████████▋| 4203/4350 [05:44<00:11, 12.44it/s]

Training loss: 0.024856  [134432/139199]


 99%|█████████▉| 4303/4350 [05:52<00:03, 12.69it/s]

Training loss: 0.024676  [137632/139199]


100%|██████████| 4350/4350 [05:56<00:00, 12.20it/s]


Train loss: 0.026245 - Val loss: 0.024283 

Epoch 11
-------------------------------


  0%|          | 1/4350 [00:00<14:21,  5.05it/s]

Training loss: 0.024531  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:37, 12.60it/s]

Training loss: 0.029703  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:28, 12.63it/s]

Training loss: 0.030619  [ 6432/139199]


  7%|▋         | 303/4350 [00:24<05:19, 12.67it/s]

Training loss: 0.023682  [ 9632/139199]


  9%|▉         | 403/4350 [00:32<05:11, 12.66it/s]

Training loss: 0.022487  [12832/139199]


 12%|█▏        | 503/4350 [00:40<05:09, 12.43it/s]

Training loss: 0.026065  [16032/139199]


 14%|█▍        | 603/4350 [00:48<04:59, 12.52it/s]

Training loss: 0.025659  [19232/139199]


 16%|█▌        | 703/4350 [00:56<04:57, 12.26it/s]

Training loss: 0.020225  [22432/139199]


 18%|█▊        | 803/4350 [01:04<04:42, 12.54it/s]

Training loss: 0.021612  [25632/139199]


 21%|██        | 903/4350 [01:12<04:51, 11.83it/s]

Training loss: 0.027439  [28832/139199]


 23%|██▎       | 1003/4350 [01:20<04:34, 12.21it/s]

Training loss: 0.030597  [32032/139199]


 25%|██▌       | 1103/4350 [01:29<04:13, 12.81it/s]

Training loss: 0.026336  [35232/139199]


 28%|██▊       | 1203/4350 [01:37<04:10, 12.55it/s]

Training loss: 0.027274  [38432/139199]


 30%|██▉       | 1303/4350 [01:45<04:02, 12.57it/s]

Training loss: 0.027822  [41632/139199]


 32%|███▏      | 1403/4350 [01:53<03:54, 12.57it/s]

Training loss: 0.028068  [44832/139199]


 35%|███▍      | 1503/4350 [02:01<03:45, 12.60it/s]

Training loss: 0.025292  [48032/139199]


 37%|███▋      | 1603/4350 [02:09<03:40, 12.44it/s]

Training loss: 0.026063  [51232/139199]


 39%|███▉      | 1703/4350 [02:17<03:30, 12.56it/s]

Training loss: 0.027030  [54432/139199]


 41%|████▏     | 1803/4350 [02:26<03:25, 12.38it/s]

Training loss: 0.027218  [57632/139199]


 44%|████▎     | 1903/4350 [02:34<03:50, 10.60it/s]

Training loss: 0.025651  [60832/139199]


 46%|████▌     | 2003/4350 [02:42<03:06, 12.60it/s]

Training loss: 0.026838  [64032/139199]


 48%|████▊     | 2103/4350 [02:50<02:57, 12.63it/s]

Training loss: 0.023148  [67232/139199]


 51%|█████     | 2203/4350 [02:58<02:58, 12.03it/s]

Training loss: 0.029886  [70432/139199]


 53%|█████▎    | 2303/4350 [03:06<02:40, 12.72it/s]

Training loss: 0.026549  [73632/139199]


 55%|█████▌    | 2403/4350 [03:14<02:32, 12.75it/s]

Training loss: 0.030027  [76832/139199]


 58%|█████▊    | 2503/4350 [03:22<02:31, 12.17it/s]

Training loss: 0.031702  [80032/139199]


 60%|█████▉    | 2603/4350 [03:30<02:26, 11.91it/s]

Training loss: 0.023475  [83232/139199]


 62%|██████▏   | 2703/4350 [03:39<02:11, 12.53it/s]

Training loss: 0.025225  [86432/139199]


 64%|██████▍   | 2803/4350 [03:47<02:09, 11.93it/s]

Training loss: 0.027353  [89632/139199]


 67%|██████▋   | 2903/4350 [03:55<01:53, 12.73it/s]

Training loss: 0.024252  [92832/139199]


 69%|██████▉   | 3003/4350 [04:03<01:50, 12.15it/s]

Training loss: 0.027680  [96032/139199]


 71%|███████▏  | 3103/4350 [04:11<01:44, 11.95it/s]

Training loss: 0.022799  [99232/139199]


 74%|███████▎  | 3203/4350 [04:19<01:37, 11.78it/s]

Training loss: 0.023418  [102432/139199]


 76%|███████▌  | 3303/4350 [04:27<01:21, 12.86it/s]

Training loss: 0.023657  [105632/139199]


 78%|███████▊  | 3403/4350 [04:35<01:24, 11.21it/s]

Training loss: 0.023874  [108832/139199]


 81%|████████  | 3503/4350 [04:43<01:08, 12.33it/s]

Training loss: 0.027105  [112032/139199]


 83%|████████▎ | 3603/4350 [04:52<00:58, 12.71it/s]

Training loss: 0.027413  [115232/139199]


 85%|████████▌ | 3703/4350 [05:00<00:52, 12.40it/s]

Training loss: 0.023831  [118432/139199]


 87%|████████▋ | 3803/4350 [05:08<00:41, 13.04it/s]

Training loss: 0.029476  [121632/139199]


 90%|████████▉ | 3903/4350 [05:16<00:34, 12.93it/s]

Training loss: 0.023167  [124832/139199]


 92%|█████████▏| 4003/4350 [05:24<00:27, 12.47it/s]

Training loss: 0.021110  [128032/139199]


 94%|█████████▍| 4103/4350 [05:32<00:19, 12.67it/s]

Training loss: 0.028716  [131232/139199]


 97%|█████████▋| 4203/4350 [05:40<00:11, 12.82it/s]

Training loss: 0.028639  [134432/139199]


 99%|█████████▉| 4303/4350 [05:48<00:03, 12.00it/s]

Training loss: 0.028476  [137632/139199]


100%|██████████| 4350/4350 [05:52<00:00, 12.35it/s]


Train loss: 0.026200 - Val loss: 0.023910 

Epoch 12
-------------------------------


  0%|          | 1/4350 [00:00<14:14,  5.09it/s]

Training loss: 0.029507  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:38, 12.54it/s]

Training loss: 0.026525  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:49, 11.86it/s]

Training loss: 0.019351  [ 6432/139199]


  7%|▋         | 303/4350 [00:24<05:24, 12.48it/s]

Training loss: 0.024775  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:35, 11.78it/s]

Training loss: 0.025654  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:19, 12.05it/s]

Training loss: 0.029725  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:00, 12.48it/s]

Training loss: 0.024461  [19232/139199]


 16%|█▌        | 703/4350 [00:57<05:06, 11.92it/s]

Training loss: 0.033649  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:52, 12.12it/s]

Training loss: 0.028123  [25632/139199]


 21%|██        | 903/4350 [01:14<04:55, 11.66it/s]

Training loss: 0.025665  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:23, 12.68it/s]

Training loss: 0.026070  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:19, 12.50it/s]

Training loss: 0.024290  [35232/139199]


 28%|██▊       | 1203/4350 [01:38<04:31, 11.60it/s]

Training loss: 0.028741  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:11, 12.11it/s]

Training loss: 0.030576  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<03:54, 12.56it/s]

Training loss: 0.023841  [44832/139199]


 35%|███▍      | 1503/4350 [02:03<04:09, 11.41it/s]

Training loss: 0.024283  [48032/139199]


 37%|███▋      | 1603/4350 [02:11<03:42, 12.37it/s]

Training loss: 0.028046  [51232/139199]


 39%|███▉      | 1703/4350 [02:19<03:38, 12.12it/s]

Training loss: 0.029275  [54432/139199]


 41%|████▏     | 1803/4350 [02:27<03:20, 12.69it/s]

Training loss: 0.026915  [57632/139199]


 44%|████▎     | 1903/4350 [02:35<03:15, 12.52it/s]

Training loss: 0.024827  [60832/139199]


 46%|████▌     | 2003/4350 [02:43<03:06, 12.61it/s]

Training loss: 0.025207  [64032/139199]


 48%|████▊     | 2103/4350 [02:52<03:10, 11.77it/s]

Training loss: 0.024860  [67232/139199]


 51%|█████     | 2203/4350 [03:00<02:57, 12.12it/s]

Training loss: 0.028344  [70432/139199]


 53%|█████▎    | 2303/4350 [03:08<02:46, 12.31it/s]

Training loss: 0.028438  [73632/139199]


 55%|█████▌    | 2403/4350 [03:16<02:43, 11.92it/s]

Training loss: 0.027712  [76832/139199]


 58%|█████▊    | 2503/4350 [03:24<02:29, 12.32it/s]

Training loss: 0.026609  [80032/139199]


 60%|█████▉    | 2603/4350 [03:32<02:19, 12.53it/s]

Training loss: 0.021641  [83232/139199]


 62%|██████▏   | 2703/4350 [03:40<02:15, 12.13it/s]

Training loss: 0.024458  [86432/139199]


 64%|██████▍   | 2803/4350 [03:48<02:02, 12.59it/s]

Training loss: 0.027710  [89632/139199]


 67%|██████▋   | 2903/4350 [03:56<01:53, 12.78it/s]

Training loss: 0.022900  [92832/139199]


 69%|██████▉   | 3003/4350 [04:04<01:49, 12.26it/s]

Training loss: 0.029025  [96032/139199]


 71%|███████▏  | 3103/4350 [04:12<01:45, 11.77it/s]

Training loss: 0.027870  [99232/139199]


 74%|███████▎  | 3203/4350 [04:21<01:32, 12.43it/s]

Training loss: 0.027259  [102432/139199]


 76%|███████▌  | 3303/4350 [04:29<01:27, 11.98it/s]

Training loss: 0.027735  [105632/139199]


 78%|███████▊  | 3403/4350 [04:37<01:14, 12.72it/s]

Training loss: 0.022550  [108832/139199]


 81%|████████  | 3503/4350 [04:45<01:05, 12.88it/s]

Training loss: 0.024296  [112032/139199]


 83%|████████▎ | 3603/4350 [04:53<01:02, 12.04it/s]

Training loss: 0.025937  [115232/139199]


 85%|████████▌ | 3703/4350 [05:01<00:52, 12.35it/s]

Training loss: 0.025412  [118432/139199]


 87%|████████▋ | 3803/4350 [05:09<00:44, 12.32it/s]

Training loss: 0.028301  [121632/139199]


 90%|████████▉ | 3903/4350 [05:17<00:36, 12.16it/s]

Training loss: 0.022805  [124832/139199]


 92%|█████████▏| 4003/4350 [05:26<00:27, 12.42it/s]

Training loss: 0.024953  [128032/139199]


 94%|█████████▍| 4103/4350 [05:34<00:20, 12.20it/s]

Training loss: 0.025652  [131232/139199]


 97%|█████████▋| 4203/4350 [05:42<00:11, 12.47it/s]

Training loss: 0.027366  [134432/139199]


 99%|█████████▉| 4303/4350 [05:50<00:03, 11.78it/s]

Training loss: 0.030240  [137632/139199]


100%|██████████| 4350/4350 [05:54<00:00, 12.27it/s]


Train loss: 0.026127 - Val loss: 0.024161 

Epoch 13
-------------------------------


  0%|          | 3/4350 [00:00<08:07,  8.91it/s]

Training loss: 0.025524  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:59, 11.80it/s]

Training loss: 0.028591  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:54, 11.70it/s]

Training loss: 0.025626  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:19, 12.67it/s]

Training loss: 0.029320  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:11, 12.65it/s]

Training loss: 0.025121  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:13, 12.26it/s]

Training loss: 0.023668  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:09, 12.10it/s]

Training loss: 0.025070  [19232/139199]


 16%|█▌        | 703/4350 [00:57<04:46, 12.71it/s]

Training loss: 0.031322  [22432/139199]


 18%|█▊        | 803/4350 [01:05<04:48, 12.32it/s]

Training loss: 0.027289  [25632/139199]


 21%|██        | 903/4350 [01:13<04:30, 12.76it/s]

Training loss: 0.026157  [28832/139199]


 23%|██▎       | 1003/4350 [01:21<04:32, 12.27it/s]

Training loss: 0.026197  [32032/139199]


 25%|██▌       | 1103/4350 [01:29<04:32, 11.91it/s]

Training loss: 0.032011  [35232/139199]


 28%|██▊       | 1203/4350 [01:37<04:14, 12.39it/s]

Training loss: 0.025378  [38432/139199]


 30%|██▉       | 1303/4350 [01:45<04:02, 12.58it/s]

Training loss: 0.025648  [41632/139199]


 32%|███▏      | 1403/4350 [01:53<04:05, 12.02it/s]

Training loss: 0.027469  [44832/139199]


 35%|███▍      | 1503/4350 [02:02<03:54, 12.12it/s]

Training loss: 0.030031  [48032/139199]


 37%|███▋      | 1603/4350 [02:10<03:47, 12.07it/s]

Training loss: 0.028966  [51232/139199]


 39%|███▉      | 1703/4350 [02:18<03:35, 12.30it/s]

Training loss: 0.023701  [54432/139199]


 41%|████▏     | 1803/4350 [02:26<03:21, 12.66it/s]

Training loss: 0.026572  [57632/139199]


 44%|████▎     | 1903/4350 [02:34<03:21, 12.14it/s]

Training loss: 0.026157  [60832/139199]


 46%|████▌     | 2003/4350 [02:42<03:09, 12.37it/s]

Training loss: 0.028574  [64032/139199]


 48%|████▊     | 2103/4350 [02:50<03:00, 12.44it/s]

Training loss: 0.025813  [67232/139199]


 51%|█████     | 2203/4350 [02:59<03:05, 11.59it/s]

Training loss: 0.027955  [70432/139199]


 53%|█████▎    | 2303/4350 [03:07<02:46, 12.27it/s]

Training loss: 0.026868  [73632/139199]


 55%|█████▌    | 2403/4350 [03:15<02:35, 12.49it/s]

Training loss: 0.031546  [76832/139199]


 58%|█████▊    | 2503/4350 [03:23<02:27, 12.55it/s]

Training loss: 0.029151  [80032/139199]


 60%|█████▉    | 2603/4350 [03:32<02:19, 12.52it/s]

Training loss: 0.025796  [83232/139199]


 62%|██████▏   | 2703/4350 [03:40<02:27, 11.19it/s]

Training loss: 0.024033  [86432/139199]


 64%|██████▍   | 2803/4350 [03:48<02:02, 12.65it/s]

Training loss: 0.023367  [89632/139199]


 67%|██████▋   | 2903/4350 [03:56<01:52, 12.85it/s]

Training loss: 0.030418  [92832/139199]


 69%|██████▉   | 3003/4350 [04:04<01:52, 11.96it/s]

Training loss: 0.020574  [96032/139199]


 71%|███████▏  | 3103/4350 [04:12<01:37, 12.85it/s]

Training loss: 0.020673  [99232/139199]


 74%|███████▎  | 3203/4350 [04:20<01:31, 12.60it/s]

Training loss: 0.027214  [102432/139199]


 76%|███████▌  | 3303/4350 [04:28<01:25, 12.31it/s]

Training loss: 0.030351  [105632/139199]


 78%|███████▊  | 3403/4350 [04:36<01:14, 12.63it/s]

Training loss: 0.026141  [108832/139199]


 81%|████████  | 3503/4350 [04:44<01:08, 12.40it/s]

Training loss: 0.029364  [112032/139199]


 83%|████████▎ | 3603/4350 [04:52<01:01, 12.08it/s]

Training loss: 0.024826  [115232/139199]


 85%|████████▌ | 3703/4350 [05:00<00:51, 12.54it/s]

Training loss: 0.026378  [118432/139199]


 87%|████████▋ | 3803/4350 [05:08<00:43, 12.50it/s]

Training loss: 0.025949  [121632/139199]


 90%|████████▉ | 3903/4350 [05:16<00:37, 11.84it/s]

Training loss: 0.024725  [124832/139199]


 92%|█████████▏| 4003/4350 [05:24<00:27, 12.70it/s]

Training loss: 0.025538  [128032/139199]


 94%|█████████▍| 4103/4350 [05:32<00:20, 12.14it/s]

Training loss: 0.028255  [131232/139199]


 97%|█████████▋| 4203/4350 [05:40<00:12, 11.90it/s]

Training loss: 0.027149  [134432/139199]


 99%|█████████▉| 4303/4350 [05:48<00:03, 12.52it/s]

Training loss: 0.024893  [137632/139199]


100%|██████████| 4350/4350 [05:52<00:00, 12.33it/s]


Train loss: 0.026279 - Val loss: 0.024543 

Epoch 14
-------------------------------


  0%|          | 3/4350 [00:00<07:53,  9.17it/s]

Training loss: 0.024940  [   32/139199]


  2%|▏         | 103/4350 [00:08<06:05, 11.62it/s]

Training loss: 0.026423  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:53, 11.73it/s]

Training loss: 0.028059  [ 6432/139199]


  7%|▋         | 303/4350 [00:24<05:23, 12.51it/s]

Training loss: 0.026431  [ 9632/139199]


  9%|▉         | 403/4350 [00:32<05:30, 11.95it/s]

Training loss: 0.025058  [12832/139199]


 12%|█▏        | 503/4350 [00:40<05:16, 12.15it/s]

Training loss: 0.034137  [16032/139199]


 14%|█▍        | 603/4350 [00:48<05:04, 12.30it/s]

Training loss: 0.022651  [19232/139199]


 16%|█▌        | 703/4350 [00:56<05:01, 12.08it/s]

Training loss: 0.023748  [22432/139199]


 18%|█▊        | 803/4350 [01:04<04:43, 12.53it/s]

Training loss: 0.028148  [25632/139199]


 21%|██        | 903/4350 [01:12<04:31, 12.67it/s]

Training loss: 0.028918  [28832/139199]


 23%|██▎       | 1003/4350 [01:21<04:37, 12.06it/s]

Training loss: 0.030267  [32032/139199]


 25%|██▌       | 1103/4350 [01:28<04:20, 12.44it/s]

Training loss: 0.025532  [35232/139199]


 28%|██▊       | 1203/4350 [01:37<04:21, 12.06it/s]

Training loss: 0.027333  [38432/139199]


 30%|██▉       | 1303/4350 [01:45<04:22, 11.60it/s]

Training loss: 0.025338  [41632/139199]


 32%|███▏      | 1403/4350 [01:53<03:53, 12.62it/s]

Training loss: 0.028528  [44832/139199]


 35%|███▍      | 1503/4350 [02:02<03:55, 12.07it/s]

Training loss: 0.026088  [48032/139199]


 37%|███▋      | 1603/4350 [02:10<03:50, 11.93it/s]

Training loss: 0.026941  [51232/139199]


 39%|███▉      | 1703/4350 [02:18<03:37, 12.18it/s]

Training loss: 0.025619  [54432/139199]


 41%|████▏     | 1803/4350 [02:26<03:24, 12.46it/s]

Training loss: 0.026581  [57632/139199]


 44%|████▎     | 1903/4350 [02:34<03:23, 12.00it/s]

Training loss: 0.034082  [60832/139199]


 46%|████▌     | 2003/4350 [02:42<03:05, 12.68it/s]

Training loss: 0.023115  [64032/139199]


 48%|████▊     | 2103/4350 [02:50<02:54, 12.91it/s]

Training loss: 0.028246  [67232/139199]


 51%|█████     | 2203/4350 [02:58<02:50, 12.56it/s]

Training loss: 0.024148  [70432/139199]


 53%|█████▎    | 2303/4350 [03:07<02:46, 12.31it/s]

Training loss: 0.026101  [73632/139199]


 55%|█████▌    | 2403/4350 [03:15<02:33, 12.69it/s]

Training loss: 0.023940  [76832/139199]


 58%|█████▊    | 2503/4350 [03:23<02:30, 12.31it/s]

Training loss: 0.022532  [80032/139199]


 60%|█████▉    | 2603/4350 [03:31<02:19, 12.48it/s]

Training loss: 0.027465  [83232/139199]


 62%|██████▏   | 2703/4350 [03:39<02:14, 12.29it/s]

Training loss: 0.027347  [86432/139199]


 64%|██████▍   | 2803/4350 [03:47<02:03, 12.53it/s]

Training loss: 0.024311  [89632/139199]


 67%|██████▋   | 2903/4350 [03:55<02:05, 11.54it/s]

Training loss: 0.025989  [92832/139199]


 69%|██████▉   | 3003/4350 [04:04<01:51, 12.09it/s]

Training loss: 0.022258  [96032/139199]


 71%|███████▏  | 3103/4350 [04:12<01:39, 12.51it/s]

Training loss: 0.024631  [99232/139199]


 74%|███████▎  | 3203/4350 [04:20<01:37, 11.74it/s]

Training loss: 0.027689  [102432/139199]


 76%|███████▌  | 3303/4350 [04:28<01:22, 12.63it/s]

Training loss: 0.025515  [105632/139199]


 78%|███████▊  | 3403/4350 [04:36<01:16, 12.45it/s]

Training loss: 0.031744  [108832/139199]


 81%|████████  | 3503/4350 [04:44<01:09, 12.14it/s]

Training loss: 0.024732  [112032/139199]


 83%|████████▎ | 3603/4350 [04:52<01:01, 12.23it/s]

Training loss: 0.034659  [115232/139199]


 85%|████████▌ | 3703/4350 [05:00<00:50, 12.77it/s]

Training loss: 0.023315  [118432/139199]


 87%|████████▋ | 3803/4350 [05:09<00:47, 11.59it/s]

Training loss: 0.027316  [121632/139199]


 90%|████████▉ | 3903/4350 [05:17<00:38, 11.70it/s]

Training loss: 0.024510  [124832/139199]


 92%|█████████▏| 4003/4350 [05:25<00:27, 12.48it/s]

Training loss: 0.022932  [128032/139199]


 94%|█████████▍| 4103/4350 [05:33<00:21, 11.56it/s]

Training loss: 0.026233  [131232/139199]


 97%|█████████▋| 4203/4350 [05:41<00:11, 12.65it/s]

Training loss: 0.027045  [134432/139199]


 99%|█████████▉| 4303/4350 [05:50<00:03, 12.61it/s]

Training loss: 0.025530  [137632/139199]


100%|██████████| 4350/4350 [05:53<00:00, 12.29it/s]


Train loss: 0.026228 - Val loss: 0.024245 

Epoch 15
-------------------------------


  0%|          | 3/4350 [00:00<07:51,  9.22it/s]

Training loss: 0.024714  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:47, 12.24it/s]

Training loss: 0.021970  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:53, 11.75it/s]

Training loss: 0.025442  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:39, 11.91it/s]

Training loss: 0.024566  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:06, 12.89it/s]

Training loss: 0.022488  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:14, 12.22it/s]

Training loss: 0.028597  [16032/139199]


 14%|█▍        | 603/4350 [00:49<04:59, 12.52it/s]

Training loss: 0.025603  [19232/139199]


 16%|█▌        | 703/4350 [00:57<04:49, 12.60it/s]

Training loss: 0.034349  [22432/139199]


 18%|█▊        | 803/4350 [01:05<04:44, 12.49it/s]

Training loss: 0.026376  [25632/139199]


 21%|██        | 903/4350 [01:13<04:49, 11.90it/s]

Training loss: 0.026484  [28832/139199]


 23%|██▎       | 1003/4350 [01:21<04:23, 12.68it/s]

Training loss: 0.024186  [32032/139199]


 25%|██▌       | 1103/4350 [01:29<04:22, 12.35it/s]

Training loss: 0.027304  [35232/139199]


 28%|██▊       | 1203/4350 [01:38<04:18, 12.16it/s]

Training loss: 0.027032  [38432/139199]


 30%|██▉       | 1303/4350 [01:46<04:18, 11.77it/s]

Training loss: 0.028810  [41632/139199]


 32%|███▏      | 1403/4350 [01:54<04:07, 11.92it/s]

Training loss: 0.022995  [44832/139199]


 35%|███▍      | 1503/4350 [02:03<03:55, 12.07it/s]

Training loss: 0.029524  [48032/139199]


 37%|███▋      | 1603/4350 [02:11<03:49, 11.99it/s]

Training loss: 0.026120  [51232/139199]


 39%|███▉      | 1703/4350 [02:19<03:28, 12.71it/s]

Training loss: 0.028527  [54432/139199]


 41%|████▏     | 1803/4350 [02:27<03:21, 12.65it/s]

Training loss: 0.025282  [57632/139199]


 44%|████▎     | 1903/4350 [02:35<03:27, 11.77it/s]

Training loss: 0.034500  [60832/139199]


 46%|████▌     | 2003/4350 [02:43<03:11, 12.25it/s]

Training loss: 0.025053  [64032/139199]


 48%|████▊     | 2103/4350 [02:51<03:10, 11.80it/s]

Training loss: 0.022986  [67232/139199]


 51%|█████     | 2203/4350 [03:00<03:04, 11.61it/s]

Training loss: 0.025815  [70432/139199]


 53%|█████▎    | 2303/4350 [03:08<02:42, 12.59it/s]

Training loss: 0.022807  [73632/139199]


 55%|█████▌    | 2403/4350 [03:16<02:46, 11.69it/s]

Training loss: 0.020109  [76832/139199]


 58%|█████▊    | 2503/4350 [03:25<02:32, 12.14it/s]

Training loss: 0.028427  [80032/139199]


 60%|█████▉    | 2603/4350 [03:33<02:19, 12.52it/s]

Training loss: 0.025062  [83232/139199]


 62%|██████▏   | 2703/4350 [03:41<02:15, 12.17it/s]

Training loss: 0.032098  [86432/139199]


 64%|██████▍   | 2803/4350 [03:49<02:09, 11.93it/s]

Training loss: 0.025168  [89632/139199]


 67%|██████▋   | 2903/4350 [03:57<01:56, 12.37it/s]

Training loss: 0.025086  [92832/139199]


 69%|██████▉   | 3003/4350 [04:05<01:46, 12.63it/s]

Training loss: 0.023065  [96032/139199]


 71%|███████▏  | 3103/4350 [04:13<01:41, 12.28it/s]

Training loss: 0.025799  [99232/139199]


 74%|███████▎  | 3203/4350 [04:22<01:34, 12.09it/s]

Training loss: 0.024222  [102432/139199]


 76%|███████▌  | 3303/4350 [04:30<01:23, 12.54it/s]

Training loss: 0.021058  [105632/139199]


 78%|███████▊  | 3403/4350 [04:38<01:16, 12.36it/s]

Training loss: 0.026172  [108832/139199]


 81%|████████  | 3503/4350 [04:46<01:09, 12.19it/s]

Training loss: 0.028196  [112032/139199]


 83%|████████▎ | 3603/4350 [04:54<00:58, 12.82it/s]

Training loss: 0.034376  [115232/139199]


 85%|████████▌ | 3703/4350 [05:02<00:52, 12.38it/s]

Training loss: 0.028453  [118432/139199]


 87%|████████▋ | 3803/4350 [05:10<00:46, 11.68it/s]

Training loss: 0.023150  [121632/139199]


 90%|████████▉ | 3903/4350 [05:18<00:35, 12.58it/s]

Training loss: 0.023971  [124832/139199]


 92%|█████████▏| 4003/4350 [05:27<00:28, 12.24it/s]

Training loss: 0.025453  [128032/139199]


 94%|█████████▍| 4103/4350 [05:35<00:20, 11.83it/s]

Training loss: 0.026755  [131232/139199]


 97%|█████████▋| 4203/4350 [05:43<00:11, 12.64it/s]

Training loss: 0.021943  [134432/139199]


 99%|█████████▉| 4303/4350 [05:51<00:03, 12.72it/s]

Training loss: 0.022827  [137632/139199]


100%|██████████| 4350/4350 [05:55<00:00, 12.24it/s]


Train loss: 0.026115 - Val loss: 0.024266 

Epoch 16
-------------------------------


  0%|          | 3/4350 [00:00<08:44,  8.29it/s]

Training loss: 0.036241  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:54, 11.98it/s]

Training loss: 0.027786  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:54, 11.70it/s]

Training loss: 0.025554  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:53, 11.43it/s]

Training loss: 0.025200  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:21, 12.29it/s]

Training loss: 0.023757  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:06, 12.57it/s]

Training loss: 0.023164  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:17, 11.80it/s]

Training loss: 0.020397  [19232/139199]


 16%|█▌        | 703/4350 [00:57<05:08, 11.82it/s]

Training loss: 0.031899  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:56, 11.96it/s]

Training loss: 0.020790  [25632/139199]


 21%|██        | 903/4350 [01:14<04:47, 12.00it/s]

Training loss: 0.020211  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:24, 12.66it/s]

Training loss: 0.024255  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:21, 12.43it/s]

Training loss: 0.027610  [35232/139199]


 28%|██▊       | 1203/4350 [01:38<04:28, 11.74it/s]

Training loss: 0.029732  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:09, 12.22it/s]

Training loss: 0.026324  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<03:57, 12.43it/s]

Training loss: 0.025025  [44832/139199]


 35%|███▍      | 1503/4350 [02:03<04:02, 11.72it/s]

Training loss: 0.025021  [48032/139199]


 37%|███▋      | 1603/4350 [02:11<03:34, 12.82it/s]

Training loss: 0.022836  [51232/139199]


 39%|███▉      | 1703/4350 [02:19<03:34, 12.36it/s]

Training loss: 0.023649  [54432/139199]


 41%|████▏     | 1803/4350 [02:27<03:21, 12.63it/s]

Training loss: 0.022035  [57632/139199]


 44%|████▎     | 1903/4350 [02:35<03:15, 12.51it/s]

Training loss: 0.023674  [60832/139199]


 46%|████▌     | 2003/4350 [02:43<03:05, 12.64it/s]

Training loss: 0.023935  [64032/139199]


 48%|████▊     | 2103/4350 [02:51<03:02, 12.34it/s]

Training loss: 0.024462  [67232/139199]


 51%|█████     | 2203/4350 [03:00<03:18, 10.82it/s]

Training loss: 0.029371  [70432/139199]


 53%|█████▎    | 2303/4350 [03:08<02:47, 12.24it/s]

Training loss: 0.028771  [73632/139199]


 55%|█████▌    | 2403/4350 [03:16<02:34, 12.59it/s]

Training loss: 0.026206  [76832/139199]


 58%|█████▊    | 2503/4350 [03:25<02:39, 11.58it/s]

Training loss: 0.030988  [80032/139199]


 60%|█████▉    | 2603/4350 [03:33<02:18, 12.60it/s]

Training loss: 0.027399  [83232/139199]


 62%|██████▏   | 2703/4350 [03:41<02:12, 12.47it/s]

Training loss: 0.027048  [86432/139199]


 64%|██████▍   | 2803/4350 [03:49<02:12, 11.66it/s]

Training loss: 0.025983  [89632/139199]


 67%|██████▋   | 2903/4350 [03:57<01:55, 12.58it/s]

Training loss: 0.025780  [92832/139199]


 69%|██████▉   | 3003/4350 [04:06<01:48, 12.41it/s]

Training loss: 0.026136  [96032/139199]


 71%|███████▏  | 3103/4350 [04:14<01:45, 11.82it/s]

Training loss: 0.021491  [99232/139199]


 74%|███████▎  | 3203/4350 [04:22<01:29, 12.82it/s]

Training loss: 0.028402  [102432/139199]


 76%|███████▌  | 3303/4350 [04:30<01:23, 12.53it/s]

Training loss: 0.023832  [105632/139199]


 78%|███████▊  | 3403/4350 [04:38<01:16, 12.44it/s]

Training loss: 0.028808  [108832/139199]


 81%|████████  | 3503/4350 [04:46<01:07, 12.62it/s]

Training loss: 0.028624  [112032/139199]


 83%|████████▎ | 3603/4350 [04:54<01:01, 12.12it/s]

Training loss: 0.023050  [115232/139199]


 85%|████████▌ | 3703/4350 [05:02<00:53, 12.18it/s]

Training loss: 0.035582  [118432/139199]


 87%|████████▋ | 3803/4350 [05:11<00:43, 12.48it/s]

Training loss: 0.021787  [121632/139199]


 90%|████████▉ | 3903/4350 [05:19<00:35, 12.49it/s]

Training loss: 0.024718  [124832/139199]


 92%|█████████▏| 4003/4350 [05:27<00:28, 12.12it/s]

Training loss: 0.028459  [128032/139199]


 94%|█████████▍| 4103/4350 [05:36<00:21, 11.44it/s]

Training loss: 0.029219  [131232/139199]


 97%|█████████▋| 4203/4350 [05:44<00:11, 12.47it/s]

Training loss: 0.025204  [134432/139199]


 99%|█████████▉| 4303/4350 [05:52<00:03, 11.93it/s]

Training loss: 0.025584  [137632/139199]


100%|██████████| 4350/4350 [05:56<00:00, 12.20it/s]


Train loss: 0.025992 - Val loss: 0.024755 

Epoch 17
-------------------------------


  0%|          | 3/4350 [00:00<08:22,  8.65it/s]

Training loss: 0.023026  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:45, 12.29it/s]

Training loss: 0.030946  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:43, 12.09it/s]

Training loss: 0.028785  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:53, 11.44it/s]

Training loss: 0.025781  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:23, 12.19it/s]

Training loss: 0.024511  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:12, 12.32it/s]

Training loss: 0.025859  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:18, 11.75it/s]

Training loss: 0.029867  [19232/139199]


 16%|█▌        | 703/4350 [00:58<04:51, 12.51it/s]

Training loss: 0.027709  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:46, 12.40it/s]

Training loss: 0.026812  [25632/139199]


 21%|██        | 903/4350 [01:15<04:35, 12.49it/s]

Training loss: 0.025086  [28832/139199]


 23%|██▎       | 1003/4350 [01:23<04:21, 12.81it/s]

Training loss: 0.028124  [32032/139199]


 25%|██▌       | 1103/4350 [01:31<04:29, 12.06it/s]

Training loss: 0.026049  [35232/139199]


 28%|██▊       | 1203/4350 [01:39<04:22, 12.01it/s]

Training loss: 0.030189  [38432/139199]


 30%|██▉       | 1303/4350 [01:48<04:29, 11.30it/s]

Training loss: 0.026206  [41632/139199]


 32%|███▏      | 1403/4350 [01:56<03:51, 12.71it/s]

Training loss: 0.022797  [44832/139199]


 35%|███▍      | 1503/4350 [02:04<03:52, 12.24it/s]

Training loss: 0.026524  [48032/139199]


 37%|███▋      | 1603/4350 [02:12<03:42, 12.33it/s]

Training loss: 0.026867  [51232/139199]


 39%|███▉      | 1703/4350 [02:20<03:40, 11.98it/s]

Training loss: 0.024656  [54432/139199]


 41%|████▏     | 1803/4350 [02:29<03:24, 12.47it/s]

Training loss: 0.025239  [57632/139199]


 44%|████▎     | 1903/4350 [02:37<03:21, 12.17it/s]

Training loss: 0.021945  [60832/139199]


 46%|████▌     | 2003/4350 [02:45<03:13, 12.14it/s]

Training loss: 0.025541  [64032/139199]


 48%|████▊     | 2103/4350 [02:53<02:59, 12.49it/s]

Training loss: 0.022985  [67232/139199]


 51%|█████     | 2203/4350 [03:01<02:59, 11.97it/s]

Training loss: 0.031819  [70432/139199]


 53%|█████▎    | 2303/4350 [03:09<02:48, 12.17it/s]

Training loss: 0.024420  [73632/139199]


 55%|█████▌    | 2403/4350 [03:18<02:40, 12.10it/s]

Training loss: 0.029313  [76832/139199]


 58%|█████▊    | 2503/4350 [03:26<02:38, 11.64it/s]

Training loss: 0.031036  [80032/139199]


 60%|█████▉    | 2603/4350 [03:34<02:25, 12.03it/s]

Training loss: 0.025327  [83232/139199]


 62%|██████▏   | 2703/4350 [03:42<02:14, 12.27it/s]

Training loss: 0.025460  [86432/139199]


 64%|██████▍   | 2803/4350 [03:50<02:09, 11.95it/s]

Training loss: 0.024732  [89632/139199]


 67%|██████▋   | 2903/4350 [03:59<01:55, 12.53it/s]

Training loss: 0.026932  [92832/139199]


 69%|██████▉   | 3003/4350 [04:07<01:47, 12.51it/s]

Training loss: 0.023131  [96032/139199]


 71%|███████▏  | 3103/4350 [04:15<01:41, 12.25it/s]

Training loss: 0.026395  [99232/139199]


 74%|███████▎  | 3203/4350 [04:23<01:34, 12.10it/s]

Training loss: 0.029521  [102432/139199]


 76%|███████▌  | 3303/4350 [04:31<01:26, 12.07it/s]

Training loss: 0.026499  [105632/139199]


 78%|███████▊  | 3403/4350 [04:40<01:19, 11.85it/s]

Training loss: 0.027391  [108832/139199]


 81%|████████  | 3503/4350 [04:48<01:15, 11.28it/s]

Training loss: 0.026560  [112032/139199]


 83%|████████▎ | 3603/4350 [04:57<00:59, 12.51it/s]

Training loss: 0.025999  [115232/139199]


 85%|████████▌ | 3703/4350 [05:05<00:56, 11.47it/s]

Training loss: 0.028141  [118432/139199]


 87%|████████▋ | 3803/4350 [05:14<00:47, 11.52it/s]

Training loss: 0.027727  [121632/139199]


 90%|████████▉ | 3903/4350 [05:22<00:37, 11.89it/s]

Training loss: 0.026905  [124832/139199]


 92%|█████████▏| 4003/4350 [05:30<00:27, 12.41it/s]

Training loss: 0.024325  [128032/139199]


 94%|█████████▍| 4103/4350 [05:38<00:20, 11.83it/s]

Training loss: 0.025420  [131232/139199]


 97%|█████████▋| 4203/4350 [05:47<00:11, 12.54it/s]

Training loss: 0.024160  [134432/139199]


 99%|█████████▉| 4303/4350 [05:55<00:03, 12.45it/s]

Training loss: 0.026217  [137632/139199]


100%|██████████| 4350/4350 [05:59<00:00, 12.11it/s]


Train loss: 0.025928 - Val loss: 0.024632 

Epoch 18
-------------------------------


  0%|          | 3/4350 [00:00<08:05,  8.96it/s]

Training loss: 0.025495  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:46, 12.25it/s]

Training loss: 0.026108  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:33, 12.44it/s]

Training loss: 0.024176  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:32, 12.18it/s]

Training loss: 0.027935  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:18, 12.37it/s]

Training loss: 0.028287  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:09, 12.42it/s]

Training loss: 0.025597  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:04, 12.31it/s]

Training loss: 0.024121  [19232/139199]


 16%|█▌        | 703/4350 [00:58<05:03, 12.00it/s]

Training loss: 0.025541  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:40, 12.63it/s]

Training loss: 0.028771  [25632/139199]


 21%|██        | 903/4350 [01:14<04:36, 12.47it/s]

Training loss: 0.027015  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:42, 11.84it/s]

Training loss: 0.021151  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:21, 12.42it/s]

Training loss: 0.025242  [35232/139199]


 28%|██▊       | 1203/4350 [01:38<04:13, 12.41it/s]

Training loss: 0.023161  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:24, 11.52it/s]

Training loss: 0.026536  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<04:11, 11.73it/s]

Training loss: 0.031259  [44832/139199]


 35%|███▍      | 1503/4350 [02:03<03:50, 12.34it/s]

Training loss: 0.025553  [48032/139199]


 37%|███▋      | 1603/4350 [02:12<03:52, 11.81it/s]

Training loss: 0.024550  [51232/139199]


 39%|███▉      | 1703/4350 [02:20<03:34, 12.33it/s]

Training loss: 0.025838  [54432/139199]


 41%|████▏     | 1803/4350 [02:28<03:25, 12.41it/s]

Training loss: 0.027928  [57632/139199]


 44%|████▎     | 1903/4350 [02:36<03:20, 12.18it/s]

Training loss: 0.023873  [60832/139199]


 46%|████▌     | 2003/4350 [02:44<03:08, 12.47it/s]

Training loss: 0.025363  [64032/139199]


 48%|████▊     | 2103/4350 [02:52<03:01, 12.35it/s]

Training loss: 0.028868  [67232/139199]


 51%|█████     | 2203/4350 [03:01<02:54, 12.31it/s]

Training loss: 0.024737  [70432/139199]


 53%|█████▎    | 2303/4350 [03:09<02:41, 12.66it/s]

Training loss: 0.024805  [73632/139199]


 55%|█████▌    | 2403/4350 [03:17<02:42, 11.97it/s]

Training loss: 0.026604  [76832/139199]


 58%|█████▊    | 2503/4350 [03:25<02:33, 12.03it/s]

Training loss: 0.028282  [80032/139199]


 60%|█████▉    | 2603/4350 [03:34<02:25, 11.97it/s]

Training loss: 0.032312  [83232/139199]


 62%|██████▏   | 2703/4350 [03:42<02:13, 12.34it/s]

Training loss: 0.023934  [86432/139199]


 64%|██████▍   | 2803/4350 [03:50<02:09, 11.98it/s]

Training loss: 0.024920  [89632/139199]


 67%|██████▋   | 2903/4350 [03:58<02:01, 11.94it/s]

Training loss: 0.028717  [92832/139199]


 69%|██████▉   | 3003/4350 [04:06<01:50, 12.22it/s]

Training loss: 0.026459  [96032/139199]


 71%|███████▏  | 3103/4350 [04:15<01:46, 11.73it/s]

Training loss: 0.022139  [99232/139199]


 74%|███████▎  | 3203/4350 [04:23<01:36, 11.85it/s]

Training loss: 0.026576  [102432/139199]


 76%|███████▌  | 3303/4350 [04:31<01:24, 12.43it/s]

Training loss: 0.026009  [105632/139199]


 78%|███████▊  | 3403/4350 [04:40<01:16, 12.36it/s]

Training loss: 0.023602  [108832/139199]


 81%|████████  | 3503/4350 [04:48<01:12, 11.70it/s]

Training loss: 0.029120  [112032/139199]


 83%|████████▎ | 3603/4350 [04:56<01:00, 12.37it/s]

Training loss: 0.025005  [115232/139199]


 85%|████████▌ | 3703/4350 [05:04<00:50, 12.81it/s]

Training loss: 0.025052  [118432/139199]


 87%|████████▋ | 3803/4350 [05:12<00:45, 12.03it/s]

Training loss: 0.027615  [121632/139199]


 90%|████████▉ | 3903/4350 [05:20<00:36, 12.41it/s]

Training loss: 0.021448  [124832/139199]


 92%|█████████▏| 4003/4350 [05:28<00:28, 12.09it/s]

Training loss: 0.030871  [128032/139199]


 94%|█████████▍| 4103/4350 [05:36<00:19, 12.40it/s]

Training loss: 0.028781  [131232/139199]


 97%|█████████▋| 4203/4350 [05:45<00:11, 12.39it/s]

Training loss: 0.023592  [134432/139199]


 99%|█████████▉| 4303/4350 [05:53<00:03, 12.28it/s]

Training loss: 0.020802  [137632/139199]


100%|██████████| 4350/4350 [05:57<00:00, 12.18it/s]


Train loss: 0.025889 - Val loss: 0.024597 

Epoch 19
-------------------------------


  0%|          | 3/4350 [00:00<07:48,  9.29it/s]

Training loss: 0.027745  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:44, 12.32it/s]

Training loss: 0.025614  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:37, 12.27it/s]

Training loss: 0.025105  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:33, 12.12it/s]

Training loss: 0.024832  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:18, 12.39it/s]

Training loss: 0.020698  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:14, 12.24it/s]

Training loss: 0.023173  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:12, 11.98it/s]

Training loss: 0.029046  [19232/139199]


 16%|█▌        | 703/4350 [00:57<04:51, 12.53it/s]

Training loss: 0.024826  [22432/139199]


 18%|█▊        | 803/4350 [01:05<04:41, 12.61it/s]

Training loss: 0.022713  [25632/139199]


 21%|██        | 903/4350 [01:13<04:40, 12.27it/s]

Training loss: 0.024671  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:39, 11.95it/s]

Training loss: 0.026699  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:33, 11.86it/s]

Training loss: 0.026389  [35232/139199]


 28%|██▊       | 1203/4350 [01:38<04:10, 12.58it/s]

Training loss: 0.025680  [38432/139199]


 30%|██▉       | 1303/4350 [01:46<04:19, 11.73it/s]

Training loss: 0.025322  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<04:05, 12.02it/s]

Training loss: 0.022352  [44832/139199]


 35%|███▍      | 1503/4350 [02:03<03:49, 12.39it/s]

Training loss: 0.023649  [48032/139199]


 37%|███▋      | 1603/4350 [02:11<03:45, 12.16it/s]

Training loss: 0.025120  [51232/139199]


 39%|███▉      | 1703/4350 [02:19<03:36, 12.24it/s]

Training loss: 0.023847  [54432/139199]


 41%|████▏     | 1803/4350 [02:27<03:25, 12.37it/s]

Training loss: 0.024108  [57632/139199]


 44%|████▎     | 1903/4350 [02:36<03:27, 11.81it/s]

Training loss: 0.021315  [60832/139199]


 46%|████▌     | 2003/4350 [02:44<03:10, 12.30it/s]

Training loss: 0.024013  [64032/139199]


 48%|████▊     | 2103/4350 [02:52<03:03, 12.24it/s]

Training loss: 0.026801  [67232/139199]


 51%|█████     | 2203/4350 [03:00<03:07, 11.47it/s]

Training loss: 0.026271  [70432/139199]


 53%|█████▎    | 2303/4350 [03:09<02:55, 11.68it/s]

Training loss: 0.030930  [73632/139199]


 55%|█████▌    | 2403/4350 [03:17<02:37, 12.34it/s]

Training loss: 0.028557  [76832/139199]


 58%|█████▊    | 2503/4350 [03:25<02:31, 12.23it/s]

Training loss: 0.027561  [80032/139199]


 60%|█████▉    | 2603/4350 [03:33<02:21, 12.33it/s]

Training loss: 0.025117  [83232/139199]


 62%|██████▏   | 2703/4350 [03:42<02:15, 12.12it/s]

Training loss: 0.027433  [86432/139199]


 64%|██████▍   | 2803/4350 [03:50<02:18, 11.20it/s]

Training loss: 0.027513  [89632/139199]


 67%|██████▋   | 2903/4350 [03:58<01:59, 12.11it/s]

Training loss: 0.025202  [92832/139199]


 69%|██████▉   | 3003/4350 [04:06<01:46, 12.61it/s]

Training loss: 0.027560  [96032/139199]


 71%|███████▏  | 3103/4350 [04:15<01:43, 12.07it/s]

Training loss: 0.026195  [99232/139199]


 74%|███████▎  | 3203/4350 [04:23<01:39, 11.57it/s]

Training loss: 0.033891  [102432/139199]


 76%|███████▌  | 3303/4350 [04:31<01:25, 12.30it/s]

Training loss: 0.027296  [105632/139199]


 78%|███████▊  | 3403/4350 [04:39<01:20, 11.76it/s]

Training loss: 0.026058  [108832/139199]


 81%|████████  | 3503/4350 [04:47<01:12, 11.68it/s]

Training loss: 0.022821  [112032/139199]


 83%|████████▎ | 3603/4350 [04:56<01:02, 11.97it/s]

Training loss: 0.025394  [115232/139199]


 85%|████████▌ | 3703/4350 [05:04<00:53, 12.10it/s]

Training loss: 0.025380  [118432/139199]


 87%|████████▋ | 3803/4350 [05:13<00:44, 12.16it/s]

Training loss: 0.025816  [121632/139199]


 90%|████████▉ | 3903/4350 [05:21<00:37, 11.87it/s]

Training loss: 0.026285  [124832/139199]


 92%|█████████▏| 4003/4350 [05:29<00:28, 12.03it/s]

Training loss: 0.030456  [128032/139199]


 94%|█████████▍| 4103/4350 [05:38<00:19, 12.39it/s]

Training loss: 0.024078  [131232/139199]


 97%|█████████▋| 4203/4350 [05:46<00:12, 12.10it/s]

Training loss: 0.025092  [134432/139199]


 99%|█████████▉| 4303/4350 [05:54<00:03, 12.08it/s]

Training loss: 0.024300  [137632/139199]


100%|██████████| 4350/4350 [05:58<00:00, 12.13it/s]


Train loss: 0.026037 - Val loss: 0.026389 

Epoch 20
-------------------------------


  0%|          | 3/4350 [00:00<08:08,  8.90it/s]

Training loss: 0.026489  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:53, 12.01it/s]

Training loss: 0.026061  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:42, 12.12it/s]

Training loss: 0.027494  [ 6432/139199]


  7%|▋         | 303/4350 [00:24<05:26, 12.41it/s]

Training loss: 0.025974  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:25, 12.13it/s]

Training loss: 0.026201  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:07, 12.53it/s]

Training loss: 0.024118  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:00, 12.47it/s]

Training loss: 0.025170  [19232/139199]


 16%|█▌        | 703/4350 [00:58<05:10, 11.73it/s]

Training loss: 0.020425  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:39, 12.71it/s]

Training loss: 0.031025  [25632/139199]


 21%|██        | 903/4350 [01:14<04:59, 11.52it/s]

Training loss: 0.022070  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:43, 11.82it/s]

Training loss: 0.027311  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:14, 12.78it/s]

Training loss: 0.027347  [35232/139199]


 28%|██▊       | 1203/4350 [01:39<04:11, 12.49it/s]

Training loss: 0.023885  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:12, 12.09it/s]

Training loss: 0.024971  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<04:00, 12.27it/s]

Training loss: 0.021722  [44832/139199]


 35%|███▍      | 1503/4350 [02:03<03:50, 12.33it/s]

Training loss: 0.027259  [48032/139199]


 37%|███▋      | 1603/4350 [02:11<03:42, 12.36it/s]

Training loss: 0.022058  [51232/139199]


 39%|███▉      | 1703/4350 [02:19<03:25, 12.90it/s]

Training loss: 0.021432  [54432/139199]


 41%|████▏     | 1803/4350 [02:28<03:23, 12.50it/s]

Training loss: 0.022923  [57632/139199]


 44%|████▎     | 1903/4350 [02:36<03:21, 12.17it/s]

Training loss: 0.022291  [60832/139199]


 46%|████▌     | 2003/4350 [02:44<03:09, 12.41it/s]

Training loss: 0.022292  [64032/139199]


 48%|████▊     | 2103/4350 [02:52<03:03, 12.26it/s]

Training loss: 0.027516  [67232/139199]


 51%|█████     | 2203/4350 [03:00<02:48, 12.73it/s]

Training loss: 0.030010  [70432/139199]


 53%|█████▎    | 2303/4350 [03:08<02:57, 11.55it/s]

Training loss: 0.023775  [73632/139199]


 55%|█████▌    | 2403/4350 [03:16<02:33, 12.66it/s]

Training loss: 0.027727  [76832/139199]


 58%|█████▊    | 2503/4350 [03:25<02:20, 13.15it/s]

Training loss: 0.029485  [80032/139199]


 60%|█████▉    | 2603/4350 [03:33<02:23, 12.18it/s]

Training loss: 0.024325  [83232/139199]


 62%|██████▏   | 2703/4350 [03:41<02:11, 12.54it/s]

Training loss: 0.025928  [86432/139199]


 64%|██████▍   | 2803/4350 [03:49<02:05, 12.38it/s]

Training loss: 0.022492  [89632/139199]


 67%|██████▋   | 2903/4350 [03:57<02:01, 11.87it/s]

Training loss: 0.025656  [92832/139199]


 69%|██████▉   | 3003/4350 [04:05<01:46, 12.70it/s]

Training loss: 0.028593  [96032/139199]


 71%|███████▏  | 3101/4350 [04:13<01:41, 12.28it/s]

Training loss: 0.027725  [99232/139199]


 74%|███████▎  | 3203/4350 [04:21<01:34, 12.14it/s]

Training loss: 0.026375  [102432/139199]


 76%|███████▌  | 3303/4350 [04:29<01:22, 12.62it/s]

Training loss: 0.024534  [105632/139199]


 78%|███████▊  | 3403/4350 [04:38<01:15, 12.58it/s]

Training loss: 0.027990  [108832/139199]


 81%|████████  | 3503/4350 [04:46<01:13, 11.55it/s]

Training loss: 0.027727  [112032/139199]


 83%|████████▎ | 3603/4350 [04:54<00:58, 12.85it/s]

Training loss: 0.025766  [115232/139199]


 85%|████████▌ | 3703/4350 [05:02<00:50, 12.81it/s]

Training loss: 0.025021  [118432/139199]


 87%|████████▋ | 3803/4350 [05:10<00:44, 12.30it/s]

Training loss: 0.024756  [121632/139199]


 90%|████████▉ | 3903/4350 [05:18<00:34, 12.98it/s]

Training loss: 0.025296  [124832/139199]


 92%|█████████▏| 4003/4350 [05:26<00:27, 12.54it/s]

Training loss: 0.028041  [128032/139199]


 94%|█████████▍| 4103/4350 [05:34<00:21, 11.60it/s]

Training loss: 0.027608  [131232/139199]


 97%|█████████▋| 4203/4350 [05:42<00:11, 12.64it/s]

Training loss: 0.025107  [134432/139199]


 99%|█████████▉| 4303/4350 [05:50<00:03, 12.43it/s]

Training loss: 0.020375  [137632/139199]


100%|██████████| 4350/4350 [05:54<00:00, 12.28it/s]


Train loss: 0.025979 - Val loss: 0.023841 

Epoch 21
-------------------------------


  0%|          | 3/4350 [00:00<07:59,  9.07it/s]

Training loss: 0.030622  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:40, 12.48it/s]

Training loss: 0.026558  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:25, 12.74it/s]

Training loss: 0.026727  [ 6432/139199]


  7%|▋         | 303/4350 [00:24<05:40, 11.89it/s]

Training loss: 0.025980  [ 9632/139199]


  9%|▉         | 403/4350 [00:32<05:16, 12.48it/s]

Training loss: 0.023581  [12832/139199]


 12%|█▏        | 503/4350 [00:40<05:01, 12.75it/s]

Training loss: 0.029022  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:31, 11.30it/s]

Training loss: 0.024242  [19232/139199]


 16%|█▌        | 703/4350 [00:57<05:05, 11.94it/s]

Training loss: 0.026517  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:50, 12.23it/s]

Training loss: 0.026058  [25632/139199]


 21%|██        | 903/4350 [01:14<04:42, 12.20it/s]

Training loss: 0.027195  [28832/139199]


 23%|██▎       | 1003/4350 [01:23<04:53, 11.39it/s]

Training loss: 0.024085  [32032/139199]


 25%|██▌       | 1103/4350 [01:31<04:32, 11.91it/s]

Training loss: 0.022616  [35232/139199]


 28%|██▊       | 1203/4350 [01:40<04:25, 11.86it/s]

Training loss: 0.026661  [38432/139199]


 30%|██▉       | 1303/4350 [01:48<04:27, 11.39it/s]

Training loss: 0.028803  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<04:05, 11.98it/s]

Training loss: 0.031008  [44832/139199]


 35%|███▍      | 1503/4350 [02:05<03:53, 12.20it/s]

Training loss: 0.025068  [48032/139199]


 37%|███▋      | 1603/4350 [02:13<03:46, 12.12it/s]

Training loss: 0.022894  [51232/139199]


 39%|███▉      | 1703/4350 [02:21<03:33, 12.39it/s]

Training loss: 0.033537  [54432/139199]


 41%|████▏     | 1803/4350 [02:29<03:27, 12.29it/s]

Training loss: 0.024790  [57632/139199]


 44%|████▎     | 1903/4350 [02:38<03:20, 12.21it/s]

Training loss: 0.026517  [60832/139199]


 46%|████▌     | 2003/4350 [02:46<03:07, 12.54it/s]

Training loss: 0.024188  [64032/139199]


 48%|████▊     | 2103/4350 [02:54<03:05, 12.12it/s]

Training loss: 0.023470  [67232/139199]


 51%|█████     | 2203/4350 [03:02<02:55, 12.22it/s]

Training loss: 0.026569  [70432/139199]


 53%|█████▎    | 2303/4350 [03:10<02:49, 12.09it/s]

Training loss: 0.020767  [73632/139199]


 55%|█████▌    | 2403/4350 [03:19<02:32, 12.74it/s]

Training loss: 0.026641  [76832/139199]


 58%|█████▊    | 2503/4350 [03:27<02:26, 12.57it/s]

Training loss: 0.022172  [80032/139199]


 60%|█████▉    | 2603/4350 [03:35<02:26, 11.89it/s]

Training loss: 0.022802  [83232/139199]


 62%|██████▏   | 2703/4350 [03:43<02:16, 12.03it/s]

Training loss: 0.026165  [86432/139199]


 64%|██████▍   | 2803/4350 [03:51<02:05, 12.35it/s]

Training loss: 0.021469  [89632/139199]


 67%|██████▋   | 2903/4350 [04:00<02:10, 11.12it/s]

Training loss: 0.022502  [92832/139199]


 69%|██████▉   | 3003/4350 [04:08<01:47, 12.58it/s]

Training loss: 0.026256  [96032/139199]


 71%|███████▏  | 3103/4350 [04:16<01:37, 12.79it/s]

Training loss: 0.024173  [99232/139199]


 74%|███████▎  | 3203/4350 [04:25<01:37, 11.81it/s]

Training loss: 0.025600  [102432/139199]


 76%|███████▌  | 3303/4350 [04:33<01:30, 11.53it/s]

Training loss: 0.035957  [105632/139199]


 78%|███████▊  | 3403/4350 [04:41<01:16, 12.41it/s]

Training loss: 0.031727  [108832/139199]


 81%|████████  | 3503/4350 [04:50<01:10, 11.96it/s]

Training loss: 0.023538  [112032/139199]


 83%|████████▎ | 3603/4350 [04:58<01:03, 11.74it/s]

Training loss: 0.025734  [115232/139199]


 85%|████████▌ | 3703/4350 [05:07<00:53, 12.13it/s]

Training loss: 0.024744  [118432/139199]


 87%|████████▋ | 3803/4350 [05:15<00:46, 11.85it/s]

Training loss: 0.023872  [121632/139199]


 90%|████████▉ | 3903/4350 [05:23<00:37, 11.93it/s]

Training loss: 0.022725  [124832/139199]


 92%|█████████▏| 4003/4350 [05:31<00:28, 12.36it/s]

Training loss: 0.026612  [128032/139199]


 94%|█████████▍| 4103/4350 [05:40<00:20, 12.27it/s]

Training loss: 0.024324  [131232/139199]


 97%|█████████▋| 4203/4350 [05:48<00:12, 11.33it/s]

Training loss: 0.030671  [134432/139199]


 99%|█████████▉| 4303/4350 [05:56<00:03, 12.83it/s]

Training loss: 0.025329  [137632/139199]


100%|██████████| 4350/4350 [06:00<00:00, 12.07it/s]


Train loss: 0.025831 - Val loss: 0.024940 

Epoch 22
-------------------------------


  0%|          | 3/4350 [00:00<08:13,  8.80it/s]

Training loss: 0.026800  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:53, 12.02it/s]

Training loss: 0.024471  [ 3232/139199]


  5%|▍         | 203/4350 [00:16<05:26, 12.70it/s]

Training loss: 0.033208  [ 6432/139199]


  7%|▋         | 303/4350 [00:24<05:37, 12.00it/s]

Training loss: 0.028720  [ 9632/139199]


  9%|▉         | 403/4350 [00:32<05:46, 11.38it/s]

Training loss: 0.024387  [12832/139199]


 12%|█▏        | 503/4350 [00:40<05:09, 12.44it/s]

Training loss: 0.022871  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:00, 12.47it/s]

Training loss: 0.026219  [19232/139199]


 16%|█▌        | 703/4350 [00:57<04:55, 12.33it/s]

Training loss: 0.023584  [22432/139199]


 18%|█▊        | 803/4350 [01:05<04:41, 12.61it/s]

Training loss: 0.022901  [25632/139199]


 21%|██        | 903/4350 [01:13<04:33, 12.60it/s]

Training loss: 0.029831  [28832/139199]


 23%|██▎       | 1003/4350 [01:21<04:24, 12.65it/s]

Training loss: 0.026613  [32032/139199]


 25%|██▌       | 1103/4350 [01:29<04:13, 12.81it/s]

Training loss: 0.022830  [35232/139199]


 28%|██▊       | 1203/4350 [01:37<04:09, 12.64it/s]

Training loss: 0.027721  [38432/139199]


 30%|██▉       | 1303/4350 [01:46<04:03, 12.50it/s]

Training loss: 0.029011  [41632/139199]


 32%|███▏      | 1403/4350 [01:54<03:58, 12.36it/s]

Training loss: 0.029155  [44832/139199]


 35%|███▍      | 1503/4350 [02:02<03:46, 12.58it/s]

Training loss: 0.019297  [48032/139199]


 37%|███▋      | 1603/4350 [02:10<03:39, 12.50it/s]

Training loss: 0.022040  [51232/139199]


 39%|███▉      | 1703/4350 [02:18<03:38, 12.10it/s]

Training loss: 0.024461  [54432/139199]


 41%|████▏     | 1803/4350 [02:26<03:28, 12.19it/s]

Training loss: 0.029048  [57632/139199]


 44%|████▎     | 1903/4350 [02:35<03:26, 11.83it/s]

Training loss: 0.023122  [60832/139199]


 46%|████▌     | 2003/4350 [02:43<03:20, 11.69it/s]

Training loss: 0.025050  [64032/139199]


 48%|████▊     | 2103/4350 [02:51<03:04, 12.19it/s]

Training loss: 0.027752  [67232/139199]


 51%|█████     | 2203/4350 [02:59<02:48, 12.78it/s]

Training loss: 0.022565  [70432/139199]


 53%|█████▎    | 2303/4350 [03:07<02:59, 11.37it/s]

Training loss: 0.025020  [73632/139199]


 55%|█████▌    | 2403/4350 [03:16<02:42, 12.00it/s]

Training loss: 0.025316  [76832/139199]


 58%|█████▊    | 2503/4350 [03:24<02:30, 12.31it/s]

Training loss: 0.025248  [80032/139199]


 60%|█████▉    | 2603/4350 [03:32<02:34, 11.29it/s]

Training loss: 0.025295  [83232/139199]


 62%|██████▏   | 2703/4350 [03:41<02:13, 12.36it/s]

Training loss: 0.020851  [86432/139199]


 64%|██████▍   | 2803/4350 [03:49<02:04, 12.42it/s]

Training loss: 0.024931  [89632/139199]


 67%|██████▋   | 2903/4350 [03:57<02:02, 11.85it/s]

Training loss: 0.027813  [92832/139199]


 69%|██████▉   | 3003/4350 [04:05<01:55, 11.69it/s]

Training loss: 0.027776  [96032/139199]


 71%|███████▏  | 3103/4350 [04:13<01:41, 12.28it/s]

Training loss: 0.025850  [99232/139199]


 74%|███████▎  | 3203/4350 [04:22<01:36, 11.86it/s]

Training loss: 0.020701  [102432/139199]


 76%|███████▌  | 3303/4350 [04:30<01:28, 11.83it/s]

Training loss: 0.020826  [105632/139199]


 78%|███████▊  | 3403/4350 [04:39<01:17, 12.29it/s]

Training loss: 0.026261  [108832/139199]


 81%|████████  | 3503/4350 [04:47<01:10, 12.10it/s]

Training loss: 0.025164  [112032/139199]


 83%|████████▎ | 3603/4350 [04:55<01:04, 11.57it/s]

Training loss: 0.022327  [115232/139199]


 85%|████████▌ | 3703/4350 [05:04<00:52, 12.28it/s]

Training loss: 0.024850  [118432/139199]


 87%|████████▋ | 3803/4350 [05:12<00:44, 12.39it/s]

Training loss: 0.024876  [121632/139199]


 90%|████████▉ | 3903/4350 [05:20<00:37, 11.78it/s]

Training loss: 0.025973  [124832/139199]


 92%|█████████▏| 4003/4350 [05:28<00:30, 11.50it/s]

Training loss: 0.027283  [128032/139199]


 94%|█████████▍| 4103/4350 [05:37<00:20, 11.99it/s]

Training loss: 0.027602  [131232/139199]


 97%|█████████▋| 4203/4350 [05:45<00:13, 11.27it/s]

Training loss: 0.027332  [134432/139199]


 99%|█████████▉| 4303/4350 [05:53<00:03, 12.13it/s]

Training loss: 0.024666  [137632/139199]


100%|██████████| 4350/4350 [05:58<00:00, 12.15it/s]


Train loss: 0.025730 - Val loss: 0.025503 

Epoch 23
-------------------------------


  0%|          | 3/4350 [00:00<08:26,  8.58it/s]

Training loss: 0.026714  [   32/139199]


  2%|▏         | 103/4350 [00:09<06:00, 11.79it/s]

Training loss: 0.025262  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:34, 12.40it/s]

Training loss: 0.026629  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:38, 11.97it/s]

Training loss: 0.025208  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:21, 12.27it/s]

Training loss: 0.024646  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:38, 11.37it/s]

Training loss: 0.024409  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:03, 12.34it/s]

Training loss: 0.024328  [19232/139199]


 16%|█▌        | 703/4350 [00:59<04:57, 12.25it/s]

Training loss: 0.027142  [22432/139199]


 18%|█▊        | 803/4350 [01:07<05:01, 11.78it/s]

Training loss: 0.020042  [25632/139199]


 21%|██        | 903/4350 [01:15<04:35, 12.51it/s]

Training loss: 0.022783  [28832/139199]


 23%|██▎       | 1003/4350 [01:23<04:43, 11.81it/s]

Training loss: 0.022936  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:47, 11.29it/s]

Training loss: 0.022096  [35232/139199]


 28%|██▊       | 1203/4350 [01:40<04:19, 12.12it/s]

Training loss: 0.025606  [38432/139199]


 30%|██▉       | 1303/4350 [01:49<04:03, 12.50it/s]

Training loss: 0.023479  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<03:55, 12.53it/s]

Training loss: 0.025746  [44832/139199]


 35%|███▍      | 1503/4350 [02:05<04:02, 11.75it/s]

Training loss: 0.025822  [48032/139199]


 37%|███▋      | 1603/4350 [02:14<03:46, 12.12it/s]

Training loss: 0.022923  [51232/139199]


 39%|███▉      | 1703/4350 [02:22<03:32, 12.44it/s]

Training loss: 0.021378  [54432/139199]


 41%|████▏     | 1803/4350 [02:30<03:34, 11.86it/s]

Training loss: 0.020387  [57632/139199]


 44%|████▎     | 1903/4350 [02:38<03:12, 12.73it/s]

Training loss: 0.024479  [60832/139199]


 46%|████▌     | 2003/4350 [02:46<03:10, 12.35it/s]

Training loss: 0.025295  [64032/139199]


 48%|████▊     | 2103/4350 [02:55<03:05, 12.12it/s]

Training loss: 0.024620  [67232/139199]


 51%|█████     | 2203/4350 [03:03<02:55, 12.21it/s]

Training loss: 0.022542  [70432/139199]


 53%|█████▎    | 2303/4350 [03:11<02:50, 11.98it/s]

Training loss: 0.028343  [73632/139199]


 55%|█████▌    | 2403/4350 [03:19<02:39, 12.20it/s]

Training loss: 0.022385  [76832/139199]


 58%|█████▊    | 2503/4350 [03:27<02:29, 12.39it/s]

Training loss: 0.026788  [80032/139199]


 60%|█████▉    | 2603/4350 [03:36<02:24, 12.07it/s]

Training loss: 0.022339  [83232/139199]


 62%|██████▏   | 2703/4350 [03:44<02:13, 12.35it/s]

Training loss: 0.028557  [86432/139199]


 64%|██████▍   | 2803/4350 [03:52<02:06, 12.27it/s]

Training loss: 0.022059  [89632/139199]


 67%|██████▋   | 2903/4350 [04:00<01:58, 12.22it/s]

Training loss: 0.029421  [92832/139199]


 69%|██████▉   | 3003/4350 [04:08<01:50, 12.21it/s]

Training loss: 0.025690  [96032/139199]


 71%|███████▏  | 3103/4350 [04:16<01:38, 12.62it/s]

Training loss: 0.024715  [99232/139199]


 74%|███████▎  | 3203/4350 [04:24<01:31, 12.60it/s]

Training loss: 0.025017  [102432/139199]


 76%|███████▌  | 3303/4350 [04:33<01:27, 11.99it/s]

Training loss: 0.026112  [105632/139199]


 78%|███████▊  | 3403/4350 [04:41<01:19, 11.98it/s]

Training loss: 0.024153  [108832/139199]


 81%|████████  | 3503/4350 [04:50<01:10, 12.04it/s]

Training loss: 0.024216  [112032/139199]


 83%|████████▎ | 3603/4350 [04:58<01:00, 12.35it/s]

Training loss: 0.028339  [115232/139199]


 85%|████████▌ | 3703/4350 [05:06<00:55, 11.70it/s]

Training loss: 0.026951  [118432/139199]


 87%|████████▋ | 3803/4350 [05:14<00:45, 12.11it/s]

Training loss: 0.022693  [121632/139199]


 90%|████████▉ | 3903/4350 [05:23<00:36, 12.18it/s]

Training loss: 0.027268  [124832/139199]


 92%|█████████▏| 4003/4350 [05:31<00:29, 11.72it/s]

Training loss: 0.024197  [128032/139199]


 94%|█████████▍| 4103/4350 [05:39<00:20, 11.84it/s]

Training loss: 0.025857  [131232/139199]


 97%|█████████▋| 4203/4350 [05:48<00:12, 11.69it/s]

Training loss: 0.027104  [134432/139199]


 99%|█████████▉| 4303/4350 [05:56<00:03, 12.06it/s]

Training loss: 0.024986  [137632/139199]


100%|██████████| 4350/4350 [06:00<00:00, 12.05it/s]


Train loss: 0.025641 - Val loss: 0.024549 

Epoch 24
-------------------------------


  0%|          | 3/4350 [00:00<08:25,  8.59it/s]

Training loss: 0.027987  [   32/139199]


  2%|▏         | 103/4350 [00:09<05:58, 11.85it/s]

Training loss: 0.028511  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:35, 12.36it/s]

Training loss: 0.025662  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:44, 11.75it/s]

Training loss: 0.021833  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:26, 12.10it/s]

Training loss: 0.030241  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:18, 12.07it/s]

Training loss: 0.027473  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:28, 11.41it/s]

Training loss: 0.021711  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:08, 11.84it/s]

Training loss: 0.027107  [22432/139199]


 18%|█▊        | 803/4350 [01:07<04:44, 12.47it/s]

Training loss: 0.027738  [25632/139199]


 21%|██        | 903/4350 [01:15<04:42, 12.19it/s]

Training loss: 0.023615  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:29, 12.40it/s]

Training loss: 0.023674  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:32, 11.91it/s]

Training loss: 0.024610  [35232/139199]


 28%|██▊       | 1203/4350 [01:41<04:29, 11.70it/s]

Training loss: 0.027453  [38432/139199]


 30%|██▉       | 1303/4350 [01:49<04:03, 12.52it/s]

Training loss: 0.027113  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<04:14, 11.60it/s]

Training loss: 0.027718  [44832/139199]


 35%|███▍      | 1503/4350 [02:06<04:04, 11.65it/s]

Training loss: 0.021956  [48032/139199]


 37%|███▋      | 1603/4350 [02:15<04:03, 11.29it/s]

Training loss: 0.026692  [51232/139199]


 39%|███▉      | 1703/4350 [02:23<03:52, 11.38it/s]

Training loss: 0.025885  [54432/139199]


 41%|████▏     | 1803/4350 [02:32<03:36, 11.79it/s]

Training loss: 0.025468  [57632/139199]


 44%|████▎     | 1903/4350 [02:41<03:27, 11.81it/s]

Training loss: 0.029234  [60832/139199]


 46%|████▌     | 2003/4350 [02:49<03:18, 11.82it/s]

Training loss: 0.028562  [64032/139199]


 48%|████▊     | 2103/4350 [02:57<03:07, 11.99it/s]

Training loss: 0.026228  [67232/139199]


 51%|█████     | 2203/4350 [03:06<03:07, 11.43it/s]

Training loss: 0.028306  [70432/139199]


 53%|█████▎    | 2303/4350 [03:15<03:01, 11.27it/s]

Training loss: 0.026497  [73632/139199]


 55%|█████▌    | 2403/4350 [03:23<02:46, 11.72it/s]

Training loss: 0.025949  [76832/139199]


 58%|█████▊    | 2503/4350 [03:32<02:36, 11.79it/s]

Training loss: 0.031495  [80032/139199]


 60%|█████▉    | 2603/4350 [03:40<02:34, 11.32it/s]

Training loss: 0.026140  [83232/139199]


 62%|██████▏   | 2703/4350 [03:49<02:20, 11.70it/s]

Training loss: 0.029556  [86432/139199]


 64%|██████▍   | 2803/4350 [03:57<02:16, 11.36it/s]

Training loss: 0.023327  [89632/139199]


 67%|██████▋   | 2903/4350 [04:06<02:00, 12.00it/s]

Training loss: 0.030753  [92832/139199]


 69%|██████▉   | 3003/4350 [04:15<01:57, 11.50it/s]

Training loss: 0.021838  [96032/139199]


 71%|███████▏  | 3103/4350 [04:23<01:45, 11.83it/s]

Training loss: 0.024226  [99232/139199]


 74%|███████▎  | 3203/4350 [04:32<01:35, 12.02it/s]

Training loss: 0.023085  [102432/139199]


 76%|███████▌  | 3303/4350 [04:40<01:32, 11.35it/s]

Training loss: 0.025764  [105632/139199]


 78%|███████▊  | 3403/4350 [04:49<01:21, 11.66it/s]

Training loss: 0.026139  [108832/139199]


 81%|████████  | 3503/4350 [04:57<01:09, 12.21it/s]

Training loss: 0.026535  [112032/139199]


 83%|████████▎ | 3603/4350 [05:06<01:04, 11.63it/s]

Training loss: 0.021726  [115232/139199]


 85%|████████▌ | 3703/4350 [05:14<00:54, 11.82it/s]

Training loss: 0.022821  [118432/139199]


 87%|████████▋ | 3803/4350 [05:23<00:47, 11.63it/s]

Training loss: 0.027733  [121632/139199]


 90%|████████▉ | 3903/4350 [05:32<00:38, 11.58it/s]

Training loss: 0.027343  [124832/139199]


 92%|█████████▏| 4003/4350 [05:40<00:30, 11.37it/s]

Training loss: 0.023449  [128032/139199]


 94%|█████████▍| 4103/4350 [05:48<00:20, 12.26it/s]

Training loss: 0.028707  [131232/139199]


 97%|█████████▋| 4203/4350 [05:57<00:13, 11.18it/s]

Training loss: 0.028245  [134432/139199]


 99%|█████████▉| 4303/4350 [06:05<00:03, 12.21it/s]

Training loss: 0.025824  [137632/139199]


100%|██████████| 4350/4350 [06:09<00:00, 11.76it/s]


Train loss: 0.025588 - Val loss: 0.024678 

Epoch 25
-------------------------------


  0%|          | 3/4350 [00:00<08:06,  8.94it/s]

Training loss: 0.021791  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:41, 12.44it/s]

Training loss: 0.027831  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<06:06, 11.32it/s]

Training loss: 0.026652  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:39, 11.92it/s]

Training loss: 0.024229  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:26, 12.08it/s]

Training loss: 0.025122  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:12, 12.32it/s]

Training loss: 0.025872  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:10, 12.07it/s]

Training loss: 0.031007  [19232/139199]


 16%|█▌        | 703/4350 [00:58<04:56, 12.31it/s]

Training loss: 0.025257  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:57, 11.91it/s]

Training loss: 0.023687  [25632/139199]


 21%|██        | 903/4350 [01:15<05:02, 11.39it/s]

Training loss: 0.021564  [28832/139199]


 23%|██▎       | 1003/4350 [01:23<04:27, 12.50it/s]

Training loss: 0.025543  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:37, 11.68it/s]

Training loss: 0.024859  [35232/139199]


 28%|██▊       | 1203/4350 [01:40<04:16, 12.28it/s]

Training loss: 0.027696  [38432/139199]


 30%|██▉       | 1303/4350 [01:48<04:12, 12.08it/s]

Training loss: 0.025926  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<03:59, 12.30it/s]

Training loss: 0.032150  [44832/139199]


 35%|███▍      | 1503/4350 [02:05<03:57, 12.01it/s]

Training loss: 0.028732  [48032/139199]


 37%|███▋      | 1603/4350 [02:13<03:52, 11.81it/s]

Training loss: 0.028640  [51232/139199]


 39%|███▉      | 1703/4350 [02:22<03:36, 12.21it/s]

Training loss: 0.029695  [54432/139199]


 41%|████▏     | 1803/4350 [02:30<03:32, 11.98it/s]

Training loss: 0.023156  [57632/139199]


 44%|████▎     | 1903/4350 [02:39<03:36, 11.32it/s]

Training loss: 0.019603  [60832/139199]


 46%|████▌     | 2003/4350 [02:47<03:14, 12.08it/s]

Training loss: 0.028078  [64032/139199]


 48%|████▊     | 2103/4350 [02:55<03:04, 12.16it/s]

Training loss: 0.025685  [67232/139199]


 51%|█████     | 2203/4350 [03:04<02:58, 12.01it/s]

Training loss: 0.024714  [70432/139199]


 53%|█████▎    | 2303/4350 [03:12<02:47, 12.20it/s]

Training loss: 0.025368  [73632/139199]


 55%|█████▌    | 2403/4350 [03:21<02:38, 12.27it/s]

Training loss: 0.024756  [76832/139199]


 58%|█████▊    | 2503/4350 [03:29<02:31, 12.20it/s]

Training loss: 0.028334  [80032/139199]


 60%|█████▉    | 2603/4350 [03:37<02:30, 11.59it/s]

Training loss: 0.026180  [83232/139199]


 62%|██████▏   | 2703/4350 [03:46<02:21, 11.65it/s]

Training loss: 0.023173  [86432/139199]


 64%|██████▍   | 2803/4350 [03:54<02:04, 12.42it/s]

Training loss: 0.023735  [89632/139199]


 67%|██████▋   | 2903/4350 [04:02<01:59, 12.07it/s]

Training loss: 0.033766  [92832/139199]


 69%|██████▉   | 3003/4350 [04:10<01:51, 12.12it/s]

Training loss: 0.024212  [96032/139199]


 71%|███████▏  | 3103/4350 [04:19<01:40, 12.35it/s]

Training loss: 0.024402  [99232/139199]


 74%|███████▎  | 3203/4350 [04:27<01:37, 11.78it/s]

Training loss: 0.026470  [102432/139199]


 76%|███████▌  | 3303/4350 [04:35<01:26, 12.16it/s]

Training loss: 0.023953  [105632/139199]


 78%|███████▊  | 3403/4350 [04:44<01:19, 11.86it/s]

Training loss: 0.027146  [108832/139199]


 81%|████████  | 3503/4350 [04:52<01:07, 12.50it/s]

Training loss: 0.030434  [112032/139199]


 83%|████████▎ | 3603/4350 [05:00<01:00, 12.42it/s]

Training loss: 0.023370  [115232/139199]


 85%|████████▌ | 3703/4350 [05:09<00:56, 11.43it/s]

Training loss: 0.025002  [118432/139199]


 87%|████████▋ | 3803/4350 [05:18<00:46, 11.69it/s]

Training loss: 0.026635  [121632/139199]


 90%|████████▉ | 3903/4350 [05:26<00:38, 11.47it/s]

Training loss: 0.026538  [124832/139199]


 92%|█████████▏| 4003/4350 [05:34<00:29, 11.95it/s]

Training loss: 0.027771  [128032/139199]


 94%|█████████▍| 4103/4350 [05:42<00:19, 12.36it/s]

Training loss: 0.029084  [131232/139199]


 97%|█████████▋| 4203/4350 [05:51<00:12, 11.83it/s]

Training loss: 0.025723  [134432/139199]


 99%|█████████▉| 4303/4350 [05:59<00:03, 12.19it/s]

Training loss: 0.023306  [137632/139199]


100%|██████████| 4350/4350 [06:03<00:00, 11.97it/s]


Train loss: 0.025793 - Val loss: 0.024137 

Epoch 26
-------------------------------


  0%|          | 3/4350 [00:00<08:08,  8.89it/s]

Training loss: 0.026870  [   32/139199]


  2%|▏         | 103/4350 [00:08<06:07, 11.56it/s]

Training loss: 0.024098  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:49, 11.86it/s]

Training loss: 0.021803  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:38, 11.96it/s]

Training loss: 0.024979  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:44, 11.45it/s]

Training loss: 0.026473  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:32, 11.56it/s]

Training loss: 0.025479  [16032/139199]


 14%|█▍        | 603/4350 [00:51<05:15, 11.88it/s]

Training loss: 0.026319  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:04, 11.96it/s]

Training loss: 0.025387  [22432/139199]


 18%|█▊        | 803/4350 [01:08<05:11, 11.40it/s]

Training loss: 0.032956  [25632/139199]


 21%|██        | 903/4350 [01:16<04:40, 12.27it/s]

Training loss: 0.020229  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:40, 11.92it/s]

Training loss: 0.022480  [32032/139199]


 25%|██▌       | 1103/4350 [01:33<04:34, 11.83it/s]

Training loss: 0.028779  [35232/139199]


 28%|██▊       | 1203/4350 [01:41<04:27, 11.76it/s]

Training loss: 0.027919  [38432/139199]


 30%|██▉       | 1303/4350 [01:50<04:16, 11.87it/s]

Training loss: 0.025676  [41632/139199]


 32%|███▏      | 1403/4350 [01:58<04:06, 11.96it/s]

Training loss: 0.025187  [44832/139199]


 35%|███▍      | 1503/4350 [02:07<04:09, 11.39it/s]

Training loss: 0.026299  [48032/139199]


 37%|███▋      | 1603/4350 [02:16<03:50, 11.92it/s]

Training loss: 0.026803  [51232/139199]


 39%|███▉      | 1703/4350 [02:24<03:33, 12.39it/s]

Training loss: 0.024653  [54432/139199]


 41%|████▏     | 1803/4350 [02:32<03:37, 11.69it/s]

Training loss: 0.026875  [57632/139199]


 44%|████▎     | 1903/4350 [02:41<03:19, 12.27it/s]

Training loss: 0.027947  [60832/139199]


 46%|████▌     | 2003/4350 [02:49<03:11, 12.24it/s]

Training loss: 0.029001  [64032/139199]


 48%|████▊     | 2103/4350 [02:58<03:19, 11.27it/s]

Training loss: 0.024113  [67232/139199]


 51%|█████     | 2203/4350 [03:06<02:55, 12.23it/s]

Training loss: 0.035884  [70432/139199]


 53%|█████▎    | 2303/4350 [03:15<02:53, 11.83it/s]

Training loss: 0.028334  [73632/139199]


 55%|█████▌    | 2403/4350 [03:23<02:54, 11.17it/s]

Training loss: 0.020779  [76832/139199]


 58%|█████▊    | 2503/4350 [03:32<02:39, 11.57it/s]

Training loss: 0.021486  [80032/139199]


 60%|█████▉    | 2603/4350 [03:40<02:22, 12.26it/s]

Training loss: 0.027638  [83232/139199]


 62%|██████▏   | 2703/4350 [03:49<02:17, 11.96it/s]

Training loss: 0.023193  [86432/139199]


 64%|██████▍   | 2803/4350 [03:57<02:10, 11.84it/s]

Training loss: 0.025760  [89632/139199]


 67%|██████▋   | 2903/4350 [04:06<02:00, 12.03it/s]

Training loss: 0.024955  [92832/139199]


 69%|██████▉   | 3003/4350 [04:14<01:56, 11.57it/s]

Training loss: 0.025067  [96032/139199]


 71%|███████▏  | 3103/4350 [04:23<01:46, 11.69it/s]

Training loss: 0.026772  [99232/139199]


 74%|███████▎  | 3203/4350 [04:31<01:37, 11.79it/s]

Training loss: 0.027236  [102432/139199]


 76%|███████▌  | 3303/4350 [04:40<01:30, 11.59it/s]

Training loss: 0.027909  [105632/139199]


 78%|███████▊  | 3403/4350 [04:48<01:18, 12.13it/s]

Training loss: 0.028719  [108832/139199]


 81%|████████  | 3503/4350 [04:57<01:13, 11.57it/s]

Training loss: 0.024030  [112032/139199]


 83%|████████▎ | 3603/4350 [05:05<01:01, 12.21it/s]

Training loss: 0.023387  [115232/139199]


 85%|████████▌ | 3703/4350 [05:14<00:56, 11.42it/s]

Training loss: 0.022926  [118432/139199]


 87%|████████▋ | 3803/4350 [05:23<00:45, 11.92it/s]

Training loss: 0.027346  [121632/139199]


 90%|████████▉ | 3903/4350 [05:31<00:38, 11.46it/s]

Training loss: 0.024336  [124832/139199]


 92%|█████████▏| 4003/4350 [05:39<00:28, 12.28it/s]

Training loss: 0.022218  [128032/139199]


 94%|█████████▍| 4103/4350 [05:48<00:20, 11.98it/s]

Training loss: 0.027707  [131232/139199]


 97%|█████████▋| 4203/4350 [05:56<00:12, 11.64it/s]

Training loss: 0.022197  [134432/139199]


 99%|█████████▉| 4303/4350 [06:05<00:03, 12.02it/s]

Training loss: 0.024236  [137632/139199]


100%|██████████| 4350/4350 [06:09<00:00, 11.78it/s]


Train loss: 0.025680 - Val loss: 0.023448 

Epoch 27
-------------------------------


  0%|          | 3/4350 [00:00<08:20,  8.68it/s]

Training loss: 0.029847  [   32/139199]


  2%|▏         | 103/4350 [00:08<06:14, 11.36it/s]

Training loss: 0.028530  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:42, 12.12it/s]

Training loss: 0.024354  [ 6432/139199]


  7%|▋         | 303/4350 [00:26<05:33, 12.14it/s]

Training loss: 0.022190  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:32, 11.86it/s]

Training loss: 0.025102  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:34, 11.51it/s]

Training loss: 0.029622  [16032/139199]


 14%|█▍        | 603/4350 [00:51<05:03, 12.34it/s]

Training loss: 0.025601  [19232/139199]


 16%|█▌        | 703/4350 [00:59<04:58, 12.21it/s]

Training loss: 0.026097  [22432/139199]


 18%|█▊        | 803/4350 [01:08<05:25, 10.88it/s]

Training loss: 0.026197  [25632/139199]


 21%|██        | 903/4350 [01:16<04:44, 12.10it/s]

Training loss: 0.022930  [28832/139199]


 23%|██▎       | 1003/4350 [01:25<04:37, 12.05it/s]

Training loss: 0.024825  [32032/139199]


 25%|██▌       | 1103/4350 [01:33<04:34, 11.83it/s]

Training loss: 0.028945  [35232/139199]


 28%|██▊       | 1203/4350 [01:41<04:20, 12.09it/s]

Training loss: 0.024204  [38432/139199]


 30%|██▉       | 1303/4350 [01:50<04:09, 12.20it/s]

Training loss: 0.025996  [41632/139199]


 32%|███▏      | 1403/4350 [01:58<04:04, 12.06it/s]

Training loss: 0.023984  [44832/139199]


 35%|███▍      | 1503/4350 [02:07<04:13, 11.25it/s]

Training loss: 0.029576  [48032/139199]


 37%|███▋      | 1603/4350 [02:15<03:55, 11.67it/s]

Training loss: 0.030139  [51232/139199]


 39%|███▉      | 1703/4350 [02:24<03:37, 12.18it/s]

Training loss: 0.025131  [54432/139199]


 41%|████▏     | 1803/4350 [02:32<03:46, 11.23it/s]

Training loss: 0.026846  [57632/139199]


 44%|████▎     | 1903/4350 [02:40<03:26, 11.85it/s]

Training loss: 0.027462  [60832/139199]


 46%|████▌     | 2003/4350 [02:49<03:17, 11.90it/s]

Training loss: 0.028813  [64032/139199]


 48%|████▊     | 2103/4350 [02:57<03:15, 11.47it/s]

Training loss: 0.026801  [67232/139199]


 51%|█████     | 2203/4350 [03:05<03:02, 11.74it/s]

Training loss: 0.027569  [70432/139199]


 53%|█████▎    | 2303/4350 [03:14<02:54, 11.70it/s]

Training loss: 0.030691  [73632/139199]


 55%|█████▌    | 2403/4350 [03:22<02:42, 12.02it/s]

Training loss: 0.024146  [76832/139199]


 58%|█████▊    | 2503/4350 [03:31<02:36, 11.80it/s]

Training loss: 0.026520  [80032/139199]


 60%|█████▉    | 2603/4350 [03:39<02:26, 11.89it/s]

Training loss: 0.023903  [83232/139199]


 62%|██████▏   | 2703/4350 [03:48<02:14, 12.25it/s]

Training loss: 0.029690  [86432/139199]


 64%|██████▍   | 2803/4350 [03:56<02:17, 11.28it/s]

Training loss: 0.029344  [89632/139199]


 67%|██████▋   | 2903/4350 [04:05<02:00, 11.98it/s]

Training loss: 0.032533  [92832/139199]


 69%|██████▉   | 3003/4350 [04:13<01:54, 11.77it/s]

Training loss: 0.018901  [96032/139199]


 71%|███████▏  | 3103/4350 [04:22<01:49, 11.38it/s]

Training loss: 0.026961  [99232/139199]


 74%|███████▎  | 3203/4350 [04:30<01:40, 11.47it/s]

Training loss: 0.029166  [102432/139199]


 76%|███████▌  | 3303/4350 [04:39<01:30, 11.53it/s]

Training loss: 0.026387  [105632/139199]


 78%|███████▊  | 3403/4350 [04:48<01:17, 12.18it/s]

Training loss: 0.030956  [108832/139199]


 81%|████████  | 3503/4350 [04:56<01:12, 11.66it/s]

Training loss: 0.025470  [112032/139199]


 83%|████████▎ | 3603/4350 [05:05<01:03, 11.77it/s]

Training loss: 0.027023  [115232/139199]


 85%|████████▌ | 3703/4350 [05:13<00:57, 11.30it/s]

Training loss: 0.022691  [118432/139199]


 87%|████████▋ | 3803/4350 [05:22<00:48, 11.36it/s]

Training loss: 0.026554  [121632/139199]


 90%|████████▉ | 3903/4350 [05:30<00:37, 12.08it/s]

Training loss: 0.023101  [124832/139199]


 92%|█████████▏| 4003/4350 [05:39<00:28, 11.99it/s]

Training loss: 0.025422  [128032/139199]


 94%|█████████▍| 4103/4350 [05:47<00:20, 11.93it/s]

Training loss: 0.030529  [131232/139199]


 97%|█████████▋| 4203/4350 [05:56<00:12, 11.69it/s]

Training loss: 0.023393  [134432/139199]


 99%|█████████▉| 4303/4350 [06:04<00:03, 11.77it/s]

Training loss: 0.029461  [137632/139199]


100%|██████████| 4350/4350 [06:08<00:00, 11.80it/s]


Train loss: 0.025598 - Val loss: 0.023015 

Epoch 28
-------------------------------


  0%|          | 3/4350 [00:00<08:37,  8.39it/s]

Training loss: 0.027275  [   32/139199]


  2%|▏         | 103/4350 [00:08<06:01, 11.74it/s]

Training loss: 0.027855  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:46, 11.96it/s]

Training loss: 0.026501  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:32, 12.18it/s]

Training loss: 0.026743  [ 9632/139199]


  9%|▉         | 403/4350 [00:34<05:43, 11.50it/s]

Training loss: 0.025324  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:15, 12.19it/s]

Training loss: 0.021566  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:17, 11.82it/s]

Training loss: 0.026071  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:03, 12.00it/s]

Training loss: 0.023132  [22432/139199]


 18%|█▊        | 803/4350 [01:07<05:17, 11.17it/s]

Training loss: 0.023039  [25632/139199]


 21%|██        | 903/4350 [01:16<04:46, 12.04it/s]

Training loss: 0.024391  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:32, 12.26it/s]

Training loss: 0.022050  [32032/139199]


 25%|██▌       | 1103/4350 [01:33<04:46, 11.33it/s]

Training loss: 0.025581  [35232/139199]


 28%|██▊       | 1203/4350 [01:41<04:15, 12.30it/s]

Training loss: 0.025347  [38432/139199]


 30%|██▉       | 1303/4350 [01:50<04:18, 11.79it/s]

Training loss: 0.025965  [41632/139199]


 32%|███▏      | 1403/4350 [01:58<04:09, 11.83it/s]

Training loss: 0.023651  [44832/139199]


 35%|███▍      | 1503/4350 [02:07<04:05, 11.59it/s]

Training loss: 0.026904  [48032/139199]


 37%|███▋      | 1603/4350 [02:16<04:01, 11.38it/s]

Training loss: 0.030429  [51232/139199]


 39%|███▉      | 1703/4350 [02:24<03:40, 12.02it/s]

Training loss: 0.023492  [54432/139199]


 41%|████▏     | 1803/4350 [02:33<03:41, 11.48it/s]

Training loss: 0.031428  [57632/139199]


 44%|████▎     | 1903/4350 [02:41<03:27, 11.82it/s]

Training loss: 0.025918  [60832/139199]


 46%|████▌     | 2003/4350 [02:50<03:14, 12.07it/s]

Training loss: 0.022367  [64032/139199]


 48%|████▊     | 2103/4350 [02:58<03:18, 11.30it/s]

Training loss: 0.022284  [67232/139199]


 51%|█████     | 2203/4350 [03:06<02:54, 12.34it/s]

Training loss: 0.026345  [70432/139199]


 53%|█████▎    | 2303/4350 [03:15<02:49, 12.06it/s]

Training loss: 0.026087  [73632/139199]


 55%|█████▌    | 2403/4350 [03:23<02:43, 11.91it/s]

Training loss: 0.033683  [76832/139199]


 58%|█████▊    | 2503/4350 [03:31<02:36, 11.83it/s]

Training loss: 0.025503  [80032/139199]


 60%|█████▉    | 2603/4350 [03:40<02:24, 12.10it/s]

Training loss: 0.027593  [83232/139199]


 62%|██████▏   | 2703/4350 [03:48<02:16, 12.06it/s]

Training loss: 0.025832  [86432/139199]


 64%|██████▍   | 2803/4350 [03:56<02:12, 11.71it/s]

Training loss: 0.023752  [89632/139199]


 67%|██████▋   | 2903/4350 [04:05<02:00, 12.02it/s]

Training loss: 0.024161  [92832/139199]


 69%|██████▉   | 3003/4350 [04:14<02:02, 11.02it/s]

Training loss: 0.020472  [96032/139199]


 71%|███████▏  | 3103/4350 [04:22<01:52, 11.08it/s]

Training loss: 0.027786  [99232/139199]


 74%|███████▎  | 3203/4350 [04:31<01:34, 12.13it/s]

Training loss: 0.026201  [102432/139199]


 76%|███████▌  | 3303/4350 [04:39<01:24, 12.33it/s]

Training loss: 0.025849  [105632/139199]


 78%|███████▊  | 3403/4350 [04:47<01:25, 11.09it/s]

Training loss: 0.030550  [108832/139199]


 81%|████████  | 3503/4350 [04:56<01:14, 11.38it/s]

Training loss: 0.022689  [112032/139199]


 83%|████████▎ | 3603/4350 [05:05<01:00, 12.32it/s]

Training loss: 0.021491  [115232/139199]


 85%|████████▌ | 3703/4350 [05:13<00:53, 12.21it/s]

Training loss: 0.023685  [118432/139199]


 87%|████████▋ | 3803/4350 [05:21<00:47, 11.52it/s]

Training loss: 0.028331  [121632/139199]


 90%|████████▉ | 3903/4350 [05:30<00:36, 12.24it/s]

Training loss: 0.025467  [124832/139199]


 92%|█████████▏| 4003/4350 [05:38<00:28, 12.11it/s]

Training loss: 0.028153  [128032/139199]


 94%|█████████▍| 4103/4350 [05:46<00:21, 11.53it/s]

Training loss: 0.022956  [131232/139199]


 97%|█████████▋| 4203/4350 [05:55<00:11, 12.30it/s]

Training loss: 0.022180  [134432/139199]


 99%|█████████▉| 4303/4350 [06:03<00:03, 12.06it/s]

Training loss: 0.024285  [137632/139199]


100%|██████████| 4350/4350 [06:07<00:00, 11.84it/s]


Train loss: 0.025481 - Val loss: 0.023521 

Epoch 29
-------------------------------


  0%|          | 3/4350 [00:00<08:15,  8.77it/s]

Training loss: 0.024348  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:51, 12.08it/s]

Training loss: 0.025208  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:46, 11.98it/s]

Training loss: 0.023466  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:46, 11.67it/s]

Training loss: 0.027246  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:30, 11.93it/s]

Training loss: 0.022174  [12832/139199]


 12%|█▏        | 503/4350 [00:42<05:22, 11.94it/s]

Training loss: 0.024582  [16032/139199]


 14%|█▍        | 603/4350 [00:50<05:14, 11.92it/s]

Training loss: 0.026031  [19232/139199]


 16%|█▌        | 703/4350 [00:59<05:09, 11.78it/s]

Training loss: 0.020324  [22432/139199]


 18%|█▊        | 803/4350 [01:07<04:59, 11.86it/s]

Training loss: 0.023456  [25632/139199]


 21%|██        | 903/4350 [01:15<04:39, 12.32it/s]

Training loss: 0.024398  [28832/139199]


 23%|██▎       | 1003/4350 [01:24<04:52, 11.44it/s]

Training loss: 0.027457  [32032/139199]


 25%|██▌       | 1103/4350 [01:32<04:30, 12.01it/s]

Training loss: 0.021887  [35232/139199]


 28%|██▊       | 1203/4350 [01:40<04:13, 12.40it/s]

Training loss: 0.026327  [38432/139199]


 30%|██▉       | 1303/4350 [01:49<04:21, 11.65it/s]

Training loss: 0.022056  [41632/139199]


 32%|███▏      | 1403/4350 [01:57<03:55, 12.52it/s]

Training loss: 0.023228  [44832/139199]


 35%|███▍      | 1503/4350 [02:06<03:49, 12.43it/s]

Training loss: 0.025171  [48032/139199]


 37%|███▋      | 1603/4350 [02:14<03:39, 12.49it/s]

Training loss: 0.022466  [51232/139199]


 39%|███▉      | 1703/4350 [02:22<03:43, 11.85it/s]

Training loss: 0.024788  [54432/139199]


 41%|████▏     | 1803/4350 [02:31<03:26, 12.35it/s]

Training loss: 0.025018  [57632/139199]


 44%|████▎     | 1903/4350 [02:39<03:27, 11.82it/s]

Training loss: 0.021740  [60832/139199]


 46%|████▌     | 2003/4350 [02:47<03:30, 11.15it/s]

Training loss: 0.022798  [64032/139199]


 48%|████▊     | 2103/4350 [02:55<03:06, 12.05it/s]

Training loss: 0.026853  [67232/139199]


 51%|█████     | 2203/4350 [03:04<02:52, 12.47it/s]

Training loss: 0.027877  [70432/139199]


 53%|█████▎    | 2303/4350 [03:12<02:51, 11.97it/s]

Training loss: 0.024514  [73632/139199]


 55%|█████▌    | 2403/4350 [03:20<02:38, 12.30it/s]

Training loss: 0.022264  [76832/139199]


 58%|█████▊    | 2503/4350 [03:28<02:30, 12.26it/s]

Training loss: 0.020558  [80032/139199]


 60%|█████▉    | 2603/4350 [03:37<02:24, 12.13it/s]

Training loss: 0.024294  [83232/139199]


 62%|██████▏   | 2703/4350 [03:45<02:13, 12.35it/s]

Training loss: 0.029345  [86432/139199]


 64%|██████▍   | 2803/4350 [03:54<02:08, 12.06it/s]

Training loss: 0.027796  [89632/139199]


 67%|██████▋   | 2903/4350 [04:02<02:05, 11.54it/s]

Training loss: 0.024501  [92832/139199]


 69%|██████▉   | 3003/4350 [04:10<01:55, 11.70it/s]

Training loss: 0.028330  [96032/139199]


 71%|███████▏  | 3103/4350 [04:19<01:39, 12.52it/s]

Training loss: 0.024488  [99232/139199]


 74%|███████▎  | 3203/4350 [04:27<01:31, 12.60it/s]

Training loss: 0.028970  [102432/139199]


 76%|███████▌  | 3303/4350 [04:35<01:28, 11.85it/s]

Training loss: 0.023322  [105632/139199]


 78%|███████▊  | 3403/4350 [04:43<01:16, 12.39it/s]

Training loss: 0.027529  [108832/139199]


 81%|████████  | 3503/4350 [04:51<01:08, 12.29it/s]

Training loss: 0.020497  [112032/139199]


 83%|████████▎ | 3603/4350 [05:00<01:03, 11.74it/s]

Training loss: 0.022070  [115232/139199]


 85%|████████▌ | 3703/4350 [05:08<00:52, 12.40it/s]

Training loss: 0.026528  [118432/139199]


 87%|████████▋ | 3803/4350 [05:16<00:45, 12.07it/s]

Training loss: 0.025044  [121632/139199]


 90%|████████▉ | 3903/4350 [05:25<00:37, 11.82it/s]

Training loss: 0.030390  [124832/139199]


 92%|█████████▏| 4003/4350 [05:33<00:28, 12.21it/s]

Training loss: 0.026503  [128032/139199]


 94%|█████████▍| 4103/4350 [05:41<00:20, 12.02it/s]

Training loss: 0.030136  [131232/139199]


 97%|█████████▋| 4203/4350 [05:49<00:12, 12.18it/s]

Training loss: 0.028942  [134432/139199]


 99%|█████████▉| 4303/4350 [05:58<00:03, 12.46it/s]

Training loss: 0.029481  [137632/139199]


100%|██████████| 4350/4350 [06:02<00:00, 12.02it/s]


Train loss: 0.025362 - Val loss: 0.024414 

Epoch 30
-------------------------------


  0%|          | 3/4350 [00:00<08:07,  8.92it/s]

Training loss: 0.024869  [   32/139199]


  2%|▏         | 103/4350 [00:08<05:42, 12.41it/s]

Training loss: 0.029693  [ 3232/139199]


  5%|▍         | 203/4350 [00:17<05:48, 11.92it/s]

Training loss: 0.026120  [ 6432/139199]


  7%|▋         | 303/4350 [00:25<05:30, 12.25it/s]

Training loss: 0.026868  [ 9632/139199]


  9%|▉         | 403/4350 [00:33<05:22, 12.22it/s]

Training loss: 0.022812  [12832/139199]


 12%|█▏        | 503/4350 [00:41<05:28, 11.71it/s]

Training loss: 0.024416  [16032/139199]


 14%|█▍        | 603/4350 [00:49<05:00, 12.46it/s]

Training loss: 0.022057  [19232/139199]


 16%|█▌        | 703/4350 [00:58<04:55, 12.36it/s]

Training loss: 0.023291  [22432/139199]


 18%|█▊        | 803/4350 [01:06<04:53, 12.08it/s]

Training loss: 0.022898  [25632/139199]


 21%|██        | 903/4350 [01:14<04:39, 12.33it/s]

Training loss: 0.022881  [28832/139199]


 23%|██▎       | 1003/4350 [01:22<04:29, 12.43it/s]

Training loss: 0.025296  [32032/139199]


 25%|██▌       | 1103/4350 [01:30<04:32, 11.93it/s]

Training loss: 0.026386  [35232/139199]


 28%|██▊       | 1203/4350 [01:39<04:16, 12.25it/s]

Training loss: 0.023418  [38432/139199]


 30%|██▉       | 1303/4350 [01:47<04:16, 11.87it/s]

Training loss: 0.028472  [41632/139199]


 32%|███▏      | 1403/4350 [01:55<04:20, 11.30it/s]

Training loss: 0.026086  [44832/139199]


 35%|███▍      | 1503/4350 [02:04<03:55, 12.10it/s]

Training loss: 0.024771  [48032/139199]


 37%|███▋      | 1603/4350 [02:12<03:49, 11.97it/s]

Training loss: 0.028006  [51232/139199]


 39%|███▉      | 1703/4350 [02:20<03:39, 12.08it/s]

Training loss: 0.022276  [54432/139199]


 41%|████▏     | 1803/4350 [02:29<03:35, 11.79it/s]

Training loss: 0.026350  [57632/139199]


 44%|████▎     | 1903/4350 [02:37<03:16, 12.44it/s]

Training loss: 0.027421  [60832/139199]


 46%|████▌     | 2003/4350 [02:45<03:12, 12.16it/s]

Training loss: 0.025274  [64032/139199]


 48%|████▊     | 2103/4350 [02:53<03:07, 11.99it/s]

Training loss: 0.024676  [67232/139199]


 51%|█████     | 2203/4350 [03:02<02:54, 12.30it/s]

Training loss: 0.027562  [70432/139199]


 53%|█████▎    | 2303/4350 [03:10<02:48, 12.15it/s]

Training loss: 0.027944  [73632/139199]


 55%|█████▌    | 2403/4350 [03:18<02:47, 11.63it/s]

Training loss: 0.023797  [76832/139199]


 58%|█████▊    | 2503/4350 [03:27<02:34, 11.98it/s]

Training loss: 0.024253  [80032/139199]


 60%|█████▉    | 2603/4350 [03:35<02:21, 12.31it/s]

Training loss: 0.029680  [83232/139199]


 62%|██████▏   | 2703/4350 [03:44<02:20, 11.76it/s]

Training loss: 0.026284  [86432/139199]


 64%|██████▍   | 2803/4350 [03:52<02:07, 12.15it/s]

Training loss: 0.026263  [89632/139199]


 67%|██████▋   | 2903/4350 [04:00<01:57, 12.32it/s]

Training loss: 0.023081  [92832/139199]


 69%|██████▉   | 3003/4350 [04:09<01:51, 12.05it/s]

Training loss: 0.025235  [96032/139199]


 71%|███████▏  | 3103/4350 [04:17<01:47, 11.55it/s]

Training loss: 0.028082  [99232/139199]


 74%|███████▎  | 3203/4350 [04:25<01:40, 11.41it/s]

Training loss: 0.022673  [102432/139199]


 76%|███████▌  | 3303/4350 [04:34<01:29, 11.69it/s]

Training loss: 0.026413  [105632/139199]


 78%|███████▊  | 3403/4350 [04:43<01:25, 11.12it/s]

Training loss: 0.025018  [108832/139199]


 81%|████████  | 3503/4350 [04:51<01:09, 12.14it/s]

Training loss: 0.022057  [112032/139199]


 83%|████████▎ | 3603/4350 [04:59<00:59, 12.48it/s]

Training loss: 0.023742  [115232/139199]


 85%|████████▌ | 3703/4350 [05:08<00:55, 11.73it/s]

Training loss: 0.023413  [118432/139199]


 87%|████████▋ | 3803/4350 [05:16<00:44, 12.22it/s]

Training loss: 0.025438  [121632/139199]


 90%|████████▉ | 3903/4350 [05:24<00:35, 12.50it/s]

Training loss: 0.021520  [124832/139199]


 92%|█████████▏| 4003/4350 [05:33<00:29, 11.69it/s]

Training loss: 0.023305  [128032/139199]


 94%|█████████▍| 4103/4350 [05:41<00:20, 12.13it/s]

Training loss: 0.025846  [131232/139199]


 97%|█████████▋| 4203/4350 [05:50<00:12, 12.02it/s]

Training loss: 0.022542  [134432/139199]


 99%|█████████▉| 4303/4350 [05:58<00:03, 12.37it/s]

Training loss: 0.030595  [137632/139199]


100%|██████████| 4350/4350 [06:02<00:00, 12.00it/s]


Train loss: 0.025329 - Val loss: 0.024025 



In [15]:
torch.save(model.state_dict(), "/content/drive/My Drive/AV Research/best_torch_model256.pt")

In [None]:
from google.colab import runtime
runtime.unassign()