# Imports

In [None]:
import string
import shutil
import torch
import random
import math
import h5py
import torchaudio
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torchaudio.transforms as T
import torch.optim as optim
from sklearn.manifold import TSNE
from torch.optim import lr_scheduler
from google.colab import drive
from google.colab import runtime
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, Dataset, Subset
from torchvision import models, transforms
from torchvision.transforms import v2
from torchsummary import summary
from torchvision.datasets import MNIST
from torchvision import transforms
!pip install audiomentations
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, TimeMask

drive.mount('/content/drive')

Collecting audiomentations
  Downloading audiomentations-0.36.0-py3-none-any.whl (80 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/80.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m71.7/80.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.3/80.3 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: audiomentations
Successfully installed audiomentations-0.36.0
Mounted at /content/drive


# Data

In [None]:
with h5py.File('/content/drive/My Drive/Projects/NeuraBeat/Data/train_data_raw.h5', 'r') as train_file:
    train_data = np.array(train_file['data'])
    train_labels = np.array(train_file['labels'])

# Load validation data from HDF5 file
with h5py.File('/content/drive/My Drive/Projects/NeuraBeat/Data/val_data_raw.h5', 'r') as val_file:
    val_data = np.array(val_file['data'])
    val_labels = np.array(val_file['labels'])

MNIST (Testing)

In [None]:
# mean, std = 0.1307, 0.3081

# # Define transforms
# transform = v2.Compose([
#     v2.Resize((64, 64)),
#     v2.ToImage(),
#     v2.ToDtype(torch.float32, scale=True),
#     v2.Normalize((mean,), (std,))
# ])

# # Load datasets
# train_dataset = MNIST('../data/MNIST', train=True, download=True, transform=transform)
# test_dataset = MNIST('../data/MNIST', train=False, download=True, transform=transform)

# # Define the target number of samples
# num_train_samples = 6400
# num_val_samples = 1600

# # Shuffle and select a subset of the training dataset
# train_indices = np.random.permutation(len(train_dataset))[:num_train_samples]
# train_subset = Subset(train_dataset, train_indices)

# # Shuffle and select a subset of the validation dataset
# val_indices = np.random.permutation(len(test_dataset))[:num_val_samples]
# val_subset = Subset(test_dataset, val_indices)

# # Create DataLoaders for the subsets
# kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}
# train_loader = DataLoader(train_subset, shuffle=True, batch_size=256, **kwargs)
# val_loader = DataLoader(val_subset, batch_size=256, **kwargs)

Data Augmentation

In [None]:
class DataAugmentation(nn.Module):
    def __init__(self):
        super(DataAugmentation, self).__init__()
        self.transforms = Compose([
            AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
            TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
            PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
            TimeMask(min_band_part=0.1, max_band_part=0.15, p=0.5),
        ])

    def forward(self, song, sample_rate):
        augmented_song = self.transforms(song, sample_rate=sample_rate)
        return augmented_song

Create Dataset

In [None]:
class AudioDataset(Dataset):
    def __init__(self, input_data, input_labels, sample_rate=16000, n_mels=128, mean=6.5304, std=11.8924, transform=None):
        self.input_data = input_data
        self.input_labels = input_labels
        self.sr = sample_rate
        self.n_mels = n_mels
        self.mean = mean
        self.std = std
        self.transform = transform
        self.mel_spec_transform = T.MelSpectrogram(sample_rate=self.sr, n_mels=self.n_mels, n_fft=2048, hop_length=512)
        self.log_mel_spec_transform = T.AmplitudeToDB()
        self.image_transforms = transform = v2.Compose([
                                                        v2.Resize((64, 64)),
                                                        v2.ToImage(),
                                                        v2.ToDtype(torch.float32, scale=True),
                                                        v2.Normalize((self.mean,), (self.std,))
                                                    ])

    def __len__(self):
        return len(self.input_data)

    def __getitem__(self, idx):
        song = self.input_data[idx]
        genre = self.input_labels[idx]
        if self.transform:
            song = self.transform(song, self.sr)

        mel_spec = self.mel_spec_transform(torch.from_numpy(song))
        log_mel_spec = self.log_mel_spec_transform(mel_spec)
        mel_spec_tensor = log_mel_spec.unsqueeze(0)
        mel_spec_tensor = self.image_transforms(mel_spec_tensor)

        return mel_spec_tensor, genre

audio_train_dataset = AudioDataset(input_data=train_data, input_labels=train_labels, transform=DataAugmentation())
audio_val_dataset = AudioDataset(input_data=val_data, input_labels=val_labels, transform=None)

audio_train_loader = DataLoader(audio_train_dataset, batch_size=32, shuffle=True, pin_memory=True)
audio_val_loader = DataLoader(audio_val_dataset, batch_size=32, pin_memory=True)

In [None]:
# From: https://stackoverflow.com/questions/60101240/finding-mean-and-standard-deviation-across-image-channels-pytorch
def dataset_mean_std(data_loader):
  nimages = 0
  mean = 0.
  std = 0.
  for batch, _ in data_loader:
      # Rearrange batch to be the shape of [B, C, W * H]
      batch = batch.view(batch.size(0), batch.size(1), -1)
      # Update total number of images
      nimages += batch.size(0)
      # Compute mean and std here
      mean += batch.float().mean(2).sum(0)
      std += batch.float().std(2).sum(0)

  # Final step
  mean /= nimages
  std /= nimages

  print("Training set mean", mean)
  print("Training set std", std)

  return mean, std

def get_mean_std(data):
  mean = np.mean(data, axis=(0, 1, 2))
  std = np.std(data, axis=(0, 1, 2))
  return mean, std

# Model

Print Layer (debugging)

In [None]:
class PrintLayer(nn.Module):
    def __init__(self):
        super(PrintLayer, self).__init__()

    def forward(self, x):
        # Do your print / debug stuff here
        print("X shape:", x.shape)
        return x


Positional Encoding

In [None]:
# From: https://github.com/wzlxjtu/PositionalEncoding2D
class PositionalEncoding1d(nn.Module):
    def __init__(self, d_model, length):
        super(PositionalEncoding1d, self).__init__()
        if d_model % 2 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                            "odd dim (got dim={:d})".format(d_model))
        pe = torch.zeros(length, d_model)
        position = torch.arange(0, length).unsqueeze(1)
        div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) *
                            -(math.log(10000.0) / d_model)))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe


class PositionalEncoding2d(nn.Module):
    def __init__(self, d_model, height, width):
        super(PositionalEncoding2d, self).__init__()
        if d_model % 4 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                            "odd dimension (got dim={:d})".format(d_model))
        pe = torch.zeros(d_model, height, width)
        # Each dimension use half of d_model
        d_model = int(d_model / 2)
        div_term = torch.exp(torch.arange(0., d_model, 2) *
                            -(math.log(10000.0) / d_model))
        pos_w = torch.arange(0., width).unsqueeze(1)
        pos_h = torch.arange(0., height).unsqueeze(1)
        pe[0:d_model:2, :, :] = torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
        pe[1:d_model:2, :, :] = torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1)
        pe[d_model::2, :, :] = torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
        pe[d_model + 1::2, :, :] = torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe

Separable Convolution 2D Layer

In [None]:
# Inspired by: https://github.com/reshalfahsi/separableconv-torch
class SeparableConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding=1, bias=False):
        super(SeparableConv2d, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels, bias=bias)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class SeparableConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding=1, bias=False):
        super(SeparableConv1d, self).__init__()
        self.depthwise = nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=in_channels, bias=bias)
        self.pointwise = nn.Conv1d(in_channels, out_channels, kernel_size=1, bias=bias)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

Residual Block

In [None]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, num_layers: int, pool: bool, short: bool, two_dim: bool):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.pooling = pool
        self.short = short
        self.two_dim = two_dim

        self.inconv = nn.Sequential(
            SeparableConv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False),
            nn.SELU()
        )

        layers = []
        for _ in range(num_layers - 1):
            if self.two_dim:
              layers.append(SeparableConv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False))
            else:
              layers.append(SeparableConv1d(in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=1, bias=False))
            layers.append(nn.SELU())
        self.convlayers = nn.Sequential(*layers)

        if self.pooling:
            if self.two_dim:
              self.pool = nn.MaxPool2d(kernel_size=kernel_size, stride=2, padding=1)
              self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False)
            else:
              self.pool = nn.MaxPool1d(kernel_size=kernel_size, stride=2, padding=1)
              self.shortcut = nn.Conv1d(in_channels, out_channels, kernel_size=1, stride=2, bias=False)
        else:
            if self.two_dim:
              self.shortcut = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
            else:
              self.shortcut = nn.Conv1d(in_channels, out_channels, kernel_size=1, bias=False)

        self.sact = nn.SELU()

    def forward(self, x):
        out = self.inconv(x)
        out = self.convlayers(out)

        if self.pooling:
            out = self.pool(out)

        if self.short:
            shortcut = self.shortcut(x)
            out = out + shortcut
            out = self.sact(out)

        return out

Embedding Model

In [None]:
class EmbeddingModel(nn.Module):
    def __init__(self):
      super(EmbeddingModel, self).__init__()
      self.input = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False),
                                 nn.SELU())
      self.positional_encoding = PositionalEncoding2d(64, 128, 130)


      self.conv_layers = nn.Sequential(
          ResidualBlock(in_channels=64, out_channels=64, kernel_size=3, num_layers=4, pool=True, short=True, two_dim=True),
          ResidualBlock(in_channels=64, out_channels=128, kernel_size=3, num_layers=4, pool=True, short=True, two_dim=True),
          ResidualBlock(in_channels=128, out_channels=256, kernel_size=3, num_layers=4, pool=True, short=True, two_dim=True),
      )

      self.attention = nn.MultiheadAttention(embed_dim=16, num_heads=2, dropout=0.5, batch_first=True)

      self.dense_layers = nn.Sequential(
          nn.Linear(in_features=256, out_features=512, bias=False),
          nn.SELU(),
          nn.Linear(in_features=512, out_features=256, bias=False),
          nn.SELU(),
          nn.Linear(in_features=256, out_features=128, bias=False),
          nn.SELU(),
          nn.Dropout(0.5),
      )

      self.output = nn.Linear(128, 8)

    def forward(self, x):
      x = self.input(x)
      # x = F.layer_norm(self.positional_encoding(x), x.shape)
      x = self.conv_layers(x)

      # batch_size, channels, height, width = x.size()
      # x = x.view(batch_size, channels, height * width)
      # attention_output, _ = self.attention(x, x, x)
      # x = F.layer_norm(x + attention_output, x.shape)

      x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2)
      x = self.dense_layers(x)
      out = self.output(x)
      return out

    def get_embedding(self, x):
      return self.forward(x)

In [None]:
# From https://github.com/adambielski/siamese-triplet
class SimpleEmbeddingModel(nn.Module):
    def __init__(self):
        super(SimpleEmbeddingModel, self).__init__()
        self.convnet = nn.Sequential(SeparableConv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False),
                                     nn.SELU(),
                                     nn.MaxPool2d(kernel_size=3, stride=2),
                                     SeparableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, bias=False),
                                     nn.SELU(),
                                     nn.MaxPool2d(kernel_size=3, stride=2),
                                     SeparableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
                                     nn.SELU(),
                                     nn.MaxPool2d(kernel_size=3, stride=2)
                                    )

        self.fc = nn.Sequential(nn.Linear(256, 512),
                                nn.SELU(),
                                nn.Linear(512, 256),
                                nn.SELU(),
                                nn.Linear(256, 10)
                                )

    def forward(self, x):
        output = self.convnet(x)
        output = output.view(output.size()[0], -1)
        output = self.fc(output)
        return output

    def get_embedding(self, x):
        return self.forward(x)

# Utils

Early Stopping

In [None]:
class EarlyStopping:
    def __init__(self, patience=3, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')
        self.max_accuracy = float('-inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

    def reset_loss(self):
        self.min_validation_loss = float('inf')

Autoclip

In [None]:
# From: https://github.com/pseeth/autoclip/blob/master/autoclip.py
class AutoClip:
    def __init__(self, percentile):
        self.grad_history = []
        self.percentile = percentile

    def compute_grad_norm(self, model):
        total_norm = 0
        for p in model.parameters():
            if p.grad is not None:
                param_norm = p.grad.data.norm(2)
                total_norm += param_norm.item() ** 2
        total_norm = total_norm ** (1. / 2)

        return total_norm

    def __call__(self, model):
        grad_norm = self.compute_grad_norm(model)
        self.grad_history.append(grad_norm)
        clip_value = np.percentile(self.grad_history, self.percentile)
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)

# Training Loop

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
cuda = torch.cuda.is_available()
embedding_model = EmbeddingModel().to(device)
simple_embedding_model = SimpleEmbeddingModel().to(device)

num_epochs = 50
learning_rate = 1e-4
classification_loss_fn = nn.CrossEntropyLoss()

optimizer = optim.AdamW(embedding_model.parameters(), lr=learning_rate, weight_decay=1e-2)
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer=optimizer, T_0=10)
early_stopping = EarlyStopping(patience=3)
scaler = torch.cuda.amp.GradScaler()
autoclipper = AutoClip(percentile=10)

def train_loop(train_loader, model, criterion, optimizer, device, epoch):
    model.train()
    train_loss = 0.0
    num_batches = len(train_loader)

    total_correct = 0
    total_samples = 0

    # Training loop
    for batch, (inputs, targets) in enumerate(tqdm(train_loader)):
        optimizer.zero_grad()
        inputs = inputs.to(device)
        targets = targets.to(device)
        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
          outputs = model(inputs)
          loss = criterion(outputs, targets)

        train_loss += loss.item()
        scaler.scale(loss).backward()
        autoclipper(model)

        _, predicted = torch.max(outputs.data, 1)
        total_correct += (predicted == targets).sum().item()
        total_samples += targets.size(0)

        scaler.step(optimizer)
        scale = scaler.get_scale()
        scaler.update()
        skip_lr_sched = (scale != scaler.get_scale())

    accuracy = 100 * total_correct / total_samples
    train_loss = train_loss / num_batches
    return train_loss, accuracy, skip_lr_sched

def val_loop(val_loader, model, criterion, device, epoch):
    model.eval()
    val_loss = 0.0

    total_correct = 0
    total_samples = 0

    num_batches = len(val_loader)

    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            _, predicted = torch.max(outputs.data, 1)
            total_correct += (predicted == targets).sum().item()
            total_samples += targets.size(0)

            val_loss += loss.item()

    accuracy = 100 * total_correct / total_samples
    val_loss /= num_batches
    return val_loss, accuracy

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train_loss, train_accuracy, skip_lr_sched = train_loop(audio_train_loader, embedding_model, classification_loss_fn, optimizer, device, epoch+1)
    val_loss, val_accuracy = val_loop(audio_val_loader, embedding_model, classification_loss_fn, device, epoch+1)
    print(f"Train loss: {train_loss:>8f} - Train Accuracy: {train_accuracy:>f} - Val loss: {val_loss:>8f} - Val Accuracy: {val_accuracy:>f} \n")

    # early_stop_result = early_stopping.early_stop(val_loss)
    # if early_stop_result:
    #     print(f"Early stopping after {epoch+1} epochs \n")
    #     print(f"Best val loss: {early_stopping.min_validation_loss} \n")
    #     break

    if not skip_lr_sched:
        scheduler.step()

Epoch 1
-------------------------------


100%|██████████| 1782/1782 [23:35<00:00,  1.26it/s]


Train loss: 1.801260 - Train Accuracy: 30.625701 - Val loss: 1.652765 - Val Accuracy: 37.492985 

Epoch 2
-------------------------------


100%|██████████| 1782/1782 [22:39<00:00,  1.31it/s]


Train loss: 1.654718 - Train Accuracy: 38.643378 - Val loss: 1.538365 - Val Accuracy: 43.637767 

Epoch 3
-------------------------------


100%|██████████| 1782/1782 [22:33<00:00,  1.32it/s]


Train loss: 1.568366 - Train Accuracy: 43.308081 - Val loss: 1.472648 - Val Accuracy: 47.334456 

Epoch 4
-------------------------------


100%|██████████| 1782/1782 [22:51<00:00,  1.30it/s]


Train loss: 1.514648 - Train Accuracy: 45.803521 - Val loss: 1.437213 - Val Accuracy: 48.947811 

Epoch 5
-------------------------------


100%|██████████| 1782/1782 [24:10<00:00,  1.23it/s]


Train loss: 1.482727 - Train Accuracy: 47.183642 - Val loss: 1.371205 - Val Accuracy: 52.244669 

Epoch 6
-------------------------------


100%|██████████| 1782/1782 [24:14<00:00,  1.22it/s]


Train loss: 1.446094 - Train Accuracy: 49.039001 - Val loss: 1.357011 - Val Accuracy: 52.321829 

Epoch 7
-------------------------------


100%|██████████| 1782/1782 [24:16<00:00,  1.22it/s]


Train loss: 1.420821 - Train Accuracy: 49.935115 - Val loss: 1.352742 - Val Accuracy: 53.065376 

Epoch 8
-------------------------------


100%|██████████| 1782/1782 [25:10<00:00,  1.18it/s]


Train loss: 1.403654 - Train Accuracy: 50.801417 - Val loss: 1.335945 - Val Accuracy: 53.514310 

Epoch 9
-------------------------------


100%|██████████| 1782/1782 [24:02<00:00,  1.24it/s]


Train loss: 1.393546 - Train Accuracy: 51.234568 - Val loss: 1.319841 - Val Accuracy: 54.306958 

Epoch 10
-------------------------------


100%|██████████| 1782/1782 [24:00<00:00,  1.24it/s]


Train loss: 1.387110 - Train Accuracy: 51.508137 - Val loss: 1.313380 - Val Accuracy: 54.391134 

Epoch 11
-------------------------------


100%|██████████| 1782/1782 [24:05<00:00,  1.23it/s]


Train loss: 1.432392 - Train Accuracy: 49.568603 - Val loss: 1.339479 - Val Accuracy: 53.030303 

Epoch 12
-------------------------------


100%|██████████| 1782/1782 [27:18<00:00,  1.09it/s]


Train loss: 1.412357 - Train Accuracy: 50.296366 - Val loss: 1.312589 - Val Accuracy: 53.829966 

Epoch 13
-------------------------------


100%|██████████| 1782/1782 [26:31<00:00,  1.12it/s]


Train loss: 1.395620 - Train Accuracy: 50.982043 - Val loss: 1.302892 - Val Accuracy: 54.587542 

Epoch 14
-------------------------------


100%|██████████| 1782/1782 [24:37<00:00,  1.21it/s]


Train loss: 1.370186 - Train Accuracy: 51.862374 - Val loss: 1.348082 - Val Accuracy: 52.918070 

Epoch 15
-------------------------------


100%|██████████| 1782/1782 [25:44<00:00,  1.15it/s]


Train loss: 1.352966 - Train Accuracy: 52.756734 - Val loss: 1.259144 - Val Accuracy: 56.649832 

Epoch 16
-------------------------------


100%|██████████| 1782/1782 [27:31<00:00,  1.08it/s]


Train loss: 1.329078 - Train Accuracy: 53.609007 - Val loss: 1.248423 - Val Accuracy: 56.797138 

Epoch 17
-------------------------------


100%|██████████| 1782/1782 [27:33<00:00,  1.08it/s]


Train loss: 1.312415 - Train Accuracy: 54.343785 - Val loss: 1.242330 - Val Accuracy: 57.182941 

Epoch 18
-------------------------------


100%|██████████| 1782/1782 [27:11<00:00,  1.09it/s]


Train loss: 1.295244 - Train Accuracy: 55.211841 - Val loss: 1.226500 - Val Accuracy: 57.638889 

Epoch 19
-------------------------------


100%|██████████| 1782/1782 [25:55<00:00,  1.15it/s]


Train loss: 1.285282 - Train Accuracy: 55.387205 - Val loss: 1.216450 - Val Accuracy: 58.122896 

Epoch 20
-------------------------------


100%|██████████| 1782/1782 [26:04<00:00,  1.14it/s]


Train loss: 1.276845 - Train Accuracy: 55.764240 - Val loss: 1.209575 - Val Accuracy: 58.228114 

Epoch 21
-------------------------------


100%|██████████| 1782/1782 [26:49<00:00,  1.11it/s]


Train loss: 1.348571 - Train Accuracy: 52.667298 - Val loss: 1.270971 - Val Accuracy: 56.116723 

Epoch 22
-------------------------------


100%|██████████| 1782/1782 [26:50<00:00,  1.11it/s]


Train loss: 1.325422 - Train Accuracy: 53.754559 - Val loss: 1.249464 - Val Accuracy: 56.586700 

Epoch 23
-------------------------------


100%|██████████| 1782/1782 [25:08<00:00,  1.18it/s]


Train loss: 1.306463 - Train Accuracy: 54.752385 - Val loss: 1.226372 - Val Accuracy: 57.168911 

Epoch 24
-------------------------------


100%|██████████| 1782/1782 [24:49<00:00,  1.20it/s]


Train loss: 1.295480 - Train Accuracy: 54.838314 - Val loss: 1.213139 - Val Accuracy: 58.045735 

Epoch 25
-------------------------------


100%|██████████| 1782/1782 [24:37<00:00,  1.21it/s]


Train loss: 1.273799 - Train Accuracy: 55.939604 - Val loss: 1.186977 - Val Accuracy: 59.343434 

Epoch 26
-------------------------------


100%|██████████| 1782/1782 [23:58<00:00,  1.24it/s]


Train loss: 1.257190 - Train Accuracy: 56.413089 - Val loss: 1.172355 - Val Accuracy: 59.750281 

Epoch 27
-------------------------------


100%|██████████| 1782/1782 [23:26<00:00,  1.27it/s]


Train loss: 1.237622 - Train Accuracy: 57.370581 - Val loss: 1.160795 - Val Accuracy: 59.841470 

Epoch 28
-------------------------------


100%|██████████| 1782/1782 [23:25<00:00,  1.27it/s]


Train loss: 1.220284 - Train Accuracy: 57.849327 - Val loss: 1.145339 - Val Accuracy: 60.353535 

Epoch 29
-------------------------------


100%|██████████| 1782/1782 [25:57<00:00,  1.14it/s]


Train loss: 1.209388 - Train Accuracy: 58.066779 - Val loss: 1.149546 - Val Accuracy: 60.325477 

Epoch 30
-------------------------------


100%|██████████| 1782/1782 [25:43<00:00,  1.15it/s]


Train loss: 1.197581 - Train Accuracy: 58.699846 - Val loss: 1.138188 - Val Accuracy: 60.928732 

Epoch 31
-------------------------------


100%|██████████| 1782/1782 [24:52<00:00,  1.19it/s]


Train loss: 1.269721 - Train Accuracy: 55.929082 - Val loss: 1.196634 - Val Accuracy: 58.501684 

Epoch 32
-------------------------------


100%|██████████| 1782/1782 [25:32<00:00,  1.16it/s]


Train loss: 1.264973 - Train Accuracy: 56.092172 - Val loss: 1.200645 - Val Accuracy: 59.238215 

Epoch 33
-------------------------------


100%|██████████| 1782/1782 [25:14<00:00,  1.18it/s]


Train loss: 1.250956 - Train Accuracy: 56.477974 - Val loss: 1.165993 - Val Accuracy: 59.792368 

Epoch 34
-------------------------------


100%|██████████| 1782/1782 [22:37<00:00,  1.31it/s]


Train loss: 1.242356 - Train Accuracy: 57.089997 - Val loss: 1.148438 - Val Accuracy: 60.521886 

Epoch 35
-------------------------------


100%|██████████| 1782/1782 [22:40<00:00,  1.31it/s]


Train loss: 1.218320 - Train Accuracy: 57.894921 - Val loss: 1.133758 - Val Accuracy: 60.816498 

Epoch 36
-------------------------------


100%|██████████| 1782/1782 [23:36<00:00,  1.26it/s]


Train loss: 1.191012 - Train Accuracy: 58.954125 - Val loss: 1.128328 - Val Accuracy: 61.251403 

Epoch 37
-------------------------------


100%|██████████| 1782/1782 [24:45<00:00,  1.20it/s]


Train loss: 1.174236 - Train Accuracy: 59.560887 - Val loss: 1.115325 - Val Accuracy: 61.489899 

Epoch 38
-------------------------------


100%|██████████| 1782/1782 [25:03<00:00,  1.18it/s]


Train loss: 1.161917 - Train Accuracy: 59.888819 - Val loss: 1.092796 - Val Accuracy: 62.324635 

Epoch 39
-------------------------------


100%|██████████| 1782/1782 [24:31<00:00,  1.21it/s]


Train loss: 1.143162 - Train Accuracy: 60.769150 - Val loss: 1.088825 - Val Accuracy: 62.394781 

Epoch 40
-------------------------------


100%|██████████| 1782/1782 [24:34<00:00,  1.21it/s]


Train loss: 1.141420 - Train Accuracy: 60.725309 - Val loss: 1.081990 - Val Accuracy: 62.682379 

Epoch 41
-------------------------------


100%|██████████| 1782/1782 [24:17<00:00,  1.22it/s]


Train loss: 1.228840 - Train Accuracy: 57.286406 - Val loss: 1.192172 - Val Accuracy: 59.245230 

Epoch 42
-------------------------------


100%|██████████| 1782/1782 [24:10<00:00,  1.23it/s]


Train loss: 1.207836 - Train Accuracy: 58.345609 - Val loss: 1.130114 - Val Accuracy: 61.328563 

Epoch 43
-------------------------------


100%|██████████| 1782/1782 [24:08<00:00,  1.23it/s]


Train loss: 1.192830 - Train Accuracy: 58.843645 - Val loss: 1.110615 - Val Accuracy: 61.679293 

Epoch 44
-------------------------------


100%|██████████| 1782/1782 [23:56<00:00,  1.24it/s]


Train loss: 1.184874 - Train Accuracy: 59.020763 - Val loss: 1.149570 - Val Accuracy: 60.949776 

Epoch 45
-------------------------------


100%|██████████| 1782/1782 [24:07<00:00,  1.23it/s]


Train loss: 1.162871 - Train Accuracy: 60.043140 - Val loss: 1.121566 - Val Accuracy: 61.293490 

Epoch 46
-------------------------------


100%|██████████| 1782/1782 [23:39<00:00,  1.26it/s]


Train loss: 1.144763 - Train Accuracy: 60.539422 - Val loss: 1.082865 - Val Accuracy: 62.415825 

Epoch 47
-------------------------------


100%|██████████| 1782/1782 [23:26<00:00,  1.27it/s]


Train loss: 1.126226 - Train Accuracy: 61.228605 - Val loss: 1.058900 - Val Accuracy: 63.517116 

Epoch 48
-------------------------------


 59%|█████▊    | 1044/1782 [14:07<10:00,  1.23it/s]

In [None]:
runtime.unassign()