In [1]:
%load_ext autoreload
%autoreload 2
from transformer import SpeechTransformer
import os
import torch
import numpy as np
from torch.utils.data import DataLoader
from custom_dataset import SpectrogramDataset, BinaryDataset, create_sampler
from training_pipeline import repeat_training, set_seed, worker_init_fn, plot_results
from collections import Counter
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
SEED = 42
set_seed(SEED)
repetitions = 4
lr = 0.001
epochs = 100
tolerance = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

alpha = 1
dropout = 0.3
weight_decay = 0.0001
augmented_fraction = 0.5
label_smoothing = 0.1

batch_size = 256
n_workers = 4
prefetch_factor = 2 if n_workers > 0 else None
persistent_workers = True if n_workers > 0 else False

In [3]:
data_path = "data/train/audio_transformed"
train_dataset = SpectrogramDataset(data_path, set_type=SpectrogramDataset.TRAIN, augmentation=True, augmented_fraction=augmented_fraction)
val_dataset = SpectrogramDataset(data_path, set_type=SpectrogramDataset.VAL)
test_dataset = SpectrogramDataset(data_path, set_type=SpectrogramDataset.TEST)

sampler = create_sampler(train_dataset, alpha)
train_loader = DataLoader(train_dataset, sampler=sampler, batch_size=batch_size, shuffle=False, num_workers=n_workers, pin_memory=True, prefetch_factor=prefetch_factor,persistent_workers=persistent_workers, worker_init_fn=worker_init_fn)

val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=n_workers, pin_memory=True, prefetch_factor=prefetch_factor, persistent_workers=persistent_workers)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=n_workers, pin_memory=True, prefetch_factor=prefetch_factor, persistent_workers=persistent_workers)

## model dim

In [4]:
# d_model = 128
nhead = 4
num_layers = 4

set_seed(SEED)

d_model_list = [64, 128, 256]
for d_model in d_model_list:
     
     def init_transformer():
          return SpeechTransformer(num_classes=11, dropout=dropout, d_model=d_model, nhead=nhead, num_layers=num_layers)
     
     model_dir = f"output/models/all_classes/transformer/d_model/d_model={d_model}"
     history_dir = f"output/history/all_classes/transformer/d_model/d_model={d_model}"
     
     os.makedirs(model_dir, exist_ok=True)
     os.makedirs(history_dir, exist_ok=True)
     
     model_path = model_dir + "/transformer.pth"
     history_path = history_dir + "/transformer.pkl"
     
     repeat_training(repetitions, init_transformer, lr, model_path, history_path, epochs, train_loader, val_loader, test_loader, device, tolerance=tolerance, weight_decay=weight_decay, label_smoothing=label_smoothing)

training iteration: 1 of 4
starting training...
epoch: 1, training loss: 0.009496262271950098, training accuracy: 9.180238020670217, training balanced accuracy: 9.150705246186215
epoch: 1, validation loss: 0.00930058945624679, validation accuracy: 3.8393645189761694, validation balanced accuracy: 9.090909090909092
model saved




KeyboardInterrupt



In [None]:
d_model_root_dir = "output/history/all_classes/transformer/d_model"
d_model_list = [64, 128, 256]
x_label = "model's dimension"
plot_results(d_model_root_dir, d_model_list, x_label, use_balanced_accuracy=True)

## number of heads

In [None]:
d_model = None # optimal or 128 
num_layers = 4

set_seed(SEED)

nhead_list = [2, 4, 8]
for nhead in nhead_list:
     
     def init_transformer():
          return SpeechTransformer(num_classes=11, dropout=dropout, d_model=d_model, nhead=nhead, num_layers=num_layers)
     
     model_dir = f"output/models/all_classes/transformer/nhead/nhead={nhead}"
     history_dir = f"output/history/all_classes/transformer/nhead/nhead={nhead}"
     
     os.makedirs(model_dir, exist_ok=True)
     os.makedirs(history_dir, exist_ok=True)
     
     model_path = model_dir + "/transformer.pth"
     history_path = history_dir + "/transformer.pkl"
     
     repeat_training(repetitions, init_transformer, lr, model_path, history_path, epochs, train_loader, val_loader, test_loader, device, tolerance=tolerance, weight_decay=weight_decay, label_smoothing=label_smoothing)

In [None]:
nhead_root_dir = "output/history/all_classes/transformer/nhead"
nhead_list = [2, 4, 8]
x_label = "number of heads"
plot_results(nhead_root_dir, nhead_list, x_label, use_balanced_accuracy=True)

## number of layers

In [None]:
d_model = None # optimal or 128
num_layers = None # optimal or 4

set_seed(SEED)

num_layers_list = [2, 4, 8]
for num_layers in num_layers_list:
     
     def init_transformer():
          return SpeechTransformer(num_classes=11, dropout=dropout, d_model=d_model, nhead=nhead, num_layers=num_layers)
     
     model_dir = f"output/models/all_classes/transformer/num_layers/num_layers={num_layers}"
     history_dir = f"output/history/all_classes/transformer/num_layers/num_layers={num_layers}"
     
     os.makedirs(model_dir, exist_ok=True)
     os.makedirs(history_dir, exist_ok=True)
     
     model_path = model_dir + "/transformer.pth"
     history_path = history_dir + "/transformer.pkl"
     
     repeat_training(repetitions, init_transformer, lr, model_path, history_path, epochs, train_loader, val_loader, test_loader, device, tolerance=tolerance, weight_decay=weight_decay, label_smoothing=label_smoothing)

In [None]:
num_layers_root_dir = "output/history/all_classes/transformer/num_layers"
num_layers_list = [2, 4, 8]
x_label = "number of layers"
plot_results(num_layers_root_dir, num_layers_list, x_label, use_balanced_accuracy=True)