In [1]:
import sys

sys.path.append('..')

In [2]:
import torch

data = torch.load('../processed_data/complete_dataset/processed_data.pt')
label = torch.load('../processed_data/complete_dataset/processed_label.pt')

In [3]:
from audio_toolbox.metrics import audio_dataset_split

RANDOM_STATE = 42
X_train, y_train, X_val, y_val,\
X_test, y_test = audio_dataset_split(data, label, train_val_test_ratio=(0.9, 0.05, 0.05), random_state=RANDOM_STATE)

In [4]:
X_train_flat = X_train.view(X_train.shape[0], -1)
X_val_flat = X_val.view(X_val.shape[0], -1)
X_test_flat = X_test.view(X_test.shape[0], -1)

In [5]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
pca = PCA(n_components=0.9)

train_scaled = scaler.fit_transform(X_train_flat)
val_scaled = scaler.transform(X_val_flat)
test_scaled = scaler.transform(X_test_flat)

device = 'cuda' if torch.cuda.is_available() else 'cpu' 
train_data = torch.tensor(pca.fit_transform(train_scaled), dtype=torch.float, device=device)
val_data = torch.tensor(pca.transform(val_scaled), dtype=torch.float, device=device)
test_data = torch.tensor(pca.transform(test_scaled), dtype=torch.float, device=device)

In [6]:
from sklearn.preprocessing import LabelBinarizer

lb = LabelBinarizer()
y_train = torch.tensor(lb.fit_transform(y_train), device=device)
y_val = torch.tensor(lb.fit_transform(y_val), device=device)
y_test = torch.tensor(lb.fit_transform(y_test), device=device)

(torch.Size([899, 661]), torch.Size([49, 661]), torch.Size([51, 661]))

In [7]:
from torch.utils.data import TensorDataset

datasets = {
    'train': TensorDataset(train_data, y_train),
    'val': TensorDataset(val_data, y_val),
    'test': TensorDataset(test_data, y_test)
}

In [8]:
from audio_toolbox.models import SimpleLinearModel

input_size = train_data.size(1)
output_size = 10
batch_size = 32

model = SimpleLinearModel(input_size, output_size, hidden_dim=64, dropout_prob=0.8, device='cuda')


In [9]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch import nn
from audio_toolbox.models import OneHotCrossEntropyLoss

loss_fn = OneHotCrossEntropyLoss()
learning_rate = 1e-3  # Adjust the learning rate as needed
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [10]:
from audio_toolbox.trainer import ModelTrainer

trainer = ModelTrainer(datasets, model, loss_fn, optimizer, scheduler)

In [11]:
trainer_config = {
    'save': False,
    'num_epochs': 100,
    'batch_size': batch_size
}

In [12]:
trainer.train(**trainer_config)

INFO:root:Epoch 0001, Learning rate: 0.001000, Training loss: 2.26744, Val loss: 1.82666, Test loss: 1.80099, Epoch time: 0.22367
INFO:root:Epoch 0002, Learning rate: 0.001000, Training loss: 2.02941, Val loss: 1.03967, Test loss: 1.03557, Epoch time: 0.19351
INFO:root:Epoch 0003, Learning rate: 0.001000, Training loss: 1.77928, Val loss: 0.54340, Test loss: 0.46794, Epoch time: 0.21136
INFO:root:Epoch 0004, Learning rate: 0.001000, Training loss: 1.62441, Val loss: 0.26945, Test loss: 0.24986, Epoch time: 0.39459
INFO:root:Epoch 0005, Learning rate: 0.001000, Training loss: 1.53162, Val loss: 0.17933, Test loss: 0.17954, Epoch time: 0.25400
INFO:root:Epoch 0006, Learning rate: 0.000500, Training loss: 1.50236, Val loss: 0.15494, Test loss: 0.18007, Epoch time: 0.25394
INFO:root:Epoch 0007, Learning rate: 0.000500, Training loss: 1.49450, Val loss: 0.13792, Test loss: 0.13796, Epoch time: 0.29746
INFO:root:Epoch 0008, Learning rate: 0.000500, Training loss: 1.48948, Val loss: 0.12237, 

In [13]:
train_res = trainer.predict(train_data)
val_res = trainer.predict(val_data)
test_res = trainer.predict(test_data)

In [14]:
from sklearn.metrics import accuracy_score

print(f"Train accuracy: {100 * accuracy_score(train_res, y_train):.2f}%")
print(f"Validation accuracy: {100 * accuracy_score(val_res, y_val):.2f}%")
print(f"Test accuracy: {100 * accuracy_score(test_res, y_test):.2f}%")

Train accuracy: 98.89%
Validation accuracy: 40.82%
Test accuracy: 52.94%


In [15]:
from audio_toolbox.metrics import precision_recall

_, _, _, f1_train = precision_recall(trainer, train_data, y_train)
_, _, _, f1_val = precision_recall(trainer, val_data, y_val)
_, _, _, f1_test = precision_recall(trainer, test_data, y_test)

In [86]:
print(f"Train f1 score: {f1_train:.4f}")
print(f"Validation f1 score: {f1_val:.4f}")
print(f"Test f1 score: {f1_test:.4f}")

Train f1 score: 0.9978
Validation f1 score: 0.4342
Test f1 score: 0.4468
