In [1]:
import sys

sys.path.append('..')

In [2]:
import torch

device = 'cuda:2' if torch.cuda.is_available() else 'cpu' 
data = torch.load('../processed_data/complete_dataset/processed_data.pt')
label = torch.load('../processed_data/complete_dataset/processed_label.pt')

In [3]:
from audio_toolbox.metrics import audio_dataset_split

RANDOM_STATE = 42
X_train, y_train, X_val, y_val,\
X_test, y_test = audio_dataset_split(data, label, train_val_test_ratio=(0.9, 0.05, 0.05), random_state=RANDOM_STATE)

In [4]:
X_train_flat = X_train.view(X_train.shape[0], -1)
X_val_flat = X_val.view(X_val.shape[0], -1)
X_test_flat = X_test.view(X_test.shape[0], -1)

In [5]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
pca = PCA(n_components=0.9)

train_scaled = scaler.fit_transform(X_train_flat)
val_scaled = scaler.transform(X_val_flat)
test_scaled = scaler.transform(X_test_flat)

train_data = torch.tensor(pca.fit_transform(train_scaled), dtype=torch.float, device=device)
val_data = torch.tensor(pca.transform(val_scaled), dtype=torch.float, device=device)
test_data = torch.tensor(pca.transform(test_scaled), dtype=torch.float, device=device)
y_train = y_train.to(device)
y_val = y_val.to(device)
y_test = y_test.to(device)

In [6]:
from torch.utils.data import TensorDataset

datasets = {
    'train': TensorDataset(train_data, y_train),
    'val': TensorDataset(val_data, y_val),
    'test': TensorDataset(test_data, y_test)
}

In [34]:
from audio_toolbox.models import LinearModel

input_size = train_data.size(1)
output_size = 10
batch_size = 32

model = LinearModel(input_size, output_size, hidden_dim=64, dropout_prob=0.3, device=device)


In [35]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch import nn

loss_fn = nn.CrossEntropyLoss()
learning_rate = 1e-3  # Adjust the learning rate as needed
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

In [36]:
from audio_toolbox.trainer import ModelTrainer

trainer = ModelTrainer(datasets, model, loss_fn, optimizer, scheduler)

In [37]:
trainer_config = {
    'save': False,
    'num_epochs': 100,
    'batch_size': batch_size,
}

In [38]:
best_state_dict = trainer.train(**trainer_config)

INFO:root:Epoch 0001, Learning rate: 0.001000, Training metric: 0.32651, Val metric: 0.28952, Test metric: 0.24095, Epoch time: 0.05884
INFO:root:Epoch 0002, Learning rate: 0.001000, Training metric: 0.49677, Val metric: 0.27574, Test metric: 0.34539, Epoch time: 0.05715
INFO:root:Epoch 0003, Learning rate: 0.001000, Training metric: 0.60057, Val metric: 0.37776, Test metric: 0.40789, Epoch time: 0.05802
INFO:root:Epoch 0004, Learning rate: 0.001000, Training metric: 0.71695, Val metric: 0.39706, Test metric: 0.45559, Epoch time: 0.05753


INFO:root:Epoch 0005, Learning rate: 0.001000, Training metric: 0.82004, Val metric: 0.48346, Test metric: 0.53454, Epoch time: 0.05630
INFO:root:Epoch 0006, Learning rate: 0.000500, Training metric: 0.83549, Val metric: 0.45404, Test metric: 0.48684, Epoch time: 0.05899
INFO:root:Epoch 0007, Learning rate: 0.000500, Training metric: 0.89476, Val metric: 0.42647, Test metric: 0.52385, Epoch time: 0.07901
INFO:root:Epoch 0008, Learning rate: 0.000500, Training metric: 0.93139, Val metric: 0.42647, Test metric: 0.56579, Epoch time: 0.07114
INFO:root:Epoch 0009, Learning rate: 0.000500, Training metric: 0.95366, Val metric: 0.44210, Test metric: 0.50247, Epoch time: 0.06398
INFO:root:Epoch 0010, Learning rate: 0.000500, Training metric: 0.97198, Val metric: 0.41268, Test metric: 0.51809, Epoch time: 0.05617
INFO:root:Epoch 0011, Learning rate: 0.000250, Training metric: 0.97845, Val metric: 0.36949, Test metric: 0.55016, Epoch time: 0.05650
INFO:root:Epoch 0012, Learning rate: 0.000250, T

In [39]:
train_res = trainer.predict(train_data).cpu().numpy()
val_res = trainer.predict(val_data).cpu().numpy()
test_res = trainer.predict(test_data).cpu().numpy()

In [40]:
from sklearn.metrics import accuracy_score

print(f"Train accuracy: {100 * accuracy_score(train_res, y_train.cpu().numpy()):.2f}%")
print(f"Validation accuracy: {100 * accuracy_score(val_res, y_val.cpu().numpy()):.2f}%")
print(f"Test accuracy: {100 * accuracy_score(test_res, y_test.cpu().numpy()):.2f}%")

Train accuracy: 99.33%
Validation accuracy: 42.86%
Test accuracy: 50.98%


In [41]:
from audio_toolbox.metrics import precision_recall

_, _, _, f1_train = precision_recall(trainer, train_data, y_train)
_, _, _, f1_val = precision_recall(trainer, val_data, y_val)
_, _, _, f1_test = precision_recall(trainer, test_data, y_test)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [42]:
print(f"Train f1 score: {f1_train:.4f}")
print(f"Validation f1 score: {f1_val:.4f}")
print(f"Test f1 score: {f1_test:.4f}")

Train f1 score: 0.9934
Validation f1 score: 0.3939
Test f1 score: 0.4517
