In [1]:
import sys

sys.path.append('..')

In [2]:
import torch

data = torch.load('../processed_data/complete_dataset/processed_data.pt')
label = torch.load('../processed_data/complete_dataset/processed_label.pt')

In [3]:
from audio_toolbox.metrics import audio_dataset_split

RANDOM_STATE = 42
X_train, y_train, _, _,\
X_test, y_test = audio_dataset_split(data, label, train_val_test_ratio=(0.9, 0, 0.1), random_state=RANDOM_STATE)

In [4]:
X_train_flat = X_train.view(X_train.shape[0], -1).cpu().numpy()
X_test_flat = X_test.view(X_test.shape[0], -1).cpu().numpy()

Do PCA on flattened features

In [5]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Initialize the scaler and PCA
scaler = StandardScaler()
pca = PCA(n_components=0.9)  # Keep 90% of the variance

# Fit the scaler on the training set and transform all sets
X_train_scaled = scaler.fit_transform(X_train_flat)
X_test_scaled = scaler.transform(X_test_flat)

reduced_X_train_flat = pca.fit_transform(X_train_scaled)
reduced_X_test_flat = pca.transform(X_test_scaled)

In [6]:
X_train = X_train.cpu().numpy()
X_test = X_test.cpu().numpy()
y_train = y_train.cpu().numpy()
y_test = y_test.cpu().numpy()

In [7]:
print(f"Training samples: {X_train_flat.shape} -> {reduced_X_train_flat.shape}")
print(f"Test samples: {X_test_flat.shape} -> {reduced_X_test_flat.shape}")

Training samples: (899, 92880) -> (899, 661)
Test samples: (100, 92880) -> (100, 661)


In [8]:
from sklearn.linear_model import LogisticRegression
from audio_toolbox.metrics import calculate_acc, precision_recall

logistic_model = LogisticRegression(max_iter=1000, C=1e-2, random_state=RANDOM_STATE)
logistic_model.fit(reduced_X_train_flat, y_train)
print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(logistic_model, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(logistic_model, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 99.8888%, 1 mismatches out of 899 samples
(Averaged) Train precision: 0.9989, recall: 0.9989, f1 score: 0.9989
Test accuracy: 61.0000%, 39 mismatches out of 100 samples
(Averaged) Test precision: 0.6067, recall: 0.6346, f1 score: 0.5883


In [9]:
from sklearn.svm import SVC

svm_classifier = SVC(C=1e-3, random_state=RANDOM_STATE)
svm_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(svm_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(svm_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 18.7987%, 730 mismatches out of 899 samples
(Averaged) Train precision: 0.3354, recall: 0.1798, f1 score: 0.0869
Test accuracy: 6.0000%, 94 mismatches out of 100 samples
(Averaged) Test precision: 0.0462, recall: 0.1000, f1 score: 0.0169


In [10]:
ovo_svm = SVC(decision_function_shape='ovo', random_state=RANDOM_STATE)
ovo_svm.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(ovo_svm, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(ovo_svm, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 98.5539%, 13 mismatches out of 899 samples
(Averaged) Train precision: 0.9858, recall: 0.9856, f1 score: 0.9856
Test accuracy: 30.0000%, 70 mismatches out of 100 samples
(Averaged) Test precision: 0.5190, recall: 0.3417, f1 score: 0.2150


In [11]:
from sklearn.ensemble import RandomForestClassifier

rand_forest_classifier = RandomForestClassifier(n_estimators=100,
                                                max_depth=4,
                                                random_state=RANDOM_STATE)
rand_forest_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(rand_forest_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(rand_forest_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 83.2036%, 151 mismatches out of 899 samples
(Averaged) Train precision: 0.8443, recall: 0.8304, f1 score: 0.8311
Test accuracy: 27.0000%, 73 mismatches out of 100 samples
(Averaged) Test precision: 0.2973, recall: 0.3142, f1 score: 0.1790


In [12]:
from sklearn.naive_bayes import GaussianNB

gnb_classifier = GaussianNB()
gnb_classifier.fit(X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([X_train_flat, X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(gnb_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(gnb_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 84.4271%, 140 mismatches out of 899 samples
(Averaged) Train precision: 0.8648, recall: 0.8436, f1 score: 0.8448
Test accuracy: 61.0000%, 39 mismatches out of 100 samples
(Averaged) Test precision: 0.6451, recall: 0.6508, f1 score: 0.6162


In [13]:
from sklearn.naive_bayes import GaussianNB

gnb_classifier = GaussianNB()
gnb_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(gnb_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(gnb_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 57.9533%, 378 mismatches out of 899 samples
(Averaged) Train precision: 0.6591, recall: 0.5800, f1 score: 0.5631
Test accuracy: 13.0000%, 87 mismatches out of 100 samples
(Averaged) Test precision: 0.4229, recall: 0.1250, f1 score: 0.0650


In [14]:
from sklearn.ensemble import GradientBoostingClassifier

xgboost_classifier = GradientBoostingClassifier(subsample=0.8, max_depth=2, random_state=RANDOM_STATE)
xgboost_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(xgboost_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(gnb_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 99.7775%, 2 mismatches out of 899 samples
(Averaged) Train precision: 0.6591, recall: 0.5800, f1 score: 0.5631
Test accuracy: 45.0000%, 55 mismatches out of 100 samples
(Averaged) Test precision: 0.4229, recall: 0.1250, f1 score: 0.0650
