In [30]:
import sys

sys.path.append('..')

In [31]:
import torch

data = torch.load('../processed_data/complete_dataset/processed_data.pt')
label = torch.load('../processed_data/complete_dataset/processed_label.pt')

In [32]:
from audio_toolbox.metrics import audio_dataset_split

RANDOM_STATE = 42
X_train, y_train, _, _,\
X_test, y_test = audio_dataset_split(data, label, train_val_test_ratio=(0.9, 0, 0.1), random_state=RANDOM_STATE)

In [33]:
X_train.shape

torch.Size([899, 6, 12, 1290])

In [34]:
X_train_flat = X_train.view(X_train.shape[0], -1).cpu().numpy()
X_test_flat = X_test.view(X_test.shape[0], -1).cpu().numpy()

Do PCA on flattened features

In [35]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# Initialize the scaler and PCA
scaler = StandardScaler()
pca = PCA(n_components=0.9)  # Keep 90% of the variance

# Fit the scaler on the training set and transform all sets
X_train_scaled = scaler.fit_transform(X_train_flat)
X_test_scaled = scaler.transform(X_test_flat)

reduced_X_train_flat = pca.fit_transform(X_train_scaled)
reduced_X_test_flat = pca.transform(X_test_scaled)

In [36]:
X_train = X_train.cpu().numpy()
X_test = X_test.cpu().numpy()
y_train = y_train.cpu().numpy()
y_test = y_test.cpu().numpy()

In [37]:
print(f"Training samples: {X_train_flat.shape} -> {reduced_X_train_flat.shape}")
print(f"Test samples: {X_test_flat.shape} -> {reduced_X_test_flat.shape}")

Training samples: (899, 92880) -> (899, 661)
Test samples: (100, 92880) -> (100, 661)


In [38]:
from sklearn.linear_model import LogisticRegression
from audio_toolbox.metrics import calculate_acc, precision_recall

logistic_model = LogisticRegression(max_iter=1000, C=1e-2, random_state=RANDOM_STATE)
logistic_model.fit(reduced_X_train_flat, y_train)
print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(logistic_model, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(logistic_model, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 99.8888%, 1 mismatches out of 899 samples
(Averaged) Train precision: 0.9989, recall: 0.9989, f1 score: 0.9989
Test accuracy: 61.0000%, 39 mismatches out of 100 samples
(Averaged) Test precision: 0.6067, recall: 0.6346, f1 score: 0.5883


In [39]:
from sklearn.svm import SVC

svm_classifier = SVC(kernel='linear', C=1, random_state=RANDOM_STATE)
svm_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(svm_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(svm_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 99.8888%, 1 mismatches out of 899 samples
(Averaged) Train precision: 0.9989, recall: 0.9989, f1 score: 0.9989
Test accuracy: 56.0000%, 44 mismatches out of 100 samples
(Averaged) Test precision: 0.5929, recall: 0.5862, f1 score: 0.5515


In [40]:
ovo_svm = SVC(kernel='linear', decision_function_shape='ovo', random_state=RANDOM_STATE)
ovo_svm.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(ovo_svm, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(ovo_svm, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 99.8888%, 1 mismatches out of 899 samples
(Averaged) Train precision: 0.9989, recall: 0.9989, f1 score: 0.9989
Test accuracy: 56.0000%, 44 mismatches out of 100 samples
(Averaged) Test precision: 0.5929, recall: 0.5862, f1 score: 0.5515


In [41]:
from sklearn.ensemble import RandomForestClassifier

rand_forest_classifier = RandomForestClassifier(random_state=RANDOM_STATE)
rand_forest_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(rand_forest_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(rand_forest_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 100.0000%, 0 mismatches out of 899 samples
(Averaged) Train precision: 1.0000, recall: 1.0000, f1 score: 1.0000
Test accuracy: 27.0000%, 73 mismatches out of 100 samples
(Averaged) Test precision: 0.4464, recall: 0.3200, f1 score: 0.2143


In [42]:
from sklearn.naive_bayes import GaussianNB

gnb_classifier = GaussianNB()
gnb_classifier.fit(X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([X_train_flat, X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(gnb_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(gnb_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished


KeyboardInterrupt: 

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb_classifier = GaussianNB()
gnb_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(gnb_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(gnb_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

Model fitting finished
Train accuracy: 27.1413%, 655 mismatches out of 899 samples
(Averaged) Train precision: 0.4033, recall: 0.2745, f1 score: 0.2358
Test accuracy: 17.0000%, 83 mismatches out of 100 samples
(Averaged) Test precision: 0.1783, recall: 0.1742, f1 score: 0.1337


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

xgboost_classifier = GradientBoostingClassifier(subsample=0.8, max_depth=2, random_state=RANDOM_STATE)
xgboost_classifier.fit(reduced_X_train_flat, y_train)

print('Model fitting finished')

for x, y, split in zip([reduced_X_train_flat, reduced_X_test_flat],
                [y_train, y_test],
                ['Train', 'Test']):
    acc, correct, incorrect = calculate_acc(xgboost_classifier, x, y)
    print(f'{split} accuracy: {acc:.4f}%, {len(incorrect)} mismatches out of {len(incorrect) + len(correct)} samples')
    conf_mat, precision, recall, f1 = precision_recall(gnb_classifier, x, y, return_each_class=False)
    print(f'(Averaged) {split} precision: {precision:.4f}, recall: {recall:.4f}, f1 score: {f1:.4f}')

KeyboardInterrupt: 