In [1]:
import os
import pandas as pd

audio_root_folder = './archive/data'
labels_csv = os.path.join(audio_root_folder, 'features_30_sec.csv')
df = pd.read_csv(labels_csv, header=0)
df.drop(df.loc[df.filename == 'jazz.00054.wav'].index, inplace=True)
df.head(5)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [2]:
import numpy as np
import torch

RANDOM_SEED = RANDOM_STATE = 42

np.random.seed(RANDOM_SEED)
torch.random.manual_seed(RANDOM_SEED);

In [3]:
from sklearn.model_selection import train_test_split

filenames = df['filename']
labels = df['label']

files_train_val, files_test, labels_train_val, labels_test = train_test_split(
    filenames, labels, test_size=0.05, random_state=RANDOM_STATE
)
files_train, files_val, labels_train, labels_val = train_test_split(
    files_train_val, labels_train_val, test_size=0.05, random_state=RANDOM_STATE
)

In [4]:
from audio_toolbox.dataset import AudioDataset

num_frames = 1290
scaling_strategy = 'min-max'

datasets = {
    'train':
        AudioDataset(
            root_folder=audio_root_folder,
            filenames=files_train.tolist(),
            labels=labels_train.tolist(),
            num_frames=num_frames,
            scaling_strategy=scaling_strategy,
            name='Training set',
            label_encoding='Label'
        ),
    'val':
        AudioDataset(
            root_folder=audio_root_folder,
            filenames=files_val.tolist(),
            labels=labels_val.tolist(),
            num_frames=num_frames,
            scaling_strategy=scaling_strategy,
            name='Validation set',
            label_encoding='Label'
        ),
    'test':
        AudioDataset(
            root_folder=audio_root_folder,
            filenames=files_test.tolist(),
            labels=labels_test.tolist(),
            num_frames=num_frames,
            scaling_strategy=scaling_strategy,
            name='Testing set',
            label_encoding='Label'
        )
}

Loading audios for Training set: 100%|██████████| 901/901 [00:08<00:00, 100.96it/s]
Processing for Training set: 100%|██████████| 901/901 [02:25<00:00,  6.19it/s]
Loading audios for Validation set: 100%|██████████| 48/48 [00:00<00:00, 144.32it/s]
Processing for Validation set: 100%|██████████| 48/48 [00:07<00:00,  6.36it/s]
Loading audios for Testing set: 100%|██████████| 50/50 [00:00<00:00, 61.54it/s]
Processing for Testing set: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s]


In [6]:
n_train, n_val, n_test = len(datasets['train']), len(datasets['val']), len(datasets['test'])
n_train, n_val, n_test

(901, 48, 50)

In [7]:
print(repr(datasets['train']))

Root folder: ./archive/data
Number of samples: 901
Shape of one sample: torch.Size([72, 1290])
Number of classes: 10
Features:
	n_mfcc: 12
	n_chroma: 12
	n_derivatives: 2
Scaling strategy: min-max


In [8]:
print(repr(datasets['val']))

Root folder: ./archive/data
Number of samples: 48
Shape of one sample: torch.Size([72, 1290])
Number of classes: 10
Features:
	n_mfcc: 12
	n_chroma: 12
	n_derivatives: 2
Scaling strategy: min-max


In [9]:
print(repr(datasets['test']))

Root folder: ./archive/data
Number of samples: 50
Shape of one sample: torch.Size([72, 1290])
Number of classes: 10
Features:
	n_mfcc: 12
	n_chroma: 12
	n_derivatives: 2
Scaling strategy: min-max


In [10]:
X_train = torch.stack([datasets['train'][i] for i in range(len(datasets['train']))]).cpu().numpy()
X_val = torch.stack([datasets['val'][i] for i in range(len(datasets['val']))]).cpu().numpy()
X_test = torch.stack([datasets['test'][i] for i in range(len(datasets['test']))]).cpu().numpy()
X_train.shape, X_val.shape, X_test.shape

((901, 72, 1290), (48, 72, 1290), (50, 72, 1290))

In [11]:
y_train = datasets['train'].labels
y_val = datasets['val'].labels
y_test = datasets['test'].labels
y_train.shape, y_val.shape, y_test.shape

((901,), (48,), (50,))

In [12]:
X_train_flat = X_train.reshape((X_train.shape[0], -1))
# y_train_labels = np.argmax(y_train, axis=1)

X_val_flat = X_val.reshape((X_val.shape[0], -1))
# y_val_labels = np.argmax(y_val, axis=1)

X_test_flat = X_test.reshape((X_test.shape[0], -1))
# y_test_labels = np.argmax(y_test, axis=1)

In [13]:
print("Flattened training samples and labels:", X_train_flat.shape, y_train.shape)
print("Flattened validation samples and labels:", X_val_flat.shape, y_val.shape)
print("Flattened test samples and labels:", X_test_flat.shape, y_test.shape)

Flattened training samples and labels: (901, 92880) (901,)
Flattened validation samples and labels: (48, 92880) (48,)
Flattened test samples and labels: (50, 92880) (50,)


Start with the simplest classification model: Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

logistic_model = LogisticRegression(max_iter=100)

logistic_model.fit(X_train_flat, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
from sklearn.metrics import accuracy_score

def calculate_acc(model, X_flattened, y_labels, split):
    y_pred = model.predict(X_flattened)
    accuracy = accuracy_score(y_labels, y_pred)
    print(f"{split} accuracy: {accuracy * 100:.4f}%")

In [15]:
for x, y, split in zip([X_train_flat, X_val_flat, X_test_flat],
                [y_train, y_val, y_test],
                ['Train', 'Validation', 'Test']):
    calculate_acc(logistic_model, x, y, split)

Train accuracy: 96.4484%
Validation accuracy: 29.1667%
Test accuracy: 36.0000%


In [16]:
regularized_logistic_model = LogisticRegression(max_iter=100, C=0.01)
regularized_logistic_model.fit(X_train_flat, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [17]:
for x, y, split in zip([X_train_flat, X_val_flat, X_test_flat],
                [y_train, y_val, y_test],
                ['Train', 'Validation', 'Test']):
    calculate_acc(regularized_logistic_model, x, y, split)

Train accuracy: 94.0067%
Validation accuracy: 37.5000%
Test accuracy: 36.0000%


In [18]:
from sklearn.svm import SVC

svm_classifier = SVC(kernel='rbf', C=0.1, gamma='scale')
svm_classifier.fit(X_train_flat, y_train)

In [19]:
for x, y, split in zip([X_train_flat, X_val_flat, X_test_flat],
                [y_train, y_val, y_test],
                ['Train', 'Validation', 'Test']):
    calculate_acc(svm_classifier, x, y, split)

Train accuracy: 32.2974%
Validation accuracy: 20.8333%
Test accuracy: 26.0000%


In [20]:
ovo_svm = SVC(decision_function_shape='ovo')
ovo_svm.fit(X_train_flat, y_train)

In [21]:
for x, y, split in zip([X_train_flat, X_val_flat, X_test_flat],
                [y_train, y_val, y_test],
                ['Train', 'Validation', 'Test']):
    calculate_acc(ovo_svm, x, y, split)

Train accuracy: 71.5871%
Validation accuracy: 39.5833%
Test accuracy: 46.0000%


In [24]:
from sklearn.ensemble import RandomForestClassifier

rand_forest_classifier = RandomForestClassifier(n_estimators=100,
                                                max_depth=4,
                                                oob_score=True)
rand_forest_classifier.fit(X_train_flat, y_train)

In [25]:
for x, y, split in zip([X_train_flat, X_val_flat, X_test_flat],
                [y_train, y_val, y_test],
                ['Train', 'Validation', 'Test']):
    calculate_acc(rand_forest_classifier, x, y, split)

Train accuracy: 71.1432%
Validation accuracy: 27.0833%
Test accuracy: 46.0000%


In [14]:
from sklearn.ensemble import GradientBoostingClassifier

xgboot_classifier = GradientBoostingClassifier(n_estimators=10,
                                               subsample=0.2)
xgboot_classifier.fit(X_train_flat, y_train)

KeyboardInterrupt: 