# Audio classification with surfboard and sklearn

In this notebook, we will use the ESC-50 dataset, sklearn and surfboard together to obtain good accuracy on audio classification.

In [None]:
# Make sure surfboard is installed
!pip install ..

In [None]:
import os

from tqdm import tqdm

import numpy as np
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, plot_confusion_matrix

from surfboard.sound import Waveform
from surfboard.feature_extraction import extract_features

### Download ESC-50. More information on this dataset can be found [here](https://github.com/karolpiczak/ESC-50). 
ESC-50 is an environmental classification dataset.  
Download link: https://github.com/karoldvl/ESC-50/archive/master.zip  
Download is roughly 600MB. This might take a bit of time depending on your internet connection.

In [None]:
# Unzip. Replace the path below with the path where your file was downloaded.
# On a mac, it is likely that the code below should work.
!unzip ~/Downloads/ESC-50-master.zip

In [None]:
# Inspect the unzipped file.
!ls ESC-50-master/audio

### The files have name `{}-{}-{}-{id}.wav` where id is the label.
We will keep only the first 10 classes, to make the processing faster.

In [None]:
# Keep only labels 0 ... 9.
acceptable_labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
file_names = [
    f for f in os.listdir(f"ESC-50-master/audio") if int(f.split('-')[-1].split('.')[0]) in acceptable_labels
]

waveforms = []

# Replace the argument to os.listdir() below if you unzipped somewhere else.
for fname in tqdm(file_names):
    waveforms.append(Waveform(path=os.path.join(f"ESC-50-master/audio/{fname}")))

### Use surfboard to extract features.
What we do below can be done quickly using the surfboard CLI.

In [None]:
# Note that we only pick components for which statistics are defined (i.e. time series) to make
# the code neater. 
features_list = [
    'mfcc', 'spectral_flux', 'spectral_slope', 'spectral_centroid', 'spectral_spread', 'spectral_skewness',
    'spectral_kurtosis', 'spectral_rolloff', 'shannon_entropy_slidingwindow', 'rms'
]

statistics_list = ['mean', 'std', 'first_derivative_mean', 'first_derivative_std']

# Extract dataframe...
feature_df = extract_features(
    waveforms=waveforms, features_list=features_list, statistics_list=statistics_list
)

### Let's inspect the extracted features

In [None]:
feature_df.head()

In [None]:
# Turn dataframe into numpy arrays.
X = np.array(feature_df)
labels = np.array([int(fname.split('-')[-1].split('.')[0]) for fname in file_names])

### Now we pick some training ids and some dev ids.

In [None]:
train_valid_split = int(0.8 * X.shape[0])

# Pick random ids to create a train/valid split from the data.
train_ids = np.random.choice(X.shape[0], train_valid_split, replace=False)
valid_ids = [idx for idx in np.arange(X.shape[0]) if idx not in train_ids]

print("There are {} training examples and {} validation examples...".format(len(train_ids), len(valid_ids)))

# Index into X using the randomly chosen ids.
X_train, X_valid = X[train_ids], X[valid_ids]
label_train, label_valid = labels[train_ids], labels[valid_ids]

# Normalize columns (these features are going into an SVM) with maximum of each X_train column.
X_train, X_valid = X_train / X_train.max(0), X_valid / X_train.max(0)

### Onto the classification task.

In [None]:
svm = LinearSVC()

# Train the SVM.
svm.fit(X_train, label_train)

predictions_train = svm.predict(X_train)
predictions_valid = svm.predict(X_valid)

### Show the accuracy

In [None]:
print('Train accuracy is {} and validation accuracy is {}'.format(
    accuracy_score(label_train, predictions_train),
    accuracy_score(label_valid, predictions_valid),
))

### Show the confusion matrix on valid set.

In [None]:
plot_confusion_matrix(svm, X_valid, label_valid);

# Exercise
Now that you have a base model which should perform decently well on this task, try to add or remove features and see how this might affect the validation accuracy and confusion matrix.