In [1]:
import numpy, scipy, matplotlib.pyplot as plt, pandas as pd
import sklearn, IPython.display as ipd
import librosa, librosa.display

from sklearn.metrics import *
from sklearn.model_selection import GridSearchCV
from pathlib import Path
%matplotlib inline

## Step 1: Retrieve Audio

In [2]:
c1_signals = [
    librosa.load(p, duration=30, offset=10)[0] for p in Path().glob('audio/train/c1/*.mp3')
]
c2_signals = [
    librosa.load(p, duration=30, offset=10)[0] for p in Path().glob('audio/train/c2/*.mp3')
]

NoBackendError: 

In [None]:
print(len(c1_signals))
print(len(c2_signals))

## Step 2: Extract Features

In [None]:
n_mfcc = 12
def extract_features(signal):
    return [
        librosa.feature.mfcc(signal, n_mfcc=n_mfcc).T[0, 0]
    ]

In [None]:
c1_features = numpy.array([extract_features(x) for x in c1_signals])
c2_features = numpy.array([extract_features(x) for x in c2_signals])

In [None]:
print(c1_features.shape)
print(c2_features.shape)

## Step 3: Train the Classifier

In [None]:
scaler = sklearn.preprocessing.MinMaxScaler(feature_range=(-1, 1))

c1_features_scaled = scaler.fit_transform(c1_features)
print(c1_features_scaled.mean(axis=0))
print(c1_features_scaled.std(axis=0))

c2_features_scaled = scaler.transform(c2_features)
print(c2_features_scaled.mean(axis=0))
print(c2_features_scaled.std(axis=0))

In [None]:
train_features = numpy.vstack((c1_features_scaled, c2_features_scaled))
print(train_features.shape)
print(train_features.min(axis=0))
print(train_features.max(axis=0))

In [None]:
train_labels = numpy.concatenate((numpy.zeros(len(c1_features_scaled)), numpy.ones(len(c2_features_scaled))))

In [None]:
print(train_labels)

In [None]:
# parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
# svc = sklearn.svm.SVC()
# model = GridSearchCV(svc, parameters)

In [None]:
model = sklearn.svm.SVC()

In [None]:
acc  = sklearn.model_selection.cross_val_score(model, train_features, train_labels, cv=10)

In [None]:
print('acc =', acc)
print ('acc mean =',acc.mean())

In [None]:
model.fit(train_features, train_labels)

## Step 4: Run the Classifier

In [None]:
c1_test_signals = [
    librosa.load(p, duration=30, offset=60)[0] for p in Path().glob('audio/test/c1/*.mp3')
]
c2_test_signals = [
    librosa.load(p, duration=30, offset=60)[0] for p in Path().glob('audio/test/c2/*.mp3')
]

In [None]:
print(len(c1_test_signals))
print(len(c2_test_signals))

In [None]:
c1_test_features = numpy.array([extract_features(x) for x in c1_test_signals])
c2_test_features = numpy.array([extract_features(x) for x in c2_test_signals])

In [None]:
print(c1_test_features.shape)
print(c2_test_features.shape)

In [None]:
c1_test_features_scaled = scaler.transform(c1_test_features)
c2_test_features_scaled = scaler.transform(c2_test_features)

In [None]:
test_features = numpy.vstack((c1_test_features_scaled, c2_test_features_scaled))

In [None]:
test_labels = numpy.concatenate((numpy.zeros(len(c1_test_features)), numpy.ones(len(c2_test_features))))

In [None]:
print(test_labels)

In [None]:
predicted_labels = model.predict(test_features)

In [None]:
predicted_labels

## Step 5: Evaluation

In [None]:
score = model.score(test_features, test_labels)
roc_auc = roc_auc_score(test_labels, predicted_labels)
recall = recall_score(test_labels, predicted_labels)
precision = precision_score(test_labels, predicted_labels)
fmeasure = f1_score(test_labels, predicted_labels)
r2 = r2_score(test_labels, predicted_labels)

In [None]:
tp = sum((test_labels == 1) & (predicted_labels == 1))
tn = sum((test_labels == 0) & (predicted_labels == 0))
fp = sum((test_labels == 0) & (predicted_labels == 1))
fn = sum((test_labels == 1) & (predicted_labels == 0))
print (tp, tn, fp, fn)

In [None]:
results = [score, roc_auc, recall, precision, fmeasure, r2]
result_labels = ['accuracy', 'roc_auc', 'recall', 'precision', 'fmeasure', 'r2_score']

In [None]:
pd.DataFrame(results, result_labels)