In [48]:
import numpy as np
import os
from pathlib import Path
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score, make_scorer, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_validate, GroupKFold
from sklearn.neighbors import KNeighborsClassifier
from collections import Counter
import scipy.stats
import librosa

class_names = ["kick","snare","clap","hat"]

path = Path.cwd() / 'drumssorted\\'
train_path_list = [path / 'Kicks\\',path / 'Snares\\',path / 'Claps\\',path / 'Hats\\']

arr = []
n = 0
for a in train_path_list:
    arr.append([n])
    for i in os.listdir(a):
        arr[n].append(i)
    arr[n].remove(arr[n][0])
    n+=1

mfccs = []
sig_len = []
n=0
for a in train_path_list:
    for i in arr[n]:
        signal, fs = librosa.load(a/i, sr=None) #you need to define the sampling frequency as None, otherwise librosa
            #assumes the default value, i.e. 22050, and modifies the signal when loading
        mfcc_13_coeffs = librosa.feature.mfcc(signal, sr=fs, n_mfcc=13) #I set the number of coefficients to 13
        mfccs.append(mfcc_13_coeffs)
        sig_len.append(np.shape(mfcc_13_coeffs)[1])
    n+=1

trunc_mfccs = []
for item in mfccs:
    item = item[:, :np.min(sig_len)]
    trunc_mfccs.append(item)
mfccs = np.asarray(trunc_mfccs)



In [49]:
#making a label with all the drum elements

labels = np.zeros(len(arr[0])+len(arr[1])+len(arr[2])+len(arr[3]))
for i in range(0, len(arr[0])):
    labels[i] = 0
for i in range(len(arr[0]), len(arr[1])+len(arr[0])):
    labels[i] = 1
for i in range(len(arr[1])+len(arr[0]), len(arr[2])+len(arr[1])+len(arr[0])):
    labels[i] = 2
for i in range(len(arr[2])+len(arr[1])+len(arr[0]), len(arr[3])+len(arr[1])+len(arr[0])+len(arr[2])):
    labels[i] = 3

print(labels.shape)
print(mfccs.shape)
Counter(labels)

(480,)
(480, 13, 3)


Counter({0.0: 137, 1.0: 211, 2.0: 74, 3.0: 58})

In [50]:
#k-nearest neighbors method using mfcc

X_train, X_test, y_train, y_test = train_test_split(mfccs, labels, test_size=0.2, random_state=42, stratify=labels)
X_train = X_train.reshape(np.shape(X_train)[0],-1)
X_test = X_test.reshape(np.shape(X_test)[0],-1)

#clasification
clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)
acc = accuracy_score(y_test, preds)
recall = recall_score(y_test, preds, average='macro')
prec = precision_score(y_test, preds, average='macro')
f1 = f1_score(y_test, preds, average='macro')
print("Dokładność (ang. accuracy): ", acc)
print("Czułość (ang. recall): ", recall)
print("Precyzja (ang. precision): ", prec)
print("F1: ", f1)

Dokładność (ang. accuracy):  0.875
Czułość (ang. recall):  0.8609126984126984
Precyzja (ang. precision):  0.8956243032329988
F1:  0.8751945556190839


In [51]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold, KFold

#random forest method

X = np.hstack((np.mean(mfccs, axis=2), #average value
               np.std(mfccs, axis=2), #standard deviation
               np.median(mfccs, axis=2), #median
               np.percentile(mfccs, 25, axis=2), #I quartile
               np.percentile(mfccs, 75, axis=2), #III quartile
               scipy.stats.iqr(mfccs, rng=(10, 90), axis=2), #spread between the 10th and 90th percentile
               scipy.stats.kurtosis(mfccs, axis=2), #kurtosis
               scipy.stats.skew(mfccs, axis=2), #skewness
               np.min(mfccs, axis=2), #minimal value
               np.max(mfccs, axis=2) #maximal value
              ))

#making a fold

kf4 = KFold(n_splits=4, shuffle=False)
for train_index, test_index in kf4.split(mfccs):
    train_folds = np.take(mfccs,train_index)
    test_fold = np.take(mfccs,test_index)
    train_folds_mask = train_index
    test_fold_mask = test_index
    
print(len(train_folds))
print(len(test_fold))
print(len(train_folds_mask))
print(len(test_fold_mask))

360
120
360
120


In [56]:
#division into training and test sets

X_train = X[train_folds_mask]
X_test = X[test_fold_mask]

y_train = labels[train_folds_mask]
y_test = labels[test_fold_mask]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

scoring = {'f1_macro': make_scorer(f1_score, average='macro')}

model = RandomForestClassifier(random_state=10)
scores = cross_validate(model, X_train, y_train, scoring=scoring, 
                        groups=train_folds, cv=GroupKFold(n_splits=len(np.unique(train_folds))))
model.fit(X_train,y_train)
predictions = model.predict(X_test)
print("Macierz pomyłek (ang. confusion matrix): ")
print(confusion_matrix(y_test, predictions))
print("Dokładność (ang. accuracy): ", accuracy_score(y_test, predictions))
print("F1: ", f1_score(y_test, predictions, average='macro'))

Macierz pomyłek (ang. confusion matrix): 
[[ 0  0  0]
 [52 10  0]
 [57  1  0]]
Dokładność (ang. accuracy):  0.08333333333333333
F1:  0.091324200913242
