### Try to load the pre-trained model

In [None]:
import os
import re
import pickle
import numpy as np
SETS = ['training', 'validation', 'tests']
GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

In [None]:
# https://scikit-learn.org/stable/model_persistence.html
try:
    dtcwt_rf = pickle.load('dtcwt/dtcwt_rf.bin','rb')
    loaded_model = True
except:
    loaded_model = False

### Load the dataset

In [None]:
import librosa
import numpy as np
from collections import Counter
import scipy
import dtcwt

trans = dtcwt.Transform1d(biort='antonini', qshift='qshift_d')

In [None]:
def calculate_entropy(list_values):
	counter_values = Counter(list_values).most_common()
	probabilities = [elem[1]/len(list_values) for elem in counter_values]
	entropy=scipy.stats.entropy(probabilities)
	return entropy

def calculate_statistics(list_values):
	n5 = np.nanpercentile(list_values, 5)
	n25 = np.nanpercentile(list_values, 25)
	n75 = np.nanpercentile(list_values, 75)
	n95 = np.nanpercentile(list_values, 95)
	median = np.nanpercentile(list_values, 50)
	mean = np.nanmean(list_values)
	std = np.nanstd(list_values)
	var = np.nanvar(list_values)
	rms = np.nanmean(np.sqrt(list_values**2))
	return [n5, n25, n75, n95, median, mean, std, var, rms]

def calculate_crossings(list_values):
	zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
	no_zero_crossings = len(zero_crossing_indices)
	mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
	no_mean_crossings = len(mean_crossing_indices)
	return [no_zero_crossings, no_mean_crossings]

def get_features(list_values):
	entropy = calculate_entropy(list_values)
	crossings = calculate_crossings(list_values)
	statistics = calculate_statistics(list_values)
	return [entropy] + crossings + statistics

## Given an audio file returns the corresponding wavelet features

In [None]:
def extract_dtcwt(file_path:str):
    d, fs = librosa.load(file_path)
    forw = trans.forward(d, nlevels=17)
    features = []
    for coeff in forw.highpasses:
        temp = (np.abs(coeff.squeeze()))
        features += get_features(temp)

    features += get_features(forw.lowpass.squeeze())
    return features

In [None]:
import numpy as np

data_path = 'url'

genres = ['blues', 'classical', 'country', 'disco',
          'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

def index_genre(genre, genres):
    for (g,index) in zip(genres, range(len(genres))):
        if(g == genre):
            return index
    return -1

def get_data(data_path, genres, decoder, training_percentaje=0.6, validation_percentaje=0.2, test_percentaje=0.2):
    """
    data_path: se le pasa la direccion de la carpeta donde se encuentra la base de datos.
    genres: se le pasa una lista con los nombres da cada carpeta que contiene un genero dado.
    decoder: funcion para decodificar el dato que se le pasa, por ejemplo en caso de imagenes habria hacer imread
    """

    data_training = {'in': [], 'out': []}
    data_validation = {'in': [], 'out': []}
    data_test = {'in': [], 'out': []}

    for genre in genres:
        files = os.listdir(data_path + genre)
        count = len(files)

        for (filename, index) in zip(files, range(len(files))):
            filepath = data_path + genre + '/' + filename

            if (index < training_percentaje * count):
                data_training['in'].append(decoder(filepath))
                data_training['out'].append(index_genre(genre, genres))
                continue

            if (index < (training_percentaje + validation_percentaje) * count):
                data_validation['in'].append(decoder(filepath))
                data_validation['out'].append(index_genre(genre, genres))
            else:
                data_test['in'].append(decoder(filepath))
                data_test['out'].append(index_genre(genre, genres))

    data_training = {'in': np.array(data_training['in']),'out': np.array(data_training['out'])}
    data_validation = {'in': np.array(data_validation['in']),'out': np.array(data_validation['out'])}
    data_test = {'in': np.array(data_test['in']),'out': np.array(data_test['out'])}

    return {
        'data_training': data_training,
        'data_validation': data_validation,
        'data_testing': data_test
    }

def get_data_wavelet(training_percentaje=0.8, validation_percentaje=0.1, test_percentaje=0.1):
    return get_data(data_path, genres, extract_dtcwt, training_percentaje,validation_percentaje,test_percentaje)

wavalet_data = get_data_wavelet()

### If you want the features for the whole dataset:

In [None]:
x_features = []
y  = []
for genre in GENRES:
    for wav in os.listdir(f'dataset/genres/{genre}'):
        matched = re.match(r"(\w+).(\d+).wav", wav)
        if matched:
            features = extract_dtcwt(f'dataset/genres/{genre}/{wav}')

            y.append(genre)
            x_features.append(features)

x_features = np.array(x_features)
y = np.array(y)
np.save('dtcwt/dtcwt_15_lvl17', x_features)
np.save('dtcwt/dtcwt_y', y)


In [None]:
X_dtcwt = np.load('dtcwt/dtcwt_15_lvl17.npy')
y = np.load('dtcwt/dtcwt_y.npy')
X_dtcwt.shape

Get a random scaled, train-test split

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
def encoder_scaler(X_dtcwt, y):
    encoder = LabelEncoder()
    y = encoder.fit_transform(y)

    scaler = StandardScaler()
    X_dtcwt_scaled = scaler.fit_transform(X_dtcwt)
    X_dtcwt_train_f, X_dtcwt_test_f, y_dtcwt_train_f, y_dtcwt_test_f = train_test_split(X_dtcwt_scaled, y, test_size = 0.2,random_state=42)
    return X_dtcwt_train_f, X_dtcwt_test_f, y_dtcwt_train_f, y_dtcwt_test_f

In [None]:
X_dtcwt = np.load('dtcwt/dtcwt_15_lvl17.npy')
y = np.load('dtcwt/dtcwt_y.npy')
# retrain model
# X_dtcwt_train_f, X_dtcwt_test_f, y_dtcwt_train_f, y_dtcwt_test_f = encoder_scaler(X_dtcwt, y)

dtcwt_rf = RandomForestClassifier(n_estimators=100,max_depth=13,bootstrap=False,random_state=42)

#region new changes
X_dtcwt_train_f = wavalet_data['data_training']['in']
y_dtcwt_train_f = wavalet_data['data_training']['out']
X_dtcwt_test_f =  wavalet_data['data_testing']['in']
y_dtcwt_test_f =  wavalet_data['data_testing']['out']
#endregion

dtcwt_rf.fit(X_dtcwt_train_f,y_dtcwt_train_f)

with open('dtcwt/dtcwt_rf.bin','wb') as mod:
    pickle.dump(dtcwt_rf,mod)
# https://scikit-learn.org/stable/model_persistence.html

print(dtcwt_rf.score(X_dtcwt_test_f, y_dtcwt_test_f))
ypred_dtcwt_rf = dtcwt_rf.predict(X_dtcwt_test_f)

Plot a confusion matrix to corroborate the behavior of the model for each genre

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import itertools
import seaborn as sns
from sklearn.metrics import confusion_matrix

# create confusion matrix
cm = confusion_matrix(y_dtcwt_test_f, ypred_dtcwt_rf)

accuracy = np.trace(cm) / np.sum(cm).astype('float')
misclass = 1 - accuracy

cmap = plt.get_cmap('Blues')

# plot confusion matrix
plt.figure(figsize=(8,6))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title('Confusion matrix DTCWT Random Forest')
plt.colorbar()

tick_marks = np.arange(len(GENRES))
plt.xticks(tick_marks, GENRES, rotation=45)
plt.yticks(tick_marks, GENRES)

for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, "{:,}".format(cm[i, j]),
            horizontalalignment="center",
            color="white" if cm[i, j] > (cm.max() / 2) else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.savefig('ConfM_DTCWT_rf.png')

A Cross-Validation using K-Fold to validate the accuracy in the previous fit

In [None]:
from sklearn.model_selection import KFold, cross_val_score
kfold=KFold(n_splits=5 , shuffle=True,random_state=0)
rf_scores = cross_val_score(dtcwt_rf, X_dtcwt, y, cv=kfold)

print('scores: ',rf_scores) # [0.735      0.705      0.8        0.81       0.79396985]
print('mean: ',rf_scores.mean()) # 0.7687939698492463

import matplotlib.pyplot as plt
plt.figure(figsize=(8,6))
plt.title('Cross-Validation Scores in Random Forest for DTCWT')
plt.xlabel('Fold')
plt.ylabel('Score')
ax = plt.gca()
ax.set_xlim(0.9, 5.1)
ax.set_ylim(0.6, 1.01)
plt.grid()
plt.plot(range(1,6),rf_scores,'o-',color='blue',lw=2)
plt.plot(range(1,6),[rf_scores.mean()]*5, linestyle="-.",color='k')
plt.annotate("%0.4f" % rf_scores.mean(), (3,rf_scores.mean() + 0.005))
plt.legend(['accuracy','mean acc'],loc="best")
plt.savefig('CV_DTCWT_rf.png')