In [None]:
!sudo add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!sudo apt-get update -qq 2>&1 > /dev/null
!sudo apt -y install -qq google-drive-ocamlfuse 2>&1 > /dev/null
!google-drive-ocamlfuse

In [None]:
!sudo apt-get install -qq w3m # to act as web browser
!xdg-settings set default-web-browser w3m.desktop # to set default browser
%cd /content
!mkdir drive
%cd drive
!mkdir MyDrive
%cd ..
%cd ..
!google-drive-ocamlfuse /content/drive/MyDrive

In [3]:
import os
import tensorflow.keras as keras

data: dict
epochs_ = 300
lr = 0.001
kernel = (2,2)
size_images = (192, 256)
channels = 3

In [40]:
import numpy as np

def index_genre(genre, genres):
    for (g,index) in zip(genres, range(len(genres))):
        if(g == genre):
            return index
    return -1

def get_data(data_path, genres, decoder, training_percentaje=0.6, validation_percentaje=0.2, test_percentaje=0.2):
    """
    data_path: se le pasa la direccion de la carpeta donde se encuentra la base de datos.
    genres: se le pasa una lista con los nombres da cada carpeta que contiene un genero dado.
    decoder: funcion para decodificar el dato que se le pasa, por ejemplo en caso de imagenes habria hacer imread
    """

    data_training = {'in': [], 'out': []}
    data_validation = {'in': [], 'out': []}
    data_test = {'in': [], 'out': []}
    data_test_from_genre = { }

    for genre in genres:
        files = os.listdir(data_path + genre)
        count = len(files)

        data_test_from_genre[genre] = {'in': [], 'out': []}

        for (filename, index) in zip(files, range(len(files))):
            filepath = data_path + genre + '/' + filename

            if (index < training_percentaje * count):
                data_training['in'].append(decoder(filepath))
                data_training['out'].append(index_genre(genre, genres))
                continue

            if (index < (training_percentaje + validation_percentaje) * count):
                data_validation['in'].append(decoder(filepath))
                data_validation['out'].append(index_genre(genre, genres))
            else:
                feature  = decoder(filepath)
                index = index_genre(genre, genres)

                data_test['in'].append(feature)
                data_test['out'].append(index)

                data_test_from_genre[genre]['in'].append(feature)
                data_test_from_genre[genre]['out'].append(index)

    data_training = {'in': np.array(data_training['in']),'out': np.array(data_training['out'])}
    data_validation = {'in': np.array(data_validation['in']),'out': np.array(data_validation['out'])}
    data_test = {'in': np.array(data_test['in']),'out': np.array(data_test['out'])}

    # data_test_from_genre = { data_test_from_genre[g]:{'in': np.array(data_test_from_genre[g]['in']),'out': np.array(data_test_from_genre[g]['out'])} for g in data_test_from_genre }

    for g in data_test_from_genre:
        data_test_from_genre[g] =  {'in': np.array(data_test_from_genre[g]['in']),'out': np.array(data_test_from_genre[g]['out'])}
    print("---------Loaded data-----------")
    return {
        'data_training': data_training,
        'data_validation': data_validation,
        'data_testing': data_test,
        'data_testing_from_genre': data_test_from_genre
    }

In [38]:
import cv2

def read_image(filepath):
    global size_images
    image = cv2.imread(filepath)
    return cv2.resize(image, size_images)

def read_gray_image(filepath):
    global size_images
    img_gray = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
    img_gray = np.expand_dims(img_gray, axis=-1)
    return cv2.resize(img_gray, size_images)

genres = ['blues', 'classical', 'country', 'disco',
          'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']


def get_data_mfcc(training_percentaje=0.6, validation_percentaje=0.2, test_percentaje=0.2):
    data_path = '/content/drive/MyDrive/MFCC_dataset/'
    # data_path = '/content/drive/MyDrive/GTZAN/Ensemble_Data/MFCC/test/'
    return get_data(data_path, genres, read_image,training_percentaje,validation_percentaje,test_percentaje)

In [8]:
def mfcc_cnn_model(input_shape=(128, 96, 1), classes=10, filter_shape=(3, 3)):
    X_input = keras.layers.Input(input_shape)

    X = keras.layers.Conv2D(32, filter_shape, activation='relu')(X_input)

    X = keras.layers.Conv2D(64, filter_shape, activation='relu')(X)
    X = keras.layers.AveragePooling2D((2, 2))(X)

    X = keras.layers.Conv2D(128, filter_shape, activation='relu')(X)
    X = keras.layers.AveragePooling2D((2, 2))(X)

    X = keras.layers.Conv2D(256, filter_shape, activation='relu')(X)
    X = keras.layers.GlobalAveragePooling2D()(X)

    X = keras.layers.Dense(256, activation='relu')(X)
    X = keras.layers.Dense(128, activation='relu')(X)
    X = keras.layers.Dense(64, activation='relu')(X)
    X = keras.layers.Dense(32, activation='relu')(X)
    X = keras.layers.Dense(classes, activation='softmax')(X)

    return keras.models.Model(inputs=X_input, outputs=X, name='SpectrogramCNN')

In [None]:
data = get_data_mfcc(training_percentaje=0.8, validation_percentaje=0.1, test_percentaje=0.1)
print(f"Train Count: {len(data['data_training']['in'])}. \nValidation Count: {len(data['data_validation']['in'])}. \nTest Count: {len(data['data_testing']['in'])}")

In [50]:
import pickle
def training():
    global data
    global epochs_
    global size_images
    global lr
    global channels
    global history

    training_data = data['data_training']
    v_data = data['data_validation']

    model = mfcc_cnn_model(input_shape=(
        size_images[1], size_images[0], channels), classes=10, filter_shape=kernel)

    model.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(),
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        metrics=['accuracy'])

    checkpoint = keras.callbacks.ModelCheckpoint(
        '/content/drive/MyDrive/mfcc_models/best_weights.h5',
        save_weights_only=True,
        save_best_only=True,
        monitor='val_accuracy',
        mode='max')

    reduce_lr = keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=20,
        min_lr=0.001)

    history = model.fit(
        training_data['in'], training_data['out'],
        validation_data=(v_data['in'], v_data['out']),
        epochs=epochs_,
        callbacks=[checkpoint]
        )

    with open('/content/drive/MyDrive/mfcc_models/history.pkl', 'wb') as file:
        pickle.dump(history.history, file)

    model.save('/content/drive/MyDrive/mfcc_models/mfcc_model.h5')
    return history

def testing(model_name="mfcc_model.h5"):
    global data

    input = data['data_testing']['in']
    output = data['data_testing']['out']

    path = '/content/drive/MyDrive/mfcc_models/' + model_name
    model = keras.models.load_model(path)

    score = model.evaluate(input, output, verbose=0)
    # print('testing loss: ' + str(score[0]))
    print('testing accuracy: ' + str(score[1]))

    input = data['data_validation']['in']
    output = data['data_validation']['out']
    score = model.evaluate(input, output, verbose=0)
    # print('validation loss: ' + str(score[0]))
    print('validation accuracy: ' + str(score[1]))

    input = data['data_training']['in']
    output = data['data_training']['out']
    score = model.evaluate(input, output, verbose=0)
    # print('training loss: ' + str(score[0]))
    print('training accuracy: ' + str(score[1]))

def testing_for_genre(model_name="mfcc_model.h5"):
    path = '/content/drive/MyDrive/mfcc_models/' + model_name
    model = keras.models.load_model(path)

    genres_data = data['data_testing_from_genre']
    for genre in genres_data:
        input = genres_data[genre]['in']
        output = genres_data[genre]['out']

        score = model.evaluate(input, output, verbose=0)
        print(f'{genre} accuracy: ' + str(score[1]))
        # print(f'{genre} loss: ' + str(score[0]))

In [14]:
import matplotlib.pyplot as plt
%matplotlib inline

def plot_training(history):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Precisión del modelo')
    plt.ylabel('Precisión')
    plt.xlabel('Época')
    plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
    plt.show()

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Pérdida del modelo')
    plt.ylabel('Pérdida')
    plt.xlabel('Época')
    plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
    plt.show()

In [44]:
#Save Weights in Model
def save_weights(model_name="mfcc_model.h5", epoch_name = 'best_weights.h5'):
    path = '/content/drive/MyDrive/mfcc_models/' + model_name
    path_best_epoch = '/content/drive/MyDrive/mfcc_models/' + epoch_name
    model = keras.models.load_model(path)
    model.load_weights(path_best_epoch)
    model.save('/content/drive/MyDrive/mfcc_models/mfcc_model.h5')


In [None]:
history = training()

In [None]:
save_weights()
testing()

In [None]:
testing_for_genre()

In [None]:
plot_training(history)

In [26]:
%matplotlib inline
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt

In [None]:
path = '/content/drive/MyDrive/mfcc_models/mfcc_model.h5'
model = keras.models.load_model(path)
predictions = model.predict(data['data_testing']['in'])
rounded_predictions = np.argmax(predictions, axis=1)

cm = confusion_matrix(y_true=data['data_testing']['out'], y_pred=rounded_predictions)

In [28]:
import time

GENRES = ['blues', 'classical', 'country', 'disco',
          'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

def plot_confusion_matrix(y_test, y_pred, save_as:str, title:str='Confusion Matrix'):
    # create confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    if not isinstance(save_as, str):
        save_as = f'conf_matrix_{time.time()}.png'

    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy

    cmap = plt.get_cmap('Blues')

    # plot confusion matrix
    plt.figure(figsize=(8,6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    tick_marks = np.arange(len(GENRES))
    plt.xticks(tick_marks, GENRES, rotation=45)
    plt.yticks(tick_marks, GENRES)

    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, "{:,}".format(cm[i, j]),
                horizontalalignment="center",
                color="white" if cm[i, j] > (cm.max() / 2) else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.savefig(save_as)

In [None]:
plot_confusion_matrix(rounded_predictions ,data['data_testing']['out'],"MFCC confusion matrix")