# MusicAI
## Sztuczna Inteligencja - projekt 
### Część 3 - prosta sieć neuronowa
Autorzy: Jakub Ochnik, Adam Karabiniewicz, Marcel Bieniek
___


Importing necessary libraries and packages

In [None]:
import numpy as np
import math
import json
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import tensorflow as tf

Constants

In [None]:
DATA_PATH = "Data\\data_full.json"

Loading dataset

In [None]:
def load_data(data_path):
    with open(data_path, "r") as fp:
        data = json.load(fp)

    """
    inputs -> X
    outputs -> y
    """

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    names = data["mapping"]

    print("Data succesfully loaded!")

    return X, y, names

In [None]:
X, y, names = load_data(DATA_PATH)

print("Number of segments: {} \n Number of labels: {}".format(len(X), len(y)))

Splitting the data into train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [None]:
print(X.shape)

Build the network

In [None]:
model = keras.Sequential([
    # input layer
    keras.layers.Flatten(input_shape=(X.shape[1],X.shape[2])),
    # 1st hidden layer
    keras.layers.Dense(512, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    # 2nd hidden layer
    keras.layers.Dense(256, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    # 3rd hidden layer
    keras.layers.Dense(64, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)),
    keras.layers.Dropout(0.3),
    # output layer
    # 10 neurons = 10 genres
    keras.layers.Dense(10, activation="softmax")    
])

Compiling the network

In [None]:
# Adam - extension of classic gradient descent
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

Training the network

In [None]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32)

Plot accuracy and error over epochs

In [None]:
def plot_history(history):
    fig, axs = plt.subplots(2)
    # accuracy subplot
    axs[0].plot(history.history["accuracy"], label="Training accuracy")
    axs[0].plot(history.history["val_accuracy"], label="Test accuracy")

    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy")

    # error subplot
    axs[1].plot(history.history["loss"], label="Training error")
    axs[1].plot(history.history["val_loss"], label="Test error")

    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error")

    plt.show()

In [None]:
plot_history(history)

In [None]:
test_error, test_accuracy = model.evaluate(X_test, y_test, verbose = 1)
print("Model accuracy: {}".format(test_accuracy))

Generating heatmap

In [None]:
import seaborn as sn
predictions = model.predict(X_test)
pred_argmax = []

for i in range(len(predictions)):
    predicted_index = np.argmax(predictions[i])
    pred_argmax.append(predicted_index)

conf_matrix = tf.math.confusion_matrix(labels=y_test, predictions=pred_argmax)
# print(conf_matrix)
cat = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']
fig, ax = plt.subplots(figsize=(12,10), facecolor='white')
sn.heatmap(conf_matrix/np.sum(conf_matrix, axis=1), annot=True, fmt='.2%', xticklabels=cat, yticklabels=cat, cmap='Blues')
ax.set(xlabel="Predicted labels", ylabel="Actual labels")
# print(np.sum(conf_matrix, axis=1))


### Testing the network on custom examples

Predicting the genre of a specific sample

In [None]:
def predict_sample(model, X):
    # X -> 2D array (130, 13)
    X = X[np.newaxis, ...]
    predictions = model.predict(X)
    predicted_index = np.argmax(predictions, axis=1)
    #print(predicted_index)
    return predicted_index[0]

In [None]:
'''
# testing a specific sample
x_n = X_test[103]
y_n = y_test[103]

predict_sample(model, x_n)
'''

Predicting the genre of custom imported song (30s)

In [None]:
import librosa
import math
import os
import json

# custom song prediction function
def predict_song(model, data, orig_genre, names):
    data = np.array(data)
    preds = []
    for X in data:
        preds.append(predict_sample(model, X))
    predicted = np.bincount(preds).argmax()
    print("Expected genre: {}, Predicted genre: {}".format(orig_genre,names[predicted]))

# function to convert a song into data for the neural network
def load_song(filename, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=10):
    SAMPLE_RATE = 22050
    signal, sr = librosa.load(filename, sr = 22050)
    DURATION = 30 # seconds
    SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
    num_samples_per_segment = SAMPLES_PER_TRACK // num_segments
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) # todo         
    data = []    

    # division into segments
    for s in range(num_segments):
        start_sample = num_samples_per_segment * s
        finish_sample = start_sample + num_samples_per_segment

        mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample], sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfcc = mfcc.T
        
        if len(mfcc) == expected_num_mfcc_vectors_per_segment:
            data.append(mfcc.tolist())
    return data

In [None]:
# songs are not included in the project files

x_to_pred = load_song("Data/jazz_piano.wav")
predict_song(model, x_to_pred, "jazz", names)

x_to_pred = load_song("Data/pop_rock.wav")
predict_song(model, x_to_pred, "pop/rock", names)

x_to_pred = load_song("Data/classic_piano.wav")
predict_song(model, x_to_pred, "classical", names)

x_to_pred = load_song("Data/classic_symphony.wav")
predict_song(model, x_to_pred, "classical", names)

x_to_pred = load_song("Data/blues.wav")
predict_song(model, x_to_pred, "blues", names)

x_to_pred = load_song("Data/blues_2.wav")
predict_song(model, x_to_pred, "blues", names)

x_to_pred = load_song("Data/classic_piano2.wav")
predict_song(model, x_to_pred, "classical", names)

x_to_pred = load_song("Data/mozart.wav")
predict_song(model, x_to_pred, "classical", names)

x_to_pred = load_song("Data/rock_metal.wav")
predict_song(model, x_to_pred, "rock/metal", names)