# MusicAI
## Sztuczna Inteligencja - projekt 
### Część 5 - Klasyfikator K-Nearest Neighbours
Autorzy: Jakub Ochnik, Adam Karabiniewicz, Marcel Bieniek
___


Importing necessary libraries and packages

In [None]:
import json
import math
import numpy as np
import seaborn as sn
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score 

Defining constants

In [None]:
DATA_PATH = "Data\\data.json"
GENRE_LIST = ['blues','classical','country','disco','hiphop','jazz','metal','pop','reggae','rock']

Loading dataset

In [None]:
def load_data(data_path):
    with open(data_path, "r") as fp:
        data = json.load(fp)

    # convert lists to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])

    # Convert mfcc to vector
    flatten_X = []
    for i in range(len(X)):
        flatten_X.append(X[i].flatten())

    np_X = np.array(flatten_X)
    
    print("X len =",len(np_X))
    print("y len =",len(np_X))
    print("Data succesfully loaded!")

    return  np_X, y

In [None]:
X, y = load_data(DATA_PATH)

In [None]:
print(X.shape)

Splitting the data into train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.3)

In [None]:
# nsamples_test, nx_test, ny_test = X_test.shape
# d2_X_test = X_test.reshape((nsamples_test,nx_test*ny_test))
# print(X_test.shape)
# print(d2_X_test.shape)

# nsamples_train, nx_train, ny_train = X_train.shape
# d2_X_train = X_train.reshape((nsamples_train,nx_train*ny_train))
# print(X_train.shape)
# print(d2_X_train.shape)

Scaling

In [None]:
X_sc = StandardScaler()
X_train = X_sc.fit_transform(X_train)
X_test = X_sc.transform(X_test)

Calculating K parameter

In [None]:
print(int(len(y_test)/len(GENRE_LIST)))
k = math.isqrt(int(len(y_test)/len(GENRE_LIST)))
print(k)

Creating and training the model

In [None]:
knn_classifier = KNeighborsClassifier(n_neighbors=3, weights='distance')
knn_classifier.fit(X_train, y_train)

knn_pred = knn_classifier.predict(X_test)

In [None]:
print('Accuracy : ' ,accuracy_score(y_test, knn_pred)*100, '%' )

Generating heatmap

In [None]:
knn_confusion_matrix = confusion_matrix(y_test, knn_pred)
print('KNN confusion_matrix:')
print(knn_confusion_matrix)

In [None]:
plt.figure(figsize=(12, 10))
plt.imshow(knn_confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title("Confusion matrix for KNN classification")
plt.colorbar()
tick_marks = np.arange(len(GENRE_LIST))
plt.xticks(tick_marks, GENRE_LIST, rotation = 45)
plt.yticks(tick_marks, GENRE_LIST)
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12,10), facecolor='white')
sn.heatmap(knn_confusion_matrix/np.sum(knn_confusion_matrix, axis = 1), annot=True, fmt='.2%', xticklabels=GENRE_LIST, yticklabels=GENRE_LIST, cmap='Blues')
ax.set(xlabel="Predicted labels", ylabel="Actual labels")
print("Number of songs for each genre:", np.sum(knn_confusion_matrix, axis = 1))