### EVALUATING THE MODELS

* ### CNN
* ### XGB
* ### NN
* ### XGB+NN hard vote
* ### XGB+NN soft vote



***


In [1]:
from sklearn.preprocessing import scale, LabelEncoder, StandardScaler, minmax_scale


import numpy as np
import pandas as pd
import librosa
from librosa import cqt
from librosa.feature import *
import librosa.display
import IPython.display as ipd
from PIL import Image

from datetime import datetime
import ffmpeg
import os
import string

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings(action='ignore', category=UserWarning)


In [2]:
from xgboost import XGBClassifier
import pandas as pd

from sklearn.preprocessing import LabelEncoder, StandardScaler 

from sklearn.metrics import precision_score, recall_score, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score, roc_curve, auc, SCORERS,  ConfusionMatrixDisplay
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
from xgboost import XGBClassifier
from xgboost import plot_importance
import xgboost
import joblib

from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.layers import BatchNormalization
import pickle

### Loading and splitting Features for XGB and NN

In [3]:
df=pd.read_csv('../../features/features_new_14genres_5sec.csv')
df.shape

(27870, 30)

In [4]:
drop_features=['genre','file_name']


X = df.drop(drop_features,axis=1)
y = df['genre']

# Train/test split for XGB and NN
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(np.array(X_train, dtype = float))
X_test_scaled = scaler.transform(np.array(X_test, dtype = float))

encoder = LabelEncoder()
y_test_num = encoder.fit_transform(y_test)
y_train_num = encoder.transform(y_train)

print(X_test.shape)

(5574, 28)


### Loading image data for CNN

In [5]:
file_name = "../../data_images/set_2x2-8-1-1_5sec.pkl"
with open(file_name, "rb") as f:
      saved_images=pickle.load(f)
     

In [6]:
train_images, train_labels = saved_images[0]
test_images, test_labels = saved_images[1]
val_images, val_labels = saved_images[2]

# Reshaping:
train_img = train_images.reshape(train_images.shape[0], -1)
test_img = test_images.reshape(test_images.shape[0], -1)
val_img = val_images.reshape(val_images.shape[0], -1)

train_y=np.array([np.argmax(i) for i in train_labels]).reshape(train_images.shape[0], -1)
test_y=np.array([np.argmax(i) for i in test_labels]).reshape(test_images.shape[0], -1)
val_y=np.array([np.argmax(i) for i in val_labels]).reshape(val_images.shape[0], -1)

###  Loading Models

In [7]:
modelNN = load_model('../../models/NN_model_5sec_14genres.h5')
modelCNN = load_model('../../models/CNN_best_model_5sec(94).h5')

modelXGB = XGBClassifier()
modelXGB.load_model("../../models/XGB_model.json")
#modelXGB.save_model("../../models/XGB_model.bin")
    
with open('../../models/NN_transformers_6sec_14genres_new.joblib', 'rb') as f:
    encoder,scaler = joblib.load(f)
    
genres=encoder.classes_

## Combining Predictions

### XGB and NN Soft Vote predictions

In [8]:
# SOFT VOTE

CNN_prob = modelCNN.predict(test_images)
NN_prob=modelNN.predict(X_test_scaled)
XGB_prob=modelXGB.predict_proba(X_test)

sum_prob=(XGB_prob+NN_prob)/2
sum_pred=np.argmax(sum_prob, axis=1)

CNN_pred=np.argmax(CNN_prob, axis=1)
NN_pred=np.argmax(NN_prob, axis=1)
XGB_pred=modelXGB.predict(X_test)



### XGB and NN Hard Vote predictions

In [9]:
# Based on comparing the precision scores when predicted classes are different

NN_prec=precision_score(y_test_num, NN_pred, average=None)
XGB_prec=precision_score(y_test_num, XGB_pred, average=None)

NN_prec_dic={key:value for key, value in  enumerate(NN_prec) }
XGB_prec_dic={key:value for key, value in  enumerate(XGB_prec) }

NN_and_XGB_hard_vote=[]
for i in range(len(NN_pred)):
    if NN_pred[i] == XGB_pred [i]:
        NN_and_XGB_hard_vote.append(NN_pred[i])
    else:
        if NN_prec_dic[NN_pred[i]] >= XGB_prec_dic[XGB_pred[i]]:
            NN_and_XGB_hard_vote.append(NN_pred[i])
        else:
            NN_and_XGB_hard_vote.append(XGB_pred[i])



### Comparing Scores:

In [10]:
CNN_score = modelCNN.evaluate(test_images, test_y, verbose=0)
NN_score = modelNN.evaluate(X_test_scaled, y_test_num, verbose=0)
XGB_score=modelXGB.score(X_test, y_test_num )
NN_and_XGB_soft_score=accuracy_score(y_test_num, sum_pred)
NN_and_XGB_hard_score=accuracy_score(y_test_num, NN_and_XGB_hard_vote)

print("CNN Testing Accuracy: ", CNN_score[1])
print("XGB Testing Accuracy: ", XGB_score)
print("NN Testing Accuracy: ", NN_score[1])
print("NN+XGB hard vote accuracy: ", NN_and_XGB_hard_score)
print("NN+XGB soft vote accuracy: ", NN_and_XGB_soft_score)

CNN Testing Accuracy:  0.9420807957649231
XGB Testing Accuracy:  0.9467168998923574
NN Testing Accuracy:  0.9655543565750122
NN+XGB hard vote accuracy:  0.9635809113742375
NN+XGB soft vote accuracy:  0.9702188733405095


## Conclusion:

* #### NN+XGB soft vote ensemble works the best

In the classifier I also combine soft vote of CNN+NN+XGB , however additional testings are required to get the accuracy score .