#### GTI770 - TP4

In [1]:
%matplotlib inline   
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import graphviz
import decimal
import pandas as pd
from openpyxl import load_workbook
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import optimizers as opt

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
# from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
# from mdlp.discretization import MDLP
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

In [30]:
#Utility methods
def SplitVectorData_Holdout(primitives_vector, train_portion):
    
    size = len(primitives_vector)
    subsize = len(primitives_vector[0])
    
    val_portion = (1 - train_portion) / 2
    test_portion = (1 - train_portion) / 2

    nbTrain = int(size * train_portion)
    nbVal = int(size * val_portion)
    nbTest = int(size * test_portion)

    array_train = np.zeros((nbTrain, subsize), dtype=np.float64)
    array_val = np.zeros((nbVal, subsize), dtype=np.float64)
    array_test = np.zeros((nbTest, subsize), dtype=np.float64)

    array_train = primitives_vector[:nbTrain]
    array_val = primitives_vector[nbTrain : nbTrain + nbVal]
    array_test = primitives_vector[-nbTest:]
    return array_train, array_val, array_test

def SplitLabels_Holdout(primitives_vector, train_portion):
    
    size = len(primitives_vector)
    subsize = 1
    
    val_portion = (1 - train_portion) / 2
    test_portion = (1 - train_portion) / 2

    nbTrain = int(size * train_portion)
    nbVal = int(size * val_portion)
    nbTest = int(size * test_portion)

    array_train = np.zeros((nbTrain, subsize), dtype=np.float64)
    array_val = np.zeros((nbVal, subsize), dtype=np.float64)
    array_test = np.zeros((nbTest, subsize), dtype=np.float64)

    array_train = primitives_vector[:nbTrain]
    array_val = primitives_vector[nbTrain : nbTrain + nbVal]
    array_test = primitives_vector[-nbTest:]
    return array_train, array_val, array_test

def SplitVectorData_KFold(primitives_vector, k, test_portion):
    
    size = len(primitives_vector)
    subsize = len(primitives_vector[0])
    subsetNb = int(size * (1 - test_portion) / k)
    testNb = int(size * test_portion)
    
    array_kfold_train = np.zeros((k, subsetNb, subsize), dtype=np.float64)
    array_kfold_test = np.zeros((testNb, subsize), dtype=np.float64)
    
    for i in range(0, k):
        if i == 0 :
            array_kfold_train[i] = primitives_vector[i * subsetNb]
        else :
            array_kfold_train[i] = primitives_vector[(i-1) * subsetNb : i * subsetNb]
    
    array_kfold_test = primitives_vector[-testNb:]

    return array_kfold_train, array_kfold_test

def SplitVectorData_NoVal(primitives_vector, train_portion):
    
    size = len(primitives_vector)
    subsize = len(primitives_vector[0])
    
    test_portion = (1 - train_portion)

    nbTrain = int(size * train_portion)
    nbTest = int(size * test_portion)

    array_train = np.zeros((nbTrain, subsize), dtype=np.float64)
    array_test = np.zeros((nbTest, subsize), dtype=np.float64)

    array_train = primitives_vector[:nbTrain]
    array_test = primitives_vector[-nbTest:]
    return array_train, array_test

def concatenateWithoutTestPortion(full_array, index):
    result_array = []
    for i in range(0, len(full_array)):
        if len(result_array) == 0:
            result_array = full_array[i]
        elif i != index :
            result_array = np.concatenate((result_array, full_array[i]), axis=0)
    return result_array

def scores_mean(array_scores):
    
    size = len(array_scores)
    subsize = len(array_scores[0])
    array_mean = np.zeros(subsize, dtype=float)
    
    for i in range(0, size):
        for j in range(0, subsize):
            array_mean[j] += array_scores[i][j]
            
    for j in range(0, subsize):
            array_mean[j] = array_mean[j] / size
    
    return array_mean

def UseModelOnTestData(array, model, transformer=None):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)
    if transformer is not None:
        transform_train, transform_val, transform_test, Y_train, Y_val, Y_test = TransformData(array_train, array_val, array_test, len(array_train[0]), transformer)
    array_model_result, array_prediction_train_result, array_prediction_val_result, array_prediction_test_result, array_train_result, array_val_result, array_test_result = GenerateModelDataFromTransform(transform_train, transform_val, transform_test, Y_train, Y_val, Y_test, len(array[0]), model)
    result = [[array_test_result, array_prediction_test_result]]
    return GenerateScores(result) 

# ----- For debug -----
#Filter_train, Filter_val, Filter_test = SplitVectorDataTrainValTest(Filter, 0.6)
#print(len(Filter_train))
#print(len(Filter_val))
#print(len(Filter_test))
#print(len(Filter))
#----------------------

In [31]:
#Transforms and Models generation methods

def GenerateModelDataFromVector(x, y, chosen_model):
    
    X_train, X_val, X_test = SplitVectorData_Holdout(x, 0.8)
    Y_train, Y_val, Y_test = SplitLabels_Holdout(y, 0.8)
    
    model = chosen_model
    model = model.fit(X_train, Y_train)
    prediction_train = model.predict(X_train)
    prediction_val = model.predict(X_val)
    prediction_test = model.predict(X_test)
    
    validations = [[Y_train, prediction_train], [Y_val, prediction_val], [Y_test, prediction_test]]
    
    return GenerateScores(validations)

def TransformData(array_train, array_val, array_test, num_features, chosen_transformer):
    
    num_features = num_features - 1
    
    data_train = array_train
    X_train  = data_train[:,0:num_features]
    Y_train  = data_train[:,num_features]
    
    data_val = array_val
    X_val  = data_val[:,0:num_features]
    Y_val  = data_val[:,num_features]
    
    data_test = array_test
    X_test  = data_test[:,0:num_features]
    Y_test  = data_test[:,num_features]
    
    transformer_train = chosen_transformer
    transformer_train = transformer_train.fit_transform(X_train, Y_train)
    
    transformer_val = chosen_transformer
    transformer_val = transformer_val.fit_transform(X_val, Y_val)
    
    transformer_test = chosen_transformer
    transformer_test = transformer_test.fit_transform(X_test, Y_test)
    return transformer_train, transformer_val, transformer_test, Y_train, Y_val, Y_test

def GenerateModelDataFromTransform(array_train, array_val, array_test, y_train, y_val, y_test, num_features, chosen_model):
    
    num_features = num_features - 1
    
    X_train  = array_train
    Y_train = y_train
    
    X_val = array_val
    Y_val = y_val
    
    X_test  = array_test
    Y_test = y_test
    
    model = chosen_model
    model = model.fit(X_train, Y_train)
    prediction_train = model.predict(X_train)
    prediction_val = model.predict(X_val)
    prediction_test = model.predict(X_test)
    return model, prediction_train, prediction_val, prediction_test, Y_train, Y_val, Y_test

In [4]:
#display methods
def ExportTree(model):
    dot_data = tree.export_graphviz(model, out_file=None, 
                         #feature_names = ['', '', ''],  
                         class_names = ['spam', 'mail'],
                         filled=True, rounded=True,  
                         special_characters=True)  
    graph = graphviz.Source(dot_data)
    graph.format = 'png'
    graph.render("Filter_data") 
    return graph

def GenerateScores(array):
    AccScores = np.zeros(len(array), dtype=float)
    F1Scores = np.zeros(len(array), dtype=float)

    for i in range(0, len(array)):
        AccScores[i] = accuracy_score(array[i][0], array[i][1])
        F1Scores[i] = f1_score(array[i][0], array[i][1], average='weighted', labels=np.unique(array[i][1]))
    
    return AccScores, F1Scores

def GenerateAccScores(array):
    AccScores = np.zeros(len(array), dtype=float)

    for i in range(0, len(array)):
        AccScores[i] = accuracy_score(array[i][0], array[i][1])
    
    return AccScores

#Extraction de primitive
def TracePlot(array_acc_X, array_acc_Y, array_f1_X, array_f1_Y, titre, titre_x, titre_y):        
    
    plt.plot(array_acc_X, array_acc_Y, 'ro')
    plt.plot(array_f1_X, array_f1_Y, 'g*')
    plt.xlabel(titre_x)
    plt.ylabel(titre_y)
    plt.legend(['Accuracy Score','F1 Score'])
    plt.title(titre)
    plt.grid(True)
    plt.show()

In [5]:
#model creations methods
def CreateDecisionTreeModel(depth):
    return tree.DecisionTreeClassifier(criterion='entropy', max_depth=depth, min_samples_leaf=1)

def CreateKNNModel(k, weight):
    return KNeighborsClassifier(n_neighbors=k, weights=weight)

In [6]:
#Decision tree methods
def DecisionTree_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)

    array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(None))
    array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(3))
    array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(5))
    array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(10))

    validations = [[array_val_none, array_prediction_val_none],
    [array_val_3, array_prediction_val_3],
    [array_val_5, array_prediction_val_5],
    [array_val_10, array_prediction_val_10]]

    return GenerateScores(validations)

def DecisionTree_KFold(array,k):

    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    all_accScores = np.zeros((k, 4), dtype=float)
    all_f1Scores = np.zeros((k, 4), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test

        array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(None))
        array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(3))
        array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_tree_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(5))
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_tree_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(10))

        validations = [[array_val_none, array_prediction_val_none],
        [array_val_3, array_prediction_val_3],
        [array_val_5, array_prediction_val_5],
        [array_val_10, array_prediction_val_10]]

        Array_AccScores, Array_F1Scores = GenerateScores(validations)

        all_accScores[i] = Array_AccScores
        all_f1Scores[i] = Array_F1Scores

    accScores_mean = scores_mean(all_accScores)
    f1Scores_mean = scores_mean(all_f1Scores)
    
    return accScores_mean, f1Scores_mean

In [7]:
#Bayes Methods
def Bayes_Holdout(array, array_prob ,array_transform=None):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)
    if array_transform is not None:
        array_train_tr, array_val_tr, array_test_tr = SplitVectorData_Holdout(array_transform, 0.6)

    transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp = TransformData(array_train, array_val, array_test, len(array_train[0]), MDLP())
    transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax = TransformData(array_train, array_val, array_test, len(array_train[0]), MinMaxScaler())

    array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), GaussianNB(priors=array_prob))
    if array_transform is not None:
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train_tr, array_val_tr, array_test_tr, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))
    else:
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))

    array_model_mdlp, array_prediction_train_mdlp, array_prediction_val_mdlp, array_prediction_test_mdlp, array_train_mdlp, array_val_mdlp, array_test_mdlp = GenerateModelDataFromTransform(transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp, len(array[0]), MultinomialNB())
    array_model_minmax, array_prediction_train_minmax, array_prediction_val_minmax, array_prediction_test_minmax, array_train_minmax, array_val_minmax, array_test_minmax = GenerateModelDataFromTransform(transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax, len(array[0]), MultinomialNB())


    validations = [[array_val_5, array_prediction_val_5],
    [array_val_10, array_prediction_val_10],
    [array_val_mdlp, array_prediction_val_mdlp],
    [array_val_minmax, array_prediction_val_minmax]]

    return GenerateScores(validations)  
    
def Bayes_KFold(array, array_prob, k, array_transform=None):
    
    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    if array_transform is not None:
        array_kfold_train_tr, array_kfold_test_tr = SplitVectorData_KFold(array_transform, k, 0.2)
    all_accScores = np.zeros((k, 4), dtype=float)
    all_f1Scores = np.zeros((k, 4), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test
        
        if array_transform is not None:
            array_train_tr = concatenateWithoutTestPortion(array_kfold_train_tr, i)
            array_val_tr = array_kfold_train_tr[i]
            array_test_tr = array_kfold_test_tr

        transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp = TransformData(array_train, array_val, array_test, len(array_train[0]), MDLP())
        transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax = TransformData(array_train, array_val, array_test, len(array_train[0]), MinMaxScaler())

        array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), GaussianNB(priors=array_prob))
        if array_transform is not None:
            array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train_tr, array_val_tr, array_test_tr, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))
        else:
            array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))
        array_model_mdlp, array_prediction_train_mdlp, array_prediction_val_mdlp, array_prediction_test_mdlp, array_train_mdlp, array_val_mdlp, array_test_mdlp = GenerateModelDataFromTransform(transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp, len(array[0]), MultinomialNB())
        array_model_minmax, array_prediction_train_minmax, array_prediction_val_minmax, array_prediction_test_minmax, array_train_minmax, array_val_minmax, array_test_minmax = GenerateModelDataFromTransform(transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax, len(array[0]), MultinomialNB())


        validations = [[array_val_5, array_prediction_val_5],
                     [array_val_10, array_prediction_val_10],
                     [array_val_mdlp, array_prediction_val_mdlp],
                     [array_val_minmax, array_prediction_val_minmax]]

        Array_AccScores, Array_F1Scores = GenerateScores(validations)

        all_accScores[i] = Array_AccScores
        all_f1Scores[i] = Array_F1Scores 
        
    accScores_mean = scores_mean(all_accScores)
    f1Scores_mean = scores_mean(all_f1Scores)

    return accScores_mean, f1Scores_mean

In [8]:
#KNN Methods
def KNN_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)

    array_model_3u, array_prediction_train_3u, array_prediction_val_3u, array_prediction_test_3u, array_train_3u, array_val_3u, array_test_3u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'uniform'))
    array_model_5u, array_prediction_train_5u, array_prediction_val_5u, array_prediction_test_5u, array_train_5u, array_val_5u, array_test_5u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'uniform'))
    array_model_10u, array_prediction_train_10u, array_prediction_val_10u, array_prediction_test_10u, array_train_10u, array_val_10u, array_test_10u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'uniform'))
    array_model_3d, array_prediction_train_3d, array_prediction_val_3d, array_prediction_test_3d, array_train_3d, array_val_3d, array_test_3d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'distance'))
    array_model_5d, array_prediction_train_5d, array_prediction_val_5d, array_prediction_test_5d, array_train_5d, array_val_5d, array_test_5d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'distance'))
    array_model_10d, array_prediction_train_10d, array_prediction_val_10d, array_prediction_test_10d, array_train_10d, array_val_10d, array_test_10d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'distance'))

    validations_uniform = [[array_val_3u, array_prediction_val_3u],
                        [array_val_5u, array_prediction_val_5u],
                        [array_val_10u, array_prediction_val_10u]]

    validations_distance = [[array_val_3d, array_prediction_val_3d],
                        [array_val_5d, array_prediction_val_5d],
                        [array_val_10d, array_prediction_val_10d]]

    accScores_uniform, f1Scores_uniform = GenerateScores(validations_uniform)
    accScores_distance, f1Scores_dsitance = GenerateScores(validations_distance)

    return accScores_uniform, f1Scores_uniform, accScores_distance, f1Scores_dsitance
    
def KNN_KFold(array, k):

    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    all_accScores_uniform = np.zeros((k, 3), dtype=float)
    all_f1Scores_uniform = np.zeros((k, 3), dtype=float)
    all_accScores_distance = np.zeros((k, 3), dtype=float)
    all_f1Scores_distance = np.zeros((k, 3), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test

        array_model_3u, array_prediction_train_3u, array_prediction_val_3u, array_prediction_test_3u, array_train_3u, array_val_3u, array_test_3u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'uniform'))
        array_model_5u, array_prediction_train_5u, array_prediction_val_5u, array_prediction_test_5u, array_train_5u, array_val_5u, array_test_5u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'uniform'))
        array_model_10u, array_prediction_train_10u, array_prediction_val_10u, array_prediction_test_10u, array_train_10u, array_val_10u, array_test_10u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'uniform'))
        array_model_3d, array_prediction_train_3d, array_prediction_val_3d, array_prediction_test_3d, array_train_3d, array_val_3d, array_test_3d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'distance'))
        array_model_5d, array_prediction_train_5d, array_prediction_val_5d, array_prediction_test_5d, array_train_5d, array_val_5d, array_test_5d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'distance'))
        array_model_10d, array_prediction_train_10d, array_prediction_val_10d, array_prediction_test_10d, array_train_10d, array_val_10d, array_test_10d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'distance'))

        validations_uniform = [[array_val_3u, array_prediction_val_3u],
        [array_val_5u, array_prediction_val_5u],
        [array_val_10u, array_prediction_val_10u]]

        validations_distance = [[array_val_3d, array_prediction_val_3d],
        [array_val_5d, array_prediction_val_5d],
        [array_val_10d, array_prediction_val_10d]]

        Array_AccScores_uniform, Array_F1Scores_uniform = GenerateScores(validations_uniform)
        Array_AccScores_distance, Array_F1Scores_distance = GenerateScores(validations_distance)

        all_accScores_uniform[i] = Array_AccScores_uniform
        all_f1Scores_uniform[i] = Array_F1Scores_uniform
        all_accScores_distance[i] = Array_AccScores_distance
        all_f1Scores_distance[i] = Array_F1Scores_distance

    accScores_mean_uniform = scores_mean(all_accScores_uniform)
    f1Scores_mean_uniform = scores_mean(all_f1Scores_uniform)
    accScores_mean_distance = scores_mean(all_accScores_distance)
    f1Scores_mean_distance = scores_mean(all_f1Scores_distance)

    return accScores_mean_uniform, f1Scores_mean_uniform, accScores_mean_distance, f1Scores_mean_distance

In [9]:
#Random forest methods
def RandomForest_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)

    array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(None))
    array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(3))
    array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(5))
    array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(10))

    validations = [[array_val_none, array_prediction_val_none],
    [array_val_3, array_prediction_val_3],
    [array_val_5, array_prediction_val_5],
    [array_val_10, array_prediction_val_10]]

    return GenerateScores(validations)

def RandomForest_KFold(array,k):

    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    all_accScores = np.zeros((k, 4), dtype=float)
    all_f1Scores = np.zeros((k, 4), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test

        array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(None))
        array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(3))
        array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_tree_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(5))
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_tree_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(10))

        validations = [[array_val_none, array_prediction_val_none],
        [array_val_3, array_prediction_val_3],
        [array_val_5, array_prediction_val_5],
        [array_val_10, array_prediction_val_10]]

        Array_AccScores, Array_F1Scores = GenerateScores(validations)

        all_accScores[i] = Array_AccScores
        all_f1Scores[i] = Array_F1Scores

    accScores_mean = scores_mean(all_accScores)
    f1Scores_mean = scores_mean(all_f1Scores)
    
    return accScores_mean, f1Scores_mean

In [10]:
def NormalizeData(data):
    return MinMaxScaler().fit_transform(data)

def GetGalaxiesClassProbabilities():
    count_smooth = 0
    count_spiral = 0
    
    fid = open('galaxy_feature_vectors.csv', 'r') 
    for line in fid:
        element = line.rstrip('\n').split(',')

        label = float(element[75])

        if label == 0.0:
            count_smooth += 1     
        elif label == 1.0:
            count_spiral += 1
    return [count_smooth/(count_smooth+count_spiral), count_spiral/(count_smooth+count_spiral)]

In [11]:
def SplitLabelsFromPrimitives(data_train, data_val, data_test):
    num_features = len(data_train[0]) - 1
    
    X_train  = data_train[:,0:num_features]
    Y_train  = data_train[:,num_features]

    X_val  = data_val[:,0:num_features]
    Y_val  = data_val[:,num_features]

    X_test  = data_test[:,0:num_features]
    Y_test  = data_test[:,num_features]
    return X_train, Y_train, X_val, Y_val, X_test, Y_test

def GenerateModel(data_train, data_val, data_test, layers, perceptrons, epochs, learnRate, name):
    
    #Split labels from the primitives
    X_train, Y_train, X_val, Y_val, X_test, Y_test = SplitLabelsFromPrimitives(data_train, data_val, data_test)
    
    #Create model
    model = Sequential()

    #Add layers
    model.add(Dense(units=perceptrons, activation='sigmoid', input_shape=(len(X_train[0]),)))
    model.add(Dropout(0.2))
    for i in range(0, layers - 2):
        model.add(Dense(units=perceptrons, activation='sigmoid'))
        model.add(Dropout(0.2))
    model.add(Dense(units=2, activation='sigmoid'))
    
    #Set optimizers and compile
    sgd = opt.SGD(lr=learnRate, decay=0, momentum=0, nesterov=False)
    model.compile(loss='sparse_categorical_crossentropy', 
              optimizer= sgd,
              metrics=['accuracy'])
    
    #Use TensorBoard
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs/'+name)
    
    #Train
    model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=epochs, batch_size=100, callbacks=[tb_callback])
    
    model.evaluate(X_test, Y_test)
    
    Y_probs = model.predict(X_test)
    
    Y_pred = Y_probs.argmax(axis=1)
    
    #plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_pred)
    #plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_test)

    return Y_test, Y_pred

def Neurone_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)
    
    AccScores = np.zeros(13, dtype=float)
    
    #base
    y_true_base, y_pred_base = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 0.5, 'base')
    
    #perceptrons
    y_true_perc1, y_pred_perc1 = GenerateModel(array_train, array_val, array_test, 4, 50, 60, 0.5, 'perc1')
    y_true_perc2, y_pred_perc2 = GenerateModel(array_train, array_val, array_test, 4, 300, 60, 0.5, 'perc2')
    y_true_perc3, y_pred_perc3 = GenerateModel(array_train, array_val, array_test, 4, 600, 60, 0.5, 'perc3')

    #epochs
    y_true_epoch1, y_pred_epoch1 = GenerateModel(array_train, array_val, array_test, 4, 100, 30, 0.5, 'epoch1')
    y_true_epoch2, y_pred_epoch2 = GenerateModel(array_train, array_val, array_test, 4, 100, 120, 0.5, 'epoch2')
    y_true_epoch3, y_pred_epoch3 = GenerateModel(array_train, array_val, array_test, 4, 100, 240, 0.5, 'epoch3')
    
    #layers
    y_true_layer1, y_pred_layer1 = GenerateModel(array_train, array_val, array_test, 6, 100, 60, 0.5, 'layer1')
    y_true_layer2, y_pred_layer2 = GenerateModel(array_train, array_val, array_test, 12, 100, 60, 0.5, 'layer2')
    y_true_layer3, y_pred_layer3 = GenerateModel(array_train, array_val, array_test, 24, 100, 60, 0.5, 'layer3')
    
    #learning rate
    y_true_lr1, y_pred_lr1 = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 0.1, 'lr1')
    y_true_lr2, y_pred_lr2 = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 0.5, 'lr2')
    y_true_lr3, y_pred_lr3 = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 1, 'lr3')

    validations = [[y_true_base, y_pred_base],
    [y_true_perc1, y_pred_perc1],
    [y_true_perc2, y_pred_perc2],
    [y_true_perc3, y_pred_perc3],
    [y_true_epoch1, y_pred_epoch1],
    [y_true_epoch2, y_pred_epoch2],
    [y_true_epoch3, y_pred_epoch3],
    [y_true_layer1, y_pred_layer1],
    [y_true_layer2, y_pred_layer2],
    [y_true_layer3, y_pred_layer3],
    [y_true_lr1, y_pred_lr1],
    [y_true_lr2, y_pred_lr2],
    [y_true_lr3, y_pred_lr3]]
    
    return GenerateScores(validations)

In [12]:
def SVM_linear(X_train, Y_train, X_val, Y_val, X_test, Y_test, C, gamma):   
    print("svm_linear with C = "+str(C)+" and gamma = "+str(gamma))
    clf = SVC(C=C, gamma=gamma, kernel='linear')
    clf.fit(X_train, np.ravel(Y_train))
    
    print("Start predictions...")
    pred_train = clf.predict(X_train)
    pred_val = clf.predict(X_val)
    pred_test = clf.predict(X_test)
    
    validations = [[Y_train, pred_train], [Y_val, pred_val], [Y_test, pred_test]]
    a1, f1 = GenerateScores(validations)
    print("acc = " + str(a1) + ", f1 = " + str(f1))
    return GenerateScores(validations)

def SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, C, gamma):   
    print("svm_rbf with C = "+str(C)+" and gamma = "+str(gamma))
    clf = SVC(C=C, gamma=gamma, kernel='rbf')
    clf.fit(X_train, np.ravel(Y_train))
    
    print("Start predictions...")
    pred_train = clf.predict(X_train)
    pred_val = clf.predict(X_val)
    pred_test = clf.predict(X_test)
    
    validations = [[Y_train, pred_train], [Y_val, pred_val], [Y_test, pred_test]]
    a1, f1 = GenerateScores(validations)
    print("acc = " + str(a1) + ", f1 = " + str(f1))
    return GenerateScores(validations)

def SVM(x, y):
    X_train, X_test = SplitVectorData_NoVal(x, 0.8)
    Y_train, Y_test = SplitVectorData_NoVal(y, 0.8)
    
    Y_pred_lin03= SVM_linear(X_train, Y_train, X_test, 1e-03)
    Y_pred_lin01= SVM_linear(X_train, Y_train, X_test, 1e-01)
    Y_pred_lin1= SVM_linear(X_train, Y_train, X_test, 1.0)
    Y_pred_lin10= SVM_linear(X_train, Y_train, X_test, 10.0)
    Y_pred_rbf03_g03= SVM_rbf(X_train, Y_train, X_test, 1e-03, 1e-03)
    Y_pred_rbf01_g03= SVM_rbf(X_train, Y_train, X_test, 1e-01, 1e-03)
    Y_pred_rbf1_g03= SVM_rbf(X_train, Y_train, X_test, 1.0, 1e-03)
    Y_pred_rbf10_g03= SVM_rbf(X_train, Y_train, X_test, 10.0, 1e-03)
    Y_pred_rbf03_g01= SVM_rbf(X_train, Y_train, X_test, 1e-03, 1e-01)
    Y_pred_rbf01_g01= SVM_rbf(X_train, Y_train, X_test, 1e-01, 1e-01)
    Y_pred_rbf1_g01= SVM_rbf(X_train, Y_train, X_test, 1.0, 1e-01)
    Y_pred_rbf10_g01= SVM_rbf(X_train, Y_train, X_test, 10.0, 1e-01)
    Y_pred_rbf03_g1= SVM_rbf(X_train, Y_train, X_test, 1e-03, 1.0)
    Y_pred_rbf01_g1= SVM_rbf(X_train, Y_train, X_test, 1e-01, 1.0)
    Y_pred_rbf1_g1= SVM_rbf(X_train, Y_train, X_test, 1.0, 1.0)
    Y_pred_rbf10_g1= SVM_rbf(X_train, Y_train, X_test, 10.0, 1.0)
    Y_pred_rbf03_g10= SVM_rbf(X_train, Y_train, X_test, 1e-03, 10.0)
    Y_pred_rbf01_g10= SVM_rbf(X_train, Y_train, X_test, 1e-01, 10.0)
    Y_pred_rbf1_g10= SVM_rbf(X_train, Y_train, X_test, 1.0, 10.0)
    Y_pred_rbf10_g10= SVM_rbf(X_train, Y_train, X_test, 10.0, 10.0)
    
    validations = [[Y_test, Y_pred_lin03],
    [Y_test, Y_pred_lin01],
    [Y_test, Y_pred_lin1],
    [Y_test, Y_pred_lin10],             
    [Y_test, Y_pred_rbf03_g03],
    [Y_test, Y_pred_rbf01_g03],
    [Y_test, Y_pred_rbf1_g03],
    [Y_test, Y_pred_rbf10_g03],            
    [Y_test, Y_pred_rbf03_g01],
    [Y_test, Y_pred_rbf01_g01],
    [Y_test, Y_pred_rbf1_g01],
    [Y_test, Y_pred_rbf10_g01],              
    [Y_test, Y_pred_rbf03_g1],
    [Y_test, Y_pred_rbf01_g1],
    [Y_test, Y_pred_rbf1_g1],
    [Y_test, Y_pred_rbf10_g1],            
    [Y_test, Y_pred_rbf03_g10],
    [Y_test, Y_pred_rbf01_g10],
    [Y_test, Y_pred_rbf1_g10],
    [Y_test, Y_pred_rbf10_g10]]
    
    return GenerateScores(validations)

    

In [13]:
def AddAccAndF1ToExcel(name, acc, f1, c_acc, c_f1):
    acc_new = pd.DataFrame({'acc': acc})
    f1_new = pd.DataFrame({'f1': f1})
    wb = load_workbook(name)

    ws = wb['Feuil1']

    for index, row in acc_new.iterrows():
        cell =  c_acc+'%d'  % (index + 2)
        ws[cell] = row[0]
        
    for index, row in f1_new.iterrows():
        cell = c_f1 + '%d'  % (index + 2)
        ws[cell] = row[0]

    wb.save(name)


    return pd.read_excel(name, index_col=0)

#svm = AddAccAndF1ToExcel('svm.xlsx', acc_svm, f1_svm, 'D', 'E')
#rn_holdout = AddAccAndF1ToExcel('rn_holdout.xlsx', acc_neurone_holdout, f1_neurone_holdout, 'F', 'G')

# For Debugging
#print(acc_svm)
#print(f1_svm)
#print(acc_neurone_holdout)
#print(f1_neurone_holdout)

## Musique

In [18]:
Labels = ['BIG_BAND', 'BLUES_CONTEMPORARY', 'COUNTRY_TRADITIONAL', 'DANCE', 'ELECTRONICA', 'EXPERIMENTAL',
          'FOLK_INTERNATIONAL', 'GOSPEL', 'GRUNGE_EMO', 'HIP_HOP_RAP', 'JAZZ_CLASSIC', 'METAL_ALTERNATIVE', 
          'METAL_DEATH', 'METAL_HEAVY', 'POP_CONTEMPORARY', 'POP_INDIE', 'POP_LATIN', 'PUNK', 'REGGAE','RNB_SOUL',
          'ROCK_ALTERNATIVE', 'ROCK_COLLEGE', 'ROCK_CONTEMPORARY', 'ROCK_HARD', 'ROCK_NEO_PSYCHEDELIA']

encoder = LabelEncoder()
encoder.fit(Labels)

def LabelsStrToInt(array):
    return encoder.transform(array)

def LabelsIntToStr(array):
    return encoder.inverse_transform(array)

def GetDataSet(name, nb, length):
    fid = open(os.path.normpath('music/tagged_feature_sets/' + name + '/' + name + '.csv'), 'r') 

    ids = np.zeros((nb, 2), dtype=np.object)
    primitives = np.zeros((nb, length), dtype=np.float64)
    labels = np.zeros((nb, 1), dtype=np.object)

    count = 0
    
    for line in fid:
        element = line.rstrip('\n').split(',')
        
        ids[count] = element[:2]
        primitives[count] = element[2:-1]
        labels[count] = element[-1:]

        count += 1
        if count >= nb:
            break

    fid.close()
    
    normalizedPrimitives = NormalizeData(primitives)
#     pca = PCA(min())
#     normAndReducedDimPrimitves = pca.fit_transform(normalizedPrimitives)
    print(len(labels))
    return ids, normalizedPrimitives, LabelsStrToInt(labels)

# ----- For debug -----
#print(count_smooth)
#print(count_spiral)
#print(count)
#print(Galaxies)
#----------------------

In [70]:
nbData = 50000

#-----------Premade with the right length-----------
# GetDataSet('msd-mvd_dev', nbData, 420)
# GetDataSet('msd-trh_dev', nbData, 420)
# GetDataSet('msd-ssd_dev', nbData, 168)
# GetDataSet('msd-marsyas_dev_new', nbData, 124)
# GetDataSet('msd-jmirderivatives_dev', nbData, 96)
# id1, x1, y1 = GetDataSet('msd-rh_dev_new', nbData, 60)
# id2, x2, y2 = GetDataSet('msd-jmirmfccs_dev', nbData, 26)
# id3, x3, y3 = GetDataSet('msd-jmirlpc_dev', nbData, 20)
# GetDataSet('msd-jmirspectral_dev', nbData, 16)
# GetDataSet('msd-jmirmoments_dev', nbData, 10)
#---------------------------------------------------

id1, x1, y1 = GetDataSet('msd-jmirmoments_dev', nbData, 10)
id2, x2, y2 = GetDataSet('msd-jmirspectral_dev', nbData, 16)
id3, x3, y3 = GetDataSet('msd-jmirlpc_dev', nbData, 20)
id4, x4, y4 = GetDataSet('msd-jmirmfccs_dev', nbData, 26)
id5, x5, y5 = GetDataSet('msd-rh_dev_new', nbData, 60)
id6, x6, y6 = GetDataSet('msd-jmirderivatives_dev', nbData, 96)

#----------For Debug-------------
print(id1[0])
print(x1[0])
print(y1[0])
#--------------------------------

50000


  y = column_or_1d(y, warn=True)


50000
50000
50000
50000
50000
['1' 'TRAAAAK128F9318786']
[0.34364904 0.19015385 0.13235502 0.10477413 0.08307906 0.33207739
 0.34794747 0.33937687 0.63612821 0.43030456]
11


In [32]:
a1, f1 = GenerateModelDataFromVector(x1, y1, GaussianNB())
a2, f2 = GenerateModelDataFromVector(x2, y2, GaussianNB())
a3, f3 = GenerateModelDataFromVector(x3, y3, GaussianNB())

print(a1)
print(f1)
print(a2)
print(f2)
print(a3)
print(f3)

[0.132275   0.13422685 0.13042609]
[0.09832604 0.09934578 0.09195797]
[0.146275   0.13702741 0.13802761]
[0.10558446 0.09844179 0.10405672]
[0.13045    0.12062412 0.13682737]
[0.09391715 0.08773675 0.0985063 ]


In [33]:
a1, f1 = GenerateModelDataFromVector(x1, y1, MultinomialNB())
a2, f2 = GenerateModelDataFromVector(x2, y2, MultinomialNB())
a3, f3 = GenerateModelDataFromVector(x3, y3, MultinomialNB())

print(a1)
print(f1)
print(a2)
print(f2)
print(a3)
print(f3)

[0.07565    0.07361472 0.07161432]
[0.07189764 0.07190003 0.070499  ]
[0.097525   0.09741948 0.090018  ]
[0.08461024 0.09908441 0.08933336]
[0.088      0.08221644 0.08541708]
[0.08953356 0.10079168 0.10819885]


In [34]:
a1, f1 = GenerateModelDataFromVector(x1, y1, CreateDecisionTreeModel(15))
a2, f2 = GenerateModelDataFromVector(x2, y2, CreateDecisionTreeModel(15))
a3, f3 = GenerateModelDataFromVector(x3, y3, CreateDecisionTreeModel(15))

print(a1)
print(f1)
print(a2)
print(f2)
print(a3)
print(f3)

[0.643125   0.11642328 0.11382276]
[0.64467294 0.11275546 0.11072412]
[0.706725   0.11422284 0.11182236]
[0.70673831 0.11311131 0.11135751]
[0.706875   0.11642328 0.12762553]
[0.70755849 0.11322631 0.12447688]


In [46]:
def CreateRandomForestModel():
    return RandomForestClassifier(n_estimators=25, max_depth=13, min_samples_split=20, random_state=0, criterion='entropy', n_jobs=3)

# a1, f1 = GenerateModelDataFromVector(x1, np.ravel(y1), CreateRandomForestRegModel(14))
# a2, f2 = GenerateModelDataFromVector(x2, np.ravel(y2), CreateRandomForestRegModel(14))
# a3, f3 = GenerateModelDataFromVector(x3, np.ravel(y3), CreateRandomForestRegModel(14))

# print(a1)
# print(f1)
# print(a2)
# print(f2)
# print(a3)
# print(f3)

In [71]:
X1_train, X1_val, X1_test = SplitVectorData_Holdout(x1, 0.8)
Y1_train, Y1_val, Y1_test = SplitLabels_Holdout(y1, 0.8)

X2_train, X2_val, X2_test = SplitVectorData_Holdout(x2, 0.8)
Y2_train, Y2_val, Y2_test = SplitLabels_Holdout(y2, 0.8)

X3_train, X3_val, X3_test = SplitVectorData_Holdout(x3, 0.8)
Y3_train, Y3_val, Y3_test = SplitLabels_Holdout(y3, 0.8)

X4_train, X4_val, X4_test = SplitVectorData_Holdout(x4, 0.8)
Y4_train, Y4_val, Y4_test = SplitLabels_Holdout(y4, 0.8)

X5_train, X5_val, X5_test = SplitVectorData_Holdout(x5, 0.8)
Y5_train, Y5_val, Y5_test = SplitLabels_Holdout(y5, 0.8)

X6_train, X6_val, X6_test = SplitVectorData_Holdout(x6, 0.8)
Y6_train, Y6_val, Y6_test = SplitLabels_Holdout(y6, 0.8)

In [36]:
#Y1_pred = SVM_linear(X1_train, Y1_train, X1_val, 1.0, 1e-01)
# 1e-03
# 1e-01
# 1.0
# 10.0
def svm(X_train, Y_train, X_val, Y_val, X_test, Y_test):
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 1e-01, 1e-01)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 1.0, 1e-01)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 10.0, 1e-01)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 1e-01, 1.0)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 1.0, 1.0)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 10.0, 1.0)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 1e-01, 10.0)
#     a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 1.0, 10.0)
    a1, f1 = SVM_rbf(X_train, Y_train, X_val, Y_val, X_test, Y_test, 10.0, 10.0)

# svm(X1_train, Y1_train, X1_val, Y1_val, X1_test, Y1_test)
# svm(X2_train, Y2_train, X2_val, Y2_val, X2_test, Y2_test)
# svm(X3_train, Y3_train, X3_val, Y3_val, X3_test, Y3_test)
svm(X4_train, Y4_train, X4_val, Y4_val, X4_test, Y4_test)
# svm(X5_train, Y5_train, X5_val, Y5_val, X5_test, Y5_test)

svm_rbf with C = 10.0 and gamma = 10.0
Start predictions...
acc = [0.364075   0.25025005 0.25485097], f1 = [0.34914461 0.22817566 0.23620963]


In [37]:
def convertToCol(array):
    a = np.zeros((len(array), 1), dtype=object)
    for i in range(0, len(array)-1):
        a[i] = array[i]
    return a

def svm_model():
    model = SVC(C=10.0, gamma=10.0, kernel='rbf')
    return model

def AdaBoost_model():
    model = AdaBoostClassifier(base_estimator = CreateDecisionTreeModel(15), n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None)
    return model

In [53]:
def CNN_model(X_train, Y_train, X_val, Y_val, layers, perceptrons, epochs, learnRate):
    
    hiddenLayers = np.zeros((layers,), dtype=int)
    for i in range(0, len(hiddenLayers)):
        hiddenLayers[i] = perceptrons
    
    #Create model
    model = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=hiddenLayers, learning_rate_init=learnRate, 
                          max_iter=epochs, random_state=1)
    
    #Train
    model.fit(X_train, Y_train)

    return model

In [62]:
# def Ensemble_model():
#     
#     model = VotingClassifier(estimators = [ ('DT', CreateRandomForestModel().fit(X2_train, np.ravel(Y2_train))),
#                                            ('Ada', AdaBoost_model().fit(X3_train, np.ravel(Y3_train))), 
#                                            ('SVM', svm_model().fit(X4_train, np.ravel(Y4_train))),
#                                            ('CNN', CNN_model(X5_train, Y5_train, X5_val, Y5_val, 3, 120, 60, 0.001))], 
#                              voting = 'hard', n_jobs=3) 
#     return model

def Ensemble_model():
    model = VotingClassifier(estimators = [ ('DT', CreateRandomForestModel()),
                                           ('Ada', AdaBoost_model()), 
                                           ('SVM', svm_model()),
                                           ('CNN', MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(120, 120, 120), 
                                                    learning_rate_init=0.001, max_iter=60, random_state=1))], 
                                            voting = 'hard', n_jobs=3) 
    return model

# acc = [0.653925   0.65193039 0.65253051], f1 = [0.65487319 0.65407308 0.65322085]

In [63]:
ensemble_model = Ensemble_model()

In [55]:
ensemble_model.fit(x4, np.ravel(y4))

VotingClassifier(estimators=[('DT', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=13, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=20,
            min_...
       solver='sgd', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False))],
         flatten_transform=None, n_jobs=3, voting='hard', weights=None)

In [56]:
pred4_train = ensemble_model.predict(X4_train)
pred4_val = ensemble_model.predict(X4_val)
pred4_test = ensemble_model.predict(X4_test)

  if diff:
  if diff:
  if diff:


In [57]:
a1, f1 = GenerateScores([[Y4_train, pred4_train], [Y4_val, pred4_val], [Y4_test, pred4_test]])
print("acc = " + str(a1) + ", f1 = " + str(f1))

acc = [0.653925   0.65193039 0.65253051], f1 = [0.65487319 0.65407308 0.65322085]


In [45]:
cnn = CNN_model(X2_train, Y2_train, X2_val, Y2_val, 3, 120, 60, 0.001)

Y_train_pred = cnn.predict(X2_train)   
Y_val_pred = cnn.predict(X2_val)   
Y_test_pred = cnn.predict(X2_test)   

a1, f1 = GenerateScores([[Y2_train, Y_train_pred], [Y2_val, Y_val_pred], [Y2_test, Y_test_pred]])
print("acc = " + str(a1) + ", f1 = " + str(f1))

acc = [0.1528     0.15443089 0.14682937], f1 = [0.16181572 0.16279614 0.16012514]




In [68]:
def VoteClassifier(x, y, X_train, Y_train, X_val, Y_val, Y_test, X_test, name):
    ensemble_model = Ensemble_model()

    ensemble_model.fit(x, np.ravel(y))

    pred_train = ensemble_model.predict(X_train)
    pred_val = ensemble_model.predict(X_val)
    pred_test = ensemble_model.predict(X_test)

    a1, f1 = GenerateScores([[Y_train, pred_train], [Y_val, pred_val], [Y_test, pred_test]])
    print("Results for set " + name + " : ")
    print("acc = " + str(a1) + ", f1 = " + str(f1) + "\n")

In [69]:
VoteClassifier(x1, y1, X1_train, Y1_train, X1_val, Y1_val, Y1_test, X1_test, "1 with 10 feature")
VoteClassifier(x2, y2, X2_train, Y2_train, X2_val, Y2_val, Y2_test, X2_test, "2 with 16 feature")
VoteClassifier(x3, y3, X3_train, Y3_train, X3_val, Y3_val, Y3_test, X3_test, "3 with 20 feature")
VoteClassifier(x4, y4, X4_train, Y4_train, X4_val, Y4_val, Y4_test, X4_test, "4 with 26 feature")
VoteClassifier(x5, y5, X5_train, Y5_train, X5_val, Y5_val, Y5_test, X5_test, "5 with 60 feature")

  if diff:
  if diff:
  if diff:


Results for set 1 with 10 feature : 
acc = [0.45775    0.46189238 0.44428886], f1 = [0.44789647 0.45390956 0.43319022]



  if diff:
  if diff:
  if diff:


Results for set 2 with 16 feature : 
acc = [0.53665    0.54230846 0.5425085 ], f1 = [0.53403494 0.5402461  0.5380138 ]



  if diff:
  if diff:
  if diff:


Results for set 3 with 20 feature : 
acc = [0.594475   0.59531906 0.58891778], f1 = [0.59298286 0.59467295 0.58792654]



  if diff:
  if diff:
  if diff:


Results for set 4 with 26 feature : 
acc = [0.653925   0.65193039 0.65253051], f1 = [0.65487319 0.65407308 0.65322085]



  if diff:
  if diff:


Results for set 5 with 60 feature : 
acc = [0.801425   0.80636127 0.78955791], f1 = [0.80643288 0.81097645 0.79502129]



  if diff:


In [72]:
VoteClassifier(x6, y6, X6_train, Y6_train, X6_val, Y6_val, Y6_test, X6_test, "6 with 96 feature")

  if diff:
  if diff:


Results for set 6 with 96 feature : 
acc = [0.7974     0.80656131 0.79615923], f1 = [0.79905731 0.80818421 0.79849798]



  if diff:


In [None]:
# print(convertToCol(Y1_pred)[0])
# print(Y1_val[2])
# validations = [[Y1_test, Y1_pred]]
# a1, f1 = GenerateScores(validations)



# print(a1)
# print(f1)

In [None]:
# Galaxies_norm = NormalizeData(Galaxies)
#acc_neurone_holdout, f1_neurone_holdout = Neurone_Holdout(Galaxies)

#acc_neurone_holdout

#acc_svm, f1_svm = SVM(Galaxies)

# Rapport