# GTI770 - TP3

In [19]:
%matplotlib inline   
import matplotlib.pyplot as plt
import numpy as np
import os
import cv2
import graphviz
import decimal
import pandas as pd
from openpyxl import load_workbook
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import optimizers as opt

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

# from sklearn import tree
# from sklearn.naive_bayes import GaussianNB
# from sklearn.naive_bayes import MultinomialNB
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.ensemble import RandomForestClassifier
# from mdlp.discretization import MDLP
from sklearn.preprocessing import MinMaxScaler

In [2]:
#Utility methods
def SplitVectorData_Holdout(primitives_vector, train_portion):
    
    size = len(primitives_vector)
    subsize = len(primitives_vector[0])
    
    val_portion = (1 - train_portion) / 2
    test_portion = (1 - train_portion) / 2

    nbTrain = int(size * train_portion)
    nbVal = int(size * val_portion)
    nbTest = int(size * test_portion)

    array_train = np.zeros((nbTrain, subsize), dtype=np.float64)
    array_val = np.zeros((nbVal, subsize), dtype=np.float64)
    array_test = np.zeros((nbTest, subsize), dtype=np.float64)

    array_train = primitives_vector[:nbTrain]
    array_val = primitives_vector[nbTrain : nbTrain + nbVal]
    array_test = primitives_vector[-nbTest:]
    return array_train, array_val, array_test

def SplitVectorData_KFold(primitives_vector, k, test_portion):
    
    size = len(primitives_vector)
    subsize = len(primitives_vector[0])
    subsetNb = int(size * (1 - test_portion) / k)
    testNb = int(size * test_portion)
    
    array_kfold_train = np.zeros((k, subsetNb, subsize), dtype=np.float64)
    array_kfold_test = np.zeros((testNb, subsize), dtype=np.float64)
    
    for i in range(0, k):
        if i == 0 :
            array_kfold_train[i] = primitives_vector[i * subsetNb]
        else :
            array_kfold_train[i] = primitives_vector[(i-1) * subsetNb : i * subsetNb]
    
    array_kfold_test = primitives_vector[-testNb:]

    return array_kfold_train, array_kfold_test

def SplitVectorData_NoVal(primitives_vector, train_portion):
    
    size = len(primitives_vector)
    subsize = len(primitives_vector[0])
    
    test_portion = (1 - train_portion)

    nbTrain = int(size * train_portion)
    nbTest = int(size * test_portion)

    array_train = np.zeros((nbTrain, subsize), dtype=np.float64)
    array_test = np.zeros((nbTest, subsize), dtype=np.float64)

    array_train = primitives_vector[:nbTrain]
    array_test = primitives_vector[-nbTest:]
    return array_train, array_test

def concatenateWithoutTestPortion(full_array, index):
    result_array = []
    for i in range(0, len(full_array)):
        if len(result_array) == 0:
            result_array = full_array[i]
        elif i != index :
            result_array = np.concatenate((result_array, full_array[i]), axis=0)
    return result_array

def scores_mean(array_scores):
    
    size = len(array_scores)
    subsize = len(array_scores[0])
    array_mean = np.zeros(subsize, dtype=float)
    
    for i in range(0, size):
        for j in range(0, subsize):
            array_mean[j] += array_scores[i][j]
            
    for j in range(0, subsize):
            array_mean[j] = array_mean[j] / size
    
    return array_mean

def UseModelOnTestData(array, model, transformer=None):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)
    if transformer is not None:
        transform_train, transform_val, transform_test, Y_train, Y_val, Y_test = TransformData(array_train, array_val, array_test, len(array_train[0]), transformer)
    array_model_result, array_prediction_train_result, array_prediction_val_result, array_prediction_test_result, array_train_result, array_val_result, array_test_result = GenerateModelDataFromTransform(transform_train, transform_val, transform_test, Y_train, Y_val, Y_test, len(array[0]), model)
    result = [[array_test_result, array_prediction_test_result]]
    return GenerateScores(result) 

# ----- For debug -----
#Filter_train, Filter_val, Filter_test = SplitVectorDataTrainValTest(Filter, 0.6)
#print(len(Filter_train))
#print(len(Filter_val))
#print(len(Filter_test))
#print(len(Filter))
#----------------------

In [3]:
#Transforms and Models generation methods
def GenerateModelDataFromVector(array_train, array_val, array_test, num_features, chosen_model):
    
    num_features = num_features - 1
    
    data_train = array_train
    X_train  = data_train[:,0:num_features]
    Y_train  = data_train[:,num_features]
    
    data_val = array_val
    X_val = data_val[:,0:num_features]
    Y_val = data_val[:,num_features]
    
    data_test = array_test
    X_test = data_test[:,0:num_features]
    Y_test = data_test[:,num_features]
    
    
    model = chosen_model
    model = model.fit(X_train, Y_train)
    prediction_train = model.predict(X_train)
    prediction_val = model.predict(X_val)
    prediction_test = model.predict(X_test)
    return model, prediction_train, prediction_val, prediction_test, Y_train, Y_val, Y_test

def TransformData(array_train, array_val, array_test, num_features, chosen_transformer):
    
    num_features = num_features - 1
    
    data_train = array_train
    X_train  = data_train[:,0:num_features]
    Y_train  = data_train[:,num_features]
    
    data_val = array_val
    X_val  = data_val[:,0:num_features]
    Y_val  = data_val[:,num_features]
    
    data_test = array_test
    X_test  = data_test[:,0:num_features]
    Y_test  = data_test[:,num_features]
    
    transformer_train = chosen_transformer
    transformer_train = transformer_train.fit_transform(X_train, Y_train)
    
    transformer_val = chosen_transformer
    transformer_val = transformer_val.fit_transform(X_val, Y_val)
    
    transformer_test = chosen_transformer
    transformer_test = transformer_test.fit_transform(X_test, Y_test)
    return transformer_train, transformer_val, transformer_test, Y_train, Y_val, Y_test

def GenerateModelDataFromTransform(array_train, array_val, array_test, y_train, y_val, y_test, num_features, chosen_model):
    
    num_features = num_features - 1
    
    X_train  = array_train
    Y_train = y_train
    
    X_val = array_val
    Y_val = y_val
    
    X_test  = array_test
    Y_test = y_test
    
    model = chosen_model
    model = model.fit(X_train, Y_train)
    prediction_train = model.predict(X_train)
    prediction_val = model.predict(X_val)
    prediction_test = model.predict(X_test)
    return model, prediction_train, prediction_val, prediction_test, Y_train, Y_val, Y_test

In [4]:
#display methods
def ExportTree(model):
    dot_data = tree.export_graphviz(model, out_file=None, 
                         #feature_names = ['', '', ''],  
                         class_names = ['spam', 'mail'],
                         filled=True, rounded=True,  
                         special_characters=True)  
    graph = graphviz.Source(dot_data)
    graph.format = 'png'
    graph.render("Filter_data") 
    return graph

def GenerateScores(array):
    AccScores = np.zeros(len(array), dtype=float)
    F1Scores = np.zeros(len(array), dtype=float)

    for i in range(0, len(array)):
        AccScores[i] = accuracy_score(array[i][0], array[i][1])
        F1Scores[i] = f1_score(array[i][0], array[i][1], average='weighted', labels=np.unique(array[i][1]))
    
    return AccScores, F1Scores

def GenerateAccScores(array):
    AccScores = np.zeros(len(array), dtype=float)

    for i in range(0, len(array)):
        AccScores[i] = accuracy_score(array[i][0], array[i][1])
    
    return AccScores

#Extraction de primitive
def TracePlot(array_acc_X, array_acc_Y, array_f1_X, array_f1_Y, titre, titre_x, titre_y):        
    
    plt.plot(array_acc_X, array_acc_Y, 'ro')
    plt.plot(array_f1_X, array_f1_Y, 'g*')
    plt.xlabel(titre_x)
    plt.ylabel(titre_y)
    plt.legend(['Accuracy Score','F1 Score'])
    plt.title(titre)
    plt.grid(True)
    plt.show()

In [5]:
#model creations methods
def CreateDecisionTreeModel(depth):
    return tree.DecisionTreeClassifier(criterion='entropy', max_depth=depth, min_samples_leaf=1)

def CreateKNNModel(k, weight):
    return KNeighborsClassifier(n_neighbors=k, weights=weight)

def CreateRandomForestModel(depth):
    return RandomForestClassifier(criterion='entropy', max_depth=depth, random_state=0)

In [6]:
#Decision tree methods
def DecisionTree_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)

    array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(None))
    array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(3))
    array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(5))
    array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(10))

    validations = [[array_val_none, array_prediction_val_none],
    [array_val_3, array_prediction_val_3],
    [array_val_5, array_prediction_val_5],
    [array_val_10, array_prediction_val_10]]

    return GenerateScores(validations)

def DecisionTree_KFold(array,k):

    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    all_accScores = np.zeros((k, 4), dtype=float)
    all_f1Scores = np.zeros((k, 4), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test

        array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(None))
        array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(3))
        array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_tree_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(5))
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_tree_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateDecisionTreeModel(10))

        validations = [[array_val_none, array_prediction_val_none],
        [array_val_3, array_prediction_val_3],
        [array_val_5, array_prediction_val_5],
        [array_val_10, array_prediction_val_10]]

        Array_AccScores, Array_F1Scores = GenerateScores(validations)

        all_accScores[i] = Array_AccScores
        all_f1Scores[i] = Array_F1Scores

    accScores_mean = scores_mean(all_accScores)
    f1Scores_mean = scores_mean(all_f1Scores)
    
    return accScores_mean, f1Scores_mean

In [7]:
#Bayes Methods
def Bayes_Holdout(array, array_prob ,array_transform=None):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)
    if array_transform is not None:
        array_train_tr, array_val_tr, array_test_tr = SplitVectorData_Holdout(array_transform, 0.6)

    transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp = TransformData(array_train, array_val, array_test, len(array_train[0]), MDLP())
    transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax = TransformData(array_train, array_val, array_test, len(array_train[0]), MinMaxScaler())

    array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), GaussianNB(priors=array_prob))
    if array_transform is not None:
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train_tr, array_val_tr, array_test_tr, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))
    else:
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))

    array_model_mdlp, array_prediction_train_mdlp, array_prediction_val_mdlp, array_prediction_test_mdlp, array_train_mdlp, array_val_mdlp, array_test_mdlp = GenerateModelDataFromTransform(transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp, len(array[0]), MultinomialNB())
    array_model_minmax, array_prediction_train_minmax, array_prediction_val_minmax, array_prediction_test_minmax, array_train_minmax, array_val_minmax, array_test_minmax = GenerateModelDataFromTransform(transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax, len(array[0]), MultinomialNB())


    validations = [[array_val_5, array_prediction_val_5],
    [array_val_10, array_prediction_val_10],
    [array_val_mdlp, array_prediction_val_mdlp],
    [array_val_minmax, array_prediction_val_minmax]]

    return GenerateScores(validations)  
    
def Bayes_KFold(array, array_prob, k, array_transform=None):
    
    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    if array_transform is not None:
        array_kfold_train_tr, array_kfold_test_tr = SplitVectorData_KFold(array_transform, k, 0.2)
    all_accScores = np.zeros((k, 4), dtype=float)
    all_f1Scores = np.zeros((k, 4), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test
        
        if array_transform is not None:
            array_train_tr = concatenateWithoutTestPortion(array_kfold_train_tr, i)
            array_val_tr = array_kfold_train_tr[i]
            array_test_tr = array_kfold_test_tr

        transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp = TransformData(array_train, array_val, array_test, len(array_train[0]), MDLP())
        transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax = TransformData(array_train, array_val, array_test, len(array_train[0]), MinMaxScaler())

        array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), GaussianNB(priors=array_prob))
        if array_transform is not None:
            array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train_tr, array_val_tr, array_test_tr, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))
        else:
            array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), MultinomialNB(fit_prior=True, class_prior=array_prob))
        array_model_mdlp, array_prediction_train_mdlp, array_prediction_val_mdlp, array_prediction_test_mdlp, array_train_mdlp, array_val_mdlp, array_test_mdlp = GenerateModelDataFromTransform(transform_train_mdlp, transform_val_mdlp, transform_test_mdlp, Y_train_mdlp, Y_val_mdlp, Y_test_mdlp, len(array[0]), MultinomialNB())
        array_model_minmax, array_prediction_train_minmax, array_prediction_val_minmax, array_prediction_test_minmax, array_train_minmax, array_val_minmax, array_test_minmax = GenerateModelDataFromTransform(transform_train_minmax, transform_val_minmax, transform_test_minmax, Y_train_minmax, Y_val_minmax, Y_test_minmax, len(array[0]), MultinomialNB())


        validations = [[array_val_5, array_prediction_val_5],
                     [array_val_10, array_prediction_val_10],
                     [array_val_mdlp, array_prediction_val_mdlp],
                     [array_val_minmax, array_prediction_val_minmax]]

        Array_AccScores, Array_F1Scores = GenerateScores(validations)

        all_accScores[i] = Array_AccScores
        all_f1Scores[i] = Array_F1Scores 
        
    accScores_mean = scores_mean(all_accScores)
    f1Scores_mean = scores_mean(all_f1Scores)

    return accScores_mean, f1Scores_mean

In [8]:
#KNN Methods
def KNN_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)

    array_model_3u, array_prediction_train_3u, array_prediction_val_3u, array_prediction_test_3u, array_train_3u, array_val_3u, array_test_3u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'uniform'))
    array_model_5u, array_prediction_train_5u, array_prediction_val_5u, array_prediction_test_5u, array_train_5u, array_val_5u, array_test_5u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'uniform'))
    array_model_10u, array_prediction_train_10u, array_prediction_val_10u, array_prediction_test_10u, array_train_10u, array_val_10u, array_test_10u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'uniform'))
    array_model_3d, array_prediction_train_3d, array_prediction_val_3d, array_prediction_test_3d, array_train_3d, array_val_3d, array_test_3d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'distance'))
    array_model_5d, array_prediction_train_5d, array_prediction_val_5d, array_prediction_test_5d, array_train_5d, array_val_5d, array_test_5d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'distance'))
    array_model_10d, array_prediction_train_10d, array_prediction_val_10d, array_prediction_test_10d, array_train_10d, array_val_10d, array_test_10d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'distance'))

    validations_uniform = [[array_val_3u, array_prediction_val_3u],
                        [array_val_5u, array_prediction_val_5u],
                        [array_val_10u, array_prediction_val_10u]]

    validations_distance = [[array_val_3d, array_prediction_val_3d],
                        [array_val_5d, array_prediction_val_5d],
                        [array_val_10d, array_prediction_val_10d]]

    accScores_uniform, f1Scores_uniform = GenerateScores(validations_uniform)
    accScores_distance, f1Scores_dsitance = GenerateScores(validations_distance)

    return accScores_uniform, f1Scores_uniform, accScores_distance, f1Scores_dsitance
    
def KNN_KFold(array, k):

    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    all_accScores_uniform = np.zeros((k, 3), dtype=float)
    all_f1Scores_uniform = np.zeros((k, 3), dtype=float)
    all_accScores_distance = np.zeros((k, 3), dtype=float)
    all_f1Scores_distance = np.zeros((k, 3), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test

        array_model_3u, array_prediction_train_3u, array_prediction_val_3u, array_prediction_test_3u, array_train_3u, array_val_3u, array_test_3u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'uniform'))
        array_model_5u, array_prediction_train_5u, array_prediction_val_5u, array_prediction_test_5u, array_train_5u, array_val_5u, array_test_5u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'uniform'))
        array_model_10u, array_prediction_train_10u, array_prediction_val_10u, array_prediction_test_10u, array_train_10u, array_val_10u, array_test_10u = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'uniform'))
        array_model_3d, array_prediction_train_3d, array_prediction_val_3d, array_prediction_test_3d, array_train_3d, array_val_3d, array_test_3d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(3, 'distance'))
        array_model_5d, array_prediction_train_5d, array_prediction_val_5d, array_prediction_test_5d, array_train_5d, array_val_5d, array_test_5d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(5, 'distance'))
        array_model_10d, array_prediction_train_10d, array_prediction_val_10d, array_prediction_test_10d, array_train_10d, array_val_10d, array_test_10d = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateKNNModel(10, 'distance'))

        validations_uniform = [[array_val_3u, array_prediction_val_3u],
        [array_val_5u, array_prediction_val_5u],
        [array_val_10u, array_prediction_val_10u]]

        validations_distance = [[array_val_3d, array_prediction_val_3d],
        [array_val_5d, array_prediction_val_5d],
        [array_val_10d, array_prediction_val_10d]]

        Array_AccScores_uniform, Array_F1Scores_uniform = GenerateScores(validations_uniform)
        Array_AccScores_distance, Array_F1Scores_distance = GenerateScores(validations_distance)

        all_accScores_uniform[i] = Array_AccScores_uniform
        all_f1Scores_uniform[i] = Array_F1Scores_uniform
        all_accScores_distance[i] = Array_AccScores_distance
        all_f1Scores_distance[i] = Array_F1Scores_distance

    accScores_mean_uniform = scores_mean(all_accScores_uniform)
    f1Scores_mean_uniform = scores_mean(all_f1Scores_uniform)
    accScores_mean_distance = scores_mean(all_accScores_distance)
    f1Scores_mean_distance = scores_mean(all_f1Scores_distance)

    return accScores_mean_uniform, f1Scores_mean_uniform, accScores_mean_distance, f1Scores_mean_distance

In [9]:
#Random forest methods
def RandomForest_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)

    array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(None))
    array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(3))
    array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(5))
    array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(10))

    validations = [[array_val_none, array_prediction_val_none],
    [array_val_3, array_prediction_val_3],
    [array_val_5, array_prediction_val_5],
    [array_val_10, array_prediction_val_10]]

    return GenerateScores(validations)

def RandomForest_KFold(array,k):

    array_kfold_train, array_kfold_test = SplitVectorData_KFold(array, k, 0.2)
    all_accScores = np.zeros((k, 4), dtype=float)
    all_f1Scores = np.zeros((k, 4), dtype=float)

    for i in range(1, k):

        array_train = concatenateWithoutTestPortion(array_kfold_train, i)
        array_val = array_kfold_train[i]
        array_test = array_kfold_test

        array_model_none, array_prediction_train_none, array_prediction_val_none, array_prediction_test_none, array_train_none, array_val_none, array_test_none = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(None))
        array_model_3, array_prediction_train_3, array_prediction_val_3, array_prediction_test_3, array_train_3, array_val_3, array_test_3 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(3))
        array_model_5, array_prediction_train_5, array_prediction_val_5, array_prediction_test_tree_5, array_train_5, array_val_5, array_test_5 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(5))
        array_model_10, array_prediction_train_10, array_prediction_val_10, array_prediction_test_tree_10, array_train_10, array_val_10, array_test_10 = GenerateModelDataFromVector(array_train, array_val, array_test, len(array[0]), CreateRandomForestModel(10))

        validations = [[array_val_none, array_prediction_val_none],
        [array_val_3, array_prediction_val_3],
        [array_val_5, array_prediction_val_5],
        [array_val_10, array_prediction_val_10]]

        Array_AccScores, Array_F1Scores = GenerateScores(validations)

        all_accScores[i] = Array_AccScores
        all_f1Scores[i] = Array_F1Scores

    accScores_mean = scores_mean(all_accScores)
    f1Scores_mean = scores_mean(all_f1Scores)
    
    return accScores_mean, f1Scores_mean

In [10]:
# accScores_filter_tree_holdout, f1Scores_filter_tree_holdout = DecisionTree_Holdout(Filter)
# accScores_filter_tree_kfold, f1Scores_filter_tree_kfold = DecisionTree_KFold(Filter,10)
# accScores_filter_bayes_holdout, f1Scores_filter_bayes_holdout = Bayes_Holdout(Filter, [0.4003,0.5997])
# accScores_filter_bayes_kfold, f1Scores_filter_bayes_kfold = Bayes_KFold(Filter, [0.4003,0.5997], 10)
# accScores_filter_knn_holdout_uniform, f1Scores_filter_knn_holdout_uniform, accScores_filter_knn_holdout_distance, f1Scores_filter_knn_holdout_distance = KNN_Holdout(Filter)
# accScores_filter_knn_kfold_uniform, f1Scores_filter_knn_kfold_uniform, accScores_filter_knn_kfold_distance, f1Scores_filter_knn_kfold_distance = KNN_KFold(Filter, 10) 
# accScores_filter_test, f1Scores_filter_test = UseModelOnTestData(Filter, MultinomialNB(), MinMaxScaler())
# accScores_filter_forest_holdout, f1Scores_filter_forest_holdout = RandomForest_Holdout(Filter)
# accScores_filter_forest_kfold, f1Scores_filter_forest_kfold = RandomForest_KFold(Filter,10)

### Galaxies

In [11]:
# Get some galaxies
fid = open('galaxy_feature_vectors.csv', 'r') 

NbGalaxy = 16000 #16000 total
Galaxies = np.zeros((NbGalaxy, 7), dtype=np.float64)

count = 0
count_smooth = 0
count_spiral = 0

for line in fid:
    element = line.rstrip('\n').split(',')
    
    label = float(element[75])
    
    if label == 0.0 and count_smooth < NbGalaxy/2:
        count_smooth += 1     
    elif label == 1.0 and count_spiral < NbGalaxy/2:
        count_spiral += 1
    else:
        continue

    Galaxies[count] = [element[4], element[5], element[6], element[17], element[23], element[24], element[75]]
        
    count += 1
    if count >= NbGalaxy:
        break

fid.close() 

# ----- For debug -----
#print(count_smooth)
#print(count_spiral)
#print(count)
#print(Galaxies)
#----------------------

In [12]:
def NormalizeData(data):
    transform = MinMaxScaler()
    return transform.fit_transform(data, data[:,len(Galaxies[0])-1])

def GetGalaxiesClassProbabilities():
    count_smooth = 0
    count_spiral = 0
    
    fid = open('galaxy_feature_vectors.csv', 'r') 
    for line in fid:
        element = line.rstrip('\n').split(',')

        label = float(element[75])

        if label == 0.0:
            count_smooth += 1     
        elif label == 1.0:
            count_spiral += 1
    return [count_smooth/(count_smooth+count_spiral), count_spiral/(count_smooth+count_spiral)]

In [31]:
def SplitLabelsFromPrimitives(data_train, data_val, data_test):
    num_features = len(data_train[0]) - 1
    
    X_train  = data_train[:,0:num_features]
    Y_train  = data_train[:,num_features]

    X_val  = data_val[:,0:num_features]
    Y_val  = data_val[:,num_features]

    X_test  = data_test[:,0:num_features]
    Y_test  = data_test[:,num_features]
    return X_train, Y_train, X_val, Y_val, X_test, Y_test

def GenerateModel(data_train, data_val, data_test, layers, perceptrons, epochs, learnRate, name):
    
    #Split labels from the primitives
    X_train, Y_train, X_val, Y_val, X_test, Y_test = SplitLabelsFromPrimitives(data_train, data_val, data_test)
    
    #Create model
    model = Sequential()

    #Add layers
    model.add(Dense(units=perceptrons, activation='sigmoid', input_shape=(len(X_train[0]),)))
    model.add(Dropout(0.2))
    for i in range(0, layers - 2):
        model.add(Dense(units=perceptrons, activation='sigmoid'))
        model.add(Dropout(0.2))
    model.add(Dense(units=2, activation='sigmoid'))
    
    #Set optimizers and compile
    sgd = opt.SGD(lr=learnRate, decay=0, momentum=0, nesterov=False)
    model.compile(loss='sparse_categorical_crossentropy', 
              optimizer= sgd,
              metrics=['accuracy'])
    
    #Use TensorBoard
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir='./logs/'+name)
    
    #Train
    model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=epochs, batch_size=100, callbacks=[tb_callback])
    
    model.evaluate(X_test, Y_test)
    
    Y_probs = model.predict(X_test)
    
    Y_pred = Y_probs.argmax(axis=1)
    
    #plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_pred)
    #plt.scatter(X_test[:, 0], X_test[:, 1], c=Y_test)

    return Y_test, Y_pred

def Neurone_Holdout(array):
    array_train, array_val, array_test = SplitVectorData_Holdout(array, 0.6)
    
    AccScores = np.zeros(13, dtype=float)
    
    #base
    y_true_base, y_pred_base = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 0.5, 'base')
    
    #perceptrons
    y_true_perc1, y_pred_perc1 = GenerateModel(array_train, array_val, array_test, 4, 50, 60, 0.5, 'perc1')
    y_true_perc2, y_pred_perc2 = GenerateModel(array_train, array_val, array_test, 4, 300, 60, 0.5, 'perc2')
    y_true_perc3, y_pred_perc3 = GenerateModel(array_train, array_val, array_test, 4, 600, 60, 0.5, 'perc3')

    #epochs
    y_true_epoch1, y_pred_epoch1 = GenerateModel(array_train, array_val, array_test, 4, 100, 30, 0.5, 'epoch1')
    y_true_epoch2, y_pred_epoch2 = GenerateModel(array_train, array_val, array_test, 4, 100, 120, 0.5, 'epoch2')
    y_true_epoch3, y_pred_epoch3 = GenerateModel(array_train, array_val, array_test, 4, 100, 240, 0.5, 'epoch3')
    
    #layers
    y_true_layer1, y_pred_layer1 = GenerateModel(array_train, array_val, array_test, 6, 100, 60, 0.5, 'layer1')
    y_true_layer2, y_pred_layer2 = GenerateModel(array_train, array_val, array_test, 12, 100, 60, 0.5, 'layer2')
    y_true_layer3, y_pred_layer3 = GenerateModel(array_train, array_val, array_test, 24, 100, 60, 0.5, 'layer3')
    
    #learning rate
    y_true_lr1, y_pred_lr1 = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 0.1, 'lr1')
    y_true_lr2, y_pred_lr2 = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 0.5, 'lr2')
    y_true_lr3, y_pred_lr3 = GenerateModel(array_train, array_val, array_test, 4, 100, 60, 1, 'lr3')

    validations = [[y_true_base, y_pred_base],
    [y_true_perc1, y_pred_perc1],
    [y_true_perc2, y_pred_perc2],
    [y_true_perc3, y_pred_perc3],
    [y_true_epoch1, y_pred_epoch1],
    [y_true_epoch2, y_pred_epoch2],
    [y_true_epoch3, y_pred_epoch3],
    [y_true_layer1, y_pred_layer1],
    [y_true_layer2, y_pred_layer2],
    [y_true_layer3, y_pred_layer3],
    [y_true_lr1, y_pred_lr1],
    [y_true_lr2, y_pred_lr2],
    [y_true_lr3, y_pred_lr3]]
    
    return GenerateScores(validations)

In [36]:
# Galaxies_norm = NormalizeData(Galaxies)
acc_neurone_holdout, f1_neurone_holdout = Neurone_Holdout(Galaxies)

Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/6

Epoch 60/60
Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Train on 9600 samples, validate on 3200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/3

Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78/120
Epoch 79/120
Epoch 80/120
Epoch 81/120
Epoch 82/120
Epoch 83/120
Epoch 84/120
Epoch 85/120


Epoch 86/120
Epoch 87/120
Epoch 88/120
Epoch 89/120
Epoch 90/120
Epoch 91/120
Epoch 92/120
Epoch 93/120
Epoch 94/120
Epoch 95/120
Epoch 96/120
Epoch 97/120
Epoch 98/120
Epoch 99/120
Epoch 100/120
Epoch 101/120
Epoch 102/120
Epoch 103/120
Epoch 104/120
Epoch 105/120
Epoch 106/120
Epoch 107/120
Epoch 108/120
Epoch 109/120
Epoch 110/120
Epoch 111/120
Epoch 112/120
Epoch 113/120
Epoch 114/120
Epoch 115/120
Epoch 116/120
Epoch 117/120
Epoch 118/120
Epoch 119/120
Epoch 120/120
Train on 9600 samples, validate on 3200 samples
Epoch 1/240
Epoch 2/240
Epoch 3/240
Epoch 4/240
Epoch 5/240
Epoch 6/240
Epoch 7/240
Epoch 8/240
Epoch 9/240
Epoch 10/240
Epoch 11/240
Epoch 12/240
Epoch 13/240
Epoch 14/240
Epoch 15/240
Epoch 16/240
Epoch 17/240
Epoch 18/240
Epoch 19/240
Epoch 20/240
Epoch 21/240
Epoch 22/240
Epoch 23/240
Epoch 24/240
Epoch 25/240
Epoch 26/240
Epoch 27/240
Epoch 28/240
Epoch 29/240
Epoch 30/240
Epoch 31/240
Epoch 32/240
Epoch 33/240
Epoch 34/240
Epoch 35/240
Epoch 36/240
Epoch 37/240
Epoc

Epoch 84/240
Epoch 85/240
Epoch 86/240
Epoch 87/240
Epoch 88/240
Epoch 89/240
Epoch 90/240
Epoch 91/240
Epoch 92/240
Epoch 93/240
Epoch 94/240
Epoch 95/240
Epoch 96/240
Epoch 97/240
Epoch 98/240
Epoch 99/240
Epoch 100/240
Epoch 101/240
Epoch 102/240
Epoch 103/240
Epoch 104/240
Epoch 105/240
Epoch 106/240
Epoch 107/240
Epoch 108/240
Epoch 109/240
Epoch 110/240
Epoch 111/240
Epoch 112/240
Epoch 113/240
Epoch 114/240
Epoch 115/240
Epoch 116/240
Epoch 117/240
Epoch 118/240
Epoch 119/240
Epoch 120/240
Epoch 121/240
Epoch 122/240
Epoch 123/240
Epoch 124/240
Epoch 125/240
Epoch 126/240
Epoch 127/240
Epoch 128/240
Epoch 129/240
Epoch 130/240
Epoch 131/240
Epoch 132/240
Epoch 133/240
Epoch 134/240
Epoch 135/240
Epoch 136/240
Epoch 137/240
Epoch 138/240
Epoch 139/240
Epoch 140/240
Epoch 141/240
Epoch 142/240
Epoch 143/240
Epoch 144/240
Epoch 145/240
Epoch 146/240
Epoch 147/240
Epoch 148/240
Epoch 149/240
Epoch 150/240
Epoch 151/240
Epoch 152/240
Epoch 153/240
Epoch 154/240
Epoch 155/240
Epoch 15

Epoch 203/240
Epoch 204/240
Epoch 205/240
Epoch 206/240
Epoch 207/240
Epoch 208/240
Epoch 209/240
Epoch 210/240
Epoch 211/240
Epoch 212/240
Epoch 213/240
Epoch 214/240
Epoch 215/240
Epoch 216/240
Epoch 217/240
Epoch 218/240
Epoch 219/240
Epoch 220/240
Epoch 221/240
Epoch 222/240
Epoch 223/240
Epoch 224/240
Epoch 225/240
Epoch 226/240
Epoch 227/240
Epoch 228/240
Epoch 229/240
Epoch 230/240
Epoch 231/240
Epoch 232/240
Epoch 233/240
Epoch 234/240
Epoch 235/240
Epoch 236/240
Epoch 237/240
Epoch 238/240
Epoch 239/240
Epoch 240/240
Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60


Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
E

Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
E

Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Train on 9600 samples, validate on 3200 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
E

In [37]:
acc_neurone_holdout

array([0.7678125, 0.776875 , 0.778125 , 0.579375 , 0.7603125, 0.7884375,
       0.7865625, 0.420625 , 0.420625 , 0.420625 , 0.720625 , 0.7940625,
       0.7925   ])

In [15]:
def SVM_linear(X_train, Y_train, X_test, C):
    gamma_range = np.logspace(-9, 3, 13)
    param_grid = dict(gamma=gamma_range)
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
    grid = GridSearchCV(SVC(cache_size=2048), param_grid=param_grid, cv=cv, n_jobs=4)
    grid.fit(X_train, Y_train)

    print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

    clf = SVC(C=C, gamma=grid.best_params_['gamma'], class_weight='balanced', kernel='linear')
    clf.fit(X_train, Y_train)
    
    Y_pred = clf.predict(X_test)
    return Y_pred

def SVM_rbf(X_train, Y_train, X_test, C, gamma):

    clf = SVC(C=C, gamma=gamma, kernel='rbf')
    clf.fit(X_train, Y_train)
    
    Y_pred = clf.predict(X_test)
    return Y_pred

def SVM(array):
    array_train, array_test = SplitVectorData_NoVal(array, 0.8)
    X_train, Y_train, X_val, Y_val, X_test, Y_test = SplitLabelsFromPrimitives(array_train, array_test, array_test) #Val N,est pas utilisé, mais la méthode èa besoin du paremèetre. On lui passe donc les données tests.
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    Y_pred_lin03= SVM_linear(X_train, Y_train, X_test, 1e-03)
    Y_pred_lin01= SVM_linear(X_train, Y_train, X_test, 1e-01)
    Y_pred_lin1= SVM_linear(X_train, Y_train, X_test, 1.0)
    Y_pred_lin10= SVM_linear(X_train, Y_train, X_test, 10.0)
    Y_pred_rbf03_g03= SVM_rbf(X_train, Y_train, X_test, 1e-03, 1e-03)
    Y_pred_rbf01_g03= SVM_rbf(X_train, Y_train, X_test, 1e-01, 1e-03)
    Y_pred_rbf1_g03= SVM_rbf(X_train, Y_train, X_test, 1.0, 1e-03)
    Y_pred_rbf10_g03= SVM_rbf(X_train, Y_train, X_test, 10.0, 1e-03)
    Y_pred_rbf03_g01= SVM_rbf(X_train, Y_train, X_test, 1e-03, 1e-01)
    Y_pred_rbf01_g01= SVM_rbf(X_train, Y_train, X_test, 1e-01, 1e-01)
    Y_pred_rbf1_g01= SVM_rbf(X_train, Y_train, X_test, 1.0, 1e-01)
    Y_pred_rbf10_g01= SVM_rbf(X_train, Y_train, X_test, 10.0, 1e-01)
    Y_pred_rbf03_g1= SVM_rbf(X_train, Y_train, X_test, 1e-03, 1.0)
    Y_pred_rbf01_g1= SVM_rbf(X_train, Y_train, X_test, 1e-01, 1.0)
    Y_pred_rbf1_g1= SVM_rbf(X_train, Y_train, X_test, 1.0, 1.0)
    Y_pred_rbf10_g1= SVM_rbf(X_train, Y_train, X_test, 10.0, 1.0)
    Y_pred_rbf03_g10= SVM_rbf(X_train, Y_train, X_test, 1e-03, 10.0)
    Y_pred_rbf01_g10= SVM_rbf(X_train, Y_train, X_test, 1e-01, 10.0)
    Y_pred_rbf1_g10= SVM_rbf(X_train, Y_train, X_test, 1.0, 10.0)
    Y_pred_rbf10_g10= SVM_rbf(X_train, Y_train, X_test, 10.0, 10.0)
    
    validations = [[Y_test, Y_pred_lin03],
    [Y_test, Y_pred_lin01],
    [Y_test, Y_pred_lin1],
    [Y_test, Y_pred_lin10],             
    [Y_test, Y_pred_rbf03_g03],
    [Y_test, Y_pred_rbf01_g03],
    [Y_test, Y_pred_rbf1_g03],
    [Y_test, Y_pred_rbf10_g03],            
    [Y_test, Y_pred_rbf03_g01],
    [Y_test, Y_pred_rbf01_g01],
    [Y_test, Y_pred_rbf1_g01],
    [Y_test, Y_pred_rbf10_g01],              
    [Y_test, Y_pred_rbf03_g1],
    [Y_test, Y_pred_rbf01_g1],
    [Y_test, Y_pred_rbf1_g1],
    [Y_test, Y_pred_rbf10_g1],            
    [Y_test, Y_pred_rbf03_g10],
    [Y_test, Y_pred_rbf01_g10],
    [Y_test, Y_pred_rbf1_g10],
    [Y_test, Y_pred_rbf10_g10]]
    
    return GenerateScores(validations)

    

In [16]:
acc_svm, f1_svm = SVM(Galaxies)

The best parameters are {'gamma': 0.1} with a score of 0.83
The best parameters are {'gamma': 0.1} with a score of 0.83
The best parameters are {'gamma': 0.1} with a score of 0.83
The best parameters are {'gamma': 0.1} with a score of 0.83


In [41]:
# For Debugging
#print(acc_svm)
#print(f1_svm)
#print(acc_neurone_holdout)
#print(f1_neurone_holdout)

In [42]:
def AddAccAndF1ToExcel(name, acc, f1, c_acc, c_f1):
    acc_new = pd.DataFrame({'acc': acc})
    f1_new = pd.DataFrame({'f1': f1})
    wb = load_workbook(name)

    ws = wb['Feuil1']

    for index, row in acc_new.iterrows():
        cell =  c_acc+'%d'  % (index + 2)
        ws[cell] = row[0]
        
    for index, row in f1_new.iterrows():
        cell = c_f1 + '%d'  % (index + 2)
        ws[cell] = row[0]

    wb.save(name)


    return pd.read_excel(name, index_col=0)

svm = AddAccAndF1ToExcel('svm.xlsx', acc_svm, f1_svm, 'D', 'E')
rn_holdout = AddAccAndF1ToExcel('rn_holdout.xlsx', acc_neurone_holdout, f1_neurone_holdout, 'F', 'G')

# Rapport

1

2

3

4

5

6

7

8

9

# Annexe

## Réseau neuronaux en modifiant le nombre de couches total

<img src="rn_layer.png">

## Réseau neuronaux en modifiant le nombre de perceptrons dans couche cachée

<img src="rn_perc.png">

## Réseau neuronaux en modifiant le nombre d'itérations

<img src="rn_epoch.png">

## Réseau neuronaux en modifiant le taux d'apprentissage

<img src="rn_lr.png">

###  Tableau 1: Réseau neuronaux avec validation Holdout

In [43]:
rn_holdout.head(100)

Unnamed: 0_level_0,Layers,Perceptrons,Epochs,Learning Rate,Accuracy,F1
Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
base,4,100,60,0.5,0.767813,0.768813
perc1,4,50,60,0.5,0.776875,0.77819
perc2,4,300,60,0.5,0.778125,0.779427
perc3,4,600,60,0.5,0.579375,0.733676
epoch1,4,100,30,0.5,0.760312,0.761186
epoch2,4,100,120,0.5,0.788438,0.789776
epoch3,4,100,240,0.5,0.786563,0.787738
layer1,6,100,60,0.5,0.420625,0.592169
layer2,12,100,60,0.5,0.420625,0.592169
layer3,24,100,60,0.5,0.420625,0.592169


### Tableau 2: SVM

In [90]:
svm.head(100)

Unnamed: 0_level_0,C,Gamma,Accuracy,F1
Kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
linear,0.001,0.1,0.733354,0.734472
linear,0.1,0.1,0.796186,0.797327
linear,1.0,0.1,0.796186,0.797365
linear,10.0,0.1,0.795874,0.797057
rbf,0.001,0.001,0.420756,0.592299
rbf,0.1,0.001,0.663332,0.658722
rbf,1.0,0.001,0.750234,0.751792
rbf,10.0,0.001,0.799937,0.801157
rbf,0.001,0.1,0.551422,0.496336
rbf,0.1,0.1,0.81588,0.816874
