<a href="https://colab.research.google.com/github/lauraabrante/TFT/blob/main/MLP/Final_TFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Primer paso:** Importar las librerías necesarias

In [None]:
#Montamos el drive para poder acceder a los datos
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%run "/content/drive/My Drive/TFG_Final/init.ipynb"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Import libraries
import numpy as np
import pandas as pd
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

# **Segundo paso:** Preparación del Dataset

Los datos que se van a usar como base para el análisis son los puntuajes de los constructos obtenidos mediante el análisis PLS-SEM.

Los datos se van a dividir de la siguiente forma:
*   Datos entrenamiento: 70% del total de la muestra.
*   Datos para validación: 20% de los datos de entrenamiento.
*   Datos para el test: 30% del total de la muestra.

A su vez es necesario separar de la matriz la variable que se quiere predecir, es decir, la variable dependiente.


In [None]:
#Extract the data
data = pd.read_csv('/content/drive/My Drive/TFG_Final/Data/scores2.csv', encoding='utf-8', sep=',', index_col=0)
data.sample(5)

Unnamed: 0,Norma Subjetiva,Ausencia Burnout,Sensación de Logro,Ausencia Desvalorización,Soporte del Club,Actitud,Control Conductual Percibido,Liga,Intención
85,-0.313367,1.125799,0.658339,0.492846,0.732172,-0.699036,0.071997,-1.241467,0.350333
10,-1.789127,-1.672408,-0.961361,-0.875785,-1.413442,-1.002492,-1.795952,0.800056,-1.162587
60,-5.495224,-1.271147,-1.191469,-0.895985,-0.395055,-5.101549,-3.363602,-1.241467,-2.171662
29,-0.375776,0.4357,-1.026458,-2.004767,0.960601,-2.611295,-0.086645,0.800056,-2.171662
68,0.001204,-2.017729,-1.466711,-1.75992,-0.429947,-0.973573,-0.683081,0.800056,-1.162018


In [None]:
#Extract the construct that we want to predict
X = data.drop('Intención', axis='columns')
Y = data[['Intención']]

In [None]:
#Divide the data in train and test
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.3, random_state = 1)

In [None]:
#Create tuning (validation set) --> divide the train set
X_train_v, X_tune, Y_train_v, Y_tune = train_test_split(X_train, Y_train, test_size = 0.2, random_state = 1)

# **Tercer paso:** Creamos la tabla de resultados y de métricas

Esta tabla va a contener los resultados de predicción obtenidos a través de una red neuronal MLP sin modificar a la que llamaremos "Blind_MLP", la red finalmente modificada que se llamará "Tuned_MLP" y como última columna se encontrará el resultado real o actual el cual fue nombrado como "Real_Value".

Por otro lado, también se creará una tabla de resultados que contendrá los valores de error referentes a al RMSE.

In [None]:
#Table result
result = pd.DataFrame(index = X_test.index, columns = ['Blind_MLP_Predict', 'Tuned_MLP_Predict', 'Real_Value'])
result.Real_Value = Y_test

#Table metric
metric = pd.DataFrame( index = ['RMSE','MAE', 'R^2'], columns = ['Blind MLP', 'Tune MLP'])

# **Cuarto paso:** Creamos la red neuronal MLP a ciegas

Esto nos sirve para hacer una comparación con la red que finalmente hayamos tuneado y ver si nuestro proceso de prueba y error para conseguir la mejor combinación de hiper-párametros ha beneficiado a la red neuronal o no.

* Se usan los hiperpárametros que hay por defecto.

In [None]:
#Inicializamos hiperpárametros:
solver = 'adam'
hidden_layer_sizes = 5
max_iter = 2000
activation = 'relu'
learning_rate = 'constant'
learning_rate_init = 0.001
alpha = 0.0001
shuffle = True
random_state = None

In [None]:
#Blind MLP
modelo = 'Blind MLP'

#Entrenamos el modelo
Y_predict = training(hidden_layer_sizes, max_iter, activation, learning_rate,
                     learning_rate_init,alpha, solver, shuffle, X_train, Y_train,
                     X_test, random_state)
result['Blind_MLP_Predict'] = Y_predict
result['|Real_Value-Blind_MLP|'] = abs(result.Real_Value-result.Blind_MLP_Predict)

#Imprimimos los errores
metrics(metric, modelo, Y_predict, Y_test)

Unnamed: 0,Blind MLP,Tune MLP
RMSE,0.692838,
MAE,0.482892,
R^2,0.52162,


# **Quinto paso:** Función de Activación y Optimizador

En este paso llega la hora de empezar a tunear la red neuronal. Vamos a comenzar por elegir la combinación entre función de activación y optimizador que nos de un mejor resultado. El resto de hiperparámetros serán los mismos utilizados en la creación del modelo MLP a ciegas.

In [None]:
#More repetitions more quality
num_repetition = 10

In [None]:
#Create a placeholder for experimentation
activation_list = ['identity', 'logistic', 'tanh', 'relu']
solver_list = ['lbfgs', 'sgd', 'adam']

my_index = pd.MultiIndex.from_product([activation_list, solver_list],
                                   names = ('Activación', 'Optimizador'))
tune = table(my_index, num_repetition)

In [None]:
#Algorithm to prove the all options
n = len(Y_tune)
for activation_option in activation_list:
  for solver_option in solver_list:
    for rep in tune.columns:
      Y_predict = training(hidden_layer_sizes, max_iter, activation_option, 
                           learning_rate, learning_rate_init,alpha, solver_option,
                           shuffle, X_train_v, Y_train_v, X_tune, random_state)

      
      #Métricas para cada combinación
      MSE = mean_squared_error(Y_tune, Y_predict)
      RMSE = math.sqrt(MSE)
      tune.at[(activation_option, solver_option), rep] = RMSE

#Media y valor mínimo
min_mean(tune, num_repetition)

Unnamed: 0_level_0,Unnamed: 1_level_0,R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,Mean,Min
Activación,Optimizador,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
identity,adam,0.487579,0.388207,0.431175,0.450111,0.472783,0.413468,0.438721,0.436758,0.424795,0.421127,0.436472,0.388207
identity,sgd,0.447742,0.417132,0.413085,0.446602,0.443512,0.396288,0.45726,0.493475,0.413286,0.42003,0.434841,0.396288
relu,sgd,0.399534,0.494404,0.494184,0.541368,0.642808,0.593848,0.430039,0.552067,0.477957,0.581309,0.520752,0.399534
logistic,adam,0.419959,0.445917,0.405059,0.429543,0.445816,0.436681,0.404598,0.427151,0.424235,0.444899,0.428386,0.404598
identity,lbfgs,0.412311,0.412407,0.412315,0.412378,0.412327,0.412324,0.412326,0.412346,0.412341,0.412323,0.41234,0.412311
tanh,adam,0.522959,0.458349,0.519764,0.547768,0.4536,0.431476,0.464063,0.525398,0.481996,0.445838,0.485121,0.431476
relu,adam,0.603315,0.528946,0.537139,0.623096,0.569811,0.587416,0.602991,0.433862,0.504456,0.514662,0.550569,0.433862
tanh,sgd,0.509024,0.591365,0.494111,0.457815,0.491791,0.454132,0.507078,0.529144,0.48244,0.468308,0.498521,0.454132
logistic,sgd,0.509724,0.502881,0.546559,0.496803,0.484435,0.52824,0.499376,0.466848,0.463322,0.485764,0.498395,0.463322
relu,lbfgs,0.618031,1.097971,0.648526,0.937319,0.79857,1.047052,0.80148,1.251654,4.205034,0.894812,1.230045,0.618031


Observando el resultado, las combinaciones (identity y adam) son las que han dado mejores resultados. Nuestro objetivo es encontrar que combinación puede reportar un menor error, es decir, cual es la más prometedora.

In [None]:
#Modificamos los hiperpárametros
activation = 'identity'
solver = 'adam'

# **Sexto paso:** Modificar el número de neuronas en las capas ocultas

Como tenemos un total de 8 nodos de entrada, se va a imponer que este sea el número máximo de neuronas dentro de la capa oculta. En cuanto al número de capas ocultas, se va a imponer un total de dos pues las situaciones en las que el rendimiento mejora con una segunda o más capas ocultas son muy pocas.

In [None]:
PossibleNetStruct = []
PossibleNetString = []

#Create a list of one layer 1 - 10 neurons
for i in range(1,9):
  netStruct = [i]
  PossibleNetStruct.append(netStruct)
  PossibleNetString.append(str(netStruct))
  print('Network Structure: ', netStruct)

#Create a list of two layers [1,1] - [8,8]
for i in range(1,9):
  for j in range(1,8):
    if( i+j < 9):
      netStruct = [i,j]
      PossibleNetStruct.append(netStruct)
      PossibleNetString.append(str(netStruct))
      print('Network Structure: ', netStruct)

Network Structure:  [1]
Network Structure:  [2]
Network Structure:  [3]
Network Structure:  [4]
Network Structure:  [5]
Network Structure:  [6]
Network Structure:  [7]
Network Structure:  [8]
Network Structure:  [1, 1]
Network Structure:  [1, 2]
Network Structure:  [1, 3]
Network Structure:  [1, 4]
Network Structure:  [1, 5]
Network Structure:  [1, 6]
Network Structure:  [1, 7]
Network Structure:  [2, 1]
Network Structure:  [2, 2]
Network Structure:  [2, 3]
Network Structure:  [2, 4]
Network Structure:  [2, 5]
Network Structure:  [2, 6]
Network Structure:  [3, 1]
Network Structure:  [3, 2]
Network Structure:  [3, 3]
Network Structure:  [3, 4]
Network Structure:  [3, 5]
Network Structure:  [4, 1]
Network Structure:  [4, 2]
Network Structure:  [4, 3]
Network Structure:  [4, 4]
Network Structure:  [5, 1]
Network Structure:  [5, 2]
Network Structure:  [5, 3]
Network Structure:  [6, 1]
Network Structure:  [6, 2]
Network Structure:  [7, 1]


In [None]:
#Experimentation with hidden layers
tune = table(PossibleNetString, num_repetition)

for i,netStr in enumerate(PossibleNetStruct):
  RowName = PossibleNetString[i]
  for rep in tune.columns:
      Y_predict = training(netStr, max_iter, activation, 
                           learning_rate, learning_rate_init,alpha, solver,
                           shuffle, X_train_v, Y_train_v, X_tune, random_state)

      #Métricas para cada combinación
      MSE = mean_squared_error(Y_tune, Y_predict)
      RMSE = math.sqrt(MSE)
      tune.at[RowName, rep] = RMSE

#Media y valor mínimo
min_mean(tune, num_repetition)



Unnamed: 0,R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,Mean,Min
[3],0.490248,0.365155,0.435086,0.497692,0.523758,0.490023,0.464659,0.424656,0.500819,0.467036,0.465913,0.365155
[4],0.425793,0.516173,0.399399,0.45583,0.443468,0.369088,0.420617,0.435233,0.4379,0.473721,0.437722,0.369088
"[1, 1]",0.371624,0.434309,0.404454,0.597461,0.467296,0.44677,0.510417,0.471475,0.459325,0.54215,0.470528,0.371624
"[2, 6]",0.387829,0.508665,0.407651,0.460549,0.475138,0.376077,0.429879,0.60592,0.455711,0.401471,0.450889,0.376077
"[3, 1]",0.414342,0.420355,0.428306,0.396837,0.381015,0.556237,0.486124,0.467124,0.398484,0.397611,0.434643,0.381015
"[5, 1]",0.40135,0.414241,0.415945,0.381448,0.425593,0.452831,0.405218,0.439164,0.425311,0.384821,0.414592,0.381448
[2],0.431076,0.444655,0.417326,0.386282,0.39308,0.402572,0.384883,0.428644,0.433022,0.449819,0.417136,0.384883
"[2, 3]",0.52531,0.386725,0.422784,0.455314,0.586524,0.538955,0.409697,0.485021,0.494242,0.478385,0.478296,0.386725
"[2, 2]",0.387677,0.417279,0.453237,0.476957,0.47677,0.39973,0.485651,0.463133,0.468993,0.528803,0.455823,0.387677
[6],0.421687,0.459557,0.419394,0.448091,0.432873,0.522516,0.429303,0.387892,0.411514,0.447662,0.438049,0.387892


En este caso la combinación de neuronas y capas en las que se ha obtenido un resultado más prometedor es: (4)

In [None]:
#Modificamos hiperparámetros
hidden_layer_sizes = (4,)

# **Séptimo paso:** Modificar la tasa de aprendizaje y la tasa de aprendizaje inicial

In [None]:
#Tune for learning_rate, learning_rate_init
LR_list = ['constant', 'invscaling', 'adaptive']
LRI_list = [0.1,0.01,0.001,0.00001, 0.05, 0.005]

my_index = pd.MultiIndex.from_product([LR_list, LRI_list],
                                      names = ('Learning rate', 'Rate'))

tune = table(my_index, num_repetition)

In [None]:
#Algorithm
for LR_option in LR_list:
  for LRI_option in LRI_list:
    for rep in tune.columns:
        Y_predict = training(hidden_layer_sizes, max_iter, activation, 
                           LR_option, LRI_option,alpha, solver,
                           shuffle, X_train_v, Y_train_v, X_tune, random_state)
        
        #Métricas para cada combinación
        MSE = mean_squared_error(Y_tune, Y_predict)
        RMSE = math.sqrt(MSE)
        tune.at[(LR_option, LRI_option), rep] = RMSE

#Media y valor mínimo
min_mean(tune, num_repetition)



Unnamed: 0_level_0,Unnamed: 1_level_0,R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,Mean,Min
Learning rate,Rate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
constant,0.05,0.414003,0.425872,0.411929,0.406086,0.382844,0.408033,0.410416,0.410438,0.453688,0.418065,0.414138,0.382844
invscaling,0.01,0.40954,0.383229,0.419925,0.422909,0.407144,0.419587,0.415449,0.400097,0.416607,0.416555,0.411104,0.383229
invscaling,0.005,0.428138,0.412652,0.403698,0.398724,0.41617,0.413622,0.443571,0.396257,0.411729,0.410797,0.413536,0.396257
invscaling,0.001,0.444429,0.399679,0.396487,0.39809,0.560845,0.411262,0.470953,0.421615,0.492366,0.427909,0.442363,0.396487
adaptive,0.05,0.415108,0.397098,0.427587,0.412355,0.409268,0.416888,0.408681,0.412273,0.40912,0.422489,0.413087,0.397098
constant,0.005,0.416432,0.410882,0.417723,0.416983,0.437237,0.397773,0.409815,0.424228,0.429576,0.407397,0.416805,0.397773
adaptive,0.001,0.508818,0.449457,0.415218,0.451221,0.486283,0.450247,0.509942,0.41972,0.399955,0.400515,0.449138,0.399955
constant,0.001,0.542543,0.426071,0.49884,0.408189,0.409589,0.483641,0.438074,0.400526,0.440015,0.463171,0.451066,0.400526
adaptive,0.01,0.411896,0.402155,0.427973,0.418416,0.417076,0.403412,0.41186,0.418459,0.411764,0.41784,0.414085,0.402155
adaptive,0.1,0.408718,0.410001,0.414,0.415768,0.421322,0.403608,0.407226,0.40524,0.427417,0.414712,0.412801,0.403608


In [None]:
learning_rate = 'constant'
learning_rate_init = 0.05

# **Octavo paso:** Modificar el número máximo de iteraciones y el parámetro shuffle

In [None]:
#Tune for max_iter, shuffle
iter_list = [1000, 2000, 5000, 10000]
shuffle_options = [True, False]

my_index = pd.MultiIndex.from_product([iter_list, shuffle_options],
                                      names = ('Iteraciones', 'Shuffle'))
tune = table(my_index, num_repetition)

In [None]:
for max_iteration in iter_list:
  for shuffle_option in shuffle_options:
    for rep in tune.columns:
        Y_predict = training(hidden_layer_sizes, max_iteration, activation, 
                      learning_rate, learning_rate_init, alpha, solver,
                      shuffle_option, X_train_v, Y_train_v, X_tune, random_state)
        
        #Métricas para cada combinación
        MSE = mean_squared_error(Y_tune, Y_predict)
        RMSE = math.sqrt(MSE)
        tune.at[(max_iteration, shuffle_option), rep] = RMSE

#Media y valor mínimo
min_mean(tune, num_repetition)

Unnamed: 0_level_0,Unnamed: 1_level_0,R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,Mean,Min
Iteraciones,Shuffle,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000,True,0.409883,0.422648,0.406735,0.387431,0.470908,0.413022,0.41907,0.414082,0.414032,0.389561,0.414737,0.387431
5000,True,0.416653,0.412161,0.412386,0.437679,0.412031,0.404788,0.465752,0.40595,0.46063,0.394546,0.422258,0.394546
1000,True,0.412754,0.405004,0.409275,0.422809,0.415209,0.398453,0.413153,0.422358,0.407876,0.476433,0.418332,0.398453
10000,True,0.41267,0.416407,0.438609,0.411988,0.417927,0.404827,0.412072,0.561562,0.476123,0.403397,0.435558,0.403397
2000,False,0.588984,0.416511,0.408761,0.409302,0.438874,0.412401,0.412417,0.429408,0.403451,0.416948,0.433706,0.403451
5000,False,0.410927,0.447069,0.41135,0.412972,0.409627,0.411719,0.4132,0.403735,0.421986,0.42297,0.416555,0.403735
1000,False,0.415632,0.419232,0.405383,0.417172,0.415277,0.407715,0.416739,0.408195,0.411341,0.417453,0.413414,0.405383
10000,False,0.409904,0.414039,0.409263,0.409039,0.417947,0.496798,0.406521,0.407226,0.429918,0.415253,0.421591,0.406521


In [None]:
max_iteration = 2000
shuffle = True

# **Noveno paso:** Modificar el coeficiente de alpha

In [None]:
#Tune for alpha
alpha_list = [0.1,0.01,0.001, 0.0001]
tune = table(alpha_list, num_repetition)

In [None]:
for alpha_option in alpha_list:
  for rep in tune.columns:
        Y_predict = training(hidden_layer_sizes, max_iter, activation, 
                  learning_rate, learning_rate_init, alpha_option, solver,
                  shuffle, X_train_v, Y_train_v, X_tune, random_state)
        
        #Métricas para cada combinación
        MSE = mean_squared_error(Y_tune, Y_predict)
        RMSE = math.sqrt(MSE)
        tune.at[(alpha_option), rep] = RMSE

#Media y valor mínimo
min_mean(tune, num_repetition)

Unnamed: 0,R0,R1,R2,R3,R4,R5,R6,R7,R8,R9,Mean,Min
0.1,0.414177,0.403339,0.438758,0.388165,0.49699,0.412315,0.502074,0.412394,0.410075,0.421787,0.430007,0.388165
0.01,0.399544,0.426668,0.491714,0.418298,0.402511,0.526882,0.647762,0.41936,0.419531,0.411304,0.456357,0.399544
0.001,0.411405,0.458507,0.40687,0.41204,0.411659,0.405163,0.409438,0.447804,0.411525,0.413032,0.418744,0.405163
0.0001,0.412891,0.509955,0.416795,0.409322,0.413455,0.413079,0.627506,0.431165,0.457569,0.415935,0.450767,0.409322


In [None]:
alpha = 0.1

# **Décimo paso:** Entrenar el modelo MLP final con el set de entrenamiento

In [None]:
#Tune MLP
modelo = 'Tune MLP'

#Entrenamos el modelo
Y_predict = training(hidden_layer_sizes, max_iter, activation, learning_rate,
                     learning_rate_init,alpha, solver, shuffle, X_train, Y_train,
                     X_test, random_state)
result['Tuned_MLP_Predict'] = Y_predict
result['|Real_Value-Tuned_MLP|'] = abs(result.Real_Value-result.Blind_MLP_Predict)

#Imprimimos los errores
metrics(metric, modelo, Y_predict, Y_test)

Unnamed: 0,Blind MLP,Tune MLP
RMSE,0.692838,0.583747
MAE,0.482892,0.414051
R^2,0.52162,0.660407
