# GS CV MLP

Explorando `GridSearchCV` a fim de reduzir a aleatoriedade dos pesos e viéses iniciais dos modelos de classificação.

## Importando bibliotecas

In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

## Preprarando dados

In [2]:
# Carregando dataset
X, y = load_iris(return_X_y=True)

# Separando dados
X_pre_train, X_pre_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=6)

# Instanciando e treinando preprocessador
sc = StandardScaler().fit(X_pre_train)

# Preprocessando dados de treino
X_train = sc.transform(X_pre_train)

# Intervalo de valores para hidden layer
intervalo = np.arange(40, 73, 3)

## Grid search com pesos randomicos

In [3]:
gs1 = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid={
        'hidden_layer_sizes':intervalo,
        'solver':['lbfgs']
    },
    cv=7
).fit(
    X_train,
    y_train
)

pd.DataFrame(gs1.cv_results_).sort_values(by='rank_test_score')[['param_hidden_layer_sizes', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,param_hidden_layer_sizes,mean_test_score,std_test_score,rank_test_score
0,40,0.952381,0.046657,1
1,43,0.942857,0.042592,2
2,46,0.942857,0.042592,2
3,49,0.942857,0.042592,2
4,52,0.942857,0.042592,2
5,55,0.942857,0.042592,2
6,58,0.942857,0.042592,2
7,61,0.942857,0.042592,2
8,64,0.942857,0.042592,2
9,67,0.942857,0.042592,2


In [4]:
gs2 = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid={
        'hidden_layer_sizes':intervalo,
        'solver':['lbfgs']
    },
    cv=7
).fit(
    X_train,
    y_train
)

pd.DataFrame(gs2.cv_results_).sort_values(by='rank_test_score')[['param_hidden_layer_sizes', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,param_hidden_layer_sizes,mean_test_score,std_test_score,rank_test_score
0,40,0.952381,0.030117,1
10,70,0.952381,0.046657,2
1,43,0.942857,0.042592,3
2,46,0.942857,0.042592,3
3,49,0.942857,0.042592,3
4,52,0.942857,0.042592,3
5,55,0.942857,0.042592,3
6,58,0.942857,0.042592,3
7,61,0.942857,0.042592,3
8,64,0.942857,0.042592,3


In [5]:
gs3 = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid={
        'hidden_layer_sizes':intervalo,
        'solver':['lbfgs']
    },
    cv=7
).fit(
    X_train,
    y_train
)

pd.DataFrame(gs3.cv_results_).sort_values(by='rank_test_score')[['param_hidden_layer_sizes', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,param_hidden_layer_sizes,mean_test_score,std_test_score,rank_test_score
0,40,0.942857,0.042592,1
1,43,0.942857,0.042592,1
2,46,0.942857,0.042592,1
3,49,0.942857,0.042592,1
4,52,0.942857,0.042592,1
5,55,0.942857,0.042592,1
6,58,0.942857,0.042592,1
7,61,0.942857,0.042592,1
8,64,0.942857,0.042592,1
9,67,0.942857,0.042592,1


## Grid Search com pesos um pouco mais determinísticos

In [6]:
gs4 = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid={
        'hidden_layer_sizes':intervalo,
        'random_state':[42],
        'solver':['lbfgs']
    },
    cv=7
).fit(
    X_train,
    y_train
)

pd.DataFrame(gs4.cv_results_).sort_values(by='rank_test_score')[['param_hidden_layer_sizes', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,param_hidden_layer_sizes,mean_test_score,std_test_score,rank_test_score
0,40,0.942857,0.042592,1
1,43,0.942857,0.042592,1
2,46,0.942857,0.042592,1
3,49,0.942857,0.042592,1
4,52,0.942857,0.042592,1
5,55,0.942857,0.042592,1
6,58,0.942857,0.042592,1
7,61,0.942857,0.042592,1
8,64,0.942857,0.042592,1
9,67,0.942857,0.042592,1


In [7]:
gs5 = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid={
        'hidden_layer_sizes':intervalo,
        'random_state':[42],
        'solver':['lbfgs']
    },
    cv=7
).fit(
    X_train,
    y_train
)

pd.DataFrame(gs5.cv_results_).sort_values(by='rank_test_score')[['param_hidden_layer_sizes', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,param_hidden_layer_sizes,mean_test_score,std_test_score,rank_test_score
0,40,0.942857,0.042592,1
1,43,0.942857,0.042592,1
2,46,0.942857,0.042592,1
3,49,0.942857,0.042592,1
4,52,0.942857,0.042592,1
5,55,0.942857,0.042592,1
6,58,0.942857,0.042592,1
7,61,0.942857,0.042592,1
8,64,0.942857,0.042592,1
9,67,0.942857,0.042592,1


In [8]:
gs6 = GridSearchCV(
    estimator=MLPClassifier(),
    param_grid={
        'hidden_layer_sizes':intervalo,
        'random_state':[42],
        'solver':['lbfgs']
    },
    cv=7
).fit(
    X_train,
    y_train
)

pd.DataFrame(gs6.cv_results_).sort_values(by='rank_test_score')[['param_hidden_layer_sizes', 'mean_test_score', 'std_test_score', 'rank_test_score']]

Unnamed: 0,param_hidden_layer_sizes,mean_test_score,std_test_score,rank_test_score
0,40,0.942857,0.042592,1
1,43,0.942857,0.042592,1
2,46,0.942857,0.042592,1
3,49,0.942857,0.042592,1
4,52,0.942857,0.042592,1
5,55,0.942857,0.042592,1
6,58,0.942857,0.042592,1
7,61,0.942857,0.042592,1
8,64,0.942857,0.042592,1
9,67,0.942857,0.042592,1
