#### Carregar o dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import RobustScaler

df = pd.read_csv('../dataset/preprocessed-sam-dataset.csv', sep='|',
                 dtype = {'CZ': 'float32', 'FZ': 'float32', 'Fp1': 'float32', 'F3': 'float32',
                          'FC1': 'float32', 'FC5': 'float32', 'FT9': 'float32', 'T7': 'float32',
                          'CP5': 'float32', 'P3': 'float32', 'P7': 'float32', 'PO9': 'float32',
                          'PZ': 'float32', 'O2': 'float32', 'P4': 'float32', 'CP6': 'float32',
                          'FT10': 'float32', 'FC6': 'float32', 'F8': 'float32', 'Fp2': 'float32',
                          'Scale': 'int8'})


#### Size do dataset

In [2]:
df.shape

(140800, 21)

#### Treinar o modelo e exibir métricas

In [3]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

scaler = RobustScaler()
scaled_trainX = scaler.fit_transform(X_train)
scaled_testX = scaler.transform(X_test)

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100,),
                        max_iter = 1000, activation = 'relu',
                        solver = 'adam', batch_size = 1000,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'adaptive',
                        early_stopping = True, alpha = 0.0001,
                        verbose = True)

model.fit(scaled_trainX, y_train)
y_pred = model.predict(scaled_testX)

print("Train Accuracy: {:.2f} %".format(model.score(scaled_trainX, y_train) * 100))
print("Test Accuracy: {:.2f} %".format(accuracy_score(y_test, y_pred) * 100))
print('\n')
print("Classifiction Report")
print(classification_report(y_test, y_pred, zero_division = 0))


Iteration 1, loss = 2.40572601
Validation score: 0.104505
Iteration 2, loss = 2.38591257
Validation score: 0.116883
Iteration 3, loss = 2.37792181
Validation score: 0.118304
Iteration 4, loss = 2.37139138
Validation score: 0.125507
Iteration 5, loss = 2.36541311
Validation score: 0.130885
Iteration 6, loss = 2.35994408
Validation score: 0.136871
Iteration 7, loss = 2.35455095
Validation score: 0.139813
Iteration 8, loss = 2.34973415
Validation score: 0.142959
Iteration 9, loss = 2.34555481
Validation score: 0.145597
Iteration 10, loss = 2.34143192
Validation score: 0.145191
Iteration 11, loss = 2.33756597
Validation score: 0.147524
Iteration 12, loss = 2.33421531
Validation score: 0.152090
Iteration 13, loss = 2.33137323
Validation score: 0.150771
Iteration 14, loss = 2.32849232
Validation score: 0.151684
Iteration 15, loss = 2.32578475
Validation score: 0.150568
Iteration 16, loss = 2.32367694
Validation score: 0.151684
Iteration 17, loss = 2.32130899
Validation score: 0.153003
Iterat

#### Treinar o modelo utilizando cross validation (10 fold)

In [4]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline

X = df.drop('Scale', axis = 1)
y = df['Scale']

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100,),
                        max_iter = 1000, activation = 'relu',
                        solver = 'adam', batch_size = 1000,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'adaptive',
                        early_stopping = True, alpha = 0.0001)

pipeline = Pipeline(steps = [
  ("scaler", RobustScaler()),  
  ("model", model)
])

strat_k_fold = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)
scores = cross_validate(pipeline, X, y, cv = strat_k_fold, n_jobs = 4, return_train_score = True)

print("Train Accuracy: {:.2f} %".format(scores['train_score'].mean() * 100))
print("Test Accuracy: {:.2f} %".format(scores['test_score'].mean() * 100))


Train Accuracy: 17.64 %
Test Accuracy: 16.32 %


In [21]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

X = df.drop('Scale', axis = 1)
y = df['Scale']

model = MLPClassifier(random_state = 42,
                        max_iter = 1000, activation = 'relu',
                        solver = 'adam', batch_size = 1000,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'adaptive',
                        early_stopping = True, alpha = 0.0001)

pipeline = Pipeline(steps = [
  ("scaler", RobustScaler()),  
  ("model", model)
])

params = {
    'model__hidden_layer_sizes': [(15,), (100,), (300,), (1200,), (1500,),
                           (15 , 7), (100, 50), (300, 150), (1200, 600), (1500, 750),
                           (15 , 15), (100, 100), (300, 300), (1200, 1200), (1500, 1500)],
}

strat_k_fold = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(pipeline, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)


Fitting 3 folds for each of 15 candidates, totalling 45 fits
{'model__hidden_layer_sizes': (1500, 1500)}


In [7]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

X = df.drop('Scale', axis = 1)
y = df['Scale']

model = MLPClassifier(random_state = 42,
                        max_iter = 1000, activation = 'relu',
                        solver = 'adam', batch_size = 1000,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'adaptive',
                        early_stopping = True, alpha = 0.0001)

pipeline = Pipeline(steps = [
  ("scaler", RobustScaler()),  
  ("model", model)
])

params = {
    'model__hidden_layer_sizes': [(1500,), (1500, 1500), (1500, 1500, 1500),
                           (2000,), (2000, 2000), (2000, 2000, 2000),
                           (3000,), (3000, 3000), (3000, 3000, 3000)],
}

strat_k_fold = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)
grid_search_cv = GridSearchCV(pipeline, cv = strat_k_fold, param_grid = params, n_jobs = 4, verbose = 5)

grid_search_cv.fit(X, y)
print(grid_search_cv.best_params_)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
{'model__hidden_layer_sizes': (3000, 3000)}


In [3]:
X = df.drop('Scale', axis = 1)
y = df['Scale']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

scaler = RobustScaler()
scaled_trainX = scaler.fit_transform(X_train)
scaled_testX = scaler.transform(X_test)

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100, 300, 300, 300, 100),
                        max_iter = 100, activation = 'relu',
                        solver = 'adam', batch_size = 1000,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'adaptive',
                        early_stopping = True, alpha = 0.0000001,
                        verbose = True)

model.fit(scaled_trainX, y_train)
y_pred = model.predict(scaled_testX)

print("Train Accuracy: {:.2f} %".format(model.score(scaled_trainX, y_train) * 100))
print("Test Accuracy: {:.2f} %".format(accuracy_score(y_test, y_pred) * 100))
print('\n')
print("Classifiction Report")
print(classification_report(y_test, y_pred, zero_division = 0))


Iteration 1, loss = 2.38411058
Validation score: 0.132711
Iteration 2, loss = 2.35021620
Validation score: 0.144683
Iteration 3, loss = 2.32579089
Validation score: 0.156656
Iteration 4, loss = 2.30506838
Validation score: 0.162946
Iteration 5, loss = 2.29100954
Validation score: 0.160613
Iteration 6, loss = 2.27718846
Validation score: 0.163758
Iteration 7, loss = 2.26260153
Validation score: 0.163048
Iteration 8, loss = 2.24918019
Validation score: 0.164570
Iteration 9, loss = 2.23587368
Validation score: 0.165077
Iteration 10, loss = 2.21902683
Validation score: 0.170455
Iteration 11, loss = 2.20202731
Validation score: 0.171266
Iteration 12, loss = 2.18574860
Validation score: 0.168628
Iteration 13, loss = 2.16808501
Validation score: 0.166903
Iteration 14, loss = 2.14778482
Validation score: 0.167411
Iteration 15, loss = 2.12910789
Validation score: 0.167106
Iteration 16, loss = 2.10474867
Validation score: 0.164671
Iteration 17, loss = 2.07852662
Validation score: 0.161830
Iterat