#### Carregar o dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import RobustScaler
from sklearn.utils import resample

df = pd.read_csv('../dataset/preprocessed-sam-dataset.csv', sep='|',
                 dtype = {'CZ': 'float32', 'FZ': 'float32', 'Fp1': 'float32', 'F3': 'float32',
                          'FC1': 'float32', 'FC5': 'float32', 'FT9': 'float32', 'T7': 'float32',
                          'CP5': 'float32', 'P3': 'float32', 'P7': 'float32', 'PO9': 'float32',
                          'PZ': 'float32', 'O2': 'float32', 'P4': 'float32', 'CP6': 'float32',
                          'FT10': 'float32', 'FC6': 'float32', 'F8': 'float32', 'Fp2': 'float32',
                          'Scale': 'int8'})


#### Size do dataset

In [2]:
df.shape

(140800, 21)

#### Executar data augmentation no dataset

In [3]:

df_list = []

for count in range(0, 11) :
    df_list.append(df[df.iloc[:, 20] == count])

df_result = pd.DataFrame(columns = ['CZ', 'FZ', 'Fp1', 'F3', 'FC1', 'FC5', 'FT9', 'T7',
                          'CP5', 'P3', 'P7', 'PO9', 'PZ', 'O2', 'P4', 'CP6',
                          'FT10', 'FC6', 'F8', 'Fp2', 'Scale'])

for count in range(0, 11) :
    df_upsampled = resample(df_list[count], replace = True, n_samples = 1000000,
                            stratify = df_list[count])
    df_result = pd.concat([df_result, df_upsampled])

print("Dataset shape: {}".format(df_result.shape))

print(df_result.Scale.value_counts())


Dataset shape: (11000000, 21)
0     1000000
1     1000000
2     1000000
3     1000000
4     1000000
5     1000000
6     1000000
7     1000000
8     1000000
9     1000000
10    1000000
Name: Scale, dtype: int64


#### Treinar o modelo e exibir métricas

In [11]:
X = df_result.drop('Scale', axis = 1)
y = df_result['Scale'].astype('int8') 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

scaler = RobustScaler()
scaled_trainX = scaler.fit_transform(X_train)
scaled_testX = scaler.transform(X_test)

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100,),
                        max_iter = 100, activation = 'relu',
                        solver = 'adam', batch_size = 100,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'constant',
                        early_stopping = True, alpha = 0.0000001,
                        verbose = True)

model.fit(scaled_trainX, y_train)
y_pred = model.predict(scaled_testX)

print("Train Accuracy: {:.2f} %".format(model.score(scaled_trainX, y_train) * 100))
print("Test Accuracy: {:.2f} %".format(accuracy_score(y_test, y_pred) * 100))
print('\n')
print("Classifiction Report")
print(classification_report(y_test, y_pred, zero_division = 0))


Iteration 1, loss = 2.29378419
Validation score: 0.180595
Iteration 2, loss = 2.27215570
Validation score: 0.183006
Iteration 3, loss = 2.26679263
Validation score: 0.183538
Iteration 4, loss = 2.26411209
Validation score: 0.184887
Iteration 5, loss = 2.26251455
Validation score: 0.187421
Iteration 6, loss = 2.26147135
Validation score: 0.188327
Iteration 7, loss = 2.26076799
Validation score: 0.187970
Iteration 8, loss = 2.26011404
Validation score: 0.187557
Iteration 9, loss = 2.25965360
Validation score: 0.186535
Iteration 10, loss = 2.25925333
Validation score: 0.189538
Iteration 11, loss = 2.25881073
Validation score: 0.188352
Iteration 12, loss = 2.25849534
Validation score: 0.189135
Iteration 13, loss = 2.25825464
Validation score: 0.189739
Iteration 14, loss = 2.25810439
Validation score: 0.188799
Iteration 15, loss = 2.25792420
Validation score: 0.190117
Iteration 16, loss = 2.25775272
Validation score: 0.188290
Iteration 17, loss = 2.25764129
Validation score: 0.190825
Iterat

#### Treinar o modelo utilizando cross validation (10 fold)

In [13]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline

X = df_result.drop('Scale', axis = 1)
y = df_result['Scale'].astype('int8') 

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100,),
                        max_iter = 100, activation = 'relu',
                        solver = 'adam', batch_size = 100,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'adaptive',
                        early_stopping = True, alpha = 0.0000001,
                        verbose = True)

pipeline = Pipeline(steps = [
  ("scaler", RobustScaler()),  
  ("model", model)
])

strat_k_fold = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 42)
scores = cross_validate(pipeline, X, y, cv = strat_k_fold, n_jobs = 6, return_train_score = True)

print("Train Accuracy: {:.2f} %".format(scores['train_score'].mean() * 100))
print("Test Accuracy: {:.2f} %".format(scores['test_score'].mean() * 100))


Train Accuracy: 19.07 %
Test Accuracy: 19.05 %


#### Treinar o modelo e exibir métricas

In [4]:
X = df_result.drop('Scale', axis = 1)
y = df_result['Scale'].astype('int8') 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

scaler = RobustScaler()
scaled_trainX = scaler.fit_transform(X_train)
scaled_testX = scaler.transform(X_test)

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100, 300, 300, 300, 100),
                        max_iter = 20, activation = 'relu',
                        solver = 'adam', batch_size = 100,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'constant',
                        early_stopping = True, alpha = 0.0000001,
                        verbose = True)

model.fit(scaled_trainX, y_train)
y_pred = model.predict(scaled_testX)

print("Train Accuracy: {:.2f} %".format(model.score(scaled_trainX, y_train) * 100))
print("Test Accuracy: {:.2f} %".format(accuracy_score(y_test, y_pred) * 100))
print('\n')
print("Classifiction Report")
print(classification_report(y_test, y_pred, zero_division = 0))


Iteration 1, loss = 1.66081497
Validation score: 0.615521
Iteration 2, loss = 0.91553486
Validation score: 0.731934
Iteration 3, loss = 0.68395756
Validation score: 0.791449
Iteration 4, loss = 0.56919546
Validation score: 0.812395
Iteration 5, loss = 0.50320048
Validation score: 0.819009
Iteration 6, loss = 0.46212103
Validation score: 0.839023
Iteration 7, loss = 0.43539686
Validation score: 0.853694
Iteration 8, loss = 0.41591609
Validation score: 0.866001
Iteration 9, loss = 0.40266611
Validation score: 0.867931
Iteration 10, loss = 0.39287366
Validation score: 0.874804
Iteration 11, loss = 0.38528790
Validation score: 0.881238
Iteration 12, loss = 0.37835373
Validation score: 0.870162
Iteration 13, loss = 0.37411638
Validation score: 0.893905
Iteration 14, loss = 0.36983117
Validation score: 0.888092
Iteration 15, loss = 0.36727278
Validation score: 0.882906
Iteration 16, loss = 0.36510958
Validation score: 0.881997
Iteration 17, loss = 0.36313828
Validation score: 0.879690
Iterat



Train Accuracy: 89.89 %
Test Accuracy: 89.62 %


Classifiction Report
              precision    recall  f1-score   support

           0       0.93      0.85      0.89    300010
           1       0.90      0.90      0.90    301279
           2       0.86      0.92      0.89    299508
           3       0.90      0.89      0.89    300146
           4       0.88      0.90      0.89    299687
           5       0.89      0.89      0.89    298892
           6       0.93      0.86      0.89    300353
           7       0.91      0.91      0.91    300178
           8       0.87      0.91      0.89    300161
           9       0.91      0.89      0.90    300213
          10       0.89      0.95      0.92    299573

    accuracy                           0.90   3300000
   macro avg       0.90      0.90      0.90   3300000
weighted avg       0.90      0.90      0.90   3300000



#### Treinar o modelo e exibir métricas

In [4]:
X = df_result.drop('Scale', axis = 1)
y = df_result['Scale'].astype('int8') 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

scaler = RobustScaler()
scaled_trainX = scaler.fit_transform(X_train)
scaled_testX = scaler.transform(X_test)

model = MLPClassifier(random_state = 42, hidden_layer_sizes = (100, 300, 500, 300, 100),
                        max_iter = 20, activation = 'relu',
                        solver = 'adam', batch_size = 100,
                        tol = 0.00001, learning_rate_init = 0.001,
                        learning_rate = 'constant',
                        early_stopping = True, alpha = 0.0000001,
                        verbose = True)

model.fit(scaled_trainX, y_train)
y_pred = model.predict(scaled_testX)

print("Train Accuracy: {:.2f} %".format(model.score(scaled_trainX, y_train) * 100))
print("Test Accuracy: {:.2f} %".format(accuracy_score(y_test, y_pred) * 100))
print('\n')
print("Classifiction Report")
print(classification_report(y_test, y_pred, zero_division = 0))


Iteration 1, loss = 1.40404738
Validation score: 0.756571
Iteration 2, loss = 0.51701186
Validation score: 0.863340
Iteration 3, loss = 0.32961934
Validation score: 0.904308
Iteration 4, loss = 0.25709660
Validation score: 0.921905
Iteration 5, loss = 0.22370169
Validation score: 0.931749
Iteration 6, loss = 0.20613778
Validation score: 0.934939
Iteration 7, loss = 0.19646341
Validation score: 0.934761
Iteration 8, loss = 0.19009077
Validation score: 0.939513
Iteration 9, loss = 0.18649616
Validation score: 0.950226
Iteration 10, loss = 0.18386348
Validation score: 0.943948
Iteration 11, loss = 0.18217685
Validation score: 0.947909
Iteration 12, loss = 0.18175873
Validation score: 0.945912
Iteration 13, loss = 0.18106357
Validation score: 0.951079
Iteration 14, loss = 0.18124807
Validation score: 0.955466
Iteration 15, loss = 0.18185097
Validation score: 0.947378
Iteration 16, loss = 0.18150321
Validation score: 0.952586
Iteration 17, loss = 0.18233813
Validation score: 0.956762
Iterat



Train Accuracy: 95.93 %
Test Accuracy: 95.83 %


Classifiction Report
              precision    recall  f1-score   support

           0       0.97      0.95      0.96    300010
           1       0.96      0.97      0.96    301279
           2       0.95      0.97      0.96    299508
           3       0.96      0.95      0.96    300146
           4       0.97      0.94      0.96    299687
           5       0.95      0.96      0.95    298892
           6       0.97      0.95      0.96    300353
           7       0.96      0.96      0.96    300178
           8       0.96      0.95      0.96    300161
           9       0.95      0.97      0.96    300213
          10       0.95      0.98      0.96    299573

    accuracy                           0.96   3300000
   macro avg       0.96      0.96      0.96   3300000
weighted avg       0.96      0.96      0.96   3300000

