In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

import os
import sys
sys.path.append(os.path.join(
  os.path.abspath(''), '..', '..')
)

from src.preprocess import Preprocess

import warnings
warnings.filterwarnings('ignore')

In [None]:
x_train: pd.DataFrame = pd.read_csv(
  '../../data/train.csv', index_col=0)
x_test: pd.DataFrame = pd.read_csv(
  '../../data/test.csv', index_col=0)
y_train: pd.Series = x_train['Survived']

x_train, x_test = Preprocess(scaler=StandardScaler()) \
  .apply(x_train, x_test)

params = {
  'hidden_layer_sizes': [(i,) for i in range(1, 15)],
}

base_mlp = MLPClassifier(random_state=3, max_iter=400)
mlp = GridSearchCV(
  base_mlp,
  params,
  scoring='balanced_accuracy',
  n_jobs=-1,
  cv=5,
  return_train_score=True,
)
mlp.fit(x_train, y_train)

results = pd.DataFrame(mlp.cv_results_)
results = results[[
  'param_hidden_layer_sizes','mean_test_score',
  'rank_test_score', 'mean_train_score'
]]
results.to_csv('mlp_results.csv', index=False)

In [None]:
plt.plot(results['param_hidden_layer_sizes'].astype('str'),
  results['mean_test_score'], label='test')
plt.plot(results['param_hidden_layer_sizes'].astype('str'),
  results['mean_train_score'], label='train')

plt.xlabel('Hidden Layers')
plt.ylabel('Balanced Accuracy')
plt.title('Hidden Layers Overfit Analysis')
plt.legend()
plt.savefig('layers.svg', format='svg')