# Chapter 4: Machine Learning state of the art
## Figure 4.17-19
Multilayer Perceptron on the aging problem.

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

plt.style.use('seaborn-paper')
plt.rc('text', usetex=False)

# 0. Load data
see `figure_4.5-13.ipynb`

In [22]:
# Load the data (drop NaNs)
df = pd.read_csv('../data/aging_data.csv', header=0, index_col=0).dropna()
columns = df.columns.drop(['age'])

# Learning data
dfx = df[columns]

# Learning labels
dfy = df['age']

# Convert gender [m/f] in [0,1]
dfx.loc[:, 'gender'] = dfx['gender'].apply(lambda x: int(x == 'f'))

# Preprocess data
from sklearn.preprocessing import StandardScaler

dfx.iloc[:,1:] = StandardScaler().fit_transform(dfx.values[:,1:])

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(dfx, dfy, test_size=0.33, random_state=42)

print('{} variables'.format(x_train.shape[1]))
print('{} training samples'.format(x_train.shape[0]))
print('{} test samples'.format(x_test.shape[0]))

12 variables
74 training samples
37 test samples


# 1. Fit `MLPRegressor`

In [26]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

np.random.seed(0)
model = GridSearchCV(MLPRegressor(early_stopping=True, max_iter=1000,
                                  hidden_layer_sizes=[1024], activation='relu'),
                     param_grid={'alpha': np.logspace(-5, 2, 20)}, n_jobs=-1)
model.fit(x_train.values, y_train.values)

y_pred = model.predict(x_test)
mae = metrics.mean_absolute_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)
evs = metrics.explained_variance_score(y_test, y_pred)

print('n hidden units: {}'.format(model.best_estimator_.hidden_layer_sizes))
print('alpha: {}'.format(model.best_estimator_.alpha))
print('activation: {}'.format(model.best_estimator_.activation))
print('n_iter_: {}'.format(model.best_estimator_.n_iter_))
print('EVS: {:2.5f}'.format(evs))
print('R2: {:2.3f}'.format(r2))
print('MAE: {:2.3f}'.format(mae))

n hidden units: [1024]
alpha: 0.0088586679041
activation: relu
n_iter_: 329
EVS: 0.78841
R2: 0.775
MAE: 9.139
