# Ex 5. 
Train a Linear Regression model, an MLP Regressor with 2 hidden layers of 10 \
neurons each and no activation functions, and another MLP Regressor with 2 hidden \
layers of 10 neurons each using ReLU activation functions. (Use `random_state=0` on the \
MLPs, regardless of the run). Plot a boxplot of the test MAE of each model.

> average the performance of the models over 10 separate runs. In each \
> run, use a different 80-20 train-test split by setting a random_state=i, with i=1..10.

In [None]:
# Code for ex5
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.exceptions import ConvergenceWarning
import matplotlib.pyplot as plt
import seaborn as sns

# Suppress convergence warnings
#warnings.filterwarnings("ignore", category=ConvergenceWarning)

df = pd.read_csv('parkinsons.csv')
X = df.drop(columns=['target'])  
y = df['target']

# Inicializar listas para armazenar os MAE de cada modelo
mae_linear = []
mae_mlp_no_activation = []
mae_mlp_relu = []

# Executar o ciclo de 10 iterações com divisões diferentes (random_state=i)
for i in range(1, 11):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)

    # Regressão Linear
    linear_model = LinearRegression()
    linear_model.fit(X_train, y_train)
    y_pred_linear = linear_model.predict(X_test)
    mae_linear.append(mean_absolute_error(y_test, y_pred_linear))

    # MLP sem funções de ativação
    mlp_no_activation = MLPRegressor(hidden_layer_sizes=(10, 10), activation='identity', random_state=0)
    mlp_no_activation.fit(X_train, y_train)
    y_pred_no_activation = mlp_no_activation.predict(X_test)
    mae_mlp_no_activation.append(mean_absolute_error(y_test, y_pred_no_activation))

    # MLP com ReLU
    mlp_relu = MLPRegressor(hidden_layer_sizes=(10, 10), activation='relu', random_state=0)
    mlp_relu.fit(X_train, y_train)
    y_pred_relu = mlp_relu.predict(X_test)
    mae_mlp_relu.append(mean_absolute_error(y_test, y_pred_relu))

In [None]:
# Plot boxplots
labels = ['Linear Regression', 'MLP No Activation', 'MLP ReLU']

plt.figure(figsize=(10, 6))
sns.boxplot(data=[mae_linear, mae_mlp_no_activation, mae_mlp_relu])
plt.xticks(ticks=np.arange(len(labels)), labels=labels)
plt.ylabel('Test MAE')
plt.title('Comparison of Test MAE Across Models')
plt.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.5)
plt.show()

# Ex 6.
Compare a Linear Regression with a MLP with no activations, and explain the impact \
and the importance of using activation functions in a MLP. Support your reasoning with the \
results from the boxplots.

> Refer to the report for the explanation

# Ex 7.
Using a 80-20 train-test split with `random_state=0`, use a Grid Search to tune the \
hyperparameters of an MLP regressor with two hidden layers (size 10 each). The \
parameters to search over are: (i) L2 penalty, with the values $\{0.0001, 0.001, 0.01\}$; (ii) \
learning rate, with the values $\{0.001, 0.01, 0.1\}$; and (iii) batch size, with the values \
$\{32, 64, 128\}$. Plot the test MAE for each combination of hyperparameters, report the \
best combination, and discuss the trade-offs between the combinations.

> Refer to the report for the explanation

In [None]:
# Code for ex7
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

# Carregar o dataset
df = pd.read_csv('parkinsons.csv')

# Separar as variáveis preditoras (X) e a variável alvo (y)
X = df.drop(columns=['target'])  # Supondo que 'target' é a variável de saída
y = df['target']

# Dividir o conjunto de dados em treino e teste (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Definir os hiperparâmetros para o GridSearch
param_grid = {
    'alpha': [0.0001, 0.001, 0.01],  # Penalidade L2
    'learning_rate_init': [0.001, 0.01, 0.1],  # Taxa de aprendizagem inicial
    'batch_size': [32, 64, 128]  # Tamanho do batch
}

results = []

# Grid Search
for alpha in param_grid['alpha']:
    for learning_rate_init in param_grid['learning_rate_init']:
        for batch_size in param_grid['batch_size']:
            # Initialize MLP Regressor
            model = MLPRegressor(hidden_layer_sizes=(10, 10), 
                                 alpha=alpha, 
                                 learning_rate_init=learning_rate_init, 
                                 batch_size=batch_size, 
                                 random_state=0)

            model.fit(X_train, y_train)

            y_pred = model.predict(X_test)

            mae = mean_absolute_error(y_test, y_pred)

            results.append({
                'alpha': alpha,
                'learning_rate_init': learning_rate_init,
                'batch_size': batch_size,
                'mae': mae
            })

results_df = pd.DataFrame(results)

In [None]:
# Plot and show results
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
vmin = min(results_df['mae'])
vmax = max(results_df['mae'])

for i, batch_size in enumerate(param_grid['batch_size']):
    subset = results_df[results_df['batch_size'] == batch_size]
    
    # Create pivot for each heatmap
    heatmap_data = subset.pivot(index='alpha', columns='learning_rate_init', values='mae')
    
    sns.heatmap(heatmap_data, annot=True, cmap='RdYlGn_r', fmt=".5f", linewidths=0.5, ax=axes[i], vmin=vmin, vmax=vmax, cbar=False)
    
    axes[i].set_title(f'Batch Size = {batch_size}')
    axes[i].set_xlabel('Learning Rate Init')
    axes[i].set_ylabel('Alpha')

cbar = fig.colorbar(axes[0].collections[0], ax=axes, orientation='vertical', fraction=0.02, pad=0.04)
cbar.set_label('Test MAE Score')

plt.show()

print(f"Best Hyperparameters: {results_df.loc[results_df['mae'].idxmin()].drop('mae').to_dict()}")
print(f"Best Test MAE: {vmin}")