# 0.0 Import library

In [1]:
import pandas                    as pd
import numpy                     as np

from sklearn import linear_model as lm
from sklearn import metrics      as mt

# 0.1 Load dataset

In [2]:
X_train = pd.read_csv( '../../dataset/regression/X_training.csv')
y_train = pd.read_csv( '../../dataset/regression/y_training.csv')
X_val = pd.read_csv( '../../dataset/regression/X_validation.csv')
y_val = pd.read_csv( '../../dataset/regression/y_val.csv')
X_test = pd.read_csv( '../../dataset/regression/X_test.csv')
y_test = pd.read_csv( '../../dataset/regression/y_test.csv')

# 1.0 Training model

## Dados de treino


In [23]:
# Não coloquei valores l1_ratio 0 e 1 para focar nos casos intermediários, pois
# já fiz a regressção LASSO e RIDGE em notebooks separados, então o ElasticNet 
# com l1_ratio 0 e 1 não acrescentaria informações relevantes aqui

a = np.arange( 1, 4, 1)
l1_ratios = np.arange(0.1, 1.0, 0.1)
r2_list = []

for i in a:
    for l1_value in l1_ratios:
        # define model
        elastic_net = lm.ElasticNet( alpha=i, l1_ratio=l1_value, max_iter=1000 )
    
        # trainig model
        elastic_net.fit( X_train, y_train )

        # performance
        yhat_elastic_net = elastic_net.predict( X_train )

        # R squared
        r2 = np.round( mt.r2_score( y_train, yhat_elastic_net ), 3 )
        r2_list.append ( r2 )
        # MSE
        mse = np.round(mt.mean_squared_error( y_train, yhat_elastic_net), 3)
        # RMSE
        rmse = np.round(np.sqrt( mse ), 3)
        # MAE
        mae = np.round(mt.mean_absolute_error( y_train, yhat_elastic_net), 3)
        # MAPE
        mape = np.round(mt.mean_absolute_percentage_error( y_train, yhat_elastic_net ), 3) 

        # Metrics
        l1_value = np.round(l1_value, 1)
        print('Alpha: {} | l1_ratio: {} | R2: {} | MSE: {} | RMSE: {} | MAE: {} | MAPE: {}'.format(i, l1_value, r2, mse, rmse, mae, mape))

Alpha: 1 | l1_ratio: 0.1 | R2: 0.01 | MSE: 473.211 | RMSE: 21.753 | MAE: 17.282 | MAPE: 8.722
Alpha: 1 | l1_ratio: 0.2 | R2: 0.009 | MSE: 473.518 | RMSE: 21.76 | MAE: 17.286 | MAPE: 8.725
Alpha: 1 | l1_ratio: 0.3 | R2: 0.009 | MSE: 473.833 | RMSE: 21.768 | MAE: 17.291 | MAPE: 8.728
Alpha: 1 | l1_ratio: 0.4 | R2: 0.008 | MSE: 474.077 | RMSE: 21.773 | MAE: 17.295 | MAPE: 8.73
Alpha: 1 | l1_ratio: 0.5 | R2: 0.008 | MSE: 474.269 | RMSE: 21.778 | MAE: 17.3 | MAPE: 8.732
Alpha: 1 | l1_ratio: 0.6 | R2: 0.008 | MSE: 474.39 | RMSE: 21.78 | MAE: 17.302 | MAPE: 8.734
Alpha: 1 | l1_ratio: 0.7 | R2: 0.007 | MSE: 474.481 | RMSE: 21.783 | MAE: 17.305 | MAPE: 8.736
Alpha: 1 | l1_ratio: 0.8 | R2: 0.007 | MSE: 474.519 | RMSE: 21.783 | MAE: 17.306 | MAPE: 8.737
Alpha: 1 | l1_ratio: 0.9 | R2: 0.007 | MSE: 474.499 | RMSE: 21.783 | MAE: 17.306 | MAPE: 8.737
Alpha: 2 | l1_ratio: 0.1 | R2: 0.006 | MSE: 474.952 | RMSE: 21.793 | MAE: 17.308 | MAPE: 8.732
Alpha: 2 | l1_ratio: 0.2 | R2: 0.006 | MSE: 475.36 | RMSE

## Dados de validação

In [54]:
# Retreinando o modelo com os melhores parâmetros sobre os dados de validação
best_alpha = r2_list.index (max (r2_list) )

# define model
elastic_net = lm.ElasticNet( alpha=a[best_alpha], l1_ratio=l1_ratios[best_alpha], max_iter=1000 )

# model training
elastic_net.fit( X_train, y_train )

# performance
yhat_val = elastic_net.predict( X_val )

# R squared
r2 = np.round( mt.r2_score( y_val, yhat_val ), 3 )
# MSE
mse = np.round(mt.mean_squared_error( y_val, yhat_val), 3)
# RMSE
rmse = np.round(np.sqrt( mse ), 3)
# MAE
mae = np.round(mt.mean_absolute_error( y_val, yhat_val), 3)
# MAPE
mape = np.round(mt.mean_absolute_percentage_error( y_val, yhat_val ), 3) 

# Metrics
print('alpha {} | l1_ratio {} | R2: {} | MSE: {} | RMSE: {} | MAE: {} | MAPE: {}'.format(a[best_alpha], l1_ratios[best_alpha], r2, mse, rmse, mae, mape) )


alpha 1 | l1_ratio 0.1 | R2: 0.01 | MSE: 472.682 | RMSE: 21.741 | MAE: 17.249 | MAPE: 8.687


## Dados de teste

In [55]:
# Retreinando o modelo com os melhores parâmetros sobre os dados de teste
best_alpha = r2_list.index (max (r2_list) )

# define model
elastic_net = lm.ElasticNet( alpha=a[best_alpha], l1_ratio=l1_ratios[best_alpha], max_iter=1000 )

# model training
elastic_net.fit( np.concatenate( ( X_train, X_val ) ),
                 np.concatenate( ( y_train, y_val ) ) ) 

# performance
yhat_test = elastic_net.predict( X_test )

# R squared
r2 = np.round( mt.r2_score( y_test, yhat_test ), 3 )
# MSE
mse = np.round(mt.mean_squared_error( y_test, yhat_test), 3)
# RMSE
rmse = np.round(np.sqrt( mse ), 3)
# MAE
mae = np.round(mt.mean_absolute_error( y_test, yhat_test), 3)
# MAPE
mape = np.round(mt.mean_absolute_percentage_error( y_test, yhat_test ), 3) 

# Metrics
print('alpha {} | l1_ratio {} | R2: {} | MSE: {} | RMSE: {} | MAE: {} | MAPE: {}'.format(a[best_alpha], l1_ratios[best_alpha], r2, mse, rmse, mae, mape) )


alpha 1 | l1_ratio 0.1 | R2: 0.01 | MSE: 481.995 | RMSE: 21.954 | MAE: 17.452 | MAPE: 8.737


