# 0.0 Import library

In [1]:
import pandas                       as pd
import numpy                        as np

from sklearn import linear_model    as lm
from sklearn import metrics         as mt

# 0.1 Load dataset

In [2]:
X_train = pd.read_csv( '../../dataset/regression/X_training.csv')
y_train = pd.read_csv( '../../dataset/regression/y_training.csv')
X_val = pd.read_csv( '../../dataset/regression/X_validation.csv')
y_val = pd.read_csv( '../../dataset/regression/y_val.csv')
X_test = pd.read_csv( '../../dataset/regression/X_test.csv')
y_test = pd.read_csv( '../../dataset/regression/y_test.csv')

# 1.0 Model training

## Dados de treino

In [14]:
# Lasso - L1 
a = np.arange( 1, 20, 1)
r2_list = []

for i in a:
    # define model
    lasso = lm.Lasso( alpha=i, max_iter=1000 )

    # model training
    lasso.fit( X_train, y_train )

    # performance
    yhat_lasso = lasso.predict( X_train )

    # R squared
    r2 = np.round( mt.r2_score( y_train, yhat_lasso ), 3 )
    r2_list.append (r2)
    # MSE
    mse = np.round(mt.mean_squared_error( y_train, yhat_lasso), 3)
    # RMSE
    rmse = np.round(np.sqrt( mse ), 3)
    # MAE
    mae = np.round(mt.mean_absolute_error( y_train, yhat_lasso), 3)
    # MAPE
    mape = np.round(mt.mean_absolute_percentage_error( y_train, yhat_lasso ), 3) 

    # Metrics
    print('Alpha {} | R2: {} | MSE: {} | RMSE: {} | MAE: {} | MAPE: {}'.format(i, r2, mse, rmse, mae, mape) )


Alpha 1 | R2: 0.007 | MSE: 474.475 | RMSE: 21.782 | MAE: 17.305 | MAPE: 8.737
Alpha 2 | R2: 0.001 | MSE: 477.475 | RMSE: 21.851 | MAE: 17.355 | MAPE: 8.742
Alpha 3 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 4 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 5 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 6 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 7 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 8 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 9 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 10 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 11 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 12 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alpha 13 | R2: 0.0 | MSE: 478.013 | RMSE: 21.864 | MAE: 17.365 | MAPE: 8.742
Alph

## Dados de validação


In [15]:
# Retreinando o modelo com o melhor parâmetro sobre os dados de validação
best_alpha = r2_list.index (max (r2_list) )

# define model
lasso = lm.Lasso( alpha=a[best_alpha], max_iter=1000 )

# model training
lasso.fit( X_train, y_train )

# performance
yhat_val = lasso.predict( X_val )

# R squared
r2 = np.round( mt.r2_score( y_val, yhat_val ), 3 )
# MSE
mse = np.round(mt.mean_squared_error( y_val, yhat_val), 3)
# RMSE
rmse = np.round(np.sqrt( mse ), 3)
# MAE
mae = np.round(mt.mean_absolute_error( y_val, yhat_val), 3)
# MAPE
mape = np.round(mt.mean_absolute_percentage_error( y_val, yhat_val ), 3) 

# Metrics
print('Alpha {} | R2: {} | MSE: {} | RMSE: {} | MAE: {} | MAPE: {}'.format(a[best_alpha], r2, mse, rmse, mae, mape) )


Alpha 1 | R2: 0.008 | MSE: 473.747 | RMSE: 21.766 | MAE: 17.265 | MAPE: 8.696


## Dados de teste


In [16]:
# Retreinando o modelo com o melhor parâmetro sobre os dados de teste
best_alpha = r2_list.index (max (r2_list) )

# define model
lasso = lm.Lasso( alpha=a[best_alpha], max_iter=1000 )

# model training
lasso.fit( np.concatenate( ( X_train, X_val ) ),
           np.concatenate( ( y_train, y_val ) ) )

# performance
yhat_test = lasso.predict( X_test )

# R squared
r2 = np.round( mt.r2_score( y_test, yhat_test ), 3 )
# MSE
mse = np.round(mt.mean_squared_error( y_test, yhat_test), 3)
# RMSE
rmse = np.round(np.sqrt( mse ), 3)
# MAE
mae = np.round(mt.mean_absolute_error( y_test, yhat_test), 3)
# MAPE
mape = np.round(mt.mean_absolute_percentage_error( y_test, yhat_test ), 3) 

# Metrics
print('Alpha {} | R2: {} | MSE: {} | RMSE: {} | MAE: {} | MAPE: {}'.format(a[best_alpha], r2, mse, rmse, mae, mape) )

Alpha 1 | R2: 0.008 | MSE: 483.096 | RMSE: 21.979 | MAE: 17.472 | MAPE: 8.753


