# 0.0 Import library

In [1]:
import pandas                       as pd
import numpy as np

from sklearn import preprocessing   as pp
from sklearn import linear_model    as lm
from sklearn import metrics         as mt


# 0.1 Load dataset

In [2]:
X_train = pd.read_csv( '../../dataset/regression/X_training.csv')
y_train = pd.read_csv( '../../dataset/regression/y_training.csv')
X_val = pd.read_csv( '../../dataset/regression/X_validation.csv')
y_val = pd.read_csv( '../../dataset/regression/y_val.csv')
X_test = pd.read_csv( '../../dataset/regression/X_test.csv')
y_test = pd.read_csv( '../../dataset/regression/y_test.csv')

# 1.0 Training model

## Dados de treino

In [3]:
# Grau do polinômio
d = np.arange( 1, 4)
a = np.arange( 1, 5)
l1_ratios = np.arange(0.1, 1.0, 0.1) # não coloquei 0 e 1 porque fiz Lasso e Ridge separados
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in d:
    for alpha in a:
        for l1_value in l1_ratios:
            # Define Polynomial Elastic Net
            poly = pp.PolynomialFeatures( degree=i )
            X_poly_train = poly.fit_transform( X_train )

            # training
            model = lm.ElasticNet( alpha=alpha, l1_ratio=l1_value, max_iter=1000 )
            model.fit( X_poly_train , y_train )

            # performance
            yhat_train = model.predict( X_poly_train )

            # R squared
            r_squared = np.round( mt.r2_score( y_train, yhat_train ), 3 )
            if r_squared > max_r2:
                max_r2 = r_squared
            # MSE
            mse = np.round(mt.mean_squared_error( y_train, yhat_train), 3)
            if mse < min_mse:
                min_mse = mse
            # RMSE
            rmse = np.round(np.sqrt( mse ), 3)
            rmse_list.append( rmse )
            if rmse < min_rmse:
                min_rmse = rmse

            # MAE
            mae = np.round(mt.mean_absolute_error( y_train, yhat_train), 3)
            if mae < min_mae:
                min_mae = mae
            # MAPE
            mape = np.round(mt.mean_absolute_percentage_error( y_train, yhat_train), 3) 
            if mape < min_mape:
                min_mape = mape
    
print('degree {} | alhpa {} | MAX R2: {} | MIN MSE: {} | MIN RMSE: {} | MIN MAE: {} | MIN MAPE: {} |'.format(i, alpha, max_r2, min_mse, min_rmse, min_mae, min_mape ) )

degree 3 | alhpa 4 | MAX R2: 0.036 | MIN MSE: 460.786 | MIN RMSE: 21.466 | MIN MAE: 17.014 | MIN MAPE: 8.549 |


## Dados de validação

In [4]:
# Grau do polinômio
d = np.arange( 1, 4)
a = np.arange( 1, 5)
l1_ratios = np.arange(0.1, 1.0, 0.1) # não coloquei 0 e 1 porque fiz Lasso e Ridge separados
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')
best_degree = 0
best_alpha = 0
best_l1_ratio = 0

for i in d:
    for alpha in a:
        for l1_value in l1_ratios:
            # Define Polynomial Elastic Net
            poly = pp.PolynomialFeatures( degree=i )
            X_poly_train = poly.fit_transform( X_train )
            X_poly_val  = poly.transform( X_val)

            # training
            model = lm.ElasticNet( alpha=alpha, l1_ratio=l1_value, max_iter=1000 )
            model.fit( X_poly_train , y_train )

            # performance
            yhat_val = model.predict( X_poly_val )

            # R squared
            r_squared = np.round( mt.r2_score( y_val, yhat_val ), 3 )
            if r_squared > max_r2:
                max_r2 = r_squared
                best_degree = i
                best_alpha = alpha
                best_l1_ratio = l1_value
            # MSE
            mse = np.round(mt.mean_squared_error( y_val, yhat_val), 3)
            if mse < min_mse:
                min_mse = mse
            # RMSE
            rmse = np.round(np.sqrt( mse ), 3)
            rmse_list.append( rmse )
            if rmse < min_rmse:
                min_rmse = rmse

            # MAE
            mae = np.round(mt.mean_absolute_error( y_val, yhat_val), 3)
            if mae < min_mae:
                min_mae = mae
            # MAPE
            mape = np.round(mt.mean_absolute_percentage_error( y_val, yhat_val), 3) 
            if mape < min_mape:
                min_mape = mape
    
print('best degree: {} | best alpha: {} | best l1_ratio: {} | MAX R2: {} | MIN MSE: {} | MIN RMSE: {} | MIN MAE: {} | MIN MAPE: {} |'.format(
    best_degree, best_alpha, best_l1_ratio, max_r2, min_mse, min_rmse, min_mae, min_mape) )

best degree: 3 | best alpha: 1 | best l1_ratio: 0.1 | MAX R2: 0.028 | MIN MSE: 464.255 | MIN RMSE: 21.547 | MIN MAE: 17.059 | MIN MAPE: 8.645 |


## Dados de teste

In [5]:
# Retreinando o modelo com os melhores parâmetros sobre os dados de teste

# Define Polynomial Elastic Net
poly = pp.PolynomialFeatures( degree=best_degree )
X_poly_train = poly.fit_transform( X_train )
X_poly_val  = poly.transform( X_val)
X_poly_test = poly.transform( X_test)

# Concatenando os dados de treino e validação
X_train_val = np.concatenate((X_poly_train, X_poly_val))
y_train_val = np.concatenate((y_train, y_val))

# training
model = lm.ElasticNet( alpha=best_alpha, l1_ratio=best_l1_ratio, max_iter=1000 )
model.fit( X_train_val , y_train_val )

# performance
y_pred_test = model.predict( X_poly_test )

# R squared
r2 = np.round( mt.r2_score( y_test, y_pred_test ), 3 )
# MSE
mse = np.round(mt.mean_squared_error( y_test, y_pred_test), 3)
# RMSE
rmse = np.round(np.sqrt( mse ), 3)
# MAE
mae = np.round(mt.mean_absolute_error( y_test, y_pred_test), 3)
# MAPE
mape = np.round(mt.mean_absolute_percentage_error( y_test, y_pred_test ), 3) 

# Metrics
print('best degree: {} | best alpha: {} | best l1_ratio: {} | MAX R2: {} | MIN MSE: {} | MIN RMSE: {} | MIN MAE: {} | MIN MAPE: {} |'.format(
    best_degree, best_alpha, best_l1_ratio, r2, mse, rmse, mae, mape) )


best degree: 3 | best alpha: 1 | best l1_ratio: 0.1 | MAX R2: 0.026 | MIN MSE: 474.417 | MIN RMSE: 21.781 | MIN MAE: 17.243 | MIN MAPE: 8.686 |
