# 0.0 Import Library

In [1]:
import pandas                       as pd
import numpy as np

from sklearn import preprocessing   as pp
from sklearn import linear_model    as lm
from sklearn import metrics         as mt


# 0.1 Load dataset

In [2]:
X_train = pd.read_csv( '../../dataset/regression/X_training.csv')
y_train = pd.read_csv( '../../dataset/regression/y_training.csv')
X_val = pd.read_csv( '../../dataset/regression/X_validation.csv')
y_val = pd.read_csv( '../../dataset/regression/y_val.csv')
X_test = pd.read_csv( '../../dataset/regression/X_test.csv')
y_test = pd.read_csv( '../../dataset/regression/y_test.csv')

# 1.0 Training Polynomial regression

## Dados de treino

In [25]:
# Grau do polinômio
d = np.arange( 1, 5)
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in d:
    # Define Polynomial regression
    poly = pp.PolynomialFeatures( degree=i )
    X_poly_train = poly.fit_transform( X_train )

    # training
    model = lm.LinearRegression()
    model.fit( X_poly_train , y_train )

    # performance
    yhat_train = model.predict( X_poly_train )

    # R squared
    r_squared = np.round( mt.r2_score( y_train, yhat_train ), 3 )
    if r_squared > max_r2:
        max_r2 = r_squared
    # MSE
    mse = np.round(mt.mean_squared_error( y_train, yhat_train), 3)
    if mse < min_mse:
        min_mse = mse
    # RMSE
    rmse = np.round(np.sqrt( mse ), 3)
    rmse_list.append( rmse )
    if rmse < min_rmse:
        min_rmse = rmse

    # MAE
    mae = np.round(mt.mean_absolute_error( y_train, yhat_train), 3)
    if mae < min_mae:
        min_mae = mae
    # MAPE
    mape = np.round(mt.mean_absolute_percentage_error( y_train, yhat_train), 3) 
    if mape < min_mape:
        min_mape = mape
    
print('MAX R2: {}'.format( max_r2 ) )
print('MIN MSE: {}'.format( min_mse ) )   
print('MIN RMSE: {}'.format( min_rmse ) )
print('MIN MAE: {}'.format( min_mae ) )
print('MIN MAPE: {}'.format( min_mape ) )  

MAX R2: 0.334
MIN MSE: 318.377
MIN RMSE: 17.843
MIN MAE: 13.614
MIN MAPE: 5.913


## Dados de validação

In [42]:
# Retreinando o modelo sobre os dados de validação
d = np.arange( 1, 5)

for i in d:
    # Define Polynomial regression
    poly = pp.PolynomialFeatures( degree=i)
    X_poly_train = poly.fit_transform( X_train )
    X_poly_val = poly.transform( X_val )

    # training
    model = lm.LinearRegression()
    model.fit( X_poly_train , y_train )

    # performance
    yhat_val = model.predict( X_poly_val )

    # R squared
    r2 = np.round( mt.r2_score( y_val, yhat_val ), 3 )
    # MSE
    mse = np.round(mt.mean_squared_error( y_val, yhat_val), 3)
    # RMSE
    rmse = np.round(np.sqrt( mse ), 3)
    # MAE
    mae = np.round(mt.mean_absolute_error( y_val, yhat_val), 3)
    # MAPE
    mape = np.round(mt.mean_absolute_percentage_error( y_val, yhat_val ), 3) 

   # Metrics
    print('Degree {} R2: {} MSE: {} RMSE: {} MAE: {} MAPE: {}'.format(i, r2, mse, rmse, mae, mape) )

   

Degree 1 R2: 0.04 MSE: 458.447 RMSE: 21.411 MAE: 17.04 MAPE: 8.683
Degree 2 R2: 0.066 MSE: 445.768 RMSE: 21.113 MAE: 16.75 MAPE: 8.548
Degree 3 R2: -0.048 MSE: 500.326 RMSE: 22.368 MAE: 17.087 MAPE: 8.678
Degree 4 R2: -102.924 MSE: 49624.741 RMSE: 222.766 MAE: 36.104 MAPE: 10.185


## Dados de teste

In [43]:
# Retreinando o modelo sobre os dados de teste
d = np.arange( 1, 5)

for i in d:
    # Define Polynomial regression
    poly = pp.PolynomialFeatures( degree=i)
    X_poly_train = poly.fit_transform( X_train )
    X_poly_val = poly.transform( X_val )
    X_poly_test = poly.transform( X_test )

    # training
    model = lm.LinearRegression()
    model.fit( np.concatenate( ( X_poly_train, X_poly_val ) ),
               np.concatenate( ( y_train, y_val ) ) )

    # performance
    yhat_test = model.predict( X_poly_test )

    # R squared
    r2 = np.round( mt.r2_score( y_test, yhat_test ), 3 )
    # MSE
    mse = np.round(mt.mean_squared_error( y_test, yhat_test), 3)
    # RMSE
    rmse = np.round(np.sqrt( mse ), 3)
    # MAE
    mae = np.round(mt.mean_absolute_error( y_test, yhat_test), 3)
    # MAPE
    mape = np.round(mt.mean_absolute_percentage_error( y_test, yhat_test ), 3) 

   # Metrics
    print('Degree {} R2: {} MSE: {} RMSE: {} MAE: {} MAPE: {}'.format(i, r2, mse, rmse, mae, mape) )

Degree 1 R2: 0.051 MSE: 461.988 RMSE: 21.494 MAE: 17.144 MAPE: 8.531
Degree 2 R2: 0.091 MSE: 442.641 RMSE: 21.039 MAE: 16.736 MAPE: 8.277
Degree 3 R2: 0.021 MSE: 476.492 RMSE: 21.829 MAE: 16.858 MAPE: 7.976
Degree 4 R2: -124.595 MSE: 61152.315 RMSE: 247.29 MAE: 23.394 MAPE: 7.845
