In [24]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor

from sklearn import tree          as tr
from sklearn import linear_model  as lm
from sklearn import metrics       as mt
from sklearn import datasets      as dt
from sklearn import preprocessing as pp


In [25]:
X_train = pd.read_csv('datasets/X_training (1).csv')
y_train = pd.read_csv('datasets/y_training (1).csv').values.ravel()

X_val = pd.read_csv('datasets/X_validation (1).csv')
y_val = pd.read_csv('datasets/y_val.csv').values.ravel()

X_test = pd.read_csv('datasets/X_test (1).csv')
y_test = pd.read_csv('datasets/y_test (1).csv').values.ravel()


In [26]:
# Incluir termos polinomiais de grau 2
poly = pp.PolynomialFeatures(degree=2, include_bias=False)
P_train = poly.fit_transform( X_train )

poly_val = pp.PolynomialFeatures(degree=2, include_bias=False)
P_val = poly.fit_transform( X_val )

poly_test = pp.PolynomialFeatures(degree=2, include_bias=False)
P_test = poly.fit_transform( X_test )

# 1.0 Model training

In [27]:
deci_tree = tr.DecisionTreeRegressor( max_depth=32 )
deci_tree.fit( X_train, y_train )

rand_fore = RandomForestRegressor( n_estimators=100, max_depth=38 )
rand_fore.fit( X_train, y_train )

# Linear Model
lr = lm.LinearRegression( )
lr.fit(X_train, y_train)

# Lasso
lasso_lin = lm.Lasso(alpha=0.9)
lasso_lin.fit(X_train, y_train)

# Ridge
ridge_lin = lm.Ridge(alpha=0.1)
ridge_lin.fit(X_train, y_train)

# ElasticNet
elasticNet_lin = lm.ElasticNet()
elasticNet_lin.fit(X_train, y_train)

# *****************************************

# Linear Model
lr_poli = lm.LinearRegression( )
lr_poli.fit(P_train, y_train)

# Lasso
lasso_poli = lm.Lasso(alpha=1, max_iter = 1000)
lasso_poli.fit(P_train, y_train)

# Ridge
ridge_poli = lm.Ridge(alpha=1, max_iter = 1000)
ridge_poli.fit(P_train, y_train)

# ElasticNet
elasticNet_poli = lm.ElasticNet(alpha=1, l1_ratio=0.5, max_iter = 1000)
elasticNet_poli.fit(P_train, y_train)

# 1.1 Predict whith training dataset

In [31]:
y_deci_tree_train = deci_tree.predict( X_train )
y_rand_fore_train = rand_fore.predict( X_train )

y_pred_lin_train = lr.predict( X_train )
y_lasso_lin_train = lasso_lin.predict( X_train )
y_ridge_lin_train = ridge_lin.predict( X_train )
y_elasticNet_lin_train = elasticNet_lin.predict( X_train )

y_lr_poli_train = lr_poli.predict( P_train )
y_lasso_poli_train = lasso_poli.predict( P_train )
y_ridge_poli_train = ridge_poli.predict( P_train )
y_elasticNet_poli_train = elasticNet_poli.predict( P_train )


## 1.1.1 Metrics

In [34]:
print( '##    Algoritmo                          R2       MSE       RMSE      MAE      MAPE   ' )
print( ' 1    Decision Tree Regressor            {:.2f}    {:.2f}     {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_deci_tree_train),
                                                                                                         mt.mean_squared_error(y_train, y_deci_tree_train),
                                                                                                         mt.mean_squared_error(y_train, y_deci_tree_train),
                                                                                                         mt.mean_absolute_error(y_train, y_deci_tree_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_deci_tree_train)
                                                                                                         ))
print( ' 2    Random Forest Regresso             {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_rand_fore_train),
                                                                                                         mt.mean_squared_error(y_train, y_rand_fore_train),
                                                                                                         mt.mean_squared_error(y_train, y_rand_fore_train),
                                                                                                         mt.mean_absolute_error(y_train, y_rand_fore_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_rand_fore_train)
                                                                                                        ))
print( ' 3    Linear Regression                  {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_pred_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_pred_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_pred_lin_train),
                                                                                                         mt.mean_absolute_error(y_train, y_pred_lin_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_pred_lin_train)      
                                                                                                        ))
print( ' 4    Linear Regression Lasso            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_lasso_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_lasso_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_lasso_lin_train),
                                                                                                         mt.mean_absolute_error(y_train, y_lasso_lin_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_lasso_lin_train)
                                                                                                        ))
print( ' 5    Linear Regression Ridge            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_ridge_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_ridge_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_ridge_lin_train),
                                                                                                         mt.mean_absolute_error(y_train, y_ridge_lin_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_ridge_lin_train)                                                                                                         
                                                                                                        ))
print( ' 6    Linear Regression Elastic Net      {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_elasticNet_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_elasticNet_lin_train),
                                                                                                         mt.mean_squared_error(y_train, y_elasticNet_lin_train),
                                                                                                         mt.mean_absolute_error(y_train, y_elasticNet_lin_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_elasticNet_lin_train)
                                                                                                        ))
print( ' 7    Polinomial Regression              {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_lr_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_lr_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_lr_poli_train),
                                                                                                         mt.mean_absolute_error(y_train, y_lr_poli_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_lr_poli_train),
                                                                                                        ))
print( ' 8    Polinomial Regression Lasso        {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_lasso_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_lasso_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_lasso_poli_train),
                                                                                                         mt.mean_absolute_error(y_train, y_lasso_poli_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_lasso_poli_train)
                                                                                                        )) 
print( ' 9    Polinomial Regression Ridge        {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_ridge_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_ridge_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_ridge_poli_train),
                                                                                                         mt.mean_absolute_error(y_train, y_ridge_lin_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_ridge_lin_train)                                                                                                         
                                                                                                        ))
print( ' 10   Polinomial Regression Elastic Net  {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_train, y_elasticNet_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_elasticNet_poli_train),
                                                                                                         mt.mean_squared_error(y_train, y_elasticNet_poli_train),
                                                                                                         mt.mean_absolute_error(y_train, y_elasticNet_poli_train),
                                                                                                         mt.mean_absolute_percentage_error(y_train, y_elasticNet_poli_train)
                                                                                                        ))


##    Algoritmo                          R2       MSE       RMSE      MAE      MAPE   
 1    Decision Tree Regressor            0.99    3.94     3.94    0.21    0.08 
 2    Random Forest Regresso             0.90    45.90    45.90    4.85    2.58 
 3    Linear Regression                  0.05    456.00    456.00    17.00    8.65 
 4    Linear Regression Lasso            0.01    474.28    474.28    17.30    8.74 
 5    Linear Regression Ridge            0.05    456.00    456.00    17.00    8.65 
 6    Linear Regression Elastic Net      0.01    474.27    474.27    17.30    8.73 
 7    Polinomial Regression              0.09    432.99    432.99    16.46    8.35 
 8    Polinomial Regression Lasso        0.01    473.64    473.64    17.29    8.70 
 9    Polinomial Regression Ridge        0.09    433.48    433.48    17.00    8.65 
 10   Polinomial Regression Elastic Net  0.01    471.88    471.88    17.24    8.68 


## 1.2 Predict whith validetion dataset

In [42]:
y_deci_tree_val = deci_tree.predict( X_val )
y_rand_fore_val = rand_fore.predict( X_val )

y_pred_lin_val = lr.predict( X_val )
y_lasso_lin_val = lasso_lin.predict( X_val )
y_ridge_lin_val = ridge_lin.predict( X_val )
y_elasticNet_lin_val = elasticNet_lin.predict( X_val )

y_lr_poli_val = lr_poli.predict( P_val )
y_lasso_poli_val = lasso_poli.predict( P_val )
y_ridge_poli_val = ridge_poli.predict( P_val )
y_elasticNet_poli_val = elasticNet_poli.predict( P_val)

In [52]:
print( '##    Algoritmo                          R2         MSE       RMSE      MAE       MAPE ' )
print( ' 1    Decision Tree Regressor            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_deci_tree_val),
                                                                                                         mt.mean_squared_error(y_val, y_deci_tree_val),
                                                                                                         mt.mean_squared_error(y_val, y_deci_tree_val),
                                                                                                         mt.mean_absolute_error(y_val, y_deci_tree_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_deci_tree_val)
                                                                                                         ))
print( ' 2    Random Forest Regresso             {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_rand_fore_val),
                                                                                                         mt.mean_squared_error(y_val, y_rand_fore_val),
                                                                                                         mt.mean_squared_error(y_val, y_rand_fore_val),
                                                                                                         mt.mean_absolute_error(y_val, y_rand_fore_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_rand_fore_val)
                                                                                                         ))
print( ' 3    Linear Regression                  {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_pred_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_pred_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_pred_lin_val),
                                                                                                         mt.mean_absolute_error(y_val, y_pred_lin_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_pred_lin_val)      
                                                                                                        ))
print( ' 4    Linear Regression Lasso            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_lasso_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_lasso_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_lasso_lin_val),
                                                                                                         mt.mean_absolute_error(y_val, y_lasso_lin_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_lasso_lin_val)
                                                                                                        ))
print( ' 5    Linear Regression Ridge            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_ridge_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_ridge_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_ridge_lin_val),
                                                                                                         mt.mean_absolute_error(y_val, y_ridge_lin_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_ridge_lin_val)                                                                                                         
                                                                                                        ))
print( ' 6    Linear Regression Elastic Net      {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_elasticNet_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_elasticNet_lin_val),
                                                                                                         mt.mean_squared_error(y_val, y_elasticNet_lin_val),
                                                                                                         mt.mean_absolute_error(y_val, y_elasticNet_lin_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_elasticNet_lin_val)
                                                                                                        ))
print( ' 7    Polinomial Regression              {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_lr_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_lr_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_lr_poli_val),
                                                                                                         mt.mean_absolute_error(y_val, y_lr_poli_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_lr_poli_val),
                                                                                                        ))
print( ' 8    Polinomial Regression Lasso        {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_lasso_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_lasso_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_lasso_poli_val),
                                                                                                         mt.mean_absolute_error(y_val, y_lasso_poli_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_lasso_poli_val)
                                                                                                        )) 
print( ' 9    Polinomial Regression Ridge        {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_ridge_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_ridge_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_ridge_poli_val),
                                                                                                         mt.mean_absolute_error(y_val, y_ridge_lin_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_ridge_lin_val)                                                                                                         
                                                                                                        ))
print( ' 10   Polinomial Regression Elastic Net  {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_val, y_elasticNet_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_elasticNet_poli_val),
                                                                                                         mt.mean_squared_error(y_val, y_elasticNet_poli_val),
                                                                                                         mt.mean_absolute_error(y_val, y_elasticNet_poli_val),
                                                                                                         mt.mean_absolute_percentage_error(y_val, y_elasticNet_poli_val)
                                                                                                        ))


##    Algoritmo                          R2         MSE       RMSE      MAE       MAPE 
 1    Decision Tree Regressor            -0.31    623.81    623.81    17.14    7.03 
 2    Random Forest Regresso             0.33    317.80    317.80    12.98    7.01 
 3    Linear Regression                  0.04    458.45    458.45    17.04    8.68 
 4    Linear Regression Lasso            0.01    473.53    473.53    17.26    8.70 
 5    Linear Regression Ridge            0.04    458.45    458.45    17.04    8.68 
 6    Linear Regression Elastic Net      0.01    473.64    473.64    17.26    8.69 
 7    Polinomial Regression              0.07    445.77    445.77    16.75    8.55 
 8    Polinomial Regression Lasso        0.01    472.91    472.91    17.24    8.68 
 9    Polinomial Regression Ridge        0.07    445.18    445.18    17.04    8.68 
 10   Polinomial Regression Elastic Net  0.01    471.41    471.41    17.20    8.68 


In [44]:
## 1.3 Predict whith test dataset

In [50]:
y_deci_tree_test = deci_tree.predict( X_test )
y_rand_fore_test = rand_fore.predict( X_test )

y_pred_lin_test= lr.predict( X_test )
y_lasso_lin_test = lasso_lin.predict( X_test )
y_ridge_lin_test = ridge_lin.predict( X_test )
y_elasticNet_lin_test = elasticNet_lin.predict( X_test ) 

y_lr_poli_test = lr_poli.predict( P_test )
y_lasso_poli_test = lasso_poli.predict( P_test )
y_ridge_poli_test = ridge_poli.predict( P_test )
y_elasticNet_poli_test = elasticNet_poli.predict( P_test ) 

In [51]:
print( '##    Algoritmo                          R2         MSE       RMSE      MAE       MAPE ' )
print( ' 1    Decision Tree Regressor            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_deci_tree_test),
                                                                                                         mt.mean_squared_error(y_test, y_deci_tree_test),
                                                                                                         mt.mean_squared_error(y_test, y_deci_tree_test),
                                                                                                         mt.mean_absolute_error(y_test, y_deci_tree_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_deci_tree_test)
                                                                                                         ))
print( ' 2    Random Forest Regresso             {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_rand_fore_test),
                                                                                                         mt.mean_squared_error(y_test, y_rand_fore_test),
                                                                                                         mt.mean_squared_error(y_test, y_rand_fore_test),
                                                                                                         mt.mean_absolute_error(y_test, y_rand_fore_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_rand_fore_test)
                                                                                                         ))
print( ' 3    Linear Regression                  {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_pred_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_pred_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_pred_lin_test),
                                                                                                         mt.mean_absolute_error(y_test, y_pred_lin_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_pred_lin_test)      
                                                                                                        ))
print( ' 4    Linear Regression Lasso            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_lasso_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_lasso_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_lasso_lin_test),
                                                                                                         mt.mean_absolute_error(y_test, y_lasso_lin_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_lasso_lin_test)
                                                                                                        ))
print( ' 5    Linear Regression Ridge            {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_ridge_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_ridge_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_ridge_lin_test),
                                                                                                         mt.mean_absolute_error(y_test, y_ridge_lin_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_ridge_lin_test)                                                                                                         
                                                                                                        ))
print( ' 6    Linear Regression Elastic Net      {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_elasticNet_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_elasticNet_lin_test),
                                                                                                         mt.mean_squared_error(y_test, y_elasticNet_lin_test),
                                                                                                         mt.mean_absolute_error(y_test, y_elasticNet_lin_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_elasticNet_lin_test)
                                                                                                        ))
print( ' 7    Polinomial Regression              {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_lr_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_lr_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_lr_poli_test),
                                                                                                         mt.mean_absolute_error(y_test, y_lr_poli_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_lr_poli_test),
                                                                                                        ))
print( ' 8    Polinomial Regression Lasso        {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_lasso_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_lasso_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_lasso_poli_test),
                                                                                                         mt.mean_absolute_error(y_test, y_lasso_poli_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_lasso_poli_test)
                                                                                                        )) 
print( ' 9    Polinomial Regression Ridge        {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_ridge_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_ridge_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_ridge_poli_test),
                                                                                                         mt.mean_absolute_error(y_test, y_ridge_lin_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_ridge_lin_test)                                                                                                         
                                                                                                        ))
print( ' 10   Polinomial Regression Elastic Net  {:.2f}    {:.2f}    {:.2f}    {:.2f}    {:.2f} '.format(mt.r2_score(y_test, y_elasticNet_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_elasticNet_poli_test),
                                                                                                         mt.mean_squared_error(y_test, y_elasticNet_poli_test),
                                                                                                         mt.mean_absolute_error(y_test, y_elasticNet_poli_test),
                                                                                                         mt.mean_absolute_percentage_error(y_test, y_elasticNet_poli_test)
                                                                                                        ))


##    Algoritmo                          R2         MSE       RMSE      MAE       MAPE 
 1    Decision Tree Regressor            -0.24    602.13    602.13    16.82    6.06 
 2    Random Forest Regresso             0.36    312.88    312.88    12.95    6.42 
 3    Linear Regression                  0.05    461.43    461.43    17.13    8.52 
 4    Linear Regression Lasso            0.01    482.97    482.97    17.47    8.76 
 5    Linear Regression Ridge            0.05    461.43    461.43    17.13    8.52 
 6    Linear Regression Elastic Net      0.01    483.03    483.03    17.47    8.74 
 7    Polinomial Regression              0.09    443.04    443.04    16.72    8.24 
 8    Polinomial Regression Lasso        0.01    482.82    482.82    17.46    8.76 
 9    Polinomial Regression Ridge        0.09    443.49    443.49    17.13    8.52 
 10   Polinomial Regression Elastic Net  0.01    481.70    481.70    17.43    8.75 
