# Regressão

## Imports

In [52]:
import pandas as pd
import numpy as np
import warnings

from sklearn import tree as tr
from sklearn import metrics as mt
from sklearn import linear_model as lm
from sklearn import ensemble as en
from sklearn import preprocessing as pp

In [53]:
# dados treino
X_training = pd.read_csv('X_training.csv')
y_training = pd.read_csv('y_training.csv')

In [78]:
# dados validação
X_val = pd.read_csv('X_validation.csv')
y_val = pd.read_csv('y_val.csv')

In [79]:
# dados teste
X_test = pd.read_csv('X_test.csv')
y_test = pd.read_csv('y_test.csv')

# Dados Treino

## Decision Tree Regressor

In [54]:
max_depth = np.arange(1, 101, 1)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in max_depth:
    print(f'Max-Depth: {i}')
    warnings.filterwarnings('ignore')
    # model definition
    tree_reg = tr.DecisionTreeRegressor(max_depth=i)

    # model training
    tree_reg.fit(X_training, y_training)

    # predict
    y_pred = tree_reg.predict(X_training)

    # metrics
    # r2
    r2 = mt.r2_score(y_training, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_training, y_pred)
    print(mse)
    mse_list.append(mse)
    
    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)
    
    # MAE
    mae = mt.mean_absolute_error(y_training, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_training, y_pred)
    print(mape)
    mape_list.append(mape)

df = {'Max-Depth': max_depth, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df = pd.DataFrame(data=df, index=max_depth).reset_index(drop=True)
df

Max-Depth: 1
0.025572193556739342
465.78873027346157
21.582139149617714
17.15881928380362
8.615677611006534
Max-Depth: 2
0.043333993887537336
457.2983666274865
21.384535688845023
16.98650904108474
8.500932012052024
Max-Depth: 3
0.06300499407565918
447.89538147019886
21.163538963750813
16.833242998780406
8.415119469628657
Max-Depth: 4
0.08664883385036748
436.5933290917379
20.894815842493994
16.618122772004238
8.268742828867644
Max-Depth: 5
0.11352273212445407
423.7472681811045
20.585122496140375
16.368766312461414
7.869536027810435
Max-Depth: 6
0.1446084343887135
408.88791206299544
20.22097703037604
16.026871964877472
7.307613166759983
Max-Depth: 7
0.19156500638901441
386.442080873255
19.658130146920257
15.505404530267514
6.535122540002132
Max-Depth: 8
0.24564930826220654
360.58910528012916
18.989183902425328
14.842361497200049
6.1502640215879225
Max-Depth: 9
0.31089148445900916
329.4025256176412
18.149449733191393
13.964752592137774
5.608745076814365
Max-Depth: 10
0.3846242438711218
29

Unnamed: 0,Max-Depth,R2,MSE,RMSE,MAE,MAPE
0,1,0.025572,465.788730,21.582139,17.158819,8.615678
1,2,0.043334,457.298367,21.384536,16.986509,8.500932
2,3,0.063005,447.895381,21.163539,16.833243,8.415119
3,4,0.086649,436.593329,20.894816,16.618123,8.268743
4,5,0.113523,423.747268,20.585122,16.368766,7.869536
...,...,...,...,...,...,...
95,96,0.991757,3.940403,1.985045,0.214099,0.082628
96,97,0.991757,3.940403,1.985045,0.214099,0.082628
97,98,0.991757,3.940403,1.985045,0.214099,0.082628
98,99,0.991757,3.940403,1.985045,0.214099,0.082628


In [75]:
tree_reg = tr.DecisionTreeRegressor()

# model training
tree_reg.fit(X_training, y_training)

# predict
y_pred = tree_reg.predict(X_training)

# metrics
# r2
r2 = mt.r2_score( y_training, y_pred )
mse = mt.mean_squared_error(y_training, y_pred)
rmse = np.sqrt(mse)
mae = mt.mean_absolute_error(y_training, y_pred)
mape = mt.mean_absolute_percentage_error(y_training, y_pred)


df_tr_regressor = {'Algoritmo': 'Decision Tree Regressor', 'R2': round(r2, 3), 'MSE': round(mse,3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_tr_regressor = pd.DataFrame(data=df_tr_regressor, index=[0])
df_tr_regressor

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Decision Tree Regressor,0.992,3.94,1.985,0.214,0.083


## Linear Regression

In [56]:
# model definition
linear_reg = lm.LinearRegression()

# model training
linear_reg.fit(X_training, y_training)

# predict
y_pred = linear_reg.predict(X_training)

# metrics
# r2
r2 = mt.r2_score(y_training, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_training, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_training, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_training, y_pred)
print(mape)


df_linear = {'Algoritmo': 'Linear Regression', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear = pd.DataFrame(data=df_linear, index=[0]).reset_index(drop=True)
df_linear

0.04605830473391903
455.99611182562677
21.35406546364478
16.998249066011095
8.653185943804514


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.046058,455.996112,21.354065,16.998249,8.653186


## Random Forest Regressor


In [57]:
max_depth = np.arange(60, 86, 2)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in max_depth:
    print(f'Max-Depth: {i}')
    # model definition
    tree_reg = en.RandomForestRegressor(max_depth=i, n_estimators=i)

    # model training
    tree_reg.fit(X_training, y_training)

    # predict
    y_pred = tree_reg.predict(X_training)

    # metrics
    # r2
    r2 = mt.r2_score(y_training, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_training, y_pred)
    print(mse)
    mse_list.append(mse)
    
    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)
    
    # MAE
    mae = mt.mean_absolute_error(y_training, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_training, y_pred)
    print(mape)
    mape_list.append(mape)

df_random_forest = {'Max-Depth': max_depth, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_random_forest = pd.DataFrame(data=df_random_forest, index=max_depth).reset_index(drop=True)
df_random_forest

Max-Depth: 60
0.9005864319008027
47.52093418382529
6.893542934067017
4.910461513658324
2.6025975081178814
Max-Depth: 62
0.899777873076856
47.90743545715429
6.921519736095123
4.906270816646519
2.6069630014342073
Max-Depth: 64
0.9001451606530795
47.731867382594906
6.908825325812986
4.909685541183701
2.604324208153793
Max-Depth: 66
0.8995284159964684
48.02667907475549
6.930128359183219
4.908284994890416
2.5940688479678173
Max-Depth: 68
0.9000111742370996
47.795914572299985
6.913458944139322
4.91811671653763
2.6115192977152626
Max-Depth: 70
0.9016243234222671
47.02480900066874
6.857463744028745
4.902863588691051
2.5682095349653915
Max-Depth: 72
0.9014110053142945
47.126777735550725
6.864894590272361
4.896216684283621
2.598512972876942
Max-Depth: 74
0.90124009160148
47.20847663694887
6.870842498336639
4.892195199128451
2.629955688088221
Max-Depth: 76
0.9023424505696658
46.68153520454967
6.832388689510402
4.874792940368886
2.5920166628569556
Max-Depth: 78
0.900774552141791
47.43101034584641


Unnamed: 0,Max-Depth,R2,MSE,RMSE,MAE,MAPE
0,60,0.900586,47.520934,6.893543,4.910462,2.602598
1,62,0.899778,47.907435,6.92152,4.906271,2.606963
2,64,0.900145,47.731867,6.908825,4.909686,2.604324
3,66,0.899528,48.026679,6.930128,4.908285,2.594069
4,68,0.900011,47.795915,6.913459,4.918117,2.611519
5,70,0.901624,47.024809,6.857464,4.902864,2.56821
6,72,0.901411,47.126778,6.864895,4.896217,2.598513
7,74,0.90124,47.208477,6.870842,4.892195,2.629956
8,76,0.902342,46.681535,6.832389,4.874793,2.592017
9,78,0.900775,47.43101,6.887018,4.892126,2.649706


In [58]:
tree_reg = en.RandomForestRegressor()

# model training
tree_reg.fit(X_training, y_training)

# predict
y_pred = tree_reg.predict(X_training)

# metrics
# r2
r2 = mt.r2_score(y_training, y_pred)
print(r2)
#r2_list.append(r2)

# MSE
mse = mt.mean_squared_error(y_training, y_pred)
print(mse)
#mse_list.append(mse)

# RMSE
rmse = np.sqrt(mse)
print(rmse)
#rmse_list.append(rmse)

# MAE
mae = mt.mean_absolute_error(y_training, y_pred)
print(mae)
#mae_list.append(mae)

# MAPE
mape = mt.mean_absolute_percentage_error(y_training, y_pred)
print(mape)
#mape_list.append(mape)

df_random_regressor = {'Algoritmo': 'Random Forest Regressor','R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape} 
df_random_regressor = pd.DataFrame(data=df_random_regressor, index=[0])
df_random_regressor

0.9028871240323574
46.42117444829539
6.813308627113217
4.862691200389898
2.6368203842199494


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Random Forest Regressor,0.902887,46.421174,6.813309,4.862691,2.63682


## Linear Regression Lasso

In [59]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(1, 10, 1)

for i in alpha:
    print(f'Alpha: {i}')
    # model definition
    linear_lasso = lm.Lasso(alpha=i)

    # model training
    linear_lasso.fit(X_training, y_training)

    # predict
    y_pred = linear_lasso.predict(X_training)

    # metrics
    # r2
    r2 = mt.r2_score(y_training, y_pred)
    print(r2)
    r2_list.append(r2)


    # MSE
    mse = mt.mean_squared_error(y_training, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_training, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_training, y_pred)
    print(mape)
    mape_list.append(mape)

df_lasso = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_lasso = pd.DataFrame(data=df_lasso, index=alpha).reset_index(drop=True)
df_lasso

Alpha: 1
0.007400905609184383
474.47483414340275
21.782443254681112
17.305483795260546
8.736697313910927
Alpha: 2
0.0011249198900505908
477.4748341434027
21.85119754483499
17.355394644672483
8.741522478804352
Alpha: 3
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 4
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 5
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 6
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 7
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 8
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 9
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.007401,474.474834,21.782443,17.305484,8.736697
1,Linear Regression Lasso,0.001125,477.474834,21.851198,17.355395,8.741522
2,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213
3,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213
4,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213
5,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213
6,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213
7,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213
8,Linear Regression Lasso,0.0,478.01256,21.863498,17.36509,8.742213


In [60]:
# model definition
linear_lasso = lm.Lasso()

# model training
linear_lasso.fit(X_training, y_training)

# predict
y_pred = linear_lasso.predict(X_training)

# metrics
# r2
r2 = mt.r2_score(y_training, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_training, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_training, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_training, y_pred)
print(mape)


df_linear_lasso = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_lasso = pd.DataFrame(data=df_linear_lasso, index=[0]).reset_index(drop=True)
df_linear_lasso

0.007400905609184383
474.47483414340275
21.782443254681112
17.305483795260546
8.736697313910927


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.007401,474.474834,21.782443,17.305484,8.736697


## Linear Regression Ridge

In [61]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(1, 51, 1)

for i in alpha:
    print(f'Alpha: {i}')
    # model definition
    linear_ridge = lm.Ridge(alpha=i)

    # model training
    linear_ridge.fit(X_training, y_training)

    # predict
    y_pred = linear_ridge.predict(X_training)

    # metrics
    # r2
    r2 = mt.r2_score(y_training, y_pred)
    print(r2)
    r2_list.append(r2)


    # MSE
    mse = mt.mean_squared_error(y_training, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_training, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_training, y_pred)
    print(mape)
    mape_list.append(mape)

df_ridge = {'Alpha': alpha, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_ridge = pd.DataFrame(data=df_ridge, index=alpha).reset_index(drop=True)
df_ridge

Alpha: 1
0.04605770063554282
455.99640059223805
21.354072225040312
16.998307602320097
8.653414734181558
Alpha: 2
0.046055949199324764
455.9972378007483
21.354091828048983
16.998366445644404
8.653637747762023
Alpha: 3
0.04605313327960969
455.99858384574
21.354123345287203
16.998440543417765
8.653855531763995
Alpha: 4
0.04604932586101396
456.00040383964983
21.354165959822684
16.998512737210483
8.654067956392042
Alpha: 5
0.04604459152971019
456.00266690947615
21.354218948710724
16.998596061744703
8.654275460997084
Alpha: 6


0.04603898769562864
456.0053456125512
21.354281669317544
16.998694945686683
8.654478347171253
Alpha: 7
0.046032565612157406
456.0084154491117
21.354353547909422
16.998801355744888
8.654676679237895
Alpha: 8
0.04602537123043404
456.0118544539367
21.35443407009272
16.99891234521389
8.6548705607647
Alpha: 9
0.0460174459179058
456.01564285286696
21.35452277277268
16.99902162876753
8.655060040426836
Alpha: 10
0.04600882706503018
456.0197627727942
21.35461923736394
16.999134783661344
8.65524538830986
Alpha: 11
0.04599954859941202
456.02419799589694
21.354723084036866
16.999247639473882
8.655426675062708
Alpha: 12
0.0459896414230313
456.0289337506409
21.354833966824486
16.99936516208705
8.655604180934978
Alpha: 13
0.04597913378533214
456.0339565334368
21.354951569447234
16.999491705608037
8.655778108579067
Alpha: 14
0.04596805160262718
456.0392539559618
21.355075601738378
16.999624953240374
8.655948531658808
Alpha: 15
0.04595641873240508
456.04481461403657
21.355205796574207
16.99976073087330

Unnamed: 0,Alpha,R2,MSE,RMSE,MAE,MAPE
0,1,0.046058,455.996401,21.354072,16.998308,8.653415
1,2,0.046056,455.997238,21.354092,16.998366,8.653638
2,3,0.046053,455.998584,21.354123,16.998441,8.653856
3,4,0.046049,456.000404,21.354166,16.998513,8.654068
4,5,0.046045,456.002667,21.354219,16.998596,8.654275
5,6,0.046039,456.005346,21.354282,16.998695,8.654478
6,7,0.046033,456.008415,21.354354,16.998801,8.654677
7,8,0.046025,456.011854,21.354434,16.998912,8.654871
8,9,0.046017,456.015643,21.354523,16.999022,8.65506
9,10,0.046009,456.019763,21.354619,16.999135,8.655245


In [62]:
# model definition
linear_ridge = lm.Ridge()

# model training
linear_ridge.fit(X_training, y_training)

# predict
y_pred = linear_ridge.predict(X_training)

# metrics
# r2
r2 = mt.r2_score(y_training, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_training, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_training, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_training, y_pred)
print(mape)


df_linear_ridge = {'Algoritmo': 'Linear Regression Ridge', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_ridge = pd.DataFrame(data=df_linear_ridge, index=[0]).reset_index(drop=True)
df_linear_ridge

0.04605770063554282
455.99640059223805
21.354072225040312
16.998307602320097
8.653414734181558


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Ridge,0.046058,455.996401,21.354072,16.998308,8.653415


## Linear Regression Elastic Net

In [63]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(1, 21, 1)

for i in alpha:
    print(f'Alpha: {i}')
    # model definition
    linear_net = lm.ElasticNet(alpha=i)

    # model training
    linear_net.fit(X_training, y_training)

    # predict
    y_pred = linear_net.predict(X_training)

    # metrics
    # r2
    r2 = mt.r2_score(y_training, y_pred)
    print(r2)
    r2_list.append(r2)


    # MSE
    mse = mt.mean_squared_error(y_training, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_training, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_training, y_pred)
    print(mape)
    mape_list.append(mape)

df_net = {'Alpha': alpha, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_net = pd.DataFrame(data=df_net, index=alpha).reset_index(drop=True)
df_net

Alpha: 1
0.007831741118630053
474.2688893586024
21.777715430196125
17.299506969671594
8.732300419042266
Alpha: 2
0.0043684991303437615
475.9243625272527
21.81569074146525
17.32681955890861
8.739289218005991
Alpha: 3
0.0021137623741809275
477.00215501614673
21.84037900349137
17.346838500436725
8.74088603142842
Alpha: 4
0.00038285334924781367
477.8295512702471
21.85931268979533
17.361632156535563
8.741978452044464
Alpha: 5


0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 6
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 7
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 8
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 9
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 10
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 11
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 12
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 13
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 14
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 15
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.742212844953096
Alpha: 16
0.0
478.012559979818
21.863498347241183
17.3650904457618
8.7422128449

Unnamed: 0,Alpha,R2,MSE,RMSE,MAE,MAPE
0,1,0.007832,474.268889,21.777715,17.299507,8.7323
1,2,0.004368,475.924363,21.815691,17.32682,8.739289
2,3,0.002114,477.002155,21.840379,17.346839,8.740886
3,4,0.000383,477.829551,21.859313,17.361632,8.741978
4,5,0.0,478.01256,21.863498,17.36509,8.742213
5,6,0.0,478.01256,21.863498,17.36509,8.742213
6,7,0.0,478.01256,21.863498,17.36509,8.742213
7,8,0.0,478.01256,21.863498,17.36509,8.742213
8,9,0.0,478.01256,21.863498,17.36509,8.742213
9,10,0.0,478.01256,21.863498,17.36509,8.742213


In [64]:
# model definition
linear_net = lm.ElasticNet()

# model training
linear_net.fit(X_training, y_training)

# predict
y_pred = linear_net.predict(X_training)

# metrics
# r2
r2 = mt.r2_score(y_training, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_training, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_training, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_training, y_pred)
print(mape)


df_linear_net = {'Algoritmo': 'Linear Regression Elastic Net', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_net = pd.DataFrame(data=df_linear_net, index=[0]).reset_index(drop=True)
df_linear_net

0.007831741118630053
474.2688893586024
21.777715430196125
17.299506969671594
8.732300419042266


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Elastic Net,0.007832,474.268889,21.777715,17.299507,8.7323


## Polinomial Regression

In [65]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(X_training)

# model training
model = lm.LinearRegression()
model.fit(X_poly, y_training)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_training, yhat)
mse = mt.mean_squared_error(y_training, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_training, yhat))
mae = mt.mean_absolute_error(y_training, yhat)
mape = mt.mean_absolute_percentage_error(y_training, yhat)


df_poli_train = {'Algoritmo': 'Polinomial Regression', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_train = pd.DataFrame(data=df_poli_train, index=[0])
df_poli_train



Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression,0.094,432.986,20.808,16.458,8.351


## Polinomial Regression Lasso

In [66]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(X_training)

# model training
model = lm.Lasso()
model.fit(X_poly, y_training)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_training, yhat)
mse = mt.mean_squared_error(y_training, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_training, yhat))
mae = mt.mean_absolute_error(y_training, yhat)
mape = mt.mean_absolute_percentage_error(y_training, yhat)


df_poli_lasso_train = {'Algoritmo': 'Polinomial Regression Lasso', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_lasso_train = pd.DataFrame(data=df_poli_lasso_train, index=[0])
df_poli_lasso_train

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Lasso,0.009,473.639,21.763,17.285,8.7


## Polinomial Regressio Ridge

In [67]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(X_training)

# model training
model = lm.Ridge()
model.fit(X_poly, y_training)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_training, yhat)
mse = mt.mean_squared_error(y_training, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_training, yhat))
mae = mt.mean_absolute_error(y_training, yhat)
mape = mt.mean_absolute_percentage_error(y_training, yhat)


df_poli_ridge_train = {'Algoritmo': 'Polinomial Regression Ridge', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_ridge_train = pd.DataFrame(data=df_poli_ridge_train, index=[0])
df_poli_ridge_train

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Ridge,0.093,433.475,20.82,16.472,8.373


## Polinomial Regression Elastic Net

In [68]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(X_training)

# model training
model = lm.ElasticNet()
model.fit(X_poly, y_training)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_training, yhat)
mse = mt.mean_squared_error(y_training, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_training, yhat))
mae = mt.mean_absolute_error(y_training, yhat)
mape = mt.mean_absolute_percentage_error(y_training, yhat)


df_poli_elastic_train = {'Algoritmo': 'Polinomial Regression Elastic Net', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_elastic_train = pd.DataFrame(data=df_poli_elastic_train, index=[0])
df_poli_elastic_train


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Elastic Net,0.013,471.878,21.723,17.244,8.679


## Resultado Final Dados Treino

In [76]:
df_final = pd.concat([df_tr_regressor, df_random_regressor, df_linear, df_linear_lasso, df_linear_ridge, df_linear_net, df_poli_train, df_poli_lasso_train, df_poli_ridge_train, df_poli_elastic_train]).reset_index(drop=True)

In [77]:
df_final

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Decision Tree Regressor,0.992,3.94,1.985,0.214,0.083
1,Random Forest Regressor,0.902887,46.421174,6.813309,4.862691,2.63682
2,Linear Regression,0.046058,455.996112,21.354065,16.998249,8.653186
3,Linear Regression Lasso,0.007401,474.474834,21.782443,17.305484,8.736697
4,Linear Regression Ridge,0.046058,455.996401,21.354072,16.998308,8.653415
5,Linear Regression Elastic Net,0.007832,474.268889,21.777715,17.299507,8.7323
6,Polinomial Regression,0.094,432.986,20.808,16.458,8.351
7,Polinomial Regression Lasso,0.009,473.639,21.763,17.285,8.7
8,Polinomial Regression Ridge,0.093,433.475,20.82,16.472,8.373
9,Polinomial Regression Elastic Net,0.013,471.878,21.723,17.244,8.679


# Dados Validação

## Decision Tree

In [121]:
max_depth = np.arange(1, 21, 1)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in max_depth:
    print(f'Max-Depth: {i}')
    warnings.filterwarnings('ignore')
    # model definition
    tree_reg = tr.DecisionTreeRegressor(max_depth=i, random_state=0)

    # model training
    tree_reg.fit(X_training, y_training)

    # predict
    y_pred = tree_reg.predict(X_val)

    # metrics
    # r2
    r2 = mt.r2_score(y_val, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_val, y_pred)
    print(mse)
    mse_list.append(mse)
    
    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)
    
    # MAE
    mae = mt.mean_absolute_error(y_val, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_val, y_pred)
    print(mape)
    mape_list.append(mape)

df = {'Max-Depth': max_depth, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df = pd.DataFrame(data=df, index=max_depth).reset_index(drop=True)
df

Max-Depth: 1
0.025732879578441392
465.22386314872665
21.56904873073281
17.12234003742936
8.549934464397861
Max-Depth: 2
0.0376087797489999
459.55298291484866
21.437186917010557
16.985101578238307
8.480124627460382
Max-Depth: 3
0.05481161922406197
451.33842730688264
21.244727047125895
16.896549887233135
8.469921603015896
Max-Depth: 4
0.06216761663185322
447.82585312714036
21.161896255466814
16.849288338389197
8.536141474557681
Max-Depth: 5
0.06355927779643322
447.1613187611445
21.146189225511637
16.843451865846713
8.395778483246199
Max-Depth: 6
0.0632964916805624
447.28680218293374
21.14915606313722
16.74808058302347
8.324422504826854
Max-Depth: 7
0.055961294654844784
450.7894439387028
21.231802654007097
16.739333173248106
7.99756080976211
Max-Depth: 8
0.03489558088963318
460.8485668757434
21.46738379206333
16.890528274001444
7.950884793697328
Max-Depth: 9
0.02349614110222864
466.29193174402815
21.593793824708712
16.863629936119775
7.860503054447145
Max-Depth: 10
-0.005483155423004149
4

Unnamed: 0,Max-Depth,R2,MSE,RMSE,MAE,MAPE
0,1,0.025733,465.223863,21.569049,17.12234,8.549934
1,2,0.037609,459.552983,21.437187,16.985102,8.480125
2,3,0.054812,451.338427,21.244727,16.89655,8.469922
3,4,0.062168,447.825853,21.161896,16.849288,8.536141
4,5,0.063559,447.161319,21.146189,16.843452,8.395778
5,6,0.063296,447.286802,21.149156,16.748081,8.324423
6,7,0.055961,450.789444,21.231803,16.739333,7.997561
7,8,0.034896,460.848567,21.467384,16.890528,7.950885
8,9,0.023496,466.291932,21.593794,16.86363,7.860503
9,10,-0.005483,480.129882,21.911866,16.879492,7.894165


In [122]:
tree_reg = tr.DecisionTreeRegressor(max_depth=19, random_state=0)

# model training
tree_reg.fit(X_training, y_training)

# predict
y_pred = tree_reg.predict(X_val)

# metrics
# r2
r2 = mt.r2_score(y_val, y_pred)
print(r2)
#r2_list.append(r2)

# MSE
mse = mt.mean_squared_error(y_val, y_pred)
print(mse)
#mse_list.append(mse)

# RMSE
rmse = np.sqrt(mse)
print(rmse)
#rmse_list.append(rmse)

# MAE
mae = mt.mean_absolute_error(y_val, y_pred)
print(mae)
#mae_list.append(mae)

# MAPE
mape = mt.mean_absolute_percentage_error(y_val, y_pred)
print(mape)
#mape_list.append(mape)

df_tree_val = {'Algoritmo': 'Decision Tree Regressor', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_tree_val= pd.DataFrame(data=df_tree_val, index=[0]).reset_index(drop=True)
df_tree_val

-0.2469807495367824
595.4477877900472
24.401798863814264
17.09721363172685
6.948472206618067


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Decision Tree Regressor,-0.247,595.448,24.402,17.097,6.948


## Random Forest Regressor

In [123]:
max_depth = np.arange(198, 206, 1)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in max_depth:
    print(f'Max-Depth: {i}')
    # model definition
    tree_reg = en.RandomForestRegressor(n_estimators=i)

    # model training
    tree_reg.fit(X_training, y_training)

    # predict
    y_pred = tree_reg.predict(X_val)

    # metrics
    # r2
    r2 = mt.r2_score(y_val, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_val, y_pred)
    print(mse)
    mse_list.append(mse)
    
    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)
    
    # MAE
    mae = mt.mean_absolute_error(y_val, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_val, y_pred)
    print(mape)
    mape_list.append(mape)

df_random_forest_val = {'Max-Depth': max_depth, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_random_forest_val = pd.DataFrame(data=df_random_forest_val, index=max_depth).reset_index(drop=True)
df_random_forest_val

Max-Depth: 198
0.33597367100286557
317.08028274098615
17.806748236019573
12.980333693676661
7.029299474442764
Max-Depth: 199
0.33727435730329547
316.4591899290181
17.789299871805472
12.98162607110632
6.999489280960111
Max-Depth: 200
0.3373683034717537
316.4143295426805
17.788038945951307
12.980101347638104
7.017761991401926
Max-Depth: 201
0.3397001370996029
315.30085190823183
17.75671286889079
12.962604518694715
7.0567122961204
Max-Depth: 202
0.33685217730109795
316.66078578240325
17.79496518070219
12.97210194479443
7.064688992763393
Max-Depth: 203
0.33771272512192396
316.24986420531496
17.783415425764392
12.948707826909667
7.096916558836165
Max-Depth: 204
0.33774109687027676
316.236316366025
17.783034509498794
12.99020156729614
7.064187734209317
Max-Depth: 205
0.3366975652676678
316.73461482371437
17.797039496043
12.972170751394518
7.046848327709086


Unnamed: 0,Max-Depth,R2,MSE,RMSE,MAE,MAPE
0,198,0.335974,317.080283,17.806748,12.980334,7.029299
1,199,0.337274,316.45919,17.7893,12.981626,6.999489
2,200,0.337368,316.41433,17.788039,12.980101,7.017762
3,201,0.3397,315.300852,17.756713,12.962605,7.056712
4,202,0.336852,316.660786,17.794965,12.972102,7.064689
5,203,0.337713,316.249864,17.783415,12.948708,7.096917
6,204,0.337741,316.236316,17.783035,12.990202,7.064188
7,205,0.336698,316.734615,17.797039,12.972171,7.046848


In [124]:
# model definition
tree_reg = en.RandomForestRegressor(n_estimators=201)

# model training
tree_reg.fit(X_training, y_training)

# predict
y_pred = tree_reg.predict(X_val)

# metrics
# r2
r2 = mt.r2_score(y_val, y_pred)
print(r2)
#r2_list.append(r2)

# MSE
mse = mt.mean_squared_error(y_val, y_pred)
print(mse)
#mse_list.append(mse)

# RMSE
rmse = np.sqrt(mse)
print(rmse)
#rmse_list.append(rmse)

# MAE
mae = mt.mean_absolute_error(y_val, y_pred)
print(mae)
#mae_list.append(mae)

# MAPE
mape = mt.mean_absolute_percentage_error(y_val, y_pred)
print(mape)
#mape_list.append(mape)

df_random_forest_val = {'Algoritmo': 'Random Forest Regressor', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_random_forest_val = pd.DataFrame(data=df_random_forest_val, index=[0])
df_random_forest_val

0.339010039861536
315.6303813527879
17.765989456058673
12.953523817822239
7.031280569734367


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Random Forest Regressor,0.33901,315.630381,17.765989,12.953524,7.031281


## Linear Regression

In [125]:
# model definition
linear_reg = lm.LinearRegression()

# model training
linear_reg.fit(X_training, y_training)

# predict
y_pred = linear_reg.predict(X_val)

# metrics
# r2
r2 = mt.r2_score(y_val, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_val, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_val, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_val, y_pred)
print(mape)


df_linear_val = {'Algoritmo': 'Linear Regression', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_val = pd.DataFrame(data=df_linear_val, index=[0]).reset_index(drop=True)
df_linear_val

0.03992483038154071
458.4470418439312
21.41137645841414
17.039753759960327
8.682541883735297


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.039925,458.447042,21.411376,17.039754,8.682542


## Linear Regression Lasso

In [126]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(0, 1, 0.1)

for i in alpha:
    print(f'Alpha: {i}')
    # model definition
    linear_lasso = lm.Lasso(alpha=i)

    # model training
    linear_lasso.fit(X_training, y_training)

    # predict
    y_pred = linear_lasso.predict(X_val)

    # metrics
    # r2
    r2 = mt.r2_score(y_val, y_pred)
    print(r2)
    r2_list.append(r2)


    # MSE
    mse = mt.mean_squared_error(y_val, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_val, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_val, y_pred)
    print(mape)
    mape_list.append(mape)

df_lasso_val = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_lasso_val = pd.DataFrame(data=df_lasso_val, index=alpha).reset_index(drop=True)
df_lasso_val

Alpha: 0.0
0.03992483038154071
458.4470418439312
21.41137645841414
17.039753759960327
8.682541883735295
Alpha: 0.1
0.037195327225104546
459.7504112960014
21.441791233383498
17.047447666076568
8.686903183048353
Alpha: 0.2
0.029583161694889615
463.3853087288141
21.52638633697756
17.09331744231932
8.677304619776228
Alpha: 0.30000000000000004
0.019805912592760766
468.05405870807203
21.63455704903782
17.157928915777752
8.673914038027862
Alpha: 0.4
0.013252924563716184
471.1831865849528
21.706754400069872
17.21262911195358
8.689271514129404
Alpha: 0.5
0.010364154127209924
472.56260801266126
21.73850519269118
17.238258520312712
8.702693454287592
Alpha: 0.6000000000000001
0.009890122594862105
472.78896357379085
21.743710897033903
17.243672540383805
8.701466404528825
Alpha: 0.7000000000000001
0.009322425596106743
473.0600454827694
21.749943574243346
17.249911732627282
8.70025508910937
Alpha: 0.8
0.008737858787954855
473.3391829216263
21.756359597175862
17.25604433771147
8.698961409916425
Alpha:

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.039925,458.447042,21.411376,17.039754,8.682542
1,Linear Regression Lasso,0.037195,459.750411,21.441791,17.047448,8.686903
2,Linear Regression Lasso,0.029583,463.385309,21.526386,17.093317,8.677305
3,Linear Regression Lasso,0.019806,468.054059,21.634557,17.157929,8.673914
4,Linear Regression Lasso,0.013253,471.183187,21.706754,17.212629,8.689272
5,Linear Regression Lasso,0.010364,472.562608,21.738505,17.238259,8.702693
6,Linear Regression Lasso,0.00989,472.788964,21.743711,17.243673,8.701466
7,Linear Regression Lasso,0.009322,473.060045,21.749944,17.249912,8.700255
8,Linear Regression Lasso,0.008738,473.339183,21.75636,17.256044,8.698961
9,Linear Regression Lasso,0.008331,473.533421,21.760823,17.260311,8.697382


In [127]:
# model definition
linear_lasso = lm.Lasso()

# model training
linear_lasso.fit(X_training, y_training)

# predict
y_pred = linear_lasso.predict(X_val)

# metrics
# r2
r2 = mt.r2_score(y_val, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_val, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_val, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_val, y_pred)
print(mape)


df_linear_lasso_val = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_lasso_val = pd.DataFrame(data=df_linear_lasso_val, index=[0]).reset_index(drop=True)
df_linear_lasso_val

0.007883642924698453
473.7470809154487
21.76573180289256
17.26492176467576
8.695808367641687


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.007884,473.747081,21.765732,17.264922,8.695808


## Linear Regression Ridge

In [128]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(1, 51, 1)

for i in alpha:
    print(f'Alpha: {i}')
    # model definition
    linear_ridge = lm.Ridge(alpha=i)

    # model training
    linear_ridge.fit(X_training, y_training)

    # predict
    y_pred = linear_ridge.predict(X_val)

    # metrics
    # r2
    r2 = mt.r2_score(y_val, y_pred)
    print(r2)
    r2_list.append(r2)


    # MSE
    mse = mt.mean_squared_error(y_val, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_val, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_val, y_pred)
    print(mape)
    mape_list.append(mape)

df_ridge_val = {'Alpha': alpha, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_ridge_val = pd.DataFrame(data=df_ridge_val, index=alpha).reset_index(drop=True)
df_ridge_val

Alpha: 1
0.03992810824954418
458.4454766238971
21.41133990725235
17.0394715713122
8.682412484475345
Alpha: 2
0.03993088130292455
458.4441524587074
21.411308985176674
17.039200923825153
8.682285435973139
Alpha: 3
0.03993317490430126
458.4430572374168
21.411283409394606
17.03896783269642
8.682161123596494
Alpha: 4
0.039935010539959515
458.44218070007463
21.411262940332936
17.038733196783852
8.682038643417766
Alpha: 5
0.03993640654155428
458.44151409310285
21.411247373590893
17.03853245269251
8.681918460909491
Alpha: 6
0.03993737866788938
458.44104989148957
21.4112365334534
17.038336547103107
8.681800028083694
Alpha: 7
0.039937940575305086
458.4407815741737
21.411230267646314
17.038143933463243
8.681683241726917
Alpha: 8
0.03993810419903987
458.44070344194034
21.41122844308426
17.037961094116433
8.681568144225457
Alpha: 9
0.03993788006316945
458.4408104694211
21.411230942414804
17.037783485198283
8.681454584234332
Alpha: 10
0.03993727753306131
458.4410981845443
21.41123766120362
17.037610

8.681231812808406
Alpha: 12
0.03993497015658665
458.44219998360416
21.411263390645686
17.037283395303948
8.681122484364954
Alpha: 13
0.039933279920647546
458.44300709089197
21.41128223836424
17.037131033141375
8.68101455531247
Alpha: 14
0.03993124076219512
458.44398081273164
21.411304976874522
17.036977414603975
8.680907791130478
Alpha: 15
0.03992885868914131
458.4451182802753
21.41133153917045
17.0368248901679
8.680802205774887
Alpha: 16
0.039926139344781064
458.4464167987843
21.411361862310027
17.036683867412023
8.680698002632779
Alpha: 17
0.03992308807026923
458.4478738177949
21.41139588671871
17.03654210838566
8.680594877385849
Alpha: 18
0.0399197099559232
458.4494869066217
21.411433555617467
17.036399439703587
8.680492798393274
Alpha: 19
0.03991600988333621
458.4512537342471
21.411474814553227
17.03626740625131
8.680391933476571
Alpha: 20
0.0399119925599295
458.45317205282277
21.411519611013667
17.036138842965627
8.680292140637556
Alpha: 21
0.03990766254726519
458.4552396841499
21

Unnamed: 0,Alpha,R2,MSE,RMSE,MAE,MAPE
0,1,0.039928,458.445477,21.41134,17.039472,8.682412
1,2,0.039931,458.444152,21.411309,17.039201,8.682285
2,3,0.039933,458.443057,21.411283,17.038968,8.682161
3,4,0.039935,458.442181,21.411263,17.038733,8.682039
4,5,0.039936,458.441514,21.411247,17.038532,8.681918
5,6,0.039937,458.44105,21.411237,17.038337,8.6818
6,7,0.039938,458.440782,21.41123,17.038144,8.681683
7,8,0.039938,458.440703,21.411228,17.037961,8.681568
8,9,0.039938,458.44081,21.411231,17.037783,8.681455
9,10,0.039937,458.441098,21.411238,17.03761,8.681342


In [129]:
# model definition
linear_ridge = lm.Ridge()

# model training
linear_ridge.fit(X_training, y_training)

# predict
y_pred = linear_ridge.predict(X_val)

# metrics
# r2
r2 = mt.r2_score(y_val, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_val, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_val, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_val, y_pred)
print(mape)


df_linear_ridge_val = {'Algoritmo': 'Linear Regression Ridge', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_ridge_val = pd.DataFrame(data=df_linear_ridge_val, index=[0]).reset_index(drop=True)
df_linear_ridge_val

0.03992810824954418
458.4454766238971
21.41133990725235
17.0394715713122
8.682412484475345


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Ridge,0.039928,458.445477,21.41134,17.039472,8.682412


## Linear Regression Elastic Net

In [130]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(0, 1, 0.1)

for i in alpha:
    print(f'Alpha: {i}')
    # model definition
    linear_net = lm.ElasticNet(alpha=i)

    # model training
    linear_net.fit(X_training, y_training)

    # predict
    y_pred = linear_net.predict(X_val)

    # metrics
    # r2
    r2 = mt.r2_score(y_val, y_pred)
    print(r2)
    r2_list.append(r2)


    # MSE
    mse = mt.mean_squared_error(y_val, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_val, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_val, y_pred)
    print(mape)
    mape_list.append(mape)

df_net_val = {'Alpha': alpha, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_net_val = pd.DataFrame(data=df_net_val, index=alpha).reset_index(drop=True)
df_net_val

Alpha: 0.0
0.03992483038154071
458.4470418439312
21.41137645841414
17.039753759960327
8.682541883735295
Alpha: 0.1
0.028384589234635604
463.9576409963567
21.539675972408606
17.098323180943396
8.676790690797697
Alpha: 0.2
0.021103753479594145
467.43432461413715
21.620229522697883
17.153466370058407
8.684987692689914
Alpha: 0.30000000000000004
0.01699850022084526
469.39463071521203
21.665517088572155
17.18650139421548
8.687840491068222
Alpha: 0.4
0.014355314990021961
470.6567823554821
21.69462565603477
17.2073861405573
8.689825191586888
Alpha: 0.5
0.012531761788559193
471.52755018420106
21.714685127447762
17.22225465029233
8.691629091278477
Alpha: 0.6000000000000001
0.01112530456355787
472.1991498408593
21.73014380626275
17.23405160758103
8.693083712417067
Alpha: 0.7000000000000001
0.010024119145518817
472.72497866489016
21.742239504358565
17.243585533969753
8.694295598418366
Alpha: 0.8
0.009317856112062706
473.06222746446105
21.749993734814293
17.25073656639926
8.69430789720519
Alpha: 0

Unnamed: 0,Alpha,R2,MSE,RMSE,MAE,MAPE
0,0.0,0.039925,458.447042,21.411376,17.039754,8.682542
1,0.1,0.028385,463.957641,21.539676,17.098323,8.676791
2,0.2,0.021104,467.434325,21.62023,17.153466,8.684988
3,0.3,0.016999,469.394631,21.665517,17.186501,8.68784
4,0.4,0.014355,470.656782,21.694626,17.207386,8.689825
5,0.5,0.012532,471.52755,21.714685,17.222255,8.691629
6,0.6,0.011125,472.19915,21.730144,17.234052,8.693084
7,0.7,0.010024,472.724979,21.74224,17.243586,8.694296
8,0.8,0.009318,473.062227,21.749994,17.250737,8.694308
9,0.9,0.008703,473.355887,21.756743,17.25708,8.694221


In [131]:
# model definition
linear_net = lm.ElasticNet()

# model training
linear_net.fit(X_training, y_training)

# predict
y_pred = linear_net.predict(X_val)

# metrics
# r2
r2 = mt.r2_score(y_val, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_val, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_val, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_val, y_pred)
print(mape)


df_linear_net_val = {'Algoritmo': 'Linear Regression Elastic Net', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_net_val = pd.DataFrame(data=df_linear_net_val, index=[0]).reset_index(drop=True)
df_linear_net_val

0.00811707101546788
473.6356162915494
21.76317109916543
17.26290253848956
8.694035085887595


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Elastic Net,0.008117,473.635616,21.763171,17.262903,8.694035


## Polinomial Regression 

In [132]:
# model definition
poly = pp.PolynomialFeatures()

# transform
X_poly = poly.fit_transform(X_val)

# model training
model = lm.LinearRegression()
model.fit(X_poly, y_val)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_val, yhat)
mse = mt.mean_squared_error(y_val, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_val, yhat))
mae = mt.mean_absolute_error(y_val, yhat)
mape = mt.mean_absolute_percentage_error(y_val, yhat)


df_poli_val = {'Algoritmo': 'Polinomial Regression', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_val = pd.DataFrame(data=df_poli_val, index=[0])
df_poli_val

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression,0.094,432.806,20.804,16.515,8.415


## Polinomial Regression Lasso

In [133]:
# model definition
poly = pp.PolynomialFeatures()

# transform
X_poly = poly.fit_transform(X_val)

# model training
model = lm.Lasso()
model.fit(X_poly, y_val)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_val, yhat)
mse = mt.mean_squared_error(y_val, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_val, yhat))
mae = mt.mean_absolute_error(y_val, yhat)
mape = mt.mean_absolute_percentage_error(y_val, yhat)


df_poli_lasso_val = {'Algoritmo': 'Polinomial Regression Lasso', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_lasso_val = pd.DataFrame(data=df_poli_lasso_val, index=[0])
df_poli_lasso_val

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Lasso,0.01,472.907,21.746,17.245,8.683


## Polinomial Regression Ridge

In [134]:
# model definition
poly = pp.PolynomialFeatures()

# transform
X_poly = poly.fit_transform(X_val)

# model training
model = lm.Ridge()
model.fit(X_poly, y_val)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_val, yhat)
mse = mt.mean_squared_error(y_val, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_val, yhat))
mae = mt.mean_absolute_error(y_val, yhat)
mape = mt.mean_absolute_percentage_error(y_val, yhat)


df_poli_ridge_val = {'Algoritmo': 'Polinomial Regression Lasso', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_ridge_val = pd.DataFrame(data=df_poli_ridge_val, index=[0])
df_poli_ridge_val

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Lasso,0.092,433.767,20.827,16.538,8.445


## Polinomial Regression Elastic Net

In [135]:
# model definition
poly = pp.PolynomialFeatures()

# transform
X_poly = poly.fit_transform(X_val)

# model training
model = lm.ElasticNet()
model.fit(X_poly, y_val)

# predict
yhat = model.predict(X_poly)


# metrics
r2 = mt.r2_score(y_val, yhat)
mse = mt.mean_squared_error(y_val, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_val, yhat))
mae = mt.mean_absolute_error(y_val, yhat)
mape = mt.mean_absolute_percentage_error(y_val, yhat)


df_poli_elastic_val = {'Algoritmo': 'Polinomial Regression Lasso', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_elastic_val  = pd.DataFrame(data=df_poli_elastic_val , index=[0])
df_poli_elastic_val 

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Lasso,0.013,471.379,21.711,17.203,8.681


## Resultado Final Validação

In [137]:
df_final_val = pd.concat([df_tree_val, df_random_forest_val, df_linear_val, df_linear_lasso_val, df_linear_ridge_val, df_linear_net_val, df_poli_val, df_poli_lasso_val, df_poli_ridge_val, df_poli_elastic_val])

In [138]:
df_final_val

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Decision Tree Regressor,-0.247,595.448,24.402,17.097,6.948
0,Random Forest Regressor,0.33901,315.630381,17.765989,12.953524,7.031281
0,Linear Regression,0.039925,458.447042,21.411376,17.039754,8.682542
0,Linear Regression Lasso,0.007884,473.747081,21.765732,17.264922,8.695808
0,Linear Regression Ridge,0.039928,458.445477,21.41134,17.039472,8.682412
0,Linear Regression Elastic Net,0.008117,473.635616,21.763171,17.262903,8.694035
0,Polinomial Regression,0.094,432.806,20.804,16.515,8.415
0,Polinomial Regression Lasso,0.01,472.907,21.746,17.245,8.683
0,Polinomial Regression Lasso,0.092,433.767,20.827,16.538,8.445
0,Polinomial Regression Lasso,0.013,471.379,21.711,17.203,8.681


# Dados Teste

## Decision Tree Regressor

In [140]:
max_depth = np.arange(1, 31, 1)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in max_depth:
    print(f'Max-Depth: {i}')
    warnings.filterwarnings('ignore')
    # model definition
    tree_reg = tr.DecisionTreeRegressor(max_depth=i, random_state=0)

    # model training
    tree_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

    # predict
    y_pred = tree_reg.predict(X_test)

    # metrics
    # r2
    r2 = mt.r2_score(y_test, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_test, y_pred)
    print(mse)
    mse_list.append(mse)
    
    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)
    
    # MAE
    mae = mt.mean_absolute_error(y_test, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_test, y_pred)
    print(mape)
    mape_list.append(mape)

df = {'Max-Depth': max_depth, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df = pd.DataFrame(data=df, index=max_depth).reset_index(drop=True)
df

Max-Depth: 1
0.028487135934400998
473.03056160041297
21.749265771524634
17.354521375858198
8.518003646594408
Max-Depth: 2
0.0457159228590206
464.64184843346476
21.55555261257444
17.198523782134753
8.351765611647364
Max-Depth: 3
0.05544674253776816
459.90390283570116
21.44537019581852
17.111420626803348
8.172098241156206
Max-Depth: 4
0.06504830500131586
455.2288927021588
21.336093660793647
17.033428968872123
8.129477577701318
Max-Depth: 5
0.09047547149696622
442.84838052138286
21.043963042197703
16.82978094998882
7.883226145162679
Max-Depth: 6
0.1005962244916242
437.9205760115716
20.92655193794648
16.688255206946074
7.730551046690193
Max-Depth: 7
0.09705582275302604
439.6444010731274
20.967698993287925
16.67856027032464
7.602002451364004
Max-Depth: 8
0.09344823051253048
441.40094125562746
21.009544051588257
16.61214121124867
7.31192271401795
Max-Depth: 9
0.09295162538981527
441.642738774508
21.01529773223563
16.547587925496536
7.148835204487813
Max-Depth: 10
0.09557171482065563
440.3670

Unnamed: 0,Max-Depth,R2,MSE,RMSE,MAE,MAPE
0,1,0.028487,473.030562,21.749266,17.354521,8.518004
1,2,0.045716,464.641848,21.555553,17.198524,8.351766
2,3,0.055447,459.903903,21.44537,17.111421,8.172098
3,4,0.065048,455.228893,21.336094,17.033429,8.129478
4,5,0.090475,442.848381,21.043963,16.829781,7.883226
5,6,0.100596,437.920576,20.926552,16.688255,7.730551
6,7,0.097056,439.644401,20.967699,16.67856,7.602002
7,8,0.093448,441.400941,21.009544,16.612141,7.311923
8,9,0.092952,441.642739,21.015298,16.547588,7.148835
9,10,0.095572,440.367015,20.984924,16.390288,6.924732


In [142]:
# model definition
tree_reg = tr.DecisionTreeRegressor(max_depth=29, random_state=0)

# model training
tree_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

# predict
y_pred = tree_reg.predict(X_test)

# metrics
# r2
r2 = mt.r2_score(y_test, y_pred)
print(r2)
#r2_list.append(r2)

# MSE
mse = mt.mean_squared_error(y_test, y_pred)
print(mse)
#mse_list.append(mse)

# RMSE
rmse = np.sqrt(mse)
print(rmse)
#rmse_list.append(rmse)

# MAE
mae = mt.mean_absolute_error(y_test, y_pred)
print(mae)
#mae_list.append(mae)

# MAPE
mape = mt.mean_absolute_percentage_error(y_test, y_pred)
print(mape)
#mape_list.append(mape)

df_decision_test = {'Algoritmo': 'Decision Tree Regressor', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_decision_test = pd.DataFrame(data=df_decision_test, index=[0]).reset_index(drop=True)
df_decision_test

-0.15562130543546093
562.6731413724519
23.720732311049165
15.702064805014764
6.126925311446299


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Decision Tree Regressor,-0.155621,562.673141,23.720732,15.702065,6.126925


## Random Forest Regressor

In [144]:
max_depth = np.arange(20, 36, 1)
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

for i in max_depth:
    print(f'Max-Depth: {i}')
    # model definition
    tree_reg = en.RandomForestRegressor(n_estimators=i)

    # model training
    tree_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

    # predict
    y_pred = tree_reg.predict(X_test)

    # metrics
    # r2
    r2 = mt.r2_score(y_test, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_test, y_pred)
    print(mse)
    mse_list.append(mse)
    
    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)
    
    # MAE
    mae = mt.mean_absolute_error(y_test, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_test, y_pred)
    print(mape)
    mape_list.append(mape)

df_random_forest_test = {'Max-Depth': max_depth, 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_random_forest_test = pd.DataFrame(data=df_random_forest_test, index=max_depth).reset_index(drop=True)
df_random_forest_test

Max-Depth: 20
0.3758597111013301
303.89451575521974
17.432570543532005
12.465829848072257
6.2821480176756115
Max-Depth: 21
0.37615617839150806
303.7501655423135
17.428429807137345
12.515588191710806
6.451498875457264
Max-Depth: 22
0.3763049809135188
303.677713449132
17.426351122628397
12.47418925357388
6.3368200433608655
Max-Depth: 23
0.3819590170769602
300.9247577234514
17.347182991006104
12.492735239506862
6.224377473668085
Max-Depth: 24
0.38558664867505066
299.1584603581823
17.296197858436468
12.410022585762926
6.297753526136134
Max-Depth: 25
0.38069918195144403
301.5381726755221
17.364854525031937
12.486605072103183
6.2899272243163535
Max-Depth: 26
0.39078100980032093
296.62932085717813
17.222930089191507
12.352463998329126
6.130299127880817
Max-Depth: 27
0.38272541423608974
300.55159819875416
17.336424031464915
12.380722189005734
6.440850042868748
Max-Depth: 28
0.37978942359068757
301.9811349091959
17.377604406511153
12.473249125793556
6.316768542613421
Max-Depth: 29
0.38924703020

Unnamed: 0,Max-Depth,R2,MSE,RMSE,MAE,MAPE
0,20,0.37586,303.894516,17.432571,12.46583,6.282148
1,21,0.376156,303.750166,17.42843,12.515588,6.451499
2,22,0.376305,303.677713,17.426351,12.474189,6.33682
3,23,0.381959,300.924758,17.347183,12.492735,6.224377
4,24,0.385587,299.15846,17.296198,12.410023,6.297754
5,25,0.380699,301.538173,17.364855,12.486605,6.289927
6,26,0.390781,296.629321,17.22293,12.352464,6.130299
7,27,0.382725,300.551598,17.336424,12.380722,6.44085
8,28,0.379789,301.981135,17.377604,12.473249,6.316769
9,29,0.389247,297.376217,17.2446,12.389731,6.167868


In [147]:
tree_reg = en.RandomForestRegressor(n_estimators=900)

# model training
tree_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

# predict
y_pred = tree_reg.predict(X_test)

# metrics
# r2
r2 = mt.r2_score(y_test, y_pred)
print(r2)
#r2_list.append(r2)

# MSE
mse = mt.mean_squared_error(y_test, y_pred)
print(mse)
#mse_list.append(mse)

# RMSE
rmse = np.sqrt(mse)
print(rmse)
#rmse_list.append(rmse)

# MAE
mae = mt.mean_absolute_error(y_test, y_pred)
print(mae)
#mae_list.append(mae)

# MAPE
mape = mt.mean_absolute_percentage_error(y_test, y_pred)
print(mape)
#mape_list.append(mape)

df_random_forest_test = {'Algoritmo': 'Random Forest Regressor', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_random_forest_test = pd.DataFrame(data=df_random_forest_test, index=[0]).reset_index(drop=True)
df_random_forest_test

0.40864728456462607
287.9302142389709
16.968506541206594
12.17295684222357
6.278072418101334


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Random Forest Regressor,0.408647,287.930214,16.968507,12.172957,6.278072


## Linear Regression 

In [148]:
# model definition
linear_reg = lm.LinearRegression()

# model training
linear_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

# predict
y_pred = linear_reg.predict(X_test)

# metrics
# r2
r2 = mt.r2_score(y_test, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_test, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_test, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_test, y_pred)
print(mape)


df_linear_test = {'Algoritmo': 'Linear Regression', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_test = pd.DataFrame(data=df_linear_test, index=[0]).reset_index(drop=True)
df_linear_test

0.05116551777115064
461.9884353525593
21.4939162404751
17.144197086987703
8.531355027820084


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.051166,461.988435,21.493916,17.144197,8.531355


## Linear Regression Lasso

In [153]:
r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []
alpha = np.arange(0.00001,0.00005,0.00001)

for i in alpha:

    # model definition
    linear_reg = lm.Lasso(alpha=i)

    # model training
    linear_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

    # predict
    y_pred = linear_reg.predict(X_test)

    # metrics
    # r2
    r2 = mt.r2_score(y_test, y_pred)
    print(r2)
    r2_list.append(r2)

    # MSE
    mse = mt.mean_squared_error(y_test, y_pred)
    print(mse)
    mse_list.append(mse)

    # RMSE
    rmse = np.sqrt(mse)
    print(rmse)
    rmse_list.append(rmse)

    # MAE
    mae = mt.mean_absolute_error(y_test, y_pred)
    print(mae)
    mae_list.append(mae)

    # MAPE
    mape = mt.mean_absolute_percentage_error(y_test, y_pred)
    print(mape)
    mape_list.append(mape)

df_linear_lasso_test = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2_list, 'MSE': mse_list, 'RMSE': rmse_list, 'MAE': mae_list, 'MAPE': mape_list}
df_linear_lasso_test = pd.DataFrame(data=df_linear_lasso_test, index=alpha).reset_index(drop=True)
df_linear_lasso_test

0.05116526728528481
461.9885573143718
21.493919077598942
17.144196475086336
8.531372696518448
0.05116501891381031
461.9886782466851
21.493921890773798
17.144195836136312
8.531390365605768
0.05116476883856391
461.9888000085666
21.49392472324602
17.144195213993452
8.531408034448182
0.05116451627486118
461.98892298207994
21.493927583903318
17.144194617669324
8.531425702913102


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.051165,461.988557,21.493919,17.144196,8.531373
1,Linear Regression Lasso,0.051165,461.988678,21.493922,17.144196,8.53139
2,Linear Regression Lasso,0.051165,461.9888,21.493925,17.144195,8.531408
3,Linear Regression Lasso,0.051165,461.988923,21.493928,17.144195,8.531426


In [154]:
# model definition
linear_reg = lm.Lasso(alpha=0.00001)

# model training
linear_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

# predict
y_pred = linear_reg.predict(X_test)

# metrics
# r2
r2 = mt.r2_score(y_test, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_test, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_test, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_test, y_pred)
print(mape)


df_linear_lasso_test = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_lasso_test = pd.DataFrame(data=df_linear_lasso_test, index=[0]).reset_index(drop=True)
df_linear_lasso_test

0.05116526728528481
461.9885573143718
21.493919077598942
17.144196475086336
8.531372696518448


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.051165,461.988557,21.493919,17.144196,8.531373


## Linear Regression Ridge

In [156]:
# model definition
linear_reg = lm.Ridge(alpha=0.00001)

# model training
linear_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

# predict
y_pred = linear_reg.predict(X_test)

# metrics
# r2
r2 = mt.r2_score(y_test, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_test, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_test, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_test, y_pred)
print(mape)


df_linear_ridge_test = {'Algoritmo': 'Linear Regression Ridge', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_ridge_test = pd.DataFrame(data=df_linear_ridge_test, index=[0]).reset_index(drop=True)
df_linear_ridge_test

0.05116551779163181
461.9884353425869
21.49391624024312
17.144197084514566
8.531355034786472


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Ridge,0.051166,461.988435,21.493916,17.144197,8.531355


## Linear Regression Elastic Net

In [157]:
# model definition
linear_reg = lm.ElasticNet(alpha=0.00001)

# model training
linear_reg.fit(pd.concat([X_training, X_val]), pd.concat([y_training, y_val]))

# predict
y_pred = linear_reg.predict(X_test)

# metrics
# r2
r2 = mt.r2_score(y_test, y_pred)
print(r2)


# MSE
mse = mt.mean_squared_error(y_test, y_pred)
print(mse)


# RMSE
rmse = np.sqrt(mse)
print(rmse)


# MAE
mae = mt.mean_absolute_error(y_test, y_pred)
print(mae)


# MAPE
mape = mt.mean_absolute_percentage_error(y_test, y_pred)
print(mape)


df_linear_elastic_test = {'Algoritmo': 'Linear Regression Lasso', 'R2': r2, 'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'MAPE': mape}
df_linear_elastic_test = pd.DataFrame(data=df_linear_elastic_test, index=[0]).reset_index(drop=True)
df_linear_elastic_test

0.051165542713248136
461.98842320822763
21.49391595796884
17.144178170622304
8.531416308696343


Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression Lasso,0.051166,461.988423,21.493916,17.144178,8.531416


## Polinomial Regression

In [158]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(pd.concat([X_training, X_val]))
X_poly_test = poly.fit_transform(X_test)

# model training
model = lm.LinearRegression()
model.fit(X_poly, pd.concat([y_training, y_val]))

# predict
yhat = model.predict(X_poly_test)


# metrics
r2 = mt.r2_score(y_test, yhat)
mse = mt.mean_squared_error(y_test, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_test, yhat))
mae = mt.mean_absolute_error(y_test, yhat)
mape = mt.mean_absolute_percentage_error(y_test, yhat)


df_poli_test = {'Algoritmo': 'Polinomial Regression', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_test = pd.DataFrame(data=df_poli_test, index=[0])
df_poli_test

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression,0.091,442.641,21.039,16.736,8.277


## Polinomial Regression Lasso


In [162]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(pd.concat([X_training, X_val]))
X_poly_test = poly.fit_transform(X_test)

# model training
model = lm.Lasso(alpha=0.01)
model.fit(X_poly, pd.concat([y_training, y_val]))

# predict
yhat = model.predict(X_poly_test)


# metrics
r2 = mt.r2_score(y_test, yhat)
mse = mt.mean_squared_error(y_test, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_test, yhat))
mae = mt.mean_absolute_error(y_test, yhat)
mape = mt.mean_absolute_percentage_error(y_test, yhat)


df_poli_lasso_test = {'Algoritmo': 'Polinomial Regression Lasso', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_lasso_test = pd.DataFrame(data=df_poli_lasso_test, index=[0])
df_poli_lasso_test

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Lasso,0.085,445.332,21.103,16.786,8.342


## Polinomial Regression Ridge

In [164]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(pd.concat([X_training, X_val]))
X_poly_test = poly.fit_transform(X_test)

# model training
model = lm.Ridge()
model.fit(X_poly, pd.concat([y_training, y_val]))

# predict
yhat = model.predict(X_poly_test)


# metrics
r2 = mt.r2_score(y_test, yhat)
mse = mt.mean_squared_error(y_test, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_test, yhat))
mae = mt.mean_absolute_error(y_test, yhat)
mape = mt.mean_absolute_percentage_error(y_test, yhat)


df_poli_ridge_test = {'Algoritmo': 'Polinomial Regression Ridge', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_ridge_test = pd.DataFrame(data=df_poli_ridge_test, index=[0])
df_poli_ridge_test

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Ridge,0.09,442.967,21.047,16.742,8.309


## Polinomial Regression Elastic Net

In [169]:
# model definition
poly = pp.PolynomialFeatures(degree=2)

# transform
X_poly = poly.fit_transform(pd.concat([X_training, X_val]))
X_poly_test = poly.fit_transform(X_test)

# model training
model = lm.ElasticNet(l1_ratio=0.1, alpha=0.2)
model.fit(X_poly, pd.concat([y_training, y_val]))

# predict
yhat = model.predict(X_poly_test)


# metrics
r2 = mt.r2_score(y_test, yhat)
mse = mt.mean_squared_error(y_test, yhat)
rmse = np.sqrt(mt.mean_squared_error(y_test, yhat))
mae = mt.mean_absolute_error(y_test, yhat)
mape = mt.mean_absolute_percentage_error(y_test, yhat)


df_poli_elastic_test = {'Algoritmo': 'Polinomial Regression Elastic Net', 'R2': round(r2, 3), 'MSE': round(mse, 3), 'RMSE': round(rmse, 3), 'MAE': round(mae, 3), 'MAPE': round(mape, 3)}
df_poli_elastic_test = pd.DataFrame(data=df_poli_elastic_test, index=[0])
df_poli_elastic_test

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Polinomial Regression Elastic Net,0.045,465.132,21.567,17.116,8.628


## Resultado Final Teste

In [170]:
df_final_test = pd.concat([df_decision_test, df_random_forest_test, df_linear_test, df_linear_lasso_test, df_linear_ridge_test, df_linear_elastic_test, df_poli_test, df_poli_lasso_test, df_poli_ridge_test, df_poli_elastic_test])

In [171]:
df_final_test

Unnamed: 0,Algoritmo,R2,MSE,RMSE,MAE,MAPE
0,Decision Tree Regressor,-0.155621,562.673141,23.720732,15.702065,6.126925
0,Random Forest Regressor,0.408647,287.930214,16.968507,12.172957,6.278072
0,Linear Regression,0.051166,461.988435,21.493916,17.144197,8.531355
0,Linear Regression Lasso,0.051165,461.988557,21.493919,17.144196,8.531373
0,Linear Regression Ridge,0.051166,461.988435,21.493916,17.144197,8.531355
0,Linear Regression Lasso,0.051166,461.988423,21.493916,17.144178,8.531416
0,Polinomial Regression,0.091,442.641,21.039,16.736,8.277
0,Polinomial Regression Lasso,0.085,445.332,21.103,16.786,8.342
0,Polinomial Regression Ridge,0.09,442.967,21.047,16.742,8.309
0,Polinomial Regression Elastic Net,0.045,465.132,21.567,17.116,8.628
