# Import of libraries

In [1]:
from sklearn.preprocessing import PolynomialFeatures

In [2]:
from sklearn.model_selection import cross_val_score

In [3]:
from sklearn.ensemble import RandomForestRegressor

In [4]:
from sklearn.linear_model import LinearRegression

In [5]:
from sklearn.model_selection import GridSearchCV

In [6]:
from sklearn.tree import DecisionTreeRegressor

In [7]:
from sklearn.linear_model import ElasticNet

In [8]:
from sklearn.linear_model import Lasso

In [9]:
from sklearn.linear_model import Ridge

In [10]:
from sklearn import metrics as mt

In [11]:
import matplotlib.pyplot as plt

In [12]:
import pandas as pd

In [13]:
import numpy as np

# Function definition

In [14]:
def join_data(x_df1, x_df2, y_df1, y_df2):
    
    """
    This function is responsible for joining the dataframes.

    Args:
        parameter1 (DataFrame): Receives a dataframe that will join with the x_df2.
        parameter2 (DataFrame): Receives a dataframe that will join with the x_df1.
        parameter3 (DataFrame): Receives a dataframe that will join with the y_df2.
        parameter4 (DataFrame): Receives a dataframe that will join with the y_df1.

    Returns:
        returns two dataframes.

    Example:
        df1
        df2
        df3
        df4
        x, y = join_data(df1, df2, df3, df4)
    """
    # Combining the data from x_df1 and x_df2, the DataFrames vertically, resetting the indices
    new_x = pd.concat([x_df1, x_df2], axis=0)
    new_x.reset_index(drop=True, inplace=True)

    # Combining the data from y_df1 and y_df2, the DataFrames vertically, resetting the indices
    new_y = pd.concat([y_df1, y_df2], axis=0)
    new_y.reset_index(drop=True, inplace=True)

    # Returns two dataframes
    return new_x, new_y

In [15]:
def get_metrics(y_real, y_pred, list_R2, list_MSE, list_RMSE, list_MAE, list_MAPE):

    """
    This function is responsible for calculating the metrics: R2, MSE, RMSE, MAE e MAPE.

    Args:
        parameter1 (DataFrame): represents the actual values.
        parameter2 (numpy.ndarray): represents the predicted values.
        parameter3 (list): location where the R2 metrics will be stored
        parameter4 (list): location where the MSE metrics will be stored
        parameter5 (list): location where the RMSE metrics will be stored
        parameter6 (list): location where the MAE metrics will be stored
        parameter7 (list): location where the MAPE metrics will be stored

    Returns:
        There is no return.

    Example:
        get_metrics(y_real, y_pred, list_R2, list_MSE, list_RMSE, list_MAE, list_MAPE)
    """
    #R2
    r2 = mt.r2_score(y_real, y_pred)
    list_R2.append(r2)

    #MSE
    mse = mt.mean_squared_error(y_real, y_pred)
    list_MSE.append(mse)

    #RMSE
    rmse = np.sqrt(mse)
    list_RMSE.append(rmse)

    #MAE
    mae = mt.mean_absolute_error(y_real, y_pred)
    list_MAE.append(mae)

    #MAPE

    y_real_array = y_real.values
    
    mape = np.mean(np.abs((y_real_array - y_pred) / y_real_array)) * 100
    list_MAPE.append(mape)

In [16]:
def show_table(list_names_algorithm, list_names_metrics, list_values_metrics):

    """
    This function is responsible for creating a table and displaying it.

    Args:
        parameter1 (list): Receives a list with the names of the algorithms worked on.
        parameter2 (list): Receives a list with the names of the defined metrics.
        parameter3 (list): Receives a list of lists with the resulting metric values. 
                           The size of that list must be equal to the size of the list of the first parameter, 
                           Otherwise, an error will occur.

    Returns:
        returns a table in dataframe format.

    Example:
        algorithm = ['A','B', 'C']
        metrics = ['aa', 'bb', 'cc']
        values = [[1], [2], [3]]
        show_table(algorithm, metrics, values)
    """
    
    # Create a dictionary = table
    tab = {}

    # Add elements to the dictionary by assigning a value to a specific key
    tab['Algorithm Name'] = list_names_algorithm

    # Error handling
    try:
        # Scrolling through the list
        for names in list_names_metrics:
            # Add elements to the dictionary by assigning a value to a specific key
            tab[names] = list_values_metrics[list_names_metrics.index(names)]

        # Create a DataFrame from data
        df_tab = pd.DataFrame(tab)
            
    except ValueError:
        
        print('Unable to execute this command! Check the size and type of the third parameter.')
        
    else:
        # Returns a dataframe
        return df_tab

# Loading data

In [17]:
x_train = pd.read_csv('../Datasets/ensaio_regressao/X_train.csv')

In [18]:
y_train = pd.read_csv('../Datasets/ensaio_regressao/Y_train.csv')

In [19]:
x_vld = pd.read_csv('../Datasets/ensaio_regressao/X_vld.csv')

In [20]:
y_vld = pd.read_csv('../Datasets/ensaio_regressao/Y_vld.csv')

In [21]:
x_test = pd.read_csv('../Datasets/ensaio_regressao/X_test.csv')

In [22]:
y_test = pd.read_csv('../Datasets/ensaio_regressao/Y_test.csv')

In [23]:
# Juntando os dados de treinamento com os dados de validação
x_train_vld, y_train_vld = join_data(x_train, x_vld, y_train, y_vld)

In [24]:
# Juntando os dados de teste com os dados juntados anteriormente
x, y = join_data(x_test, x_train_vld, y_test, y_train_vld)

# Training supervised regression learning algorithms

### Linear Regressionn

#### Para dados de treinamento

In [25]:
model_lr = LinearRegression()

In [26]:
lr_train = model_lr.fit(x_train, y_train)

#### Para dados de validação

In [27]:
lr_vld = model_lr.fit(x_train_vld, y_train_vld)

#### Para dados de teste

In [28]:
lr_test = model_lr.fit(x, y)

### Decision Tree Regressor

#### Para dados de treinamento

In [29]:
model_tree_r = DecisionTreeRegressor()
tree_r_train = model_tree_r.fit(x_train, y_train)

#### Para dados de validação

In [30]:
model_tree_r = DecisionTreeRegressor(max_depth=20)

In [31]:
tree_r_vld = model_tree_r.fit(x_train_vld, y_train_vld)

##### Obtendo o melhor parametro

In [None]:
# Valores de d
d_values = list(range(1, 40))

# Lista para armazenar os valores da R2
list_r2_tree_r = []

# Lista para armazenar os valores da MSE
list_mse_tree_r = []

# Lista para armazenar os valores da RMSE
list_rmse_tree_r = []

# Lista para armazenar os valores da MAE
list_mae_tree_r = []

# Lista para armazenar os valores da MAPE
list_mape_tree_r = []

for d in d_values:
    # Treinando o algoritmo
    tree_r = DecisionTreeRegressor(max_depth=d).fit(x_train, y_train)
    
    # Previsão
    y_pred = tree_r.predict(x_vld)
    
    # Performance  
    r2_tree_r = mt.r2_score(y_vld, y_pred)
    list_r2_tree_r.append(r2_tree_r)

    mse_tree_r = mt.mean_squared_error(y_vld, y_pred)
    list_mse_tree_r.append(mse_tree_r)

    rmse_tree_r = np.sqrt(mse_tree_r)
    list_rmse_tree_r.append(rmse_tree_r)

    mae_tree_r = mt.mean_absolute_error(y_vld, y_pred)
    list_mae_tree_r.append(mae_tree_r)

    y_vld_array = y_vld.values # Convertendo o DataFrame em um numpy.ndarray

    mape_tree_r = np.mean(np.abs((y_vld_array - y_pred) / y_vld_array)) * 100
    list_mape_tree_r.append(mape_tree_r)

In [None]:
# Plot the results
plt.plot(d_values, list_r2_tree_r, marker='o', linestyle='-', label='R2')
plt.plot(d_values, list_mse_tree_r, marker='o', linestyle='-', label='MSE')
plt.plot(d_values, list_rmse_tree_r, marker='o', linestyle='-', label='RMSE')
plt.plot(d_values, list_mae_tree_r, marker='o', linestyle='-', label='MAE')
plt.plot(d_values, list_mape_tree_r, marker='o', linestyle='-', label='MAPE')

plt.xlabel('Valor de max_depth')
plt.ylabel('Performance')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Plot the results
plt.scatter(x_train.loc[:, ['song_duration_ms']], y_train, edgecolor="black", c="darkorange", label="dado de treinamento")
plt.plot(x_vld, y_pred, color="cornflowerblue", label="max_depth=2", linewidth=2)
# plt.plot(X_test, y_2, color="yellowgreen", label="max_depth=5", linewidth=2)
plt.xlabel("feature")
plt.ylabel("target = label - é o que estamos tentando prever")
plt.title("Decision Tree Regression")
plt.legend()
plt.show()

#### Para dados de teste

In [32]:
tree_r_test = model_tree_r.fit(x, y)

### Random Forest Regressor

#### Para dados de treinamento

In [34]:
rf_regressor = RandomForestRegressor()
rf_regressor_train = rf_regressor.fit(x_train, y_train.values.ravel())

#### Para dados de validação

In [35]:
rf_regressor = RandomForestRegressor(n_estimators=200, max_depth=30)
rf_regressor_vld = rf_regressor.fit(x_train_vld, y_train_vld.values.ravel())

##### Obtendo o melhor parâmetro

In [None]:
# Crie um dicionário de hiperparâmetros que deseja ajustar
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Crie o regressor Random Forest
rf_regressor = RandomForestRegressor()

# Crie o objeto GridSearchCV para encontrar os melhores hiperparâmetros
grid_search = GridSearchCV(estimator=rf_regressor, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Realize a busca em grade nos dados de treinamento
grid_search.fit(x_train, y_train.values.ravel())

# Exiba os melhores hiperparâmetros encontrados
best_params = grid_search.best_params_
print("Melhores hiperparâmetros encontrados:")
print(best_params)

# Treine um novo modelo com os melhores hiperparâmetros no conjunto de treinamento completo
best_rf_regressor = RandomForestRegressor(random_state=42, **best_params)
best_rf_regressor.fit(x_train, y_train)

# Avalie o modelo final no conjunto de teste
y_pred = best_rf_regressor.predict(x_vld)
mse = mt.mean_squared_error(y_vld, y_pred)
print(f"Erro médio quadrático no conjunto de teste: {mse}")

In [None]:
# Avalie o modelo final no conjunto de teste
y_pred = best_rf_regressor.predict(x_train)
mse = mt.mean_squared_error(y_train, y_pred)
print(f"Erro médio quadrático no conjunto de teste: {mse}")

#### Para dados de teste

In [36]:
rf_regressor_test = rf_regressor.fit(x, y.values.ravel())

### Polinomial Regression

#### Para dados de treinamento

In [37]:
poly_features_pr_train = PolynomialFeatures()
x_poly_train = poly_features_pr_train.fit_transform(x_train)

In [38]:
model_pr = LinearRegression()
pr_train = model_pr.fit(x_poly_train, y_train)

#### Para dados de validação

In [39]:
poly_features_pr_vld = PolynomialFeatures(degree=2)
x_poly_train_vld = poly_features_pr_vld.fit_transform(x_train_vld)

In [40]:
model_pr = LinearRegression()
pr_vld = model_pr.fit(x_poly_train_vld, y_train_vld)

##### Obtendo o melhor valor para os parametros

In [None]:
# Transformar os recursos em recursos polinomiais
poly_features = PolynomialFeatures(degree=2)  # Grau do polinômio
X_poly = poly_features.fit_transform(x_train)

In [None]:
# Treinar o modelo de regressão linear
model = LinearRegression()
model.fit(X_poly, y_train)

In [None]:
# Fazer previsões
X_vld_poly = poly_features.transform(x_vld)  # Transformar os dados de teste em recursos polinomiais
y_pred = model.predict(X_vld_poly)  # Fazer previsões

In [None]:
# Plotar os dados originais
plt.scatter(x_train.iloc[:, 0], y_train, label='Dados originais')

# Plotar as previsões do modelo
plt.plot(x_vld, y_pred, color='red', label='Previsões')

plt.xlabel('x_train')
plt.ylabel('y_train')
plt.legend()
plt.show()

In [None]:
# Lista para armazenar os escores de validação cruzada
cross_val_scores = []

# Range de graus de polinômio para tentar
degrees = list(range(4, 6))

for degree in degrees:
    # Transformar os recursos em recursos polinomiais
    poly_features = PolynomialFeatures(degree=degree)
    X_poly = poly_features.fit_transform(x_train)

    # Criar e ajustar o modelo de regressão linear
    model = LinearRegression()
    
    # Calcular o erro médio quadrático usando validação cruzada
    mse_scores = -cross_val_score(model, X_poly, y_train, cv=5, scoring='neg_mean_squared_error')
    
    # Calcular a média dos erros quadráticos
    mean_mse = mse_scores.mean()
    
    # Armazenar a pontuação média de validação cruzada para este grau do polinômio
    cross_val_scores.append(mean_mse)

# Encontrar o grau do polinômio com o menor erro médio quadrático
best_degree = degrees[np.argmin(cross_val_scores)]
print(f"O melhor grau do polinômio é {best_degree}")

In [None]:
# Plotar os resultados
plt.plot(degrees, cross_val_scores, marker='o')
plt.xlabel('Grau do Polinômio')
plt.ylabel('Erro Médio Quadrático (MSE)')
plt.title('Erro Médio Quadrático vs. Grau do Polinômio')
plt.show()

#### Para dados de teste

In [41]:
poly_features_pr_test = PolynomialFeatures(degree=2)
x_poly_test = poly_features_pr_test.fit_transform(x)

In [42]:
model_pr = LinearRegression()
pr_test = model_pr.fit(x_poly_test, y)

### Linear Regression Lasso

#### Para dados de treinamento

In [43]:
model_lr_lasso = Lasso()
lr_lasso_train = model_lr_lasso.fit(x_train, y_train)

#### Para dados de validação

In [44]:
model_lr_lasso = Lasso(alpha=0.001, max_iter=1000)
lr_lasso_vld = model_lr_lasso.fit(x_train_vld, y_train_vld)

##### Obtendo o melhor valor para os parametros

In [None]:
# Defina uma grade de valores de hiperparâmetros para testar
param_grid = {'alpha': [0.001, 0.01, 0.1, 1.0, 10.0]}

# Crie o modelo Lasso
lasso = Lasso()

# Use GridSearchCV para encontrar os melhores parâmetros
grid_search = GridSearchCV(lasso, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(x_train, y_train)

# Obtenha os melhores parâmetros
best_alpha = grid_search.best_params_['alpha']

In [None]:
# Crie o modelo Lasso com os melhores parâmetros
lasso_model = Lasso(alpha=best_alpha)

# Treine o modelo com os dados de treinamento
lasso_model.fit(x_train, y_train)

# Faça previsões no conjunto de validação
y_pred = lasso_model.predict(x_vld)

# Avalie o desempenho do modelo
mse = mt.mean_squared_error(y_vld, y_pred)
r2 = mt.r2_score(y_vld, y_pred)

print(f'Melhor valor de alpha: {best_alpha}')
print(f'MSE: {mse}')
print(f'R-squared: {r2}')

#### Para dados de teste

In [45]:
model_lr_lasso = Lasso(alpha=0.001, max_iter=1000)
lr_lasso_test = model_lr_lasso.fit(x, y)

### Linear Regression Ridge

#### Para dados de treinamento

In [48]:
alpha = 1
max_iter = None
model_lr_ridge = Ridge(alpha=alpha, max_iter=max_iter)
lr_ridge_train = model_lr_ridge.fit(x_train, y_train)

#### Para dados de validação

In [50]:
alpha = 10
max_iter = 1000
model_lr_ridge = Ridge(alpha=alpha, max_iter=max_iter)
lr_ridge_vld = model_lr_ridge.fit(x_train_vld, y_train_vld)

##### Obtendo o melhor parametro

In [None]:
# Defina uma grade de valores de hiperparâmetros para testar
param_grid = {'alpha': [0.001, 0.01, 0.1, 1.0, 10.0]}

# Crie um objeto de regressão Ridge
ridge_reg = Ridge()  # O parâmetro alpha controla a força da regularização

# Use GridSearchCV para encontrar os melhores parâmetros
grid_search = GridSearchCV(ridge_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(x_train, y_train)

# Obtenha os melhores parâmetros
best_alpha = grid_search.best_params_['alpha']

In [None]:
ridge_reg = Ridge(alpha=best_alpha)

# Treine o modelo com os dados de treinamento
ridge_reg.fit(x_train, y_train)

# Faça previsões no conjunto de validação
y_pred = ridge_reg.predict(x_vld)

# Avalie o desempenho do modelo
mse = mt.mean_squared_error(y_vld, y_pred)
r2 = mt.r2_score(y_vld, y_pred)

print(f'Melhor valor de alpha: {best_alpha}')
print(f'MSE: {mse}')
print(f'R-squared: {r2}')

#### Para dados de teste

In [51]:
alpha = 10
max_iter = 1000
model_lr_ridge = Ridge(alpha=alpha, max_iter=max_iter)
lr_ridge_test = model_lr_ridge.fit(x, y)

### Linear Regression Elastic Net

#### Para dados de treinamento

In [54]:
alpha = 1 
l1_ratio = 0.5
max_iterint = 1000
model_elastic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iterint)
lr_elastic_net_train = model_elastic_net.fit(x_train, y_train)

#### Para dados de validação

In [55]:
alpha=0.001
l1_ratio=0.5
max_iterint=1000
model_elastic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iterint)
lr_elastic_net_vld = model_elastic_net.fit(x_train, y_train)

##### Obtendo os melhores valores para o parametro

In [None]:
# Crie um modelo Elastic Net
max_iter = 1000
alpha = 0.001 # Parâmetro de regularização (alpha)
l1_ratio = 0.5 # Proporção de regularização L1 (0.5 significa que é uma combinação igual de L1 e L2)
elastic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iter)

# Treine o modelo
elastic_net.fit(x_train, y_train)

# Faça previsões no conjunto de teste
y_pred = elastic_net.predict(x_vld)

# Avalie o desempenho do modelo
mse = mt.mean_squared_error(y_vld, y_pred)
r2 = mt.r2_score(y_vld, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

#### Para dados de teste

In [56]:
alpha=0.001
l1_ratio=0.5
max_iterint=1000
model_elastic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iterint)
lr_elastic_net_test = model_elastic_net.fit(x, y)

### Polinomial Regression Lasso

#### Para dados de treinamento

In [57]:
poly_features_pr_lasso_train = PolynomialFeatures()
x_train_poly = poly_features_pr_lasso_train.fit_transform(x_train)

model_pr_lasso = Lasso()
pr_lasso_train = model_pr_lasso.fit(x_train_poly, y_train)

#### Para dados de validação

In [58]:
poly_features_pr_lasso_vld = PolynomialFeatures(degree=2)
x_train_vld_poly = poly_features_pr_lasso_vld.fit_transform(x_train_vld)

model_pr_lasso = Lasso(alpha=0.01, max_iter=1000)
pr_lasso_vld = model_pr_lasso.fit(x_train_vld_poly, y_train_vld)

##### Obtendo os mehores valores para o parametro

In [None]:
degree = 2
alpha= 0.01
max_iter= 1000

poly_features = PolynomialFeatures(degree=degree)
X_poly = poly_features.fit_transform(x_train)

# Aplicando a regressão polinomial Lasso
lasso_reg = Lasso(alpha=alpha, max_iter=max_iter)  # Alpha é o hiperparâmetro de regularização
lasso_reg.fit(X_poly, y_train)

# Predição
X_new_poly = poly_features.transform(x_vld)
y_pred = lasso_reg.predict(X_new_poly)

mse = mt.mean_squared_error(y_vld, y_pred)

print(f"Mean Squared Error: {mse:.2f}")

#### Para dados de teste

In [60]:
poly_features_pr_lasso_test = PolynomialFeatures(degree=2)
x_poly = poly_features_pr_lasso_test.fit_transform(x)

model_pr_lasso = Lasso(alpha=0.01, max_iter=1000)
pr_lasso_test = model_pr_lasso.fit(x_poly, y)

### Polinomial Regression Ridge

#### Para dados de treinamento

In [61]:
poly_features_pr_ridge_train = PolynomialFeatures()
x_train_poly = poly_features_pr_ridge_train.fit_transform(x_train)

model_pr_ridge = Ridge()
pr_ridge_train = model_pr_ridge.fit(x_train_poly, y_train)

#### Para dados de validação

In [62]:
poly_features_pr_ridge_vld = PolynomialFeatures(degree=2)
x_train_vld_poly = poly_features_pr_ridge_vld.fit_transform(x_train_vld)

model_pr_ridge = Ridge(alpha=0.001, max_iter=1000)
pr_ridge_vld = model_pr_ridge.fit(x_train_vld_poly, y_train_vld)

##### Obtendo os melhores valores para os parametros

In [None]:
alpha = 0.001
degree = 2
max_iter = 1000
poly = PolynomialFeatures(degree=degree)
X_poly = poly.fit_transform(x_train)

ridge = Ridge(alpha=alpha, max_iter=max_iter)
ridge.fit(X_poly, y_train)

X_test_poly = poly.transform(x_vld)
y_pred = ridge.predict(X_test_poly)

# Calcular o erro médio quadrático
mse = mt.mean_squared_error(y_vld, y_pred)
print(f"Mean Squared Error: {mse}")

#### Para dados de teste

In [63]:
poly_features_pr_ridge_test = PolynomialFeatures(degree=2)
x_poly = poly_features_pr_ridge_test.fit_transform(x)

model_pr_ridge = Ridge(alpha=0.001, max_iter=1000)
pr_ridge_test = model_pr_ridge.fit(x_poly, y)

### Polinomial Regression Elastic Net

#### Para dados de treinamento

In [64]:
poly_features_pr_elastic_net_train = PolynomialFeatures()
x_train_poly = poly_features_pr_elastic_net_train.fit_transform(x_train)

model_pr_elastic_net = ElasticNet()
pr_elastic_net_train = model_pr_elastic_net.fit(x_train_poly, y_train)

#### Para dados de validação

In [65]:
poly_features_pr_elastic_net_vld = PolynomialFeatures(degree=2)
x_train_vld_poly = poly_features_pr_elastic_net_vld.fit_transform(x_train_vld)

model_pr_elastic_net = ElasticNet(alpha=0.0001, l1_ratio=0.5, max_iter=100000)
pr_elastic_net_vld = model_pr_elastic_net.fit(x_train_vld_poly, y_train_vld)

##### Obtendo os melhores valores para o parametro

In [None]:
degree = 2  # Grau do polinômio
alpha = 0.0001  # Parâmetro de regularização L1 (lasso)
l1_ratio = 0.5  # Proporção de L1 em relação a L2 (0.5 para Elastic Net)
max_iter = 100000

poly = PolynomialFeatures(degree=degree)
X_train_poly = poly.fit_transform(x_train)

elastic_net = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iter)
elastic_net.fit(X_train_poly, y_train)

x_vld_poly = poly.transform(x_vld)

y_pred = elastic_net.predict(x_vld_poly)
mse = mt.mean_squared_error(y_vld, y_pred)
print(f"Erro médio quadrático: {mse}")

#### Para dados de teste

In [66]:
poly_features_pr_elastic_net_test = PolynomialFeatures(degree=2)
x_poly = poly_features_pr_elastic_net_test.fit_transform(x)

model_pr_elastic_net = ElasticNet(alpha=0.0001, l1_ratio=0.5, max_iter=100000)
pr_elastic_net_test = model_pr_elastic_net.fit(x_poly, y)

# Previsão das observações

## Linear Regression

### Para dados de treinamento

In [67]:
y_pred_lr_train = lr_train.predict(x_vld)

### Para dados de validação

In [68]:
y_pred_lr_vld = lr_vld.predict(x_test)

### Para dados de teste

In [69]:
y_pred_lr_test = lr_test.predict(x_test)

## Decision Tree Regressor

### Para dados de treinamento

In [70]:
y_pred_tree_r_train = tree_r_train.predict(x_vld)

### Para dados de validação

In [71]:
y_pred_tree_r_vld = tree_r_vld.predict(x_test)

### Para dados de teste

In [72]:
y_pred_tree_r_test = tree_r_test.predict(x_test)

## Random Forest Regressor

### Para dados de treinamento

In [73]:
y_pred_rf_regressor_train = rf_regressor_train.predict(x_vld)

### Para dados de validação

In [74]:
y_pred_rf_regressor_vld = rf_regressor_vld.predict(x_test)

### Para dados de teste

In [75]:
y_pred_rf_regressor_test = rf_regressor_test.predict(x_test)

## Polinomial Regression

### Para dados de treinamento

In [76]:
x_vld_poly = poly_features_pr_train.transform(x_vld)
y_pred_pr_train = pr_train.predict(x_vld_poly)

### Para dados de validação

In [77]:
x_test_poly = poly_features_pr_vld.transform(x_test)
y_pred_pr_vld = pr_vld.predict(x_test_poly)

### Para dados de teste

In [78]:
x_test_poly = poly_features_pr_test.transform(x_test)
y_pred_pr_test = pr_test.predict(x_test_poly)

## Linear Regression Lasso

### Para dados de treinamento

In [79]:
y_pred_lr_lasso_train = lr_lasso_train.predict(x_vld)

### Para dados de validação

In [80]:
y_pred_lr_lasso_vld = lr_lasso_vld.predict(x_test)

### Para dados de teste

In [81]:
y_pred_lr_lasso_test = lr_lasso_test.predict(x_test)

## Linear Regression Ridge

### Para dados de treinamento

In [82]:
y_pred_lr_ridge_train = lr_ridge_train.predict(x_vld)

### Para dados de validação

In [83]:
y_pred_lr_ridge_vld = lr_ridge_vld.predict(x_test)

### Para dados de teste

In [84]:
y_pred_lr_ridge_test = lr_ridge_test.predict(x_test)

## Linear Regression Elastic Net

### Para dados de treinamento

In [85]:
y_pred_lr_elastic_net_train = lr_elastic_net_train.predict(x_vld)

### Para dados de validação

In [86]:
y_pred_lr_elastic_net_vld = lr_elastic_net_vld.predict(x_test)

### Para dados de teste

In [87]:
y_pred_lr_elastic_net_test = lr_elastic_net_test.predict(x_test)

## Polinomial Regression Lasso

### Para dados de treinamento

In [88]:
x_vld_poly = poly_features_pr_lasso_train.transform(x_vld)
y_pred_pr_lasso_train = pr_lasso_train.predict(x_vld_poly)

### Para dados de validação

In [89]:
x_test_poly = poly_features_pr_lasso_vld.transform(x_test)
y_pred_pr_lasso_vld = pr_lasso_vld.predict(x_test_poly)

### Para dados de teste

In [90]:
x_test_poly = poly_features_pr_lasso_test.transform(x_test)
y_pred_pr_lasso_test = pr_lasso_test.predict(x_test_poly)

## Polinomial Regression Ridge

### Para dados de treinamento

In [91]:
x_vld_poly = poly_features_pr_ridge_train.transform(x_vld)
y_pred_pr_ridge_train = pr_ridge_train.predict(x_vld_poly)

### Para dados de validação

In [92]:
x_test_poly = poly_features_pr_ridge_vld.transform(x_test)
y_pred_pr_ridge_vld = pr_ridge_vld.predict(x_test_poly)

### Para dados de teste

In [93]:
x_test_poly = poly_features_pr_ridge_test.transform(x_test)
y_pred_pr_ridge_test = pr_ridge_test.predict(x_test_poly)

## Polinomial Regression Elastic Net

### Para dados de treinamento

In [94]:
x_vld_poly = poly_features_pr_elastic_net_train.transform(x_vld)
y_pred_pr_elastic_net_train = pr_elastic_net_train.predict(x_vld_poly)

### Para dados de validação

In [95]:
x_test_poly = poly_features_pr_elastic_net_vld.transform(x_test)
y_pred_pr_elastic_net_vld = pr_elastic_net_vld.predict(x_test_poly)

### Para dados de teste

In [96]:
x_test_poly = poly_features_pr_elastic_net_test.transform(x_test)
y_pred_pr_elastic_net_test = pr_elastic_net_test.predict(x_test_poly)

# Performance

In [97]:
names_algorithm = ['Linear Regression', 'Decision Tree Regressor', 'Random Forest Regressor', 'Polinomial Regression', 'Linear Regression Lasso', 'Linear Regression Ridge', 'Linear Regression Elastic Net',
'Polinomial Regression Lasso', 'Polinomial Regression Ridge', 'Polinomial Regression Elastic Net']

In [98]:
names_metrics = ['R2', 'MSE', 'RMSE', 'MAE', 'MAPE']

In [99]:
list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train = [], [], [], [], []

In [100]:
list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld = [], [], [], [], []

In [101]:
list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test = [], [], [], [], []

## Linear Regression

### Para dados de treinamento

In [102]:
get_metrics(y_vld, y_pred_lr_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [103]:
get_metrics(y_test, y_pred_lr_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [104]:
get_metrics(y_test, y_pred_lr_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Decision Tree Regressor

### Para dados de treinamento

In [105]:
get_metrics(y_vld, y_pred_tree_r_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [106]:
get_metrics(y_test, y_pred_tree_r_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [107]:
get_metrics(y_test, y_pred_tree_r_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Random Forest Regressor

### Para dados de treinamento

In [108]:
get_metrics(y_vld, y_pred_rf_regressor_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [109]:
get_metrics(y_test, y_pred_rf_regressor_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [110]:
get_metrics(y_test, y_pred_rf_regressor_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Polinomial Regression

### Para dados de treinamento

In [111]:
get_metrics(y_vld, y_pred_pr_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [112]:
get_metrics(y_test, y_pred_pr_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [113]:
get_metrics(y_test, y_pred_pr_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Linear Regression Lasso

### Para dados de treinamento

In [114]:
get_metrics(y_vld, y_pred_lr_lasso_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [115]:
get_metrics(y_test, y_pred_lr_lasso_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [116]:
get_metrics(y_test, y_pred_lr_lasso_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Linear Regression Ridge

### Para dados de treinamento

In [117]:
get_metrics(y_vld, y_pred_lr_ridge_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [118]:
get_metrics(y_test, y_pred_lr_ridge_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [119]:
get_metrics(y_test, y_pred_lr_ridge_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Linear Regression Elastic Net

### Para dados de treinamento

In [120]:
get_metrics(y_vld, y_pred_lr_elastic_net_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [121]:
get_metrics(y_test, y_pred_lr_elastic_net_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [122]:
get_metrics(y_test, y_pred_lr_elastic_net_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Polinomial Regression Lasso

### Para dados de treinamento

In [123]:
get_metrics(y_vld, y_pred_pr_lasso_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [124]:
get_metrics(y_test, y_pred_pr_lasso_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [125]:
get_metrics(y_test, y_pred_pr_lasso_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Polinomial Regression Ridge

### Para dados de treinamento

In [126]:
get_metrics(y_vld, y_pred_pr_ridge_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [127]:
get_metrics(y_test, y_pred_pr_ridge_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [128]:
get_metrics(y_test, y_pred_pr_ridge_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Polinomial Regression Elastic Net

### Para dados de treinamento

In [129]:
get_metrics(y_vld, y_pred_pr_elastic_net_train, list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train)

### Para dados de validação

In [130]:
get_metrics(y_test, y_pred_pr_elastic_net_vld, list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld)

### Para dados de teste

In [131]:
get_metrics(y_test, y_pred_pr_elastic_net_test, list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test)

## Tabela de Performance sobre os dados de treinamento

In [132]:
values_metrics_train = [list_R2_train, list_MSE_train, list_RMSE_train, list_MAE_train, list_MAPE_train]
df_tab_train = show_table(names_algorithm, names_metrics, values_metrics_train)
df_tab_train

Unnamed: 0,Algorithm Name,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.040218,458.307113,21.408109,17.041686,866.476822
1,Decision Tree Regressor,-0.293172,617.504423,24.849636,17.11012,900.505733
2,Random Forest Regressor,0.334636,317.718948,17.824672,12.998754,885.288592
3,Polinomial Regression,0.066477,445.768223,21.113224,16.749939,854.793103
4,Linear Regression Lasso,0.007884,473.747081,21.765732,17.264922,868.184976
5,Linear Regression Ridge,0.039928,458.445477,21.41134,17.039472,868.241248
6,Linear Regression Elastic Net,0.008117,473.635616,21.763171,17.262903,868.111874
7,Polinomial Regression Lasso,0.009631,472.912694,21.746556,17.238379,867.966682
8,Polinomial Regression Ridge,0.067699,445.18441,21.099394,16.738741,856.899247
9,Polinomial Regression Elastic Net,0.012782,471.408166,21.711936,17.199966,867.938764


## Tabela de Performance sobre os dados de validação

In [133]:
values_metrics_vld = [list_R2_vld, list_MSE_vld, list_RMSE_vld, list_MAE_vld, list_MAPE_vld]
df_tab_vld = show_table(names_algorithm, names_metrics, values_metrics_vld)
df_tab_vld

Unnamed: 0,Algorithm Name,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.054306,460.459387,21.458317,17.115537,849.986898
1,Decision Tree Regressor,0.872488,62.085563,7.879439,3.529543,880.101747
2,Random Forest Regressor,0.913614,42.061652,6.485496,4.459579,884.179048
3,Polinomial Regression,0.090901,442.641386,21.039044,16.736414,827.697169
4,Linear Regression Lasso,0.051139,462.001242,21.494214,17.144137,873.097036
5,Linear Regression Ridge,0.051128,462.006491,21.494336,17.14243,853.781591
6,Linear Regression Elastic Net,0.052246,461.462471,21.481678,17.128797,873.675494
7,Polinomial Regression Lasso,0.085374,445.332233,21.102896,16.785714,873.899765
8,Polinomial Regression Ridge,0.090902,442.640901,21.039033,16.736387,827.703142
9,Polinomial Regression Elastic Net,0.090399,442.885409,21.044843,16.74036,873.849209


## Tabela de Performance sobre os dados de Teste

In [134]:
values_metrics_test = [list_R2_test, list_MSE_test, list_RMSE_test, list_MAE_test, list_MAPE_test]
df_tab_test = show_table(names_algorithm, names_metrics, values_metrics_test)
df_tab_test

Unnamed: 0,Algorithm Name,R2,MSE,RMSE,MAE,MAPE
0,Linear Regression,0.054306,460.459387,21.458317,17.115537,849.986898
1,Decision Tree Regressor,0.872488,62.085563,7.879439,3.529543,880.101747
2,Random Forest Regressor,0.913614,42.061652,6.485496,4.459579,884.179048
3,Polinomial Regression,0.104011,436.257712,20.886783,16.618534,816.738614
4,Linear Regression Lasso,0.054275,460.47451,21.45867,17.11559,872.24867
5,Linear Regression Ridge,0.054202,460.509779,21.459492,17.114989,850.551244
6,Linear Regression Elastic Net,0.05419,460.515904,21.459634,17.115095,872.222672
7,Polinomial Regression Lasso,0.09345,441.400317,21.009529,16.723153,872.90872
8,Polinomial Regression Ridge,0.10401,436.258326,20.886798,16.618534,816.743978
9,Polinomial Regression Elastic Net,0.102593,436.948243,20.903307,16.631439,872.926053
