# Imports

In [1]:
import pandas   as pd
import numpy    as np

from sklearn            import metrics          as mt
from sklearn            import ensemble         as en
from sklearn            import model_selection  as ms

# Load Dataset

In [2]:
#Lendo arquivo CSV de Treino
x_train = pd.read_csv('../../dataset/reg/X_training.csv')
y_train = pd.read_csv('../../dataset/reg/y_training.csv')

#Lendo arquivo CSV de teste
x_test = pd.read_csv('../../dataset/reg/X_test.csv')
y_test = pd.read_csv('../../dataset/reg/y_test.csv')

#Lendo arquivo CSV de Validação
x_val = pd.read_csv('../../dataset/reg/X_validation.csv')
y_val = pd.read_csv('../../dataset/reg/y_val.csv')

In [3]:
#Preparação dos dados

y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

# Model Training - RandomForestRegressor (Training Data)

## Definindo os melhores parametros da RandomForest

In [None]:
# Definindo os hiperparâmetros para o GridSearch
param_grid = {
    'n_estimators': [50, 100, 200, 500],
    'max_depth': [None, 10, 20, 30, 50]
}

# Configuração do GridSearchCV
grid_search = ms.GridSearchCV(
    estimator = en.RandomForestRegressor(),
    param_grid = param_grid,
    cv = 5,  # Validação cruzada com 5 folds
    scoring = 'neg_root_mean_squared_error',  # Métrica de avaliação
    n_jobs = -1  # Paralelismo total para acelerar
)

# Ajuste aos dados
grid_search.fit(x_train, y_train)

# Melhores parâmetros
print("Melhores parâmetros:", grid_search.best_params_)

## Model Training

In [5]:
#define
model = en.RandomForestRegressor(   n_estimators=grid_search.best_params_['n_estimators'],
                                    max_depth=grid_search.best_params_['max_depth'],
                                    random_state=0  )

#fit
model.fit(x_train,y_train)
yhat_train = model.predict(x_train)

#performance
r2_train = mt.r2_score(y_train,yhat_train)
print(f'R2 Score: {r2_train:.3f}')

mse_train = mt.mean_squared_error(y_train,yhat_train)
print(f'MSE: {mse_train:.3f}')

rmse_train = np.sqrt(mse_train)
print(f'RMSE: {rmse_train:.3f}')

mae_train = mt.mean_absolute_error(y_train,yhat_train)
print(f'MAE: {mae_train:.3f}')

mape_train = mt.mean_absolute_percentage_error(y_train,yhat_train)
print(f'MAPE: {mape_train:.2f}%')

R2 Score: 0.906
MSE: 44.755
RMSE: 6.690
MAE: 4.797
MAPE: 2.63%


# Model Training - RandomForestRegressor (Validation Data)

In [6]:
#define
model = en.RandomForestRegressor(   n_estimators=grid_search.best_params_['n_estimators'],
                                    max_depth=grid_search.best_params_['max_depth'],
                                    random_state=0  )

#fit
model.fit(x_train,y_train)
yhat_val = model.predict(x_val)

#performance
r2_val = mt.r2_score(y_val,yhat_val)
print(f'R2 Score: {r2_val:.3f}')

mse_val = mt.mean_squared_error(y_val,yhat_val)
print(f'MSE: {mse_val:.3f}')

rmse_val = np.sqrt(mse_val)
print(f'RMSE: {rmse_val:.3f}')

mae_val = mt.mean_absolute_error(y_val,yhat_val)
print(f'MAE: {mae_val:.3f}')

mape_val = mt.mean_absolute_percentage_error(y_val,yhat_val)
print(f'MAPE: {mape_val:.2f}%')

R2 Score: 0.341
MSE: 314.827
RMSE: 17.743
MAE: 12.929
MAPE: 7.04%


# Model Training - RandomForestRegressor (Test Data)

In [7]:
#define
model = en.RandomForestRegressor(   n_estimators=grid_search.best_params_['n_estimators'],
                                    max_depth=grid_search.best_params_['max_depth'],
                                    random_state=0  )

#fit
model.fit(np.concatenate((x_train,x_val)),
          np.concatenate((y_train,y_val)))
yhat_test = model.predict(x_test)

#performance
r2_test = mt.r2_score(y_test,yhat_test)
print(f'R2 Score: {r2_test:.3f}')

mse_test = mt.mean_squared_error(y_test,yhat_test)
print(f'MSE: {mse_test:.3f}')

rmse_test = np.sqrt(mse_test)
print(f'RMSE: {rmse_test:.3f}')

mae_test = mt.mean_absolute_error(y_test,yhat_test)
print(f'MAE: {mae_test:.3f}')

mape_test = mt.mean_absolute_percentage_error(y_test,yhat_test)
print(f'MAPE: {mape_test:.2f}%')



R2 Score: 0.409
MSE: 287.964
RMSE: 16.969
MAE: 12.190
MAPE: 6.33%
