# Imports

In [1]:
import pandas   as pd
import numpy    as np

from sklearn            import metrics          as mt
from sklearn            import linear_model     as lm
from sklearn            import preprocessing    as pp
from sklearn            import model_selection  as ms


# Load Dataset

In [2]:
#Lendo arquivo CSV de Treino
x_train = pd.read_csv('../../dataset/reg/X_training.csv')
y_train = pd.read_csv('../../dataset/reg/y_training.csv')

#Lendo arquivo CSV de teste
x_test = pd.read_csv('../../dataset/reg/X_test.csv')
y_test = pd.read_csv('../../dataset/reg/y_test.csv')

#Lendo arquivo CSV de Validação
x_val = pd.read_csv('../../dataset/reg/X_validation.csv')
y_val = pd.read_csv('../../dataset/reg/y_val.csv')

In [3]:
#Preparação dos dados

y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

# Model Training - ElasticNet - Polynomial (Training Data)

## Definindo os melhores parametros para o ElasticNet

In [4]:
# features
poly_features = pp.PolynomialFeatures( degree=2, include_bias=False )
x_poly = poly_features.fit_transform( x_train )

In [None]:
# Definindo os hiperparâmetros para o GridSearch
param_grid = {
    'alpha': np.logspace(-4, 4, 9),  # Testar valores de alpha entre 10^-4 e 10^4
    'max_iter': [1000, 5000, 10000, 20000],  # Valores típicos para max_iter
    'l1_ratio': np.linspace(0, 1, 11)
}

# Configurar a busca em grade
grid_search = ms.GridSearchCV(estimator=lm.ElasticNet(), 
                              param_grid=param_grid, 
                              cv=5, 
                              scoring='neg_mean_squared_error', 
                              n_jobs=-1)

# Ajustar o modelo
grid_search.fit(x_poly, y_train)

# Exibir os melhores parâmetros e o desempenho
print(f"Melhores parâmetros: {grid_search.best_params_}")
print(f"Melhor erro quadrático médio (MSE): {-grid_search.best_score_}")


## Model Training

In [6]:
# define
ElasticNet = lm.ElasticNet( alpha=grid_search.best_params_['alpha'],
                            max_iter=grid_search.best_params_['max_iter'],
                            l1_ratio=grid_search.best_params_['l1_ratio'])

# fit
ElasticNet.fit( x_poly, y_train )
yhat_train = ElasticNet.predict( x_poly )

# performance
r2_train = mt.r2_score(y_train,yhat_train)
print(f'R2 Score: {r2_train:.3f}')

mse_train = mt.mean_squared_error(y_train,yhat_train)
print(f'MSE: {mse_train:.3f}')

rmse_train = np.sqrt(mse_train)
print(f'RMSE: {rmse_train:.3f}')

mae_train = mt.mean_absolute_error(y_train,yhat_train)
print(f'MAE: {mae_train:.3f}')

mape_train = mt.mean_absolute_percentage_error(y_train,yhat_train)
print(f'MAPE: {mape_train:.2f}%')

R2 Score: 0.092
MSE: 434.037
RMSE: 20.834
MAE: 16.485
MAPE: 8.39%


  model = cd_fast.enet_coordinate_descent(


# Model Training - ElasticNet - Polynomial (Validation Data)

In [7]:
# features
poly_features = pp.PolynomialFeatures( degree=2, include_bias=False )
x_polytrain = poly_features.fit_transform( x_train )
x_polyval = poly_features.fit_transform(x_val)

# define
ElasticNet = lm.ElasticNet( alpha=grid_search.best_params_['alpha'],
                            max_iter=grid_search.best_params_['max_iter'],
                            l1_ratio=grid_search.best_params_['l1_ratio'])

# fit
ElasticNet.fit( x_polytrain, y_train )
yhat_val = ElasticNet.predict( x_polyval )

# performance
r2_val = mt.r2_score(y_val,yhat_val)
print(f'R2 Score: {r2_val:.3f}')

mse_val = mt.mean_squared_error(y_val,yhat_val)
print(f'MSE: {mse_val:.3f}')

rmse_val = np.sqrt(mse_val)
print(f'RMSE: {rmse_val:.3f}')

mae_val = mt.mean_absolute_error(y_val,yhat_val)
print(f'MAE: {mae_val:.3f}')

mape_val = mt.mean_absolute_percentage_error(y_val,yhat_val)
print(f'MAPE: {mape_val:.2f}%')

R2 Score: 0.068
MSE: 445.115
RMSE: 21.098
MAE: 16.738
MAPE: 8.58%


  model = cd_fast.enet_coordinate_descent(


# Model Training - ElasticNet - Polynomial (Test Data)

In [8]:
# features
poly_features = pp.PolynomialFeatures( degree=2, include_bias=False )
x_polytrain = poly_features.fit_transform( np.concatenate(( x_train,x_val )))
x_polytest = poly_features.fit_transform(x_test)

# define
ElasticNet = lm.ElasticNet( alpha=grid_search.best_params_['alpha'],
                            max_iter=grid_search.best_params_['max_iter'],
                            l1_ratio=grid_search.best_params_['l1_ratio'])

# fit
ElasticNet.fit( x_polytrain, np.concatenate((y_train,y_val)) )
yhat_test = ElasticNet.predict( x_polytest )

# performance
r2_test = mt.r2_score(y_test,yhat_test)
print(f'R2 Score: {r2_val:.3f}')

mse_test = mt.mean_squared_error(y_test,yhat_test)
print(f'MSE: {mse_test:.3f}')

rmse_test = np.sqrt(mse_val)
print(f'RMSE: {rmse_test:.3f}')

mae_test = mt.mean_absolute_error(y_test,yhat_test)
print(f'MAE: {mae_test:.3f}')

mape_test = mt.mean_absolute_percentage_error(y_test,yhat_test)
print(f'MAPE: {mape_test:.2f}%')

R2 Score: 0.068
MSE: 443.763
RMSE: 21.098
MAE: 16.759
MAPE: 8.33%


  model = cd_fast.enet_coordinate_descent(


# Save Results

In [9]:
train_metrics = {
    "Algorithm": "Polynomial Regression - ElasticNet",
    "R-Squared": np.round(r2_train, 3),
    "MSE": np.round(mse_train, 3),
    "RMSE": np.round(rmse_train, 3),
    "MAE": np.round(mae_train, 3),
    "MAPE": np.round(mape_train, 3),
}
validation_metrics = {
    "Algorithm": "Polynomial Regression - ElasticNet",
    "R-Squared": np.round(r2_val, 3),
    "MSE": np.round(mse_val, 3),
    "RMSE": np.round(rmse_val, 3),
    "MAE": np.round(mae_val, 3),
    "MAPE": np.round(mape_val, 3),
}
test_metrics = {
    "Algorithm": "Polynomial Regression - ElasticNet",
    "R-Squared": np.round(r2_test, 3),
    "MSE": np.round(mse_test, 3),
    "RMSE": np.round(rmse_test, 3),
    "MAE": np.round(mae_test, 3),
    "MAPE": np.round(mape_test, 3),
}

pd.DataFrame(train_metrics, index=[0]).to_csv(
    "./reg_train_metrics.csv", mode="a", header=False
)
pd.DataFrame(validation_metrics, index=[0]).to_csv(
    "./reg_validation_metrics.csv", mode="a", header=False
)
pd.DataFrame(test_metrics, index=[0]).to_csv(
    "./reg_test_metrics.csv", mode="a", header=False)