# 1 - Import das bibliotecas

In [None]:
pip install scikit-learn


In [1]:
import numpy                as np
import pandas               as pd
from sklearn.linear_model   import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.tree           import DecisionTreeRegressor
from sklearn.ensemble       import RandomForestRegressor
from sklearn.preprocessing  import PolynomialFeatures
from sklearn.metrics        import r2_score, mean_squared_error, mean_absolute_error
from math                   import sqrt

# 2 - Import dos datasets

In [4]:
x_training   = pd.read_csv('x_training.csv')
y_training   = pd.read_csv('y_training.csv')
x_test      = pd.read_csv('x_test.csv')
y_test      = pd.read_csv('y_test.csv')
x_validation = pd.read_csv('x_validation.csv')
y_validation = pd.read_csv('y_val.csv') 

# 3 - Regressão - Base Treinamento / Base Teste / Base Validação

In [7]:
# Hiperparâmetros com valores padrão
max_depth_dt = None  # Profundidade máxima da Decision Tree Regressor (None significa que não há limite)
degree_poly = 2     # Grau do polinômio para regressões polinomiais (grafo 2 como padrão)
alpha_lasso = 1.0   # Alpha para Lasso Regression
alpha_ridge = 1.0   # Alpha para Ridge Regression
alpha_en = 1.0      # Alpha para Elastic Net
l1_ratio_en = 0.5   # L1 Ratio para Elastic Net
max_iter_en = 1000  # Número máximo de iterações para modelos Lasso, Ridge e Elastic Net
n_estimators_rf = 100  # Número de estimadores para o Random Forest Regressor

# Lista de modelos e seus nomes
models = [
    ("Decision Tree Regressor", DecisionTreeRegressor(max_depth=max_depth_dt)),
    ("Polynomial Regression", LinearRegression()),
    ("Lasso Regression", Lasso(alpha=alpha_lasso, max_iter=max_iter_en)),
    ("Ridge Regression", Ridge(alpha=alpha_ridge, max_iter=max_iter_en)),
    ("Elastic Net Regression", ElasticNet(alpha=alpha_en, l1_ratio=l1_ratio_en, max_iter=max_iter_en)),
    ("Random Forest Regressor", RandomForestRegressor(n_estimators=n_estimators_rf, max_depth=max_depth_dt))
]

# Resultados em um DataFrame
results_regression = pd.DataFrame(columns=["Model", "R2 Score Train", "MSE Train", "RMSE Train", "MAE Train",
                                "R2 Score Validation", "MSE Validation", "RMSE Validation", "MAE Validation",
                                "R2 Score Test", "MSE Test", "RMSE Test", "MAE Test"])

for model_name, model in models:
    # Treinamento
    model.fit(x_training, y_training)
    
    # Validação
    predictions_validation = model.predict(x_validation)
    r2_validation = r2_score(y_validation, predictions_validation)
    mse_validation = mean_squared_error(y_validation, predictions_validation)
    rmse_validation = np.sqrt(mse_validation)
    mae_validation = mean_absolute_error(y_validation, predictions_validation)
    
    # Teste
    predictions_test = model.predict(x_test)
    r2_test = r2_score(y_test, predictions_test)
    mse_test = mean_squared_error(y_test, predictions_test)
    rmse_test = np.sqrt(mse_test)
    mae_test = mean_absolute_error(y_test, predictions_test)
    
    # Resultados de Treinamento
    predictions_train = model.predict(x_training)
    r2_train = r2_score(y_training, predictions_train)
    mse_train = mean_squared_error(y_training, predictions_train)
    rmse_train = np.sqrt(mse_train)
    mae_train = mean_absolute_error(y_training, predictions_train)
    
    # Adiciona os resultados aos DataFrames
    results_regression = pd.concat([results_regression, pd.DataFrame([[model_name, r2_train, mse_train, rmse_train, mae_train,
                                                r2_validation, mse_validation, rmse_validation, mae_validation,
                                                r2_test, mse_test, rmse_test, mae_test]], columns=results_regression.columns)], ignore_index=True)

# Exibir o DataFrame com os resultados
results_regression


  model.fit(x_training, y_training)


Unnamed: 0,Model,R2 Score Train,MSE Train,RMSE Train,MAE Train,R2 Score Validation,MSE Validation,RMSE Validation,MAE Validation,R2 Score Test,MSE Test,RMSE Test,MAE Test
0,Decision Tree Regressor,0.991757,3.940403,1.985045,0.214099,-0.306531,623.883902,24.977668,17.243365,-0.235303,601.470127,24.524888,16.827024
1,Polynomial Regression,0.046058,455.996112,21.354065,16.998249,0.039925,458.447042,21.411376,17.039754,0.052317,461.427719,21.480869,17.129965
2,Lasso Regression,0.007401,474.474834,21.782443,17.305484,0.007884,473.747081,21.765732,17.264922,0.007646,483.17797,21.98131,17.472989
3,Ridge Regression,0.046058,455.996401,21.354072,16.998308,0.039928,458.445477,21.41134,17.039472,0.05231,461.431102,21.480947,17.129678
4,Elastic Net Regression,0.007832,474.268889,21.777715,17.299507,0.008117,473.635616,21.763171,17.262903,0.00794,483.03485,21.978054,17.470259
5,Random Forest Regressor,0.903107,46.316088,6.805592,4.859678,0.33132,319.302495,17.869037,13.020717,0.350171,316.402145,17.787696,13.048014


In [10]:
# Salvar o DataFrame results_regression em um arquivo CSV
results_regression.to_csv("resultados_regressao.csv", index=False)