# Imports

In [14]:
import pandas   as pd
import numpy    as np

from sklearn            import metrics          as mt
from sklearn            import linear_model     as lm

# Load Dataset

In [15]:
#Lendo arquivo CSV de Treino
x_train = pd.read_csv('../../dataset/reg/X_training.csv')
y_train = pd.read_csv('../../dataset/reg/y_training.csv')

#Lendo arquivo CSV de teste
x_test = pd.read_csv('../../dataset/reg/X_test.csv')
y_test = pd.read_csv('../../dataset/reg/y_test.csv')

#Lendo arquivo CSV de Validação
x_val = pd.read_csv('../../dataset/reg/X_validation.csv')
y_val = pd.read_csv('../../dataset/reg/y_val.csv')

In [16]:
#Preparação dos dados

y_train = y_train.values.ravel()
y_val = y_val.values.ravel()

# Model Training - LinearRegression (Training Data)

In [17]:
#define
model = lm.LinearRegression()

#fit
model.fit(x_train,y_train)
yhat_train = model.predict(x_train)

#performance
r2_train = mt.r2_score(y_train,yhat_train)
print(f'R2 Score: {r2_train:.3f}')

mse_train = mt.mean_squared_error(y_train,yhat_train)
print(f'MSE: {mse_train:.3f}')

rmse_train = np.sqrt(mse_train)
print(f'RMSE: {rmse_train:.3f}')

mae_train = mt.mean_absolute_error(y_train,yhat_train)
print(f'MAE: {mae_train:.3f}')

mape_train = mt.mean_absolute_percentage_error(y_train,yhat_train)
print(f'MAPE: {mape_train:.2f}%')

R2 Score: 0.046
MSE: 455.996
RMSE: 21.354
MAE: 16.998
MAPE: 8.65%


# Model Training - LinearRegression (Validation Data)

In [18]:
#define
model = lm.LinearRegression()

#fit
model.fit(x_train,y_train)
yhat_val = model.predict(x_val)

#performance
r2_val = mt.r2_score(y_val,yhat_val)
print(f'R2 Score: {r2_val:.3f}')

mse_val = mt.mean_squared_error(y_val,yhat_val)
print(f'MSE: {mse_val:.3f}')

rmse_val = np.sqrt(mse_val)
print(f'RMSE: {rmse_val:.3f}')

mae_val = mt.mean_absolute_error(y_val,yhat_val)
print(f'MAE: {mae_val:.3f}')

mape_val = mt.mean_absolute_percentage_error(y_val,yhat_val)
print(f'MAPE: {mape_val:.2f}%')

R2 Score: 0.040
MSE: 458.447
RMSE: 21.411
MAE: 17.040
MAPE: 8.68%


# Model Training - LinearRegression (Test Data)

In [19]:
#define
model = lm.LinearRegression()

#fit
model.fit(np.concatenate((x_train,x_val)),
          np.concatenate((y_train,y_val)))
yhat_test = model.predict(x_test)

#performance
r2_test = mt.r2_score(y_test,yhat_test)
print(f'R2 Score: {r2_test:.3f}')

mse_test = mt.mean_squared_error(y_test,yhat_test)
print(f'MSE: {mse_test:.3f}')

rmse_test = np.sqrt(mse_test)
print(f'RMSE: {rmse_test:.3f}')

mae_test = mt.mean_absolute_error(y_test,yhat_test)
print(f'MAE: {mae_test:.3f}')

mape_test = mt.mean_absolute_percentage_error(y_test,yhat_test)
print(f'MAPE: {mape_test:.2f}%')

R2 Score: 0.051
MSE: 461.988
RMSE: 21.494
MAE: 17.144
MAPE: 8.53%




# Save Results

In [20]:
train_metrics = {
    "Algorithm": "Linear Regression",
    "R-Squared": np.round(r2_train, 3),
    "MSE": np.round(mse_train, 3),
    "RMSE": np.round(rmse_train, 3),
    "MAE": np.round(mae_train, 3),
    "MAPE": np.round(mape_train, 3),
}
validation_metrics = {
    "Algorithm": "Linear Regression",
    "R-Squared": np.round(r2_val, 3),
    "MSE": np.round(mse_val, 3),
    "RMSE": np.round(rmse_val, 3),
    "MAE": np.round(mae_val, 3),
    "MAPE": np.round(mape_val, 3),
}
test_metrics = {
    "Algorithm": "Linear Regression",
    "R-Squared": np.round(r2_test, 3),
    "MSE": np.round(mse_test, 3),
    "RMSE": np.round(rmse_test, 3),
    "MAE": np.round(mae_test, 3),
    "MAPE": np.round(mape_test, 3),
}

pd.DataFrame(train_metrics, index=[0]).to_csv(
    "./reg_train_metrics.csv", mode="a", header=False
)
pd.DataFrame(validation_metrics, index=[0]).to_csv(
    "./reg_validation_metrics.csv", mode="a", header=False
)
pd.DataFrame(test_metrics, index=[0]).to_csv(
    "./reg_test_metrics.csv", mode="a", header=False)