# LINEAR REGRESSION

## 0. Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn import metrics as mt
from sklearn import linear_model as lm

## 1. Load Datasets

In [2]:
# Train Datasets
dataset_path = '../../datasets/regression/X_training.csv'
x_train = pd.read_csv(dataset_path)
dataset_path2 = '../../datasets/regression/y_training.csv'
y_train = pd.read_csv(dataset_path2)

# Test Datasets
dataset_path3 = '../../datasets/regression/X_test.csv'
x_test = pd.read_csv(dataset_path3)
dataset_path4 = '../../datasets/regression/y_test.csv'
y_test = pd.read_csv(dataset_path4)

# Validation Datasets
dataset_path5 = '../../datasets/regression/X_validation.csv'
x_val = pd.read_csv(dataset_path5)
dataset_path6 = '../../datasets/regression/y_val.csv'
y_val = pd.read_csv(dataset_path6)

## 2. Linear Regression

### 2.1 Model Definition and Training

In [3]:
linear_regression = lm.LinearRegression()
linear_regression.fit(x_train, y_train)

### 2.2 Model Predict and Performance

In [4]:
# Predict Train
y_pred_train = linear_regression.predict(x_train)  

# Performance Metrics
r2_train = mt.r2_score(y_train, y_pred_train)
mse_train = mt.mean_squared_error(y_train, y_pred_train)
rmse_train = np.sqrt(mse_train)
mae_train = mt.mean_absolute_error(y_train, y_pred_train)
mape_train = mt.mean_absolute_percentage_error(y_train, y_pred_train)

print(f"Train R2 Square: {r2_train}")
print(f"Train MSE: {mse_train}")
print(f"Train RMSE: {rmse_train}")
print(f"Train MAE: {mae_train}")
print(f"Train MAPE: {mape_train}%")

Train R2 Square: 0.04605830473391903
Train MSE: 455.99611182562677
Train RMSE: 21.35406546364478
Train MAE: 16.998249066011095
Train MAPE: 8.653185943804512%


In [5]:
# Predict Validation
y_pred_val = linear_regression.predict(x_val)  

# Performance Metrics
r2_val = mt.r2_score(y_val, y_pred_val)
mse_val = mt.mean_squared_error(y_val, y_pred_val)
rmse_val = np.sqrt(mse_val)
mae_val = mt.mean_absolute_error(y_val, y_pred_val)
mape_val = mt.mean_absolute_percentage_error(y_val, y_pred_val)

print(f"Validation R2 Square: {r2_val}")
print(f"Validation MSE: {mse_val}")
print(f"Validation RMSE: {rmse_val}")
print(f"Validation MAE: {mae_val}")
print(f"Validation MAPE: {mape_val}%")

Validation R2 Square: 0.03992483038154071
Validation MSE: 458.4470418439312
Validation RMSE: 21.41137645841414
Validation MAE: 17.039753759960327
Validation MAPE: 8.682541883735295%


In [6]:
# Predict Test 
y_pred_test = linear_regression.predict(x_test)  

# Performance Metrics
r2_test = mt.r2_score(y_test, y_pred_test)
mse_test = mt.mean_squared_error(y_test, y_pred_test)
rmse_test = np.sqrt(mse_test)
mae_test = mt.mean_absolute_error(y_test, y_pred_test)
mape_test = mt.mean_absolute_percentage_error(y_test, y_pred_test)

print(f"Test R2 Square: {r2_test}")
print(f"Test MSE: {mse_test}")
print(f"Test RMSE: {rmse_test}")
print(f"Test MAE: {mae_test}")
print(f"Test MAPE: {mape_test}%")

Test R2 Square: 0.052317119521687494
Test MSE: 461.42771933638954
Test RMSE: 21.480868682071254
Test MAE: 17.12996509166612
Test MAPE: 8.521859277849835%
