In [3]:
# Step 1: Import libraries
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Step 2: Load the saved training and testing data
train_data = pd.read_csv("train_multi_output.csv")
test_data = pd.read_csv("test_multi_output.csv")

# Step 3: Separate features (X) and target variables (y)
X_train = train_data[['Season', 'Day_of_the_week', 'DHI', 'DNI', 'GHI', 'Wind_speed', 'Humidity', 'Temperature']]
y_train = train_data[['PV_production', 'Wind_production']]

X_test = test_data[['Season', 'Day_of_the_week', 'DHI', 'DNI', 'GHI', 'Wind_speed', 'Humidity', 'Temperature']]
y_test = test_data[['PV_production', 'Wind_production']]

# Step 4: Initialize and train the model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Step 5: Make predictions
y_pred = lr_model.predict(X_test)

# Step 6: Calculate Metrics for PV Production
mae_pv = mean_absolute_error(y_test['PV_production'], y_pred[:, 0])
rmse_pv = np.sqrt(mean_squared_error(y_test['PV_production'], y_pred[:, 0]))
r2_pv = r2_score(y_test['PV_production'], y_pred[:, 0])
mse_pv = mean_squared_error(y_test['PV_production'], y_pred[:, 0])

# Adjusted R² for PV
n_pv = len(y_test)  # number of samples
p_pv = X_test.shape[1]  # number of features
adj_r2_pv = 1 - ((1 - r2_pv) * (n_pv - 1) / (n_pv - p_pv - 1))

# Step 7: Calculate Metrics for Wind Production
mae_wind = mean_absolute_error(y_test['Wind_production'], y_pred[:, 1])
rmse_wind = np.sqrt(mean_squared_error(y_test['Wind_production'], y_pred[:, 1]))
r2_wind = r2_score(y_test['Wind_production'], y_pred[:, 1])
mse_wind = mean_squared_error(y_test['Wind_production'], y_pred[:, 1])

# Adjusted R² for Wind
n_wind = len(y_test)
p_wind = X_test.shape[1]
adj_r2_wind = 1 - ((1 - r2_wind) * (n_wind - 1) / (n_wind - p_wind - 1))

# Step 8: Print results
print("----- Linear Regression Baseline Results -----")
print(f"PV Production -> MAE: {mae_pv:.3f}, RMSE: {rmse_pv:.3f}, R²: {r2_pv:.3f}, Adjusted R²: {adj_r2_pv:.3f}, MSE: {mse_pv:.3f}")
print(f"Wind Production -> MAE: {mae_wind:.3f}, RMSE: {rmse_wind:.3f}, R²: {r2_wind:.3f}, Adjusted R²: {adj_r2_wind:.3f}, MSE: {mse_wind:.3f}")

----- Linear Regression Baseline Results -----
PV Production -> MAE: 0.069, RMSE: 0.102, R²: 0.903, Adjusted R²: 0.903, MSE: 0.010
Wind Production -> MAE: 0.133, RMSE: 0.165, R²: 0.481, Adjusted R²: 0.481, MSE: 0.027
