In [108]:
#Import necessary libraries
!pip install scikit-learn

import numpy as np
import pandas as pd



In [109]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [110]:
#Load the dataset
df = pd.read_csv('/content/energy_iter13c.csv')

In [111]:
#Extract day from date
df['Date']=pd.to_datetime(df['Date'])
df['Day']=df['Date'].dt.day

In [112]:
df.head(5)

Unnamed: 0,Date,Demand(MW),Generation(MW),Temp(C),Year,Month,Season,IsHoliday,DemandGenGap(MW),Day
0,2024-12-31,-0.855166,0.537407,-2.304651,2024,12,0,0,1.588629,31
1,2024-12-30,-0.754021,0.884273,-1.472501,2024,12,0,0,1.794999,30
2,2024-12-29,-0.754021,0.39233,-1.633562,2024,12,0,0,1.322754,29
3,2024-12-28,-1.234458,0.236042,-1.633562,2024,12,0,0,1.774109,28
4,2024-12-27,-0.855166,0.187903,-1.579875,2024,12,0,1,1.253119,27


# **Random Forest**

In [113]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from scipy.stats import randint

In [114]:
#Separate features and target for train-test split
#Drop the Date features beause we cant use date as datestamp format in Random Forest model
#Our Target is Energy Demand

X = df.drop(columns=['Demand(MW)','Date','DemandGenGap(MW)'])
y = df['Demand(MW)']

#Split the dataset: 20% for Testing and 80% for Training
#For the consistency in result we use random_state= 42
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=42)

#Define Forest classifier
rf_reg= RandomForestRegressor()

#Use different hyperparameter combinations
param_dist = {
    'n_estimators':randint(50,200,10),
    'max_depth':[None,10,20, 30],
    'min_samples_split':randint(2,10),
    'min_samples_leaf':randint(1,4)
    }

random_search = RandomizedSearchCV(estimator=rf_reg, param_distributions=param_dist, n_iter=10, cv=5, random_state=42)
random_search.fit(X_train, y_train)

#Find the best hyperparameters combinations
best_params = random_search.best_params_
print("Best Hyperparameters:",  best_params)

# Train the model with the best hyperparameters
rf_reg= RandomForestRegressor(**best_params)
rf_reg.fit(X_train, y_train)

#Prediction
y_pred = rf_reg.predict(X_test)

#Different types of evaluation parameter
r2=r2_score(y_test,y_pred)
mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)


print(f"R Squared: {r2:.5f}")
print(f'Mean absolute error: {mae:.5f}')
print(f'Mean Squared error: {mse:.5f}')
print(f'Root Mean Squared error: {rmse:.5f}')


Best Hyperparameters: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 163}
R Squared: 0.89776
Mean absolute error: 0.22607
Mean Squared error: 0.09781
Root Mean Squared error: 0.31274


# **Linear Regression**

In [115]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

In [116]:
#Scaling(Day,Year,Month)features just for Linear regression model
#Use Standard Scaler

features=["Day","Month","Year"]
scaler=StandardScaler()
df[features]=scaler.fit_transform(df[features])

print("scaling done")

scaling done


In [117]:
#Separate features and target for train-test split
#Drop the Date features beause we cant use date as datestamp format in Linear Regression
#Our Target is Energy Demand

X = df.drop(columns=['Demand(MW)','Date','DemandGenGap(MW)'])
y = df['Demand(MW)']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=42)

#Train the model
lr_model=LinearRegression()
lr_model.fit(X_train,y_train)

#prediction
y_pred=lr_model.predict(X_test)

#evaluation parameter
r2=r2_score(y_test,y_pred)
mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)

#Show result with different evaluation parameter
print(f"R Squared:{r2:.5f}")
print(f'Mean absolute error:{mae:.5f}')
print(f'Mean Squared error:{mse:.5f}')
print(f'Root Mean Squared error:{rmse:.5f}')

R Squared:0.85597
Mean absolute error:0.28180
Mean Squared error:0.13778
Root Mean Squared error:0.37119
