In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df=pd.read_csv('dataset/24-hours Delhi Power Consumption dataset.csv')

In [3]:
df['timestamp']=pd.to_datetime(df['timestamp'])

In [4]:
def assign_compensation_method(row):
    if row['is_holiday'] == 1:
        return 'Lower Power Supply'
    elif row['solar_generation'] > 200:
        return 'Increased Renewable Energy Integration'
    elif row['hour_of_day'] >= 18 and row['hour_of_day'] <= 22:
        return 'Peak Load Shifting'
    elif row['temperature'] > 35:
        return 'Increased Power Supply'
    else:
        return 'Normal Operation'

df['compensation_method'] = df.apply(assign_compensation_method, axis=1)

print(df[['timestamp', 'load', 'compensation_method']].head(10))


            timestamp   load                     compensation_method
0 2023-01-01 00:00:00  18493                        Normal Operation
1 2023-01-01 01:00:00  17090                        Normal Operation
2 2023-01-01 02:00:00  16052                        Normal Operation
3 2023-01-01 03:00:00  15356                        Normal Operation
4 2023-01-01 04:00:00  14936                        Normal Operation
5 2023-01-01 05:00:00  14884                        Normal Operation
6 2023-01-01 06:00:00  15314  Increased Renewable Energy Integration
7 2023-01-01 07:00:00  16591  Increased Renewable Energy Integration
8 2023-01-01 08:00:00  18271                        Normal Operation
9 2023-01-01 09:00:00  19470  Increased Renewable Energy Integration


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [6]:
X = df[['temperature', 'solar_generation', 'hour_of_day', 'load', 'is_holiday']]
y=df['compensation_method']
X_reg = df[['temperature', 'solar_generation', 'hour_of_day', 'is_holiday']]
y_reg = df['load']
le = LabelEncoder()
y = le.fit_transform(y)

In [7]:
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.3, random_state=42)
X_reg_train, X_reg_test, y_reg_train, y_reg_test = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

In [8]:
from sklearn.preprocessing import StandardScaler

# from keras.models import Sequential
# from keras.layers import LSTM, Dense, Dropout, Conv1D, MaxPooling1D, Flatten
# from keras.optimizers import Adam
# from keras.layers import BatchNormalization
# from keras.layers import Activation
#ann

# Normalize the data
scaler_reg = StandardScaler()
scaler_cls = StandardScaler()

X_reg_train_scaled = scaler_reg.fit_transform(X_reg_train)
X_reg_test_scaled = scaler_reg.transform(X_reg_test)
X_train_scaled = scaler_cls.fit_transform(X_train)
X_test_scaled = scaler_cls.transform(X_test)



In [13]:
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, LSTM, Dense, Dropout
from tensorflow.keras.layers import Bidirectional
from xgboost import XGBRegressor
import numpy as np

# Define k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Placeholder for results
results = []

for train_index, val_index in kf.split(X_reg_train_scaled):
    X_train_fold, X_val_fold = X_reg_train_scaled[train_index], X_reg_train_scaled[val_index]
    y_train_fold, y_val_fold = y_reg_train.to_numpy()[train_index], y_reg_train.to_numpy()[val_index]

    # Reshape input data to 3D for GRU and BiLSTM models
    X_train_fold_reshaped = X_train_fold.reshape((X_train_fold.shape[0], 1, X_train_fold.shape[1]))
    X_val_fold_reshaped = X_val_fold.reshape((X_val_fold.shape[0], 1, X_val_fold.shape[1]))

    # GRU Model
    gru_model = Sequential([
        GRU(64, return_sequences=True, input_shape=(X_train_fold_reshaped.shape[1], X_train_fold_reshaped.shape[2])),
        Dropout(0.2),
        GRU(32, return_sequences=False),
        Dropout(0.2),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    gru_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    gru_model.fit(X_train_fold_reshaped, y_train_fold, epochs=100, batch_size=32, verbose=0)
    gru_preds = gru_model.predict(X_val_fold_reshaped)

    # BiLSTM Model
    bilstm_model = Sequential([
        Bidirectional(LSTM(64, return_sequences=True), input_shape=(X_train_fold_reshaped.shape[1], X_train_fold_reshaped.shape[2])),
        Dropout(0.2),
        Bidirectional(LSTM(32, return_sequences=False)),
        Dropout(0.2),
        Dense(1, activation='linear')  # Linear activation for regression
    ])
    bilstm_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    bilstm_model.fit(X_train_fold_reshaped, y_train_fold, epochs=100, batch_size=32, verbose=0)
    bilstm_preds = bilstm_model.predict(X_val_fold_reshaped)

    # XGBoost Model
    xgb_model = XGBRegressor()
    xgb_model.fit(X_train_fold, y_train_fold)
    xgb_preds = xgb_model.predict(X_val_fold)

    # Combine predictions (e.g., weighted average)
    final_preds = (gru_preds.flatten() + bilstm_preds.flatten() + xgb_preds) / 3

    # Evaluate
    mse = mean_squared_error(y_val_fold, final_preds)
    results.append(mse)

print("Average MSE:", np.mean(results))

Average MSE: 141783831.96064487


In [15]:
from sklearn.metrics import mean_absolute_error, r2_score

# Generate predictions for the test set
X_reg_test_reshaped = X_reg_test_scaled.reshape((X_reg_test_scaled.shape[0], 1, X_reg_test_scaled.shape[1]))

gru_test_preds = gru_model.predict(X_reg_test_reshaped)
bilstm_test_preds = bilstm_model.predict(X_reg_test_reshaped)
xgb_test_preds = xgb_model.predict(X_reg_test_scaled)

# Combine predictions (e.g., weighted average)
final_test_preds = (gru_test_preds.flatten() + bilstm_test_preds.flatten() + xgb_test_preds) / 3

# Calculate metrics for the ensemble model
ensemble_mse = mean_squared_error(y_reg_test, final_test_preds)
ensemble_mae = mean_absolute_error(y_reg_test, final_test_preds)
ensemble_r2 = r2_score(y_reg_test, final_test_preds)
ensemble_rmse = np.sqrt(ensemble_mse)
ensemble_mape = np.mean(np.abs((y_reg_test - final_test_preds) / y_reg_test)) * 100

print("Ensemble Model Metrics:")
print(f"MSE: {ensemble_mse}")
print(f"MAE: {ensemble_mae}")
print(f"R2 Score: {ensemble_r2}")
print(f"RMSE: {ensemble_rmse}")
print(f"MAPE: {ensemble_mape}")


Ensemble Model Metrics:
MSE: 144617217.44311732
MAE: 11672.322280709204
R2 Score: -11.002860955254743
RMSE: 12025.689894684518
MAPE: 63.87721671893654
