In [1]:
import os
import joblib

import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.optimizers import Adam # type:ignore
from tensorflow.keras.regularizers import l2 # type:ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.callbacks import EarlyStopping # type:ignore
from tensorflow.keras.layers import Input, Dense, GRU, Dropout, BatchNormalization, Bidirectional # type:ignore
from tensorflow.keras.callbacks import ReduceLROnPlateau # type:ignore

In [2]:
print(f"Pandas Version: {pd.__version__}")
print(f"Sklearn (Scikit) Version: {sklearn.__version__}")
print(f"TensorFlow Version: {tf.__version__}")

Pandas Version: 3.0.1
Sklearn (Scikit) Version: 1.8.0
TensorFlow Version: 2.20.0


In [3]:
PROCESSED_DATA ='../data/processed/'


TEST_DATA = '../data/processed/test/'
TRAIN_DATA = '../data/processed/train/'


REPORT = '../reports/figures/'


MODEL_PATH = '../models/'

In [4]:
SCALER = MinMaxScaler(feature_range=(0, 1))

In [5]:
TRAIN_DATA_PATH = os.path.join(TRAIN_DATA, 'dataset.csv')
TEST_DATA_PATH = os.path.join(TEST_DATA, 'dataset.csv')

In [6]:
TRAINING_DATASET = pd.read_csv(TRAIN_DATA_PATH, index_col='Timestamp', parse_dates=True)

TESTING_DATASET = pd.read_csv(TEST_DATA_PATH, index_col='Timestamp', parse_dates=True)

In [7]:
TRAINING_DATASET.drop(columns=['Predicted Load (kW)', 'Transformer Fault'])
TESTING_DATASET.drop(columns=['Predicted Load (kW)', 'Transformer Fault'])

Unnamed: 0_level_0,Current (A),Electricity Price (USD/kWh),Grid Supply (kW),Humidity (%),Overload Condition,Power Consumption (kW),Power Factor,Reactive Power (kVAR),Solar Power (kW),Temperature (°C),Voltage (V),Voltage Fluctuation (%),Wind Power (kW),Hours,Day of Week,Weekend
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2024-12-30 14:00:00,40.535635,0.497850,0.0,35.860080,0,9.508464,0.992804,2.559293,21.504700,33.635491,234.570489,2.441752,23.672970,14,0,0
2024-12-30 14:15:00,21.064044,0.464954,0.0,20.979286,1,5.015821,0.958999,0.672476,49.452665,26.578696,238.122436,-2.152105,19.208487,14,0,0
2024-12-30 14:30:00,16.555170,0.215821,0.0,78.859669,0,3.836290,0.901580,0.447534,17.776861,13.983671,231.727586,-3.058982,17.152905,14,0,0
2024-12-30 14:45:00,29.787593,0.417138,0.0,61.827096,0,6.813149,0.895525,2.724099,31.973194,12.981277,228.724395,-2.810414,8.162543,14,0,0
2024-12-30 15:00:00,44.225248,0.261347,0.0,23.721667,1,9.935926,0.838068,1.874352,15.073820,20.613128,224.666376,-0.535226,7.584563,15,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-18 15:45:00,8.779311,0.371212,0.0,67.423775,0,2.027413,0.831051,0.653013,30.803952,21.216514,230.930715,4.499369,10.681204,15,1,0
2025-03-18 16:00:00,7.064872,0.281913,0.0,72.609169,0,1.613722,0.865908,0.496339,47.806129,19.630265,228.414844,0.901988,22.605573,16,1,0
2025-03-18 16:15:00,47.244731,0.366633,0.0,58.052527,0,11.122062,0.842867,4.334285,16.571503,33.818902,235.413808,-2.480346,8.402725,16,1,0
2025-03-18 16:30:00,13.309820,0.278386,0.0,26.327605,0,3.143181,0.907185,1.116186,33.559473,11.055751,236.155047,-1.996958,23.978541,16,1,0


In [8]:
SCALING_COLUMNS = ['Voltage (V)', 'Current (A)', 'Reactive Power (kVAR)', 'Voltage Fluctuation (%)', 'Grid Supply (kW)', 'Temperature (°C)', 'Humidity (%)', 'Power Consumption (kW)', 'Solar Power (kW)', 'Wind Power (kW)']

In [9]:
TRAINING_DATASET[SCALING_COLUMNS] = SCALER.fit_transform(TRAINING_DATASET[SCALING_COLUMNS])

In [10]:
TESTING_DATASET[SCALING_COLUMNS] = SCALER.transform(TESTING_DATASET[SCALING_COLUMNS])

In [11]:
SCALER_PATH = os.path.join(MODEL_PATH, 'scaler/load.pkl')

joblib.dump(SCALER, SCALER_PATH)

['../models/scaler/load.pkl']

In [None]:
# ── Lag features (autoregressive signal) ────────────────────────────
# Most impactful fix: gives the model 'memory' of recent values.
# Without this, the model sees no autocorrelation → predicts mean.
for lag in [1, 4, 8, 16, 24]:   # 15 min, 1 hr, 2 hr, 4 hr, 6 hr ago
    for col in ['Solar Power (kW)', 'Wind Power (kW)', 'Power Consumption (kW)']:
        TRAINING_DATASET[f'{col}_lag{lag}'] = TRAINING_DATASET[col].shift(lag)
        TESTING_DATASET[f'{col}_lag{lag}']  = TESTING_DATASET[col].shift(lag)

TRAINING_DATASET.dropna(inplace=True)
TESTING_DATASET.dropna(inplace=True)
print(f'Train shape after lags: {TRAINING_DATASET.shape}')
print(f'Test  shape after lags: {TESTING_DATASET.shape}')

In [12]:
def getSequence(data, idx, window):
    X, Y = [], []
    array = data.values.astype('float32')
    
    for i in range(len(array) - window):
        X.append(array[i:i + window])
        Y.append(array[i + window, idx])
    return np.array(X), np.array(Y)

In [13]:
TARGET_COLUMNS = ['Power Consumption (kW)', 'Solar Power (kW)', 'Wind Power (kW)']

# Recalculate indices AFTER lag columns are added
TARGET = [TRAINING_DATASET.columns.get_loc(col) for col in TARGET_COLUMNS]

In [14]:
print(f"Col: {TARGET_COLUMNS} | Index: {TARGET}")

Col: ['Power Consumption (kW)', 'Solar Power (kW)', 'Wind Power (kW)'] | Index: [5, 9, 14]


In [15]:
WINDOW = 24

X_TRAIN, Y_TRAIN = getSequence(TRAINING_DATASET, TARGET, window=WINDOW)

X_TEST, Y_TEST = getSequence(TESTING_DATASET, TARGET, window=WINDOW)

In [16]:
print(f"X_TRAIN Shape: {X_TRAIN.shape} | Y_TRAIN Shape: {Y_TRAIN.shape}")

X_TRAIN Shape: (34976, 24, 18) | Y_TRAIN Shape: (34976, 3)


In [17]:
STEPS = X_TRAIN.shape[1]
FEATURES = X_TRAIN.shape[2]
OUTPUT = Y_TRAIN.shape[1]

In [18]:
REGULARIZATION = 1e-4

In [19]:
DROP = 0.1

In [20]:
model = Sequential([
    Input(shape=(STEPS, FEATURES)),


    GRU(128, activation='tanh', kernel_regularizer=l2(REGULARIZATION), return_sequences=True), 
    BatchNormalization(), 
    Dropout(DROP),


    GRU(64, activation='tanh'),
    BatchNormalization(),
    Dropout(DROP),


    Dense(32, activation='relu', kernel_initializer='he_uniform'),


    Dense(OUTPUT) 
])

In [21]:
model.summary()

In [22]:
OPTIMIZER = Adam(learning_rate=1e-3)

In [23]:
model.compile(optimizer=OPTIMIZER, loss='log_cosh', metrics=['mae', 'mse', tf.keras.metrics.RootMeanSquaredError(name='rmse')])

In [24]:
LR_SCHEDULER = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=7, min_lr=1e-6, verbose=1)

In [25]:
EARLY_STOP = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)

In [None]:
history = model.fit(
    X_TRAIN, Y_TRAIN,
    epochs=100,
    batch_size=16,
    validation_data=(X_TEST, Y_TEST),
    callbacks=[EARLY_STOP, LR_SCHEDULER],
    verbose=1
)

Epoch 1/100
[1m2186/2186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 20ms/step - loss: 0.0569 - mae: 0.2788 - mse: 0.1183 - rmse: 0.3439 - val_loss: 0.0412 - val_mae: 0.2481 - val_mse: 0.0831 - val_rmse: 0.2883 - learning_rate: 0.0010
Epoch 2/100
[1m2186/2186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 23ms/step - loss: 0.0412 - mae: 0.2489 - mse: 0.0838 - rmse: 0.2895 - val_loss: 0.0400 - val_mae: 0.2463 - val_mse: 0.0815 - val_rmse: 0.2854 - learning_rate: 0.0010
Epoch 3/100
[1m2186/2186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 20ms/step - loss: 0.0404 - mae: 0.2476 - mse: 0.0824 - rmse: 0.2871 - val_loss: 0.0395 - val_mae: 0.2452 - val_mse: 0.0805 - val_rmse: 0.2837 - learning_rate: 0.0010
Epoch 4/100
[1m2186/2186[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 21ms/step - loss: 0.0399 - mae: 0.2464 - mse: 0.0814 - rmse: 0.2854 - val_loss: 0.0406 - val_mae: 0.2480 - val_mse: 0.0831 - val_rmse: 0.2883 - learning_rate: 0.0010
Epoch 5/100


In [None]:
model.save(os.path.join(MODEL_PATH, 'core/load.keras'))

In [None]:
LOADED_MODEL = tf.keras.models.load_model('../models/core/load.keras')

In [None]:
PREDICTIONS = LOADED_MODEL.predict(X_TEST)

In [None]:
print(f"PREDICTIONS:\n\n {PREDICTIONS[:10]}")

In [None]:
LABELS = ['Power Consumption (kW)', 'Solar Power (kW)', 'Wind Power (kW)']

In [None]:
def getInverseScale(data, scaler, idx, cols):
    dummy = np.zeros((len(data), cols))
    for i, t in enumerate(idx):
        dummy[:, t] = data[:, i]
    
    unscaled = scaler.inverse_transform(dummy)
    return unscaled[:, idx]

In [None]:
PRED = getInverseScale(PREDICTIONS, SCALER, TARGET, X_TEST.shape[2])
TRUE = getInverseScale(Y_TEST, SCALER, TARGET, X_TEST.shape[2])

In [None]:
for i, col in enumerate(LABELS):
    mae = mean_absolute_error(TRUE[:, i], PRED[:, i])
    rmse = np.sqrt(mean_squared_error(TRUE[:, i], PRED[:, i]))
    print(f"{col} -> MAE: {mae:.2f} kW | RMSE: {rmse:.2f} kW")

In [None]:
plt.figure(figsize=(15, 10))
for i, col in enumerate(LABELS):
    plt.subplot(3, 1, i+1)
    plt.plot(TRUE[:100, i], label='Actual', color='blue', alpha=0.7)
    plt.plot(PRED[:100, i], label='Predicted', color='red', linestyle='--')
    plt.title(f"Actual vs Predicted: {col}")
    plt.legend()
    plt.ylabel("kW")

In [None]:
REPORT_PATH = os.path.join(REPORT, 'result.png')

In [None]:
plt.tight_layout()
plt.savefig(REPORT_PATH)