In [25]:
# importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels

import os
import random

In [26]:
import sklearn
import tensorflow 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [27]:
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch

In [28]:
os.environ['PYTHONHASHSEED'] = '42'  

random.seed(42)          
np.random.seed(42)       
tensorflow.random.set_seed(42)  

In [29]:
# load the data into dataframe
df=pd.read_csv('feature_engineered_data.csv')
print(df.head())

   current_value  lights         T1       RH_1         T2       RH_2  \
0          430.0      30  20.133333  48.000000  19.566667  44.400000   
1          250.0      30  20.260000  52.726667  19.730000  45.100000   
2          100.0      10  20.426667  55.893333  19.856667  45.833333   
3          100.0      10  20.566667  53.893333  20.033333  46.756667   
4           90.0      10  20.730000  52.660000  20.166667  47.223333   

          T3       RH_3         T4       RH_4  ...  is_weekend    nsm  lag_1  \
0  19.890000  44.900000  19.000000  46.363333  ...           0  68400  576.6   
1  19.890000  45.493333  19.000000  47.223333  ...           0  69000  430.0   
2  20.033333  47.526667  19.000000  48.696667  ...           0  69600  250.0   
3  20.100000  48.466667  19.000000  48.490000  ...           0  70200  100.0   
4  20.200000  48.530000  18.926667  48.156667  ...           0  70800  100.0   

   lag_2  lag_6  lag_12  hour_sin  hour_cos  rolling_mean_12  rolling_std_12  
0  230.

In [30]:
import pickle

# Load the selected features for 10-minute forecasting
with open("selected_features_10min.pkl", "rb") as f:
    selected_features = pickle.load(f)

print("Selected Features:", selected_features)

Selected Features: ['current_value', 'RH_5', 'T6', 'T8', 'RH_8', 'Press_mm_hg', 'nsm', 'lag_1', 'rolling_mean_12', 'rolling_std_12']


In [31]:
df['target_10min']= df['current_value'].shift(-1)

In [32]:
df = df.dropna().reset_index(drop=True)

In [33]:
df = df[selected_features + ['target_10min']]

In [34]:
train_size = int(0.8 * len(df))
df_train, df_test = df[:train_size], df[train_size:]

In [35]:
scaler = MinMaxScaler()
df_train_scaled = pd.DataFrame(scaler.fit_transform(df_train), columns=df.columns, index=df_train.index)
df_test_scaled = pd.DataFrame(scaler.transform(df_test), columns=df.columns, index=df_test.index)


In [36]:
def create_sequences(data, target_column, window_size):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data.iloc[i-window_size:i].values)
        y.append(data.iloc[i][target_column])
    return np.array(X), np.array(y)

window_size = 24  # use previous 24 time steps
#X, y = create_sequences(scaled_df, target_column='target_10min', window_size=window_size)
X_train, y_train = create_sequences(df_train_scaled, target_column='target_10min', window_size=window_size)
X_test, y_test = create_sequences(df_test_scaled, target_column='target_10min', window_size=window_size)

In [37]:
val_size = int(len(X_train) * 0.125)

X_train_final = X_train[:-val_size]
y_train_final = y_train[:-val_size]

X_val = X_train[-val_size:]
y_val = y_train[-val_size:]

In [38]:
# Invert scaling for target only
target_scaler = MinMaxScaler()
target_scaler.fit(df[['target_10min']])  # fit only on original (unscaled) appliances column

# LSTM

In [39]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, GRU
from keras.optimizers import Adam, RMSprop
from kerastuner import HyperModel
from kerastuner.tuners import BayesianOptimization

def build_lstm_model(hp):
    model = Sequential()
    model.add(LSTM(
        units=hp.Int('units_1', min_value=32, max_value=256, step=32),
        input_shape=(X_train_final.shape[1], X_train_final.shape[2]),
        return_sequences=False
    ))
    
    model.add(Dropout(hp.Float('dropout_1', 0.1, 0.5, step=0.1)))
    model.add(Dense(1))

    model.compile(
        loss='mse',
        optimizer=hp.Choice('optimizer', ['adam']),
        metrics=['mae']
    )
    return model



In [40]:
tuner = BayesianOptimization(
    build_lstm_model,
    objective='val_mae',
    max_trials=15,  # Number of models to try
    directory='tuning_dir',
    project_name='lstm_bayes'
)


  super().__init__(**kwargs)


In [41]:
from keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

tuner.search(
    X_train_final, y_train_final,
    validation_data=(X_val, y_val),
    epochs=20,
    callbacks=[early_stop],
    verbose=1,
    shuffle=False
)



Trial 15 Complete [00h 00m 54s]
val_mae: 0.04378615319728851

Best val_mae So Far: 0.04353388026356697
Total elapsed time: 00h 24m 37s


In [42]:
best_hp = tuner.get_best_hyperparameters(1)[0]

print("Best Hyperparameters:")
print(f"Units: {best_hp.get('units_1')}")
print(f"Dropout: {best_hp.get('dropout_1')}")
print(f"Optimizer: {best_hp.get('optimizer')}")

# Build and train the best model
best_lstm_model = tuner.hypermodel.build(best_hp)
history = best_lstm_model.fit(X_train, y_train, validation_data=(X_val, y_val),
                         epochs=20, batch_size=32,
                         callbacks=[early_stop])


Best Hyperparameters:
Units: 224
Dropout: 0.4
Optimizer: adam
Epoch 1/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - loss: 0.0211 - mae: 0.0869 - val_loss: 0.0113 - val_mae: 0.0543
Epoch 2/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0136 - mae: 0.0632 - val_loss: 0.0101 - val_mae: 0.0458
Epoch 3/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0126 - mae: 0.0590 - val_loss: 0.0099 - val_mae: 0.0452
Epoch 4/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - loss: 0.0124 - mae: 0.0577 - val_loss: 0.0098 - val_mae: 0.0449
Epoch 5/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.0124 - mae: 0.0564 - val_loss: 0.0098 - val_mae: 0.0449
Epoch 6/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 14ms/step - loss: 0.0122 - mae: 0.0558 - val_loss: 0.0098 - val_mae: 0.0453
Epoch 7/20
[1m493/4

In [43]:
# Predict
y_pred_lstm = best_lstm_model.predict(X_test)

y_test_actual = target_scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_actual_lstm = target_scaler.inverse_transform(y_pred_lstm)

# Evaluation metrics
mae_LSTM = mean_absolute_error(y_test_actual, y_pred_actual_lstm)
r2_LSTM = r2_score(y_test_actual, y_pred_actual_lstm)
rmse_LSTM = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual_lstm))

print(f"MAE: {mae_LSTM:.2f}")
print(f"R² Score: {r2_LSTM:.4f}")
print(f"RMSE: {rmse_LSTM:.4f}")


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step
MAE: 25.69
R² Score: 0.5744
RMSE: 55.3721


# GRU

In [44]:
def build_gru_model(hp):
    model = Sequential()
    model.add(GRU(
        units=hp.Int('units_1', min_value=32, max_value=256, step=32),
        input_shape=(X_train_final.shape[1], X_train_final.shape[2]),
        return_sequences=False
    ))
    
    model.add(Dropout(hp.Float('dropout_1', 0.1, 0.5, step=0.1)))
    model.add(Dense(1))

    model.compile(
        loss='mse',
        optimizer=hp.Choice('optimizer', ['adam']),
        metrics=['mae']
    )
    return model



In [45]:
tuner = BayesianOptimization(
    build_gru_model,
    objective='val_mae',
    max_trials=15,  # Number of models to try
    directory='tuning_dir',
    project_name='gru_bayes'
)


  super().__init__(**kwargs)


In [46]:
from keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

tuner.search(
    X_train_final, y_train_final,
    validation_data=(X_val, y_val),
    epochs=20,
    callbacks=[early_stop],
    verbose=1,
    shuffle=False
)

Trial 15 Complete [00h 02m 53s]
val_mae: 0.043975889682769775

Best val_mae So Far: 0.043975889682769775
Total elapsed time: 00h 32m 26s


In [47]:
best_hp = tuner.get_best_hyperparameters(1)[0]

print("Best Hyperparameters:")
print(f"Units: {best_hp.get('units_1')}")
print(f"Dropout: {best_hp.get('dropout_1')}")
print(f"Optimizer: {best_hp.get('optimizer')}")

# Build and train the best model
best_gru_model = tuner.hypermodel.build(best_hp)
history = best_gru_model.fit(X_train, y_train, validation_data=(X_val, y_val),
                         epochs=20, batch_size=32,
                         callbacks=[early_stop])

Best Hyperparameters:
Units: 160
Dropout: 0.4
Optimizer: adam
Epoch 1/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0214 - mae: 0.0908 - val_loss: 0.0103 - val_mae: 0.0502
Epoch 2/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 0.0134 - mae: 0.0629 - val_loss: 0.0101 - val_mae: 0.0493
Epoch 3/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 14ms/step - loss: 0.0127 - mae: 0.0595 - val_loss: 0.0100 - val_mae: 0.0464
Epoch 4/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 0.0126 - mae: 0.0582 - val_loss: 0.0100 - val_mae: 0.0463
Epoch 5/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 23ms/step - loss: 0.0124 - mae: 0.0570 - val_loss: 0.0098 - val_mae: 0.0446
Epoch 6/20
[1m493/493[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 22ms/step - loss: 0.0123 - mae: 0.0569 - val_loss: 0.0099 - val_mae: 0.0460
Epoch 7/20
[1m49

In [48]:
# Predict
y_pred_gru = best_gru_model.predict(X_test)


y_test_actual = target_scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_actual_gru = target_scaler.inverse_transform(y_pred_gru)

# Evaluation metrics
mae_GRU = mean_absolute_error(y_test_actual, y_pred_actual_gru)
r2_GRU = r2_score(y_test_actual, y_pred_actual_gru)
rmse_GRU = np.sqrt(mean_squared_error(y_test_actual, y_pred_actual_gru))

print(f"MAE: {mae_GRU:.2f}")
print(f"R² Score: {r2_GRU:.4f}")
print(f"RMSE: {rmse_GRU:.4f}")

[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
MAE: 26.14
R² Score: 0.5608
RMSE: 56.2493
