In [41]:
# importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels

import os
import random

In [42]:
import sklearn
import tensorflow 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [43]:
from tensorflow import keras
from tensorflow.keras import layers
from keras_tuner.tuners import RandomSearch

In [44]:
os.environ['PYTHONHASHSEED'] = '42'  

random.seed(42)          
np.random.seed(42)       
tensorflow.random.set_seed(42)  

In [45]:
# load the data into dataframe
df=pd.read_csv('feature_engineered_data.csv')
print(df.head())

   current_value  lights         T1       RH_1         T2       RH_2  \
0          430.0      30  20.133333  48.000000  19.566667  44.400000   
1          250.0      30  20.260000  52.726667  19.730000  45.100000   
2          100.0      10  20.426667  55.893333  19.856667  45.833333   
3          100.0      10  20.566667  53.893333  20.033333  46.756667   
4           90.0      10  20.730000  52.660000  20.166667  47.223333   

          T3       RH_3         T4       RH_4  ...  is_weekend    nsm  lag_1  \
0  19.890000  44.900000  19.000000  46.363333  ...           0  68400  576.6   
1  19.890000  45.493333  19.000000  47.223333  ...           0  69000  430.0   
2  20.033333  47.526667  19.000000  48.696667  ...           0  69600  250.0   
3  20.100000  48.466667  19.000000  48.490000  ...           0  70200  100.0   
4  20.200000  48.530000  18.926667  48.156667  ...           0  70800  100.0   

   lag_2  lag_6  lag_12  hour_sin  hour_cos  rolling_mean_12  rolling_std_12  
0  230.

In [46]:
import pickle

# Load the selected features for 10-minute forecasting
with open("selected_features_60min.pkl", "rb") as f:
    selected_features = pickle.load(f)

print("Selected Features:", selected_features)

Selected Features: ['current_value', 'T3', 'RH_5', 'RH_8', 'T_out', 'Press_mm_hg', 'nsm', 'hour_cos', 'rolling_mean_12', 'rolling_std_12']


In [47]:
df['target_60min']= df['current_value'].shift(-6)

In [48]:
df = df.dropna().reset_index(drop=True)

In [49]:
df = df[selected_features + ['target_60min']]

In [50]:
train_size = int(0.8 * len(df))
df_train, df_test = df[:train_size], df[train_size:]

In [51]:
scaler = MinMaxScaler()
df_train_scaled = pd.DataFrame(scaler.fit_transform(df_train), columns=df.columns, index=df_train.index)
df_test_scaled = pd.DataFrame(scaler.transform(df_test), columns=df.columns, index=df_test.index)


In [52]:
def create_sequences(data, target_column, window_size):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data.iloc[i-window_size:i].values)
        y.append(data.iloc[i][target_column])
    return np.array(X), np.array(y)

window_size = 24  # use previous 24 time steps
#X, y = create_sequences(scaled_df, target_column='target_10min', window_size=window_size)
X_train, y_train = create_sequences(df_train_scaled, target_column='target_60min', window_size=window_size)
X_test, y_test = create_sequences(df_test_scaled, target_column='target_60min', window_size=window_size)

In [53]:
val_size = int(len(X_train) * 0.125)

X_train_final = X_train[:-val_size]
y_train_final = y_train[:-val_size]

X_val = X_train[-val_size:]
y_val = y_train[-val_size:]

In [54]:
# Invert scaling for target only
target_scaler = MinMaxScaler()
target_scaler.fit(df[['target_60min']])  # fit only on original (unscaled) appliances column

In [55]:
param_space = {
    'nb_filters': [32, 64, 96],
    'kernel_size': [2, 3, 4],
    'dropout_rate': [0.0, 0.1, 0.2],
    'learning_rate': [1e-3, 1e-4],
}


In [56]:
from keras_tuner.tuners import BayesianOptimization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tcn import TCN
from tensorflow.keras.callbacks import EarlyStopping

def build_tcn_model(hp):
    model = Sequential()
    model.add(
        TCN(
            input_shape=(X_train.shape[1], X_train.shape[2]),
            nb_filters=hp.Choice('nb_filters', [32, 64, 96]),
            kernel_size=hp.Choice('kernel_size', [2, 3, 4]),
            dilations=[1, 2, 4, 8],  # fixed
            nb_stacks=1,             # fixed
            dropout_rate=hp.Choice('dropout_rate', [0.0, 0.1, 0.2]),
            return_sequences=False
        )
    )
    model.add(Dense(1))
    
    lr = hp.Choice('learning_rate', [1e-3, 1e-4])
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse', metrics=['mae'])
    return model

tuner = BayesianOptimization(
    build_tcn_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    directory='bayes_tcn_limited',
    project_name='tcn_appliance',
    overwrite=True
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

tuner.search(
    X_train_final, y_train_final,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1,
    shuffle=False
)


Trial 10 Complete [00h 01m 23s]
val_loss: 0.010490464977920055

Best val_loss So Far: 0.010490464977920055
Total elapsed time: 00h 08m 16s


In [57]:
best_model = tuner.get_best_models(num_models=1)[0]
val_loss, val_mae = best_model.evaluate(X_val, y_val)
print(f"Validation Loss: {val_loss:.4f} | Validation MAE: {val_mae:.4f}")


  saveable.load_own_variables(weights_store.get(inner_path))


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0105 - mae: 0.0497
Validation Loss: 0.0105 | Validation MAE: 0.0496


In [58]:
y_pred_tcn = best_model.predict(X_test)
y_pred_tcn_inv = target_scaler.inverse_transform(y_pred_tcn)
y_test_inv = target_scaler.inverse_transform(y_test.reshape(-1, 1))

from sklearn.metrics import mean_absolute_error, r2_score
print("TCN MAE:", mean_absolute_error(y_test_inv, y_pred_tcn_inv))
print("TCN R²:", r2_score(y_test_inv, y_pred_tcn_inv))
print("TCN RMSE",np.sqrt(mean_squared_error(y_test_inv, y_pred_tcn_inv)))


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step
TCN MAE: 30.17717346210868
TCN R²: 0.5301833570560526
TCN RMSE 58.13008290508683


In [59]:
from keras_tuner.tuners import BayesianOptimization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

def build_cnn_model(hp):
    model = Sequential()
    model.add(
        Conv1D(
            filters=hp.Choice('filters', [32, 64, 96]),
            kernel_size=hp.Choice('kernel_size', [2, 3, 4]),
            activation='relu',
            input_shape=(X_train.shape[1], X_train.shape[2])
        )
    )
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(
        Dense(
            hp.Choice('dense_units', [32, 50, 64]),
            activation='relu'
        )
    )
    model.add(Dense(1))

    lr = hp.Choice('learning_rate', [1e-3, 5e-4])
    model.compile(optimizer=Adam(learning_rate=lr), loss='mse', metrics=['mae'])
    return model

tuner = BayesianOptimization(
    build_cnn_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=1,
    directory='cnn_tuning',
    project_name='cnn_appliance_forecast'
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=10,
    batch_size=64,
    callbacks=[early_stop],
    verbose=1,
    shuffle=False
)


Trial 10 Complete [00h 00m 10s]
val_loss: 0.015670208260416985

Best val_loss So Far: 0.009494373574852943
Total elapsed time: 00h 01m 45s


In [60]:
best_model_cnn = tuner.get_best_models(1)[0]
loss, mae = best_model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")


[1m  1/123[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m17s[0m 140ms/step - loss: 0.0622 - mae: 0.1933

  saveable.load_own_variables(weights_store.get(inner_path))


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.0110 - mae: 0.0548
Test Loss: 0.0109, Test MAE: 0.0542


In [61]:
best_model_cnn.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.1, verbose=1,shuffle=False)

Epoch 1/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.0151 - mae: 0.0672 - val_loss: 0.0094 - val_mae: 0.0463
Epoch 2/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0146 - mae: 0.0657 - val_loss: 0.0096 - val_mae: 0.0482
Epoch 3/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0145 - mae: 0.0649 - val_loss: 0.0102 - val_mae: 0.0507
Epoch 4/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0144 - mae: 0.0645 - val_loss: 0.0098 - val_mae: 0.0468
Epoch 5/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0140 - mae: 0.0641 - val_loss: 0.0098 - val_mae: 0.0465
Epoch 6/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0139 - mae: 0.0636 - val_loss: 0.0096 - val_mae: 0.0471
Epoch 7/10
[1m222/222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - 

<keras.src.callbacks.history.History at 0x2dc861d27e0>

In [62]:
y_pred_cnn = best_model_cnn.predict(X_test)
y_pred_cnn_inv = target_scaler.inverse_transform(y_pred_cnn)
y_test_inv = target_scaler.inverse_transform(y_test.reshape(-1, 1))

from sklearn.metrics import mean_absolute_error, r2_score
print("CNN MAE:", mean_absolute_error(y_test_inv, y_pred_cnn_inv))
print("CNN R²:", r2_score(y_test_inv, y_pred_cnn_inv))
print("CNN RMSE",np.sqrt(mean_squared_error(y_test_inv, y_pred_cnn_inv)))


[1m123/123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
CNN MAE: 32.526037504901836
CNN R²: 0.48883975337399943
CNN RMSE 60.633868732505206
