In [None]:
# ===============================
# 📦 Import Required Libraries
# ===============================

import os                # File and directory operations
import random            # For generating random numbers
import numpy as np       # Numerical operations
import pandas as pd      # Data manipulation and analysis
from datetime import datetime  # Handling date and time data

# Deep learning imports (TensorFlow + Keras)
from tensorflow.keras.models import load_model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

# Machine learning utilities (data scaling, metrics, splitting)
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split

import joblib  # For saving and loading preprocessing models (e.g., scalers)

# ===============================
# 🎲 Set Random Seed for Reproducibility
# ===============================

SEED = 42  # Fix the seed value so results are consistent across runs
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# This ensures that TensorFlow operations are deterministic (reproducible)
tf.config.experimental.enable_op_determinism()

# ===============================
# ⚙️ Environment Information
# ===============================

# Print TensorFlow version (helps ensure compatibility)
print(tf.__version__)

# Try to detect available GPUs
try:
    print('GPU:', tf.config.list_physical_devices('GPU'))
except Exception as e:
    print('GPU info error:', e)


2.19.0
GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
# ===============================
# 🖥️ Pandas Display Options
# ===============================

# Show all columns when displaying a DataFrame
pd.set_option('display.max_columns', None)

# Show all rows when displaying a DataFrame
pd.set_option('display.max_rows', None)

# Set the display width for better readability in wide DataFrames
pd.set_option('display.width', 1000)

# Set maximum column width to display long text without truncation
pd.set_option('display.max_colwidth', 1000)


In [None]:
# ===============================
# 📁 Define File Paths & Directories
# ===============================

BASE_DIR = ''  # Base directory for your dataset and models

# Paths to CSV files
TRAIN_CSV = os.path.join(BASE_DIR, '')  # Training data CSV path
TEST_CSV = os.path.join(BASE_DIR, '')   # Test data CSV path
PREDICTIONS_CSV = os.path.join('predictions.csv')  # Where to save predictions

# Directory to save trained models
MODEL_DIR = os.path.join(BASE_DIR, '')
os.makedirs(MODEL_DIR, exist_ok=True)  # Create the directory if it doesn't exist

# ===============================
# 🕒 Column Names in Dataset
# ===============================

TIME_COL = 'Timestamp_Local'            # Column containing timestamp of each record
SITE_COL = 'Site'                        # Column indicating the building/site
FLAG_COL = 'Demand_Response_Flag'       # Target flag for demand response events (-1,0,1)
TARGET_COL = 'Demand_Response_Capacity_kW'  # Target numeric column (capacity in kW)


In [None]:
# ===============================
# 📂 Load Training and Test Data
# ===============================

# Load the training dataset from the specified CSV file
train_df = pd.read_csv(TRAIN_CSV)

# Load the test dataset from the specified CSV file
test_df = pd.read_csv(TEST_CSV)


In [None]:
# ===============================
# 📊 Inspect Data Shapes and Columns
# ===============================

# Print the number of rows and columns in the training and test datasets
print('[INFO] Train shape:', train_df.shape)
print('[INFO] Test shape:', test_df.shape)

# Print the column names for both datasets
print('[INFO] Train columns:', list(train_df.columns))
print('[INFO] Test columns:', list(test_df.columns))

# Display the first 10 rows of each dataset to get a quick look at the data
print('[INFO] Train head (first 10 rows):')
print(train_df.head(10))

print('[INFO] Test head (first 10 rows):')
print(test_df.head(10))


In [None]:
# ===============================
# 🛠️ Feature Engineering: Create Enhanced Features
# ===============================

def create_enhanced_features(df):
    df = df.copy()

    # Convert timestamp to datetime object
    df['Timestamp_Local'] = pd.to_datetime(df['Timestamp_Local'])

    # Extract basic temporal features
    df['hour'] = df['Timestamp_Local'].dt.hour
    df['month'] = df['Timestamp_Local'].dt.month
    df['weekday'] = df['Timestamp_Local'].dt.weekday  # 0=Monday, 6=Sunday

    # Encode cyclic features using sine and cosine transformations
    hour_rad = 2 * np.pi * df['hour'] / 24
    df['hour_sin'] = np.sin(hour_rad)
    df['hour_cos'] = np.cos(hour_rad)

    month_rad = 2 * np.pi * (df['month'] - 1) / 12
    df['month_sin'] = np.sin(month_rad)
    df['month_cos'] = np.cos(month_rad)

    df['weekday_sin'] = np.sin(2 * np.pi * df['weekday'] / 7)
    df['weekday_cos'] = np.cos(2 * np.pi * df['weekday'] / 7)

    # Binary flags for specific months
    allowed_months = {1, 2, 6, 7, 8, 12}
    df['is_allowed_month'] = df['month'].isin(allowed_months).astype(int)

    # Binary flags for each hour from 7 AM to 19 PM
    for h in range(7, 20):
        df[f'is_{h}'] = (df['hour'] == h).astype(int)

    # Binary flags for specific months
    df['is_january'] = (df['month'] == 1).astype(int)
    df['is_february'] = (df['month'] == 2).astype(int)
    df['is_june'] = (df['month'] == 6).astype(int)
    df['is_july'] = (df['month'] == 7).astype(int)
    df['is_august'] = (df['month'] == 8).astype(int)
    df['is_december'] = (df['month'] == 12).astype(int)

    # Morning and afternoon flags
    df['is_morning'] = ((df['hour'] >= 10) & (df['hour'] <= 11)).astype(int)
    df['is_afternoon'] = ((df['hour'] >= 12) & (df['hour'] <= 17)).astype(int)

    # Drop original hour, weekday, month columns after encoding
    df = df.drop(columns=['hour','weekday','month'])

    return df

# Apply enhanced feature creation to train and test sets
train_df = create_enhanced_features(train_df)
test_df = create_enhanced_features(test_df)


In [None]:
# ======================================================
# 🕰️ Feature Engineering: Time-of-Day & Past Days Features
# (comments added; code logic unchanged)
# ======================================================

def create_timeofday_pastdays_features(
    df,
    timestamp_col='Timestamp_Local',
    power_col='Building_Power_kW',
    temp_col='Dry_Bulb_Temperature_C',
    windows=(1,2,3,4,5,6,7,8,9,10,11,12,13,14),
    min_periods=1,
    time_format='%H:%M:%S'
    ):
    """
    For each original timestamp, compute summaries of the same time-of-day over the
    previous N days (exclude current day). Produces features for both power_col and temp_col.
    Returns: (df_with_features, daily_agg_df)
    """
    # make a copy to avoid modifying caller's dataframe
    df = df.copy()

    # ----------------------
    # helper columns
    # ----------------------
    # 'date' is midnight-normalized timestamp for grouping by calendar day
    df['date'] = df[timestamp_col].dt.normalize()        # midnight timestamp for the date
    # 'time_of_day' is a string representation of the time portion used for aligning
    # observations across different days at the same clock time
    df['time_of_day'] = df[timestamp_col].dt.strftime(time_format)


    # NOTE: the function arguments name power_col and temp_col but below the
    # code reassigns power_col/temp_col to the literal column names. This keeps
    # the subsequent code consistent with the original implementation.
    power_col = 'Building_Power_kW'

    # ----------------------
    # trailing-power deltas (differences with previous timestamps)
    # each shift corresponds to the prior interval (assumes regular sampling)
    # ----------------------
    df['trailing_15m_pow_delta'] = df[power_col] - df[power_col].shift(1)
    df['trailing_30m_pow_delta'] = df[power_col] - df[power_col].shift(2)
    df['trailing_45m_pow_delta'] = df[power_col] - df[power_col].shift(3)
    df['trailing_60m_pow_delta'] = df[power_col] - df[power_col].shift(4)
    df['trailing_1.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(5)
    df['trailing_1.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(6)
    df['trailing_1.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(7)
    df['trailing_2_hours_pow_delta'] = df[power_col] - df[power_col].shift(8)
    df['trailing_2.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(9)
    df['trailing_2.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(10)
    df['trailing_2.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(11)
    df['trailing_3_hours_pow_delta'] = df[power_col] - df[power_col].shift(12)
    df['trailing_3.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(13)
    df['trailing_3.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(14)
    df['trailing_3.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(15)
    df['trailing_4_hours_pow_delta'] = df[power_col] - df[power_col].shift(16)
    df['trailing_4.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(17)
    df['trailing_4.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(18)
    df['trailing_4.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(19)
    df['trailing_5_hours_pow_delta'] = df[power_col] - df[power_col].shift(20)
    df['trailing_5.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(21)
    df['trailing_5.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(22)
    df['trailing_5.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(23)
    df['trailing_6_hours_pow_delta'] = df[power_col] - df[power_col].shift(24)
    df['trailing_7_hours_pow_delta'] = df[power_col] - df[power_col].shift(28)
    df['trailing_8_hours_pow_delta'] = df[power_col] - df[power_col].shift(32)
    df['trailing_9_hours_pow_delta'] = df[power_col] - df[power_col].shift(36)
    df['trailing_10_hours_pow_delta'] = df[power_col] - df[power_col].shift(40)
    df['trailing_11_hours_pow_delta'] = df[power_col] - df[power_col].shift(44)
    df['trailing_12_hours_pow_delta'] = df[power_col] - df[power_col].shift(48)

    # ----------------------
    # rolling aggregates for short windows (mean/min/max)
    # windows list maps sampling-window length -> human-readable label
    # min_periods=1 allows early rows to have aggregates computed
    # ----------------------
    for window, label in [(2,'0.5'),(4, '1h'),(6,'1.5h'), (8, '2h'),(10,'2.5h'), (12,'3h'), (16, '4h'),(48,'12h')]:
        df[f'rolling_{label}_avg_pow'] = df[power_col].rolling(window, min_periods=1).mean()
        df[f'rolling_{label}_min_pow'] = df[power_col].rolling(window, min_periods=1).min()
        df[f'rolling_{label}_max_pow'] = df[power_col].rolling(window, min_periods=1).max()



    # ----------------------
    # rolling higher-order moments (skewness, kurtosis) for power
    # windows chosen to match earlier labels (e.g. 8 corresponds to ~2 hours)
    # ----------------------
    df['trailing_2h_skew_pow'] = df[power_col].rolling(window=8, min_periods=1).skew()
    df['trailing_2h_kurt_pow'] = df[power_col].rolling(window=8, min_periods=1).kurt()
    df['trailing_2.25h_skew_pow'] = df[power_col].rolling(window=9, min_periods=1).skew()
    df['trailing_2.25h_kurt_pow'] = df[power_col].rolling(window=9, min_periods=1).kurt()
    df['trailing_2.5h_skew_pow'] = df[power_col].rolling(window=10, min_periods=1).skew()
    df['trailing_2.5h_kurt_pow'] = df[power_col].rolling(window=10, min_periods=1).kurt()
    df['trailing_2.75h_skew_pow'] = df[power_col].rolling(window=11, min_periods=1).skew()
    df['trailing_2.75h_kurt_pow'] = df[power_col].rolling(window=11, min_periods=1).kurt()

    df['trailing_3h_skew_pow'] = df[power_col].rolling(window=12, min_periods=1).skew()
    df['trailing_3h_kurt_pow'] = df[power_col].rolling(window=12, min_periods=1).kurt()
    df['trailing_3.25h_skew_pow'] = df[power_col].rolling(window=13, min_periods=1).skew()
    df['trailing_3.25h_kurt_pow'] = df[power_col].rolling(window=13, min_periods=1).kurt()
    df['trailing_3.5h_skew_pow'] = df[power_col].rolling(window=14, min_periods=1).skew()
    df['trailing_3.5h_kurt_pow'] = df[power_col].rolling(window=14, min_periods=1).kurt()
    df['trailing_3.75h_skew_pow'] = df[power_col].rolling(window=15, min_periods=1).skew()
    df['trailing_3.75h_kurt_pow'] = df[power_col].rolling(window=15, min_periods=1).kurt()

    df['trailing_4h_skew_pow'] = df[power_col].rolling(window=16, min_periods=1).skew()
    df['trailing_4h_kurt_pow'] = df[power_col].rolling(window=16, min_periods=1).kurt()
    df['trailing_5h_skew_pow'] = df[power_col].rolling(window=24, min_periods=1).skew()
    df['trailing_5h_kurt_pow'] = df[power_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_6h_skew_pow'] = df[power_col].rolling(window=24, min_periods=1).skew()
    df['trailing_6h_kurt_pow'] = df[power_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_7h_skew_pow'] = df[power_col].rolling(window=28, min_periods=1).skew()
    df['trailing_7h_kurt_pow'] = df[power_col].rolling(window=28, min_periods=1).kurt()
    df['trailing_8h_skew_pow'] = df[power_col].rolling(window=32, min_periods=1).skew()
    df['trailing_8h_kurt_pow'] = df[power_col].rolling(window=32, min_periods=1).kurt()
    df['trailing_9h_skew_pow'] = df[power_col].rolling(window=36, min_periods=1).skew()
    df['trailing_9h_kurt_pow'] = df[power_col].rolling(window=36, min_periods=1).kurt()
    df['trailing_10h_skew_pow'] = df[power_col].rolling(window=40, min_periods=1).skew()
    df['trailing_10h_kurt_pow'] = df[power_col].rolling(window=40, min_periods=1).kurt()
    df['trailing_11h_skew_pow'] = df[power_col].rolling(window=44, min_periods=1).skew()
    df['trailing_11h_kurt_pow'] = df[power_col].rolling(window=44, min_periods=1).kurt()
    df['trailing_12h_skew_pow'] = df[power_col].rolling(window=48, min_periods=1).skew()
    df['trailing_12h_kurt_pow'] = df[power_col].rolling(window=48, min_periods=1).kurt()




    # ----------------------
    # Temperature features (mirrors power features)
    # ----------------------
    temp_col = 'Dry_Bulb_Temperature_C'

    # trailing deltas for temperature (same shift pattern as power)
    df['trailing_15m_temp_delta'] = df[temp_col] - df[temp_col].shift(1)
    df['trailing_30m_temp_delta'] = df[temp_col] - df[temp_col].shift(2)
    df['trailing_45m_temp_delta'] = df[temp_col] - df[temp_col].shift(3)
    df['trailing_60m_temp_delta'] = df[temp_col] - df[temp_col].shift(4)
    df['trailing_1.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(5)
    df['trailing_1.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(6)
    df['trailing_1.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(7)
    df['trailing_2_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(8)
    df['trailing_2.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(9)
    df['trailing_2.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(10)
    df['trailing_2.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(11)
    df['trailing_3_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(12)
    df['trailing_3.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(13)
    df['trailing_3.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(14)
    df['trailing_3.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(15)
    df['trailing_4_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(16)
    df['trailing_4.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(17)
    df['trailing_4.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(18)
    df['trailing_4.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(19)
    df['trailing_5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(20)
    df['trailing_5.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(21)
    df['trailing_5.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(22)
    df['trailing_5.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(23)
    df['trailing_6_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(24)
    df['trailing_7_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(28)
    df['trailing_8_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(32)
    df['trailing_9_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(36)
    df['trailing_10_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(40)
    df['trailing_11_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(44)
    df['trailing_12_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(48)

    # rolling aggregates for temperature
    for window, label in [(2,'0.5'),(4, '1h'),(6,'1.5h'), (8, '2h'),(10,'2.5h'), (16, '4h'),(48,'12h')]:
        df[f'rolling_{label}_avg_temp'] = df[temp_col].rolling(window, min_periods=1).mean()
        df[f'rolling_{label}_min_temp'] = df[temp_col].rolling(window, min_periods=1).min()
        df[f'rolling_{label}_max_temp'] = df[temp_col].rolling(window, min_periods=1).max()

    # rolling skew/kurt for temperature
    df['trailing_2h_skew_temp'] = df[temp_col].rolling(window=8, min_periods=1).skew()
    df['trailing_2h_kurt_temp'] = df[temp_col].rolling(window=8, min_periods=1).kurt()
    df['trailing_2.25h_skew_temp'] = df[temp_col].rolling(window=9, min_periods=1).skew()
    df['trailing_2.25h_kurt_temp'] = df[temp_col].rolling(window=9, min_periods=1).kurt()
    df['trailing_2.5h_skew_temp'] = df[temp_col].rolling(window=10, min_periods=1).skew()
    df['trailing_2.5h_kurt_temp'] = df[temp_col].rolling(window=10, min_periods=1).kurt()
    df['trailing_2.75h_skew_temp'] = df[temp_col].rolling(window=11, min_periods=1).skew()
    df['trailing_2.75h_kurt_temp'] = df[temp_col].rolling(window=11, min_periods=1).kurt()

    df['trailing_3h_skew_temp'] = df[temp_col].rolling(window=12, min_periods=1).skew()
    df['trailing_3h_kurt_temp'] = df[temp_col].rolling(window=12, min_periods=1).kurt()
    df['trailing_3.25h_skew_temp'] = df[temp_col].rolling(window=13, min_periods=1).skew()
    df['trailing_3.25h_kurt_temp'] = df[temp_col].rolling(window=13, min_periods=1).kurt()
    df['trailing_3.5h_skew_temp'] = df[temp_col].rolling(window=14, min_periods=1).skew()
    df['trailing_3.5h_kurt_temp'] = df[temp_col].rolling(window=14, min_periods=1).kurt()
    df['trailing_3.75h_skew_temp'] = df[temp_col].rolling(window=15, min_periods=1).skew()
    df['trailing_3.75h_kurt_temp'] = df[temp_col].rolling(window=15, min_periods=1).kurt()

    df['trailing_4h_skew_temp'] = df[temp_col].rolling(window=16, min_periods=1).skew()
    df['trailing_4h_kurt_temp'] = df[temp_col].rolling(window=16, min_periods=1).kurt()
    df['trailing_5h_skew_temp'] = df[temp_col].rolling(window=24, min_periods=1).skew()
    df['trailing_5h_kurt_temp'] = df[temp_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_6h_skew_temp'] = df[temp_col].rolling(window=24, min_periods=1).skew()
    df['trailing_6h_kurt_temp'] = df[temp_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_7h_skew_temp'] = df[temp_col].rolling(window=28, min_periods=1).skew()
    df['trailing_7h_kurt_temp'] = df[temp_col].rolling(window=28, min_periods=1).kurt()
    df['trailing_8h_skew_temp'] = df[temp_col].rolling(window=32, min_periods=1).skew()
    df['trailing_8h_kurt_temp'] = df[temp_col].rolling(window=32, min_periods=1).kurt()
    df['trailing_9h_skew_temp'] = df[temp_col].rolling(window=36, min_periods=1).skew()
    df['trailing_9h_kurt_temp'] = df[temp_col].rolling(window=36, min_periods=1).kurt()
    df['trailing_10h_skew_temp'] = df[temp_col].rolling(window=40, min_periods=1).skew()
    df['trailing_10h_kurt_temp'] = df[temp_col].rolling(window=40, min_periods=1).kurt()
    df['trailing_11h_skew_temp'] = df[temp_col].rolling(window=44, min_periods=1).skew()
    df['trailing_11h_kurt_temp'] = df[temp_col].rolling(window=44, min_periods=1).kurt()
    df['trailing_12h_skew_temp'] = df[temp_col].rolling(window=48, min_periods=1).skew()
    df['trailing_12h_kurt_temp'] = df[temp_col].rolling(window=48, min_periods=1).kurt()


    # ----------------------
    # 1) Build daily-series: one row per (date, time_of_day)
    # aggregate BOTH power and temp to ensure one value per (date,time)
    # ----------------------
    agg_cols = [power_col, temp_col]
    daily = (
        df.groupby(['date', 'time_of_day'])[agg_cols]
          .mean()            # if multiple rows for same (date,time) -> average them
          .reset_index()
          .sort_values(['time_of_day', 'date'])
    )

    # convenience short names for feature naming
    col_map = {power_col: 'power', temp_col: 'temp'}

    # ----------------------
    # 2) For each time_of_day group, compute rolling (on days) over shifted series (exclude current)
    # The loop below computes previous-day aggregates for multiple lookback windows
    # ----------------------
    for w in windows:
        prefix = f'prev{w}d'   # e.g. prev3d_mean_power, prev7d_std_temp...
        # handle each column separately

        for col in agg_cols:
            short = col_map[col]
            grp = daily.groupby('time_of_day')[col]

            # special-case w==1 to only compute mean using a shifted window
            if w==1:
              daily[f'{prefix}_mean_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).mean()
              )
            else:
              # lag value (exact value w days ago at same time_of_day)
              daily[f'lag_{w}d_{short}'] = grp.transform(lambda s: s.shift(w))

              # rolling aggregates over the previous w days (exclude current day via shift(1))
              daily[f'{prefix}_mean_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).mean()
              )

              daily[f'{prefix}_min_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).min()
              )
              daily[f'{prefix}_max_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).max()
              )



    # ----------------------
    # 3) Merge daily-level features back to original timestamps by (date, time_of_day)
    # This aligns each original row with statistics computed from prior days at the same clock time
    # ----------------------
    daily_feats = daily.drop(columns=[power_col, temp_col])
    df = df.merge(daily_feats, on=['date', 'time_of_day'], how='left', validate='m:1')
    # cleanup helper columns
    df.drop(columns=['date', 'time_of_day'], inplace=True)

    return df, daily


# example usage (original calls kept unchanged):
train_df, _ = create_timeofday_pastdays_features(train_df)
test_df, _ = create_timeofday_pastdays_features(test_df)


In [None]:
def preprocess_nan_values(df):
    # Replace infinite values with NaN to avoid issues in calculations
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    # Fill NaN values with 0 (could also use median or other strategies depending on data)
    df.fillna(0, inplace=True)

    return df

# Apply preprocessing to training data
train_df = preprocess_nan_values(train_df)
# Apply preprocessing to test data
test_df = preprocess_nan_values(test_df)

In [None]:
# Print the number of rows and columns in the training and test datasets
print(train_df.shape)
print(test_df.shape)

(35040, 314)
(105120, 313)


In [None]:
# Select only numeric columns from train and test data
num_train = train_df.select_dtypes(include=[np.number]).copy()
num_test = test_df.select_dtypes(include=[np.number]).copy()

# Extract target values for training set and reshape to 2D array
y_train_raw = num_train[TARGET_COL].values.reshape(-1, 1)
# Features for training (all numeric columns except target)
X_train_num = num_train.drop(columns=[TARGET_COL])

# Ensure all columns in training data exist in test data, fill missing columns with 0
for col in num_train.columns:
    if col not in num_test.columns:
        num_test[col] = 0.0

# Print test numeric columns to verify alignment
print(num_test.columns)

# Extract target values for test set and reshape to 2D array
y_test_raw = num_test[TARGET_COL].values.reshape(-1, 1)
# Features for test set (all numeric columns except target)
X_test_num = num_test.drop(columns=[TARGET_COL])

# Align the order of test features to match training features
X_test_num = X_test_num[X_train_num.columns]

Index(['Dry_Bulb_Temperature_C', 'Global_Horizontal_Radiation_W/m2', 'Building_Power_kW', 'Demand_Response_Flag', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'weekday_sin', 'weekday_cos',
       ...
       'prev13d_max_temp', 'lag_14d_power', 'prev14d_mean_power', 'prev14d_min_power', 'prev14d_max_power', 'lag_14d_temp', 'prev14d_mean_temp', 'prev14d_min_temp', 'prev14d_max_temp', 'Demand_Response_Capacity_kW'], dtype='object', length=312)


In [None]:
# Initialize MinMaxScaler to scale features and target between -1 and 1
X_scaler = MinMaxScaler(feature_range=(-1, 1))
y_scaler = MinMaxScaler(feature_range=(-1, 1))

# Fit the scaler on training features and transform both train and test sets
X_train_scaled = X_scaler.fit_transform(X_train_num.values)
X_test_scaled = X_scaler.transform(X_test_num.values)

# Convert scaled feature arrays to float32 for efficient computation
X_train_scaled = X_train_scaled.astype('float32')
X_test_scaled = X_test_scaled.astype('float32')

# Fit the scaler on training target and transform both train and test targets
y_train_scaled = y_scaler.fit_transform(y_train_raw)
y_test_scaled = y_scaler.transform(y_test_raw)

# Print confirmation and shapes of scaled arrays
print('[INFO] Scaling done. X_train_scaled:', X_train_scaled.shape, 'y_train_scaled:', y_train_scaled.shape)

[INFO] Scaling done. X_train_scaled: (35040, 311) y_train_scaled: (35040, 1)


In [None]:
WINDOW_SIZE = 96 #our train df includes timestamps from 7:00 to 19:00 so it makes 48 rows and we thought it would be good if short memory stores the last 2 days information (we also tried different window sizes but this was the most efficient one)

# Function to create rolling sequences for each site (or globally if no site column)
def make_sequences_by_site(df_raw, X_scaled_all, y_scaled_all, window_size, site_col=SITE_COL, time_col=TIME_COL, have_target=True):
    # Initialize lists to hold sequences, corresponding targets, and end indices
    X_seq_list, y_seq_list, end_indices = [], [], []
    n_features = X_scaled_all.shape[1]

    # Check if the dataset contains a site column
    if site_col in df_raw.columns:
        # Process each site separately
        for site, g in df_raw.groupby(site_col):
            idx = g.index.values
            Xg = X_scaled_all[idx]  # select corresponding feature rows
            yg = y_scaled_all[idx] if (have_target and y_scaled_all is not None) else None

            # Create sequences of length `window_size`
            for i in range(len(idx) - window_size + 1):
                X_seq_list.append(Xg[i:i+window_size])  # features sequence
                end_indices.append(idx[i+window_size-1])  # index of last element in sequence
                if have_target and yg is not None:
                    y_seq_list.append(yg[i+window_size-1])  # target corresponding to last element
    else:
        # If no site column, process the entire dataset as a single group
        idx = np.arange(len(df_raw))
        Xg = X_scaled_all
        yg = y_scaled_all if (have_target and y_scaled_all is not None) else None
        for i in range(len(idx) - window_size + 1):
            X_seq_list.append(Xg[i:i+window_size])
            end_indices.append(i+window_size-1)
            if have_target and yg is not None:
                y_seq_list.append(yg[i+window_size-1])

    # Convert lists to numpy arrays
    X_seq = np.array(X_seq_list)
    y_seq = np.array(y_seq_list) if have_target and y_seq_list else None

    return X_seq, y_seq, np.array(end_indices)

# Generate training sequences (with target)
X_train_seq, y_train_seq, train_end_idx = make_sequences_by_site(train_df, X_train_scaled, y_train_scaled, WINDOW_SIZE, have_target=True)
print('[INFO] Train sequences:', X_train_seq.shape, y_train_seq.shape)

# Generate test sequences (without target)
X_test_seq, _, test_end_idx = make_sequences_by_site(test_df, X_test_scaled, None, WINDOW_SIZE, have_target=False)
print('[INFO] Test sequences:', X_test_seq.shape)

[INFO] Train sequences: (34849, 192, 311) (34849, 1)
[INFO] Test sequences: (104547, 192, 311)


In [None]:
# Number of features in the input sequence
n_features = X_train_seq.shape[-1]

# Define the input layer for the model
# The input shape is (WINDOW_SIZE, n_features)
# WINDOW_SIZE is the number of timesteps in the sequence
# n_features is the number of features at each timestep
inputs = keras.Input(shape=(WINDOW_SIZE, n_features))

# First LSTM layer with 128 units
# return_sequences=True is used so that the next LSTM layer receives a full sequence
# This helps the network capture temporal dependencies across all timesteps
x = layers.LSTM(128, return_sequences=True)(inputs)

# Second LSTM layer with 64 units
# return_sequences=False because this is the last LSTM layer
# It outputs a single vector representing the entire sequence
x = layers.LSTM(64, return_sequences=False)(x)

# Dropout layer with rate 0.3
# Helps prevent overfitting by randomly setting 30% of inputs to zero during training
x = layers.Dropout(0.3)(x)

# Dense layer with 64 units and ReLU activation
# Adds non-linear transformation and allows the model to learn complex representations
x = layers.Dense(64, activation='relu')(x)

# Dropout layer with rate 0.2
# Additional regularization to further reduce overfitting
x = layers.Dropout(0.2)(x)

# Output layer with 1 unit
# Suitable for regression tasks (predicting a single continuous value)
outputs = layers.Dense(1)(x)

# Create the model by specifying the input and output layers
model = keras.Model(inputs, outputs)


In [None]:
# Define the optimizer
# Adam is a popular choice for sequence models because it adapts the learning rate for each parameter
# learning_rate=5e-4 is relatively small, which helps with stable training
optimizer = Adam(learning_rate=5e-4)

# Compile the model

model.compile(
    optimizer=optimizer,
    loss='mse',
    metrics=['mae', tf.keras.metrics.RootMeanSquaredError()]
)

# Print a summary of the model architecture
# Shows each layer, output shapes, number of parameters, and total trainable parameters
model.summary()


In [None]:
# Define callbacks for training
callbacks = [
    # EarlyStopping: stops training if validation loss doesn't improve for 'patience' epochs
    # restore_best_weights=True ensures the model returns to the weights with the lowest validation loss
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    ),

    # ReduceLROnPlateau: reduces learning rate if validation loss stops improving
    # This helps the model converge better if it hits a plateau
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        patience=10
    ),

    # ModelCheckpoint: saves the model with the best validation loss during training
    # This ensures you don't lose the best version of your model
    keras.callbacks.ModelCheckpoint(
        filepath='best_lstm_model.keras',
        monitor='val_loss',
        save_best_only=True
    )
]

# Train the model
history = model.fit(
    X_train_seq,            # Input training sequences
    y_train_seq,            # Target values for each sequence
    epochs=50,              # Maximum number of epochs to train
    batch_size=128,         # Number of samples per gradient update
    validation_split=0.2,   # Use 20% of training data as validation set
    callbacks=callbacks,     # Apply the callbacks defined above
    verbose=1                # Print training progress per epoch
)


In [None]:
# Recreate the model architecture and load the best weights
model = keras.Model(inputs, outputs)
model.load_weights('/content/best_lstm_model.keras')  # Load saved weights from training

print('[LOADED] Model loaded successfully using weights.')
print('[LOADED] Scalers loaded successfully.')

# Ensure that the timestamp column is in datetime format
if TIME_COL in test_df.columns:
    test_df[TIME_COL] = pd.to_datetime(test_df[TIME_COL])

# Make predictions on the test sequences
# The output will be scaled values because the model was trained on scaled data
y_test_pred_scaled = model.predict(X_test_seq, verbose=0)

# Convert scaled predictions back to original scale using the inverse transform of the y_scaler
y_test_pred = y_scaler.inverse_transform(y_test_pred_scaled)

# Initialize an array to hold predicted capacities for the full test DataFrame
pred_capacity = np.full((len(test_df),), np.nan, dtype=float)

# Retrieve flags if available, otherwise assume zeros
flags = test_df[FLAG_COL].fillna(0).values if FLAG_COL in test_df.columns else np.zeros(len(test_df))

# Map the model predictions to the correct indices in the original test DataFrame
for i, end_idx in enumerate(test_end_idx):
    pred_capacity[end_idx] = y_test_pred[i]

# Prepare signed predictions
# Only keep predictions where the flag is non-zero and a valid prediction exists
signed_pred = np.zeros_like(pred_capacity)
non_zero_mask = (flags != 0) & ~np.isnan(pred_capacity)
signed_pred[non_zero_mask] = pred_capacity[non_zero_mask]

# Replace remaining NaNs with 0
signed_pred = np.nan_to_num(signed_pred, nan=0.0)

# Create the output DataFrame
out_df = pd.DataFrame({
    SITE_COL: test_df[SITE_COL].values if SITE_COL in test_df.columns else 'NA',
    TIME_COL: pd.to_datetime(test_df[TIME_COL]).dt.strftime('%Y-%m-%d %H:%M:%S')
              if TIME_COL in test_df.columns else np.arange(len(test_df)),
    FLAG_COL: flags,
    'Demand_Response_Capacity_kW': signed_pred
})

# Sort the output by site and timestamp and reset index
out_df = out_df.sort_values([SITE_COL, TIME_COL]).reset_index(drop=True)

# Save predictions to CSV
out_df.to_csv(PREDICTIONS_CSV, index=False)
print('[OK] Predictions saved to:', PREDICTIONS_CSV)


In [None]:
# Define file paths for saving model weights and scalers
weights_path = os.path.join('LSTM_capacity_weights.weights.h5')  # Model weights will be saved here
features_scaler_path = os.path.join('features_scaler.joblib')    # Scaler for input features
target_scaler_path = os.path.join('target_scaler.joblib')        # Scaler for target/output values

# Save only the model weights
model.save_weights(weights_path)

# Save the scalers used for preprocessing
joblib.dump(X_scaler, features_scaler_path)  # Save the input feature scaler
joblib.dump(y_scaler, target_scaler_path)    # Save the target/output scaler

# Confirmation messages to inform the user
print('[SAVED] Model weights:', weights_path)
print('[SAVED] Feature scaler:', features_scaler_path)
print('[SAVED] Target scaler:', target_scaler_path)
