In [None]:
import os
import random
import numpy as np
import pandas as pd
from datetime import datetime
from tensorflow.keras.models import load_model

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import joblib

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

tf.config.experimental.enable_op_determinism()

print(tf.__version__)
try:
    print('GPU:', tf.config.list_physical_devices('GPU'))
except Exception as e:
    print('GPU info error:', e)

2.19.0
GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
BASE_DIR = ''
TRAIN_CSV = os.path.join(BASE_DIR, '')
TEST_CSV = os.path.join(BASE_DIR, '')
PREDICTIONS_CSV = os.path.join('predictions.csv')
MODEL_DIR = os.path.join(BASE_DIR, '')
os.makedirs(MODEL_DIR, exist_ok=True)

TIME_COL = 'Timestamp_Local'
SITE_COL = 'Site'
FLAG_COL = 'Demand_Response_Flag'
TARGET_COL = 'Demand_Response_Capacity_kW'

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)
pd.set_option('display.max_colwidth', 1000)

In [None]:
train_df = pd.read_csv(TRAIN_CSV)
test_df = pd.read_csv(TEST_CSV)

In [None]:
print('[INFO] Train shape:', train_df.shape)
print('[INFO] Test shape:', test_df.shape)

print('[INFO] Train columns:', list(train_df.columns))
print('[INFO] Test columns:', list(test_df.columns))

print('[INFO] Train shape:', train_df.head(10))
print('[INFO] Test shape:', test_df.head(10))

In [None]:
print(train_df.head())
print(test_df.head())

In [10]:
def create_enhanced_features(df):
    df = df.copy()
    df['Timestamp_Local'] = pd.to_datetime(df['Timestamp_Local'])
    df['hour'] = df['Timestamp_Local'].dt.hour
    df['month'] = df['Timestamp_Local'].dt.month

    hour_rad = 2 * np.pi * df['hour'] / 24
    df['hour_sin'] = np.sin(hour_rad)
    df['hour_cos'] = np.cos(hour_rad)

    month_rad = 2 * np.pi * (df['month'] - 1) / 12
    df['month_sin'] = np.sin(month_rad)
    df['month_cos'] = np.cos(month_rad)

    df['weekday'] = df['Timestamp_Local'].dt.weekday
    df['weekday_sin'] = np.sin(2 * np.pi * df['weekday'] / 7)
    df['weekday_cos'] = np.cos(2 * np.pi * df['weekday'] / 7)

    allowed_months = {1, 2, 6, 7, 8, 12}
    df['is_allowed_month'] = df['month'].isin(allowed_months).astype(int)

    df['is_7'] = (df['hour'] == 7).astype(int)
    df['is_8'] = (df['hour'] == 8).astype(int)
    df['is_9'] = (df['hour'] == 9).astype(int)
    df['is_10'] = (df['hour'] == 10).astype(int)
    df['is_11'] = (df['hour'] == 11).astype(int)
    df['is_12'] = (df['hour'] == 12).astype(int)
    df['is_13'] = (df['hour'] == 13).astype(int)
    df['is_14'] = (df['hour'] == 14).astype(int)
    df['is_15'] = (df['hour'] == 15).astype(int)
    df['is_16'] = (df['hour'] == 16).astype(int)
    df['is_17'] = (df['hour'] == 17).astype(int)
    df['is_18'] = (df['hour'] == 18).astype(int)
    df['is_19'] = (df['hour'] == 19).astype(int)

    df['is_january'] = (df['month'] == 1).astype(int)
    df['is_february'] = (df['month'] == 2).astype(int)
    df['is_june'] = (df['month'] == 6).astype(int)
    df['is_july'] = (df['month'] == 7).astype(int)
    df['is_august'] = (df['month'] == 8).astype(int)
    df['is_december'] = (df['month'] == 12).astype(int)

    df['is_morning'] = ((df['hour'] >= 10) & (df['hour'] <= 11)).astype(int)
    df['is_afternoon'] = ((df['hour'] >= 12) & (df['hour'] <= 17)).astype(int)

    df = df.drop(columns=['hour','weekday','month'])

    return df


train_df = create_enhanced_features(train_df)
test_df = create_enhanced_features(test_df)

In [None]:
def create_timeofday_pastdays_features(
    df,
    timestamp_col='Timestamp_Local',
    power_col='Building_Power_kW',
    temp_col='Dry_Bulb_Temperature_C',
    windows=(1,2,3,4,5,6,7,8,9,10,11,12,13,14),
    min_periods=1,
    time_format='%H:%M:%S'
    ):
    """
    For each original timestamp, compute summaries of the same time-of-day over the
    previous N days (exclude current day). Produces features for both power_col and temp_col.
    Returns: (df_with_features, daily_agg_df)
    """
    df = df.copy()

    # helper columns
    df['date'] = df[timestamp_col].dt.normalize()        # midnight timestamp for the date
    df['time_of_day'] = df[timestamp_col].dt.strftime(time_format)


    power_col = 'Building_Power_kW'
    df['trailing_15m_pow_delta'] = df[power_col] - df[power_col].shift(1)
    df['trailing_30m_pow_delta'] = df[power_col] - df[power_col].shift(2)
    df['trailing_45m_pow_delta'] = df[power_col] - df[power_col].shift(3)
    df['trailing_60m_pow_delta'] = df[power_col] - df[power_col].shift(4)
    df['trailing_1.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(5)
    df['trailing_1.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(6)
    df['trailing_1.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(7)
    df['trailing_2_hours_pow_delta'] = df[power_col] - df[power_col].shift(8)
    df['trailing_2.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(9)
    df['trailing_2.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(10)
    df['trailing_2.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(11)
    df['trailing_3_hours_pow_delta'] = df[power_col] - df[power_col].shift(12)
    df['trailing_3.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(13)
    df['trailing_3.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(14)
    df['trailing_3.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(15)
    df['trailing_4_hours_pow_delta'] = df[power_col] - df[power_col].shift(16)
    df['trailing_4.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(17)
    df['trailing_4.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(18)
    df['trailing_4.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(19)
    df['trailing_5_hours_pow_delta'] = df[power_col] - df[power_col].shift(20)
    df['trailing_5.25_hours_pow_delta'] = df[power_col] - df[power_col].shift(21)
    df['trailing_5.5_hours_pow_delta'] = df[power_col] - df[power_col].shift(22)
    df['trailing_5.75_hours_pow_delta'] = df[power_col] - df[power_col].shift(23)
    df['trailing_6_hours_pow_delta'] = df[power_col] - df[power_col].shift(24)
    df['trailing_7_hours_pow_delta'] = df[power_col] - df[power_col].shift(28)
    df['trailing_8_hours_pow_delta'] = df[power_col] - df[power_col].shift(32)
    df['trailing_9_hours_pow_delta'] = df[power_col] - df[power_col].shift(36)
    df['trailing_10_hours_pow_delta'] = df[power_col] - df[power_col].shift(40)
    df['trailing_11_hours_pow_delta'] = df[power_col] - df[power_col].shift(44)
    df['trailing_12_hours_pow_delta'] = df[power_col] - df[power_col].shift(48)

    for window, label in [(2,'0.5'),(4, '1h'),(6,'1.5h'), (8, '2h'),(10,'2.5h'), (12,'3h'), (16, '4h'),(48,'12h')]:
        df[f'rolling_{label}_avg_pow'] = df[power_col].rolling(window, min_periods=1).mean()
        df[f'rolling_{label}_min_pow'] = df[power_col].rolling(window, min_periods=1).min()
        df[f'rolling_{label}_max_pow'] = df[power_col].rolling(window, min_periods=1).max()



    df['trailing_2h_skew_pow'] = df[power_col].rolling(window=8, min_periods=1).skew()
    df['trailing_2h_kurt_pow'] = df[power_col].rolling(window=8, min_periods=1).kurt()
    df['trailing_2.25h_skew_pow'] = df[power_col].rolling(window=9, min_periods=1).skew()
    df['trailing_2.25h_kurt_pow'] = df[power_col].rolling(window=9, min_periods=1).kurt()
    df['trailing_2.5h_skew_pow'] = df[power_col].rolling(window=10, min_periods=1).skew()
    df['trailing_2.5h_kurt_pow'] = df[power_col].rolling(window=10, min_periods=1).kurt()
    df['trailing_2.75h_skew_pow'] = df[power_col].rolling(window=11, min_periods=1).skew()
    df['trailing_2.75h_kurt_pow'] = df[power_col].rolling(window=11, min_periods=1).kurt()

    df['trailing_3h_skew_pow'] = df[power_col].rolling(window=12, min_periods=1).skew()
    df['trailing_3h_kurt_pow'] = df[power_col].rolling(window=12, min_periods=1).kurt()
    df['trailing_3.25h_skew_pow'] = df[power_col].rolling(window=13, min_periods=1).skew()
    df['trailing_3.25h_kurt_pow'] = df[power_col].rolling(window=13, min_periods=1).kurt()
    df['trailing_3.5h_skew_pow'] = df[power_col].rolling(window=14, min_periods=1).skew()
    df['trailing_3.5h_kurt_pow'] = df[power_col].rolling(window=14, min_periods=1).kurt()
    df['trailing_3.75h_skew_pow'] = df[power_col].rolling(window=15, min_periods=1).skew()
    df['trailing_3.75h_kurt_pow'] = df[power_col].rolling(window=15, min_periods=1).kurt()

    df['trailing_4h_skew_pow'] = df[power_col].rolling(window=16, min_periods=1).skew()
    df['trailing_4h_kurt_pow'] = df[power_col].rolling(window=16, min_periods=1).kurt()
    df['trailing_5h_skew_pow'] = df[power_col].rolling(window=24, min_periods=1).skew()
    df['trailing_5h_kurt_pow'] = df[power_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_6h_skew_pow'] = df[power_col].rolling(window=24, min_periods=1).skew()
    df['trailing_6h_kurt_pow'] = df[power_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_7h_skew_pow'] = df[power_col].rolling(window=28, min_periods=1).skew()
    df['trailing_7h_kurt_pow'] = df[power_col].rolling(window=28, min_periods=1).kurt()
    df['trailing_8h_skew_pow'] = df[power_col].rolling(window=32, min_periods=1).skew()
    df['trailing_8h_kurt_pow'] = df[power_col].rolling(window=32, min_periods=1).kurt()
    df['trailing_9h_skew_pow'] = df[power_col].rolling(window=36, min_periods=1).skew()
    df['trailing_9h_kurt_pow'] = df[power_col].rolling(window=36, min_periods=1).kurt()
    df['trailing_10h_skew_pow'] = df[power_col].rolling(window=40, min_periods=1).skew()
    df['trailing_10h_kurt_pow'] = df[power_col].rolling(window=40, min_periods=1).kurt()
    df['trailing_11h_skew_pow'] = df[power_col].rolling(window=44, min_periods=1).skew()
    df['trailing_11h_kurt_pow'] = df[power_col].rolling(window=44, min_periods=1).kurt()
    df['trailing_12h_skew_pow'] = df[power_col].rolling(window=48, min_periods=1).skew()
    df['trailing_12h_kurt_pow'] = df[power_col].rolling(window=48, min_periods=1).kurt()



    temp_col = 'Dry_Bulb_Temperature_C'

    df['trailing_15m_temp_delta'] = df[temp_col] - df[temp_col].shift(1)
    df['trailing_30m_temp_delta'] = df[temp_col] - df[temp_col].shift(2)
    df['trailing_45m_temp_delta'] = df[temp_col] - df[temp_col].shift(3)
    df['trailing_60m_temp_delta'] = df[temp_col] - df[temp_col].shift(4)
    df['trailing_1.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(5)
    df['trailing_1.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(6)
    df['trailing_1.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(7)
    df['trailing_2_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(8)
    df['trailing_2.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(9)
    df['trailing_2.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(10)
    df['trailing_2.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(11)
    df['trailing_3_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(12)
    df['trailing_3.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(13)
    df['trailing_3.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(14)
    df['trailing_3.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(15)
    df['trailing_4_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(16)
    df['trailing_4.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(17)
    df['trailing_4.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(18)
    df['trailing_4.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(19)
    df['trailing_5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(20)
    df['trailing_5.25_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(21)
    df['trailing_5.5_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(22)
    df['trailing_5.75_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(23)
    df['trailing_6_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(24)
    df['trailing_7_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(28)
    df['trailing_8_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(32)
    df['trailing_9_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(36)
    df['trailing_10_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(40)
    df['trailing_11_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(44)
    df['trailing_12_hours_temp_delta'] = df[temp_col] - df[temp_col].shift(48)

    for window, label in [(2,'0.5'),(4, '1h'),(6,'1.5h'), (8, '2h'),(10,'2.5h'), (16, '4h'),(48,'12h')]:
        df[f'rolling_{label}_avg_temp'] = df[temp_col].rolling(window, min_periods=1).mean()
        df[f'rolling_{label}_min_temp'] = df[temp_col].rolling(window, min_periods=1).min()
        df[f'rolling_{label}_max_temp'] = df[temp_col].rolling(window, min_periods=1).max()

    df['trailing_2h_skew_temp'] = df[temp_col].rolling(window=8, min_periods=1).skew()
    df['trailing_2h_kurt_temp'] = df[temp_col].rolling(window=8, min_periods=1).kurt()
    df['trailing_2.25h_skew_temp'] = df[temp_col].rolling(window=9, min_periods=1).skew()
    df['trailing_2.25h_kurt_temp'] = df[temp_col].rolling(window=9, min_periods=1).kurt()
    df['trailing_2.5h_skew_temp'] = df[temp_col].rolling(window=10, min_periods=1).skew()
    df['trailing_2.5h_kurt_temp'] = df[temp_col].rolling(window=10, min_periods=1).kurt()
    df['trailing_2.75h_skew_temp'] = df[temp_col].rolling(window=11, min_periods=1).skew()
    df['trailing_2.75h_kurt_temp'] = df[temp_col].rolling(window=11, min_periods=1).kurt()

    df['trailing_3h_skew_temp'] = df[temp_col].rolling(window=12, min_periods=1).skew()
    df['trailing_3h_kurt_temp'] = df[temp_col].rolling(window=12, min_periods=1).kurt()
    df['trailing_3.25h_skew_temp'] = df[temp_col].rolling(window=13, min_periods=1).skew()
    df['trailing_3.25h_kurt_temp'] = df[temp_col].rolling(window=13, min_periods=1).kurt()
    df['trailing_3.5h_skew_temp'] = df[temp_col].rolling(window=14, min_periods=1).skew()
    df['trailing_3.5h_kurt_temp'] = df[temp_col].rolling(window=14, min_periods=1).kurt()
    df['trailing_3.75h_skew_temp'] = df[temp_col].rolling(window=15, min_periods=1).skew()
    df['trailing_3.75h_kurt_temp'] = df[temp_col].rolling(window=15, min_periods=1).kurt()

    df['trailing_4h_skew_temp'] = df[temp_col].rolling(window=16, min_periods=1).skew()
    df['trailing_4h_kurt_temp'] = df[temp_col].rolling(window=16, min_periods=1).kurt()
    df['trailing_5h_skew_temp'] = df[temp_col].rolling(window=24, min_periods=1).skew()
    df['trailing_5h_kurt_temp'] = df[temp_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_6h_skew_temp'] = df[temp_col].rolling(window=24, min_periods=1).skew()
    df['trailing_6h_kurt_temp'] = df[temp_col].rolling(window=24, min_periods=1).kurt()
    df['trailing_7h_skew_temp'] = df[temp_col].rolling(window=28, min_periods=1).skew()
    df['trailing_7h_kurt_temp'] = df[temp_col].rolling(window=28, min_periods=1).kurt()
    df['trailing_8h_skew_temp'] = df[temp_col].rolling(window=32, min_periods=1).skew()
    df['trailing_8h_kurt_temp'] = df[temp_col].rolling(window=32, min_periods=1).kurt()
    df['trailing_9h_skew_temp'] = df[temp_col].rolling(window=36, min_periods=1).skew()
    df['trailing_9h_kurt_temp'] = df[temp_col].rolling(window=36, min_periods=1).kurt()
    df['trailing_10h_skew_temp'] = df[temp_col].rolling(window=40, min_periods=1).skew()
    df['trailing_10h_kurt_temp'] = df[temp_col].rolling(window=40, min_periods=1).kurt()
    df['trailing_11h_skew_temp'] = df[temp_col].rolling(window=44, min_periods=1).skew()
    df['trailing_11h_kurt_temp'] = df[temp_col].rolling(window=44, min_periods=1).kurt()
    df['trailing_12h_skew_temp'] = df[temp_col].rolling(window=48, min_periods=1).skew()
    df['trailing_12h_kurt_temp'] = df[temp_col].rolling(window=48, min_periods=1).kurt()


    # 1) Build daily-series: one row per (date, time_of_day)
    # aggregate BOTH power and temp to ensure one value per (date,time)
    agg_cols = [power_col, temp_col]
    daily = (
        df.groupby(['date', 'time_of_day'])[agg_cols]
          .mean()            # if multiple rows for same (date,time) -> average them
          .reset_index()
          .sort_values(['time_of_day', 'date'])
    )

    # convenience short names for feature naming
    col_map = {power_col: 'power', temp_col: 'temp'}

    # 2) For each time_of_day group, compute rolling (on days) over shifted series (exclude current)
    for w in windows:
        prefix = f'prev{w}d'   # e.g. prev3d_mean_power, prev7d_std_temp...
        # handle each column separately

        for col in agg_cols:
            short = col_map[col]
            grp = daily.groupby('time_of_day')[col]

            if w==1:
              daily[f'{prefix}_mean_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).mean()
              )
            else:
              daily[f'lag_{w}d_{short}'] = grp.transform(lambda s: s.shift(w))

              daily[f'{prefix}_mean_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).mean()
              )

              daily[f'{prefix}_min_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).min()
              )
              daily[f'{prefix}_max_{short}'] = grp.transform(
                  lambda s: s.shift(1).rolling(window=w, min_periods=min_periods).max()
              )



    # 3) Merge daily-level features back to original timestamps by (date, time_of_day)
    daily_feats = daily.drop(columns=[power_col, temp_col])
    df = df.merge(daily_feats, on=['date', 'time_of_day'], how='left', validate='m:1')
    df.drop(columns=['date', 'time_of_day'], inplace=True)

    return df, daily

train_df, _ = create_timeofday_pastdays_features(train_df)
test_df, _ = create_timeofday_pastdays_features(test_df)


In [16]:
def preprocess_nan_values(df):
  # Replace inf / -inf with NaN
  df.replace([np.inf, -np.inf], np.nan, inplace=True)
  # Option 1: Fill with column median (robust)
  df.fillna(0, inplace=True)

  return df

train_df = preprocess_nan_values(train_df)
test_df = preprocess_nan_values(test_df)

In [19]:
print(train_df.shape)
print(test_df.shape)

(35040, 314)
(105120, 313)


In [None]:
num_train = train_df.select_dtypes(include=[np.number]).copy()
num_test = test_df.select_dtypes(include=[np.number]).copy()

y_train_raw = num_train[TARGET_COL].values.reshape(-1, 1)
X_train_num = num_train.drop(columns=[TARGET_COL])


for col in num_train.columns:
    if col not in num_test.columns:
      num_test[col] = 0.0

print(num_test.columns)
y_test_raw = num_test[TARGET_COL].values.reshape(-1, 1)
X_test_num = num_test.drop(columns=[TARGET_COL])

# Sıra eşitle
X_test_num = X_test_num[X_train_num.columns]

Index(['Dry_Bulb_Temperature_C', 'Global_Horizontal_Radiation_W/m2', 'Building_Power_kW', 'Demand_Response_Flag', 'hour_sin', 'hour_cos', 'month_sin', 'month_cos', 'weekday_sin', 'weekday_cos',
       ...
       'prev13d_max_temp', 'lag_14d_power', 'prev14d_mean_power', 'prev14d_min_power', 'prev14d_max_power', 'lag_14d_temp', 'prev14d_mean_temp', 'prev14d_min_temp', 'prev14d_max_temp', 'Demand_Response_Capacity_kW'], dtype='object', length=312)


In [None]:
X_scaler = MinMaxScaler(feature_range=(-1,1))
y_scaler = MinMaxScaler(feature_range=(-1,1))

X_train_scaled = X_scaler.fit_transform(X_train_num.values)
X_test_scaled = X_scaler.transform(X_test_num.values)

X_train_scaled = X_train_scaled.astype('float32')
X_test_scaled = X_test_scaled.astype('float32')

y_train_scaled = y_scaler.fit_transform(y_train_raw)
y_test_scaled = y_scaler.transform(y_test_raw)

print('[INFO] Scaling done. X_train_scaled:', X_train_scaled.shape, 'y_train_scaled:', y_train_scaled.shape)

[INFO] Scaling done. X_train_scaled: (35040, 311) y_train_scaled: (35040, 1)


In [None]:
WINDOW_SIZE=192

def make_sequences_by_site(df_raw, X_scaled_all, y_scaled_all, window_size, site_col=SITE_COL, time_col=TIME_COL, have_target=True):
    X_seq_list, y_seq_list, end_indices = [], [], []
    n_features = X_scaled_all.shape[1]

    if site_col in df_raw.columns:
        for site, g in df_raw.groupby(site_col):
            idx = g.index.values
            Xg = X_scaled_all[idx]
            yg = y_scaled_all[idx] if (have_target and y_scaled_all is not None) else None
            
            for i in range(len(idx) - window_size + 1):
                X_seq_list.append(Xg[i:i+window_size])
                end_indices.append(idx[i+window_size-1])
                if have_target and yg is not None:
                    y_seq_list.append(yg[i+window_size-1])
    else:
        idx = np.arange(len(df_raw))
        Xg = X_scaled_all
        yg = y_scaled_all if (have_target and y_scaled_all is not None) else None
        for i in range(len(idx) - window_size + 1):
            X_seq_list.append(Xg[i:i+window_size])
            end_indices.append(i+window_size-1)
            if have_target and yg is not None:
                y_seq_list.append(yg[i+window_size-1])

    X_seq = np.array(X_seq_list)
    y_seq = np.array(y_seq_list) if have_target and y_seq_list else None
    return X_seq, y_seq, np.array(end_indices)

X_train_seq, y_train_seq, train_end_idx = make_sequences_by_site(train_df, X_train_scaled, y_train_scaled, WINDOW_SIZE, have_target=True)
print('[INFO] Train sequences:', X_train_seq.shape, y_train_seq.shape)

X_test_seq, _, test_end_idx = make_sequences_by_site(test_df, X_test_scaled, None, WINDOW_SIZE, have_target=False)
print('[INFO] Test sequences:', X_test_seq.shape)

[INFO] Train sequences: (34849, 192, 311) (34849, 1)
[INFO] Test sequences: (104547, 192, 311)


In [None]:
n_features = X_train_seq.shape[-1]

inputs = keras.Input(
    shape=( WINDOW_SIZE, n_features)
)

x = layers.LSTM(128, return_sequences=True)(inputs)
x = layers.LSTM(64, return_sequences=False)(x)

x = layers.Dropout(0.3)(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.2)(x)
outputs = layers.Dense(1)(x)

model = keras.Model(inputs, outputs)

In [None]:
optimizer = Adam(learning_rate=5e-4)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae', tf.keras.metrics.RootMeanSquaredError()])#loss değiştir

model.summary()

In [None]:
callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss',patience=10),
    keras.callbacks.ModelCheckpoint(
          filepath='best_lstm_model.keras',
          monitor='val_loss',
          save_best_only=True
          )
]

history = model.fit(
      X_train_seq, y_train_seq, # Pass X_train_seq and y_train_seq separately
      epochs=50,
      batch_size=128,
      validation_split=0.2,
      callbacks=callbacks,
      verbose=1
    )

In [None]:
model = keras.Model(inputs, outputs)
model.load_weights('/content/best_lstm_model.keras')

print('[LOADED] Model loaded successfully using weights.')
print('[LOADED] Scalers loaded successfully.')

# Convert 'Timestamp_Local' to datetime objects in test_df
if TIME_COL in test_df.columns:
    test_df[TIME_COL] = pd.to_datetime(test_df[TIME_COL])

# Make predictions
y_test_pred_scaled = model.predict(X_test_seq, verbose=0)

y_test_pred = y_scaler.inverse_transform(y_test_pred_scaled)


pred_capacity = np.full((len(test_df),), np.nan, dtype=float)

flags = test_df[FLAG_COL].fillna(0).values if FLAG_COL in test_df.columns else np.zeros(len(test_df))
# Map the predictions to the correct rows in the original test_df using the end indices
for i, end_idx in enumerate(test_end_idx):
    pred_capacity[end_idx] = y_test_pred[i]

signed_pred = np.zeros_like(pred_capacity)
non_zero_mask = (flags != 0) & ~np.isnan(pred_capacity)
signed_pred[non_zero_mask] = pred_capacity[non_zero_mask]

signed_pred = np.nan_to_num(signed_pred, nan=0.0)

# DataFrame
out_df = pd.DataFrame({
    SITE_COL: test_df[SITE_COL].values if SITE_COL in test_df.columns else 'NA',
    TIME_COL: pd.to_datetime(test_df[TIME_COL]).dt.strftime('%Y-%m-%d %H:%M:%S') if TIME_COL in test_df.columns else np.arange(len(test_df)),
    FLAG_COL: flags,
    'Demand_Response_Capacity_kW': signed_pred
})

out_df = out_df.sort_values([SITE_COL, TIME_COL]).reset_index(drop=True)
out_df.to_csv(PREDICTIONS_CSV, index=False)
print('[OK] Predictions saved to:', PREDICTIONS_CSV)

In [None]:
# 10) Modeli Kaydet ve Yükleme Örneği
weights_path = os.path.join('LSTM_capacity_weights.weights.h5')
features_scaler_path = os.path.join('features_scaler.joblib')
target_scaler_path = os.path.join('target_scaler.joblib')

# Save only the model weights
model.save_weights(weights_path)

# Save the scalers
joblib.dump(X_scaler, features_scaler_path)
joblib.dump(y_scaler, target_scaler_path)


print('[SAVED] Model weights:', weights_path)
print('[SAVED] Feature scaler:', features_scaler_path)
print('[SAVED] Target scaler:', target_scaler_path)