In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Attention, concatenate
from tensorflow.keras.optimizers import Adam
from xgboost import XGBRegressor
from pyswarm import pso
from datetime import datetime

In [2]:
# 1. Data Loading and Preprocessing
def load_and_preprocess_data(file_path):
    # Load data
    data = pd.read_csv(file_path, parse_dates=['Date'], index_col='Date')
    data = data.dropna()
    
    # Scale demand
    scaler = MinMaxScaler()
    data['Peak Demand'] = scaler.fit_transform(data[['Peak Demand']])

    return data, scaler

In [3]:
# 2. Feature Engineering (Adjusted)
def create_features(data):
    # data['day_of_week'] = data.index.dayofweek   #Removing as it can take only discrete value
    # data['month'] = data.index.month #Removing as it can take only discrete value
    # data['year'] = data.index.year  #Removing as it can take only discrete value
    return data

In [4]:
# 3. Prepare LSTM Data (Adjusted for Continuous Data)
def prepare_lstm_data(data, time_steps=30):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:(i + time_steps)].values)
        y.append(data['Peak Demand'].iloc[i + time_steps])
    return np.array(X), np.array(y)

In [6]:
# 4. Hybrid Model Building Blocks
def build_lstm_attention(input_shape):
    lstm_input = Input(shape=input_shape)
    lstm_out = LSTM(64, return_sequences=True)(lstm_input)
    attention_out = Attention()([lstm_out, lstm_out])
    lstm_output = LSTM(32)(attention_out)
    return lstm_input, lstm_output

def build_xgboost(n_estimators=100):
    xgb = XGBRegressor(objective='reg:squarederror', n_estimators=n_estimators)
    return xgb


In [7]:
# 5. Hybrid Model Creation
def create_hybrid_model(lstm_input_shape, num_xgb_features, lstm_units=32, learning_rate=0.001, xgboost_estimators=100):
    # LSTM branch
    lstm_input, lstm_output = build_lstm_attention(lstm_input_shape)

    # XGBoost branch
    xgb_input = Input(shape=(num_xgb_features,))
    xgb_output = Dense(16)(xgb_input)

    # Combine
    combined = concatenate([lstm_output, xgb_output])
    dense1 = Dense(32, activation='relu')(combined)
    output = Dense(1, activation='linear')(dense1)

    model = Model(inputs=[lstm_input, xgb_input], outputs=output)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')
    return model

In [8]:
# 6. PSO Optimization (Adjusted Objective Function)
def objective_function(params, X_lstm_train, X_xgb_train, y_train, lstm_input_shape, num_xgb_features):
    # Example: params = [lstm_units, learning_rate, xgboost_estimators]
    lstm_units = int(params[0])  # Ensure integer
    learning_rate = params[1]
    xgboost_estimators = int(params[2])
    
    # Create and train model
    model = create_hybrid_model(lstm_input_shape, num_xgb_features, lstm_units, learning_rate, xgboost_estimators)
    model.fit([X_lstm_train, X_xgb_train], y_train, epochs=2, verbose=0)
    loss = model.evaluate([X_lstm_train, X_xgb_train], y_train, verbose=0)
    
    return loss

In [9]:
# 7. Main Execution
if __name__ == "__main__":
    # Load and preprocess
    file_path = 'final_dataset.csv'
    data, demand_scaler = load_and_preprocess_data(file_path)

    # Feature creation
    data = create_features(data)

    # Prepare LSTM data
    time_steps = 30
    X_lstm, y = prepare_lstm_data(data[['Peak Demand', 'Temperature', 'Relative Humidity']], time_steps)

    # Prepare XGBoost data (Use last values from LSTM sequence - adjust features as needed)
    X_xgb = data[['Temperature', 'Relative Humidity']][time_steps:].values
    
    # Split data
    X_lstm_train, X_lstm_test, X_xgb_train, X_xgb_test, y_train, y_test = train_test_split(
        X_lstm, X_xgb, y, test_size=0.2, shuffle=False
    )

    # PSO Optimization
    lb = [32, 0.001, 50]  # Lower bounds [lstm_units, learning_rate, xgboost_estimators]
    ub = [128, 0.1, 200] # Upper bounds [lstm_units, learning_rate, xgboost_estimators]

    lstm_input_shape = (X_lstm_train.shape[1], X_lstm_train.shape[2])
    num_xgb_features = X_xgb_train.shape[1]
    

    # Ensure correct arguments are passed
    def pso_objective(params):
        return objective_function(params, X_lstm_train, X_xgb_train, y_train, lstm_input_shape, num_xgb_features)

    best_params, _ = pso(pso_objective, lb, ub, swarmsize=10, maxiter=5)

    # Create and train the final model with optimized parameters
    lstm_units = int(best_params[0])
    learning_rate = best_params[1]
    xgboost_estimators = int(best_params[2])

    final_model = create_hybrid_model(lstm_input_shape, num_xgb_features, lstm_units, learning_rate, xgboost_estimators)
    final_model.fit([X_lstm_train, X_xgb_train], y_train, epochs=10, batch_size=32, verbose=1)

Stopping search: maximum iterations reached --> 5
Epoch 1/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 19ms/step - loss: 917.0349
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 6.3906
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.5525
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.4189
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.2839
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.1991
Epoch 7/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - loss: 0.1270
Epoch 8/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.0823
Epoch 9/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 0.0465
Epoch 10/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━

In [18]:
# -- Make prediction for a specific date --
while True:
    input_date_str = input("Enter date to predict (YYYY-MM-DD, or type 'exit'): ")
    if input_date_str.lower() == 'exit':
        break

    try:
        input_date = datetime.strptime(input_date_str, '%Y-%m-%d').date()  # Parse to date only

        # Find data for the entered date or use available date to estimate
        if input_date_str in data.index:
            input_data = data[['Temperature', 'Relative Humidity']].loc[input_date_str] #Data is collected from already made data
        else:
            raise ValueError("Provided date does not exist in the list of dates in dataset. Choose new date or type exit.")

        # Get the last 'time_steps' days of data before this date
        #print(data.index)

        #Prepare dataframes for input and check to see whether they are empty
        last_data = data[['Peak Demand', 'Temperature', 'Relative Humidity']].loc[data.index < input_date_str].iloc[-time_steps:]

        if last_data.empty:
            raise ValueError("Not enough historical data for the given date.")

        # Create the input for LSTM
        X_lstm_next_day = np.array([last_data.values])

        # Create the input for XGBoost:
        X_xgb_next_day = np.array([input_data.values])

        # Make the prediction:
        predicted_peak_demand = final_model.predict([X_lstm_next_day, X_xgb_next_day.reshape(1,-1)])

        # Inverse Transform for scaling
        predicted_peak_demand = demand_scaler.inverse_transform(predicted_peak_demand)

        # Print results
        print("Predicted Peak Demand for", input_date_str, ":", predicted_peak_demand[0][0], "MW")

    except ValueError as e:
        print("Error:", e)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Predicted Peak Demand for 2024-04-20 : 211.8589 MW
Error: time data '2024-13-12' does not match format '%Y-%m-%d'


In [None]:
"""
# Make prediction for the next day
# Prepare input data for the next day forecast:

# Get the last 'time_steps' days of data
last_data = data[['Peak Demand', 'Temperature', 'Relative Humidity']].iloc[-time_steps:]

# Create the input for LSTM
X_lstm_next_day = np.array([last_data.values])

# Create the input for XGBoost (using last values from LSTM sequence)
X_xgb_next_day = data[['Temperature', 'Relative Humidity']].iloc[[-1]].values
# Make the prediction:
predicted_peak_demand = final_model.predict([X_lstm_next_day, X_xgb_next_day])

# Inverse Transform for scaling
predicted_peak_demand = demand_scaler.inverse_transform(predicted_peak_demand)

# print results
print("Predicted Peak Demand for Tomorrow:", predicted_peak_demand[0][0], "MW")
"""

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted Peak Demand for Tomorrow: 244.2282 MW
