In [9]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dropout, Attention, Dense, BatchNormalization, Bidirectional
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import tensorflow_probability as tfp

# Load and preprocess the new dataset
path = "/content/AEP_hourly.csv"  # Update the path to the new dataset
data = pd.read_csv(path, parse_dates=['Datetime'])

# Set Datetime as index
data.set_index('Datetime', inplace=True)

# Handle missing values
data.fillna(method='ffill', inplace=True)  # Forward fill for simplicity; adjust as needed

# Normalize features
scalers = {}
features = ['Temperature', 'Relative Humidity', 'Wind Speed', 'Precipitation', 'Is_Weekend_Holiday']
for feature in features:
    scalers[feature] = MinMaxScaler()
    data[feature] = scalers[feature].fit_transform(data[[feature]])

# Normalize AEP_MW separately
scaler_aep = MinMaxScaler()
data['AEP_MW'] = scaler_aep.fit_transform(data[['AEP_MW']])

# Define time window and reshape data
n_timesteps = 24  # Using 24 hours as the time window
features_list = features + ['AEP_MW']
data_reshaped = data[features_list]

# Reshape the data for LSTM
n_samples = data_reshaped.shape[0] // n_timesteps
reshaped_data = data_reshaped.values[:n_samples * n_timesteps].reshape(n_samples, n_timesteps, len(features_list))

# Split data into features and target
X = reshaped_data[:, :-1, :]
Y = reshaped_data[:, -1, -1]  # Only target is the last feature ('AEP_MW')

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print(f"The training set has {X_train.shape}")
print(f"The test set has {X_test.shape}")

The training set has (4042, 23, 6)
The test set has (1011, 23, 6)


  data.fillna(method='ffill', inplace=True)  # Forward fill for simplicity; adjust as needed


In [12]:
def build_model_with_attention(units=50, dropout_rate=0.2, learning_rate=0.001):
    inputs = tf.keras.Input(shape=(X_train.shape[1], X_train.shape[2]))

    # LSTM layer with attention
    lstm_out = Bidirectional(LSTM(units, return_sequences=True))(inputs)
    attention_out = Attention()([lstm_out, lstm_out])
    dropout_out = Dropout(dropout_rate)(attention_out)

    # Additional LSTM layers
    lstm_out = Bidirectional(LSTM(units, return_sequences=True))(dropout_out)
    dropout_out = Dropout(dropout_rate)(lstm_out)
    lstm_out = Bidirectional(LSTM(units))(dropout_out)

    # Dense layers
    dense_out = Dense(units, activation='leaky_relu')(lstm_out)
    outputs = Dense(1)(dense_out)  # Predicting AEP_MW

    model = tf.keras.Model(inputs, outputs)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    return model

In [13]:
# Function to train and evaluate the attention model
def train_and_evaluate_attention_model(units, dropout_rate, learning_rate):
    model = build_model_with_attention(units, dropout_rate, learning_rate)

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

    history = model.fit(
        X_train, Y_train,
        validation_data=(X_test, Y_test),
        epochs=100,
        batch_size=64,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    return model, history

# Train and evaluate the model with attention
print("Training model with attention mechanism")
model, history = train_and_evaluate_attention_model(
    best_params['units'],
    best_params['dropout_rate'],
    best_params['learning_rate']
)

val_rmse = min(history.history['val_loss'])
if val_rmse < best_rmse:
    best_rmse = val_rmse
    best_params['attention'] = True

print(f"Best RMSE: {best_rmse} with params: {best_params}")

Training model with attention mechanism
Epoch 1/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - loss: 0.0377 - mae: 0.1447 - val_loss: 0.0148 - val_mae: 0.0942 - learning_rate: 0.0010
Epoch 2/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 0.0146 - mae: 0.0927 - val_loss: 0.0141 - val_mae: 0.0876 - learning_rate: 0.0010
Epoch 3/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 0.0127 - mae: 0.0852 - val_loss: 0.0099 - val_mae: 0.0749 - learning_rate: 0.0010
Epoch 4/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0092 - mae: 0.0722 - val_loss: 0.0071 - val_mae: 0.0648 - learning_rate: 0.0010
Epoch 5/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0072 - mae: 0.0653 - val_loss: 0.0073 - val_mae: 0.0660 - learning_rate: 0.0010
Epoch 6/100
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms

In [33]:
def predict(model, data):
    # Ensure data is in the same shape as training data
    # The input data should have the shape (1, n_timesteps - 1, number of features)
    prediction = model.predict(data)
    return scaler_aep.inverse_transform(prediction.reshape(-1, 1))

# Define a function to prepare the input data
def prepare_input_data(datetime, temperature, humidity, wind_speed, precipitation, is_weekend_holiday):
    # Create a DataFrame with the new input values
    input_data = pd.DataFrame({
        'Datetime': [datetime],
        'Temperature': [temperature],
        'Relative Humidity': [humidity],
        'Wind Speed': [wind_speed],
        'Precipitation': [precipitation],
        'Is_Weekend_Holiday': [is_weekend_holiday],
        'AEP_MW': [0]
    })

    # Set Datetime as index
    input_data.set_index('Datetime', inplace=True)

    # Normalize the features
    for feature in ['Temperature', 'Relative Humidity', 'Wind Speed', 'Precipitation', 'Is_Weekend_Holiday']:
        input_data[feature] = scalers[feature].transform(input_data[[feature]])

    # Create a time window (e.g., last 24 hours)
    # Note: You need historical data for a valid time window, here we just repeat the values for demonstration
    input_data = pd.concat([input_data] * (n_timesteps - 1), ignore_index=True)

    # Normalize the target feature
    input_data['AEP_MW'] = scaler_aep.transform(input_data[['AEP_MW']])

    # Prepare data for prediction
    input_array = input_data[features_list].values.reshape(1, n_timesteps - 1, len(features_list))
    return input_array

# Example usage
datetime = pd.to_datetime('2009-09-13 18:00:00')  # Replace with actual datetime
temperature = 2.78  # Example temperature
humidity = 72.24  # Example relative humidity
wind_speed = 16.1  # Example wind speed
precipitation = 0  # Example precipitation
is_weekend_holiday = True  # Example: 0 for False, 1 for True

# Prepare input data
input_data = prepare_input_data(datetime, temperature, humidity, wind_speed, precipitation, is_weekend_holiday)

# Ensure the model is trained before prediction
model = build_model_with_attention(best_params['units'], best_params['dropout_rate'], best_params['learning_rate'])

# Predict the value
predicted_value = predict(model, input_data)
print(f"Predicted AEP_MW: {predicted_value}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 392ms/step
Predicted AEP_MW: [[10702.376]]
