### LSTM Model with AEP_Hourly dataset


In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import json
from datetime import timedelta
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('Datasets\AEP_hourly.csv\AEP_hourly.csv')

# Convert 'Datetime' to datetime format for time-series data
data['Datetime'] = pd.to_datetime(data['Datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Set 'Datetime' as the index for time-series modeling
data.set_index('Datetime', inplace=True)

# Ensure 'AEP_MW' column is numeric; convert if necessary
data['AEP_MW'] = pd.to_numeric(data['AEP_MW'], errors='coerce')

# Handle missing values by forward filling
data.fillna(method='ffill', inplace=True)

# Normalize the data using MinMaxScaler to keep values between 0 and 1
scaler = MinMaxScaler(feature_range=(0, 1))
data['AEP_MW'] = scaler.fit_transform(data['AEP_MW'].values.reshape(-1, 1))

# Function to create dataset in the form of time steps for LSTM
def create_dataset(data, time_step=1):
    X, Y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step)])
        Y.append(data[i + time_step])
    return np.array(X), np.array(Y)

# Specify the time step (window size)
time_step = 10
X, y = create_dataset(data['AEP_MW'].values, time_step)

# Reshape the data to be compatible with LSTM: [samples, timesteps, features]
X = X.reshape(X.shape[0], X.shape[1], 1)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(X_train.shape[1], 1), activation='relu'))
model.add(Dense(1))  # Output layer with 1 unit (prediction of AEP_MW)

# Compile the model using Adam optimizer and mean squared error loss
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model with 10 epochs and batch size of 32, using validation data
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Make predictions for the next 30-31 days
last_sequence = data['AEP_MW'].values[-time_step:]  # Get the last time step sequence

predictions = []
future_timestamps = []

# Start with the last timestamp in the dataset
last_timestamp = data.index[-1]

# Predict for the next month (e.g., 30 days * 24 hours)
num_hours = 31 * 24  # Predicting 31 days of hourly data

for i in range(num_hours):
    # Reshape the last sequence for LSTM input
    last_sequence_reshaped = last_sequence.reshape(1, time_step, 1)
    
    # Predict the next step
    predicted_value = model.predict(last_sequence_reshaped)
    
    # Store the predicted value
    predictions.append(predicted_value[0][0])
    
    # Shift the sequence to include the new prediction
    last_sequence = np.append(last_sequence[1:], predicted_value.flatten(), axis=0)
    
    # Generate the next timestamp (1 hour later)
    next_timestamp = last_timestamp + timedelta(hours=1)
    future_timestamps.append(next_timestamp)
    last_timestamp = next_timestamp

# Inverse transform the predictions back to the original scale
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Save predictions and timestamps to a JSON file
output_data = []
for i in range(len(predictions)):
    output_data.append({
        "timestamp": str(future_timestamps[i]),  # Timestamp of prediction
        "AEP_MW": float(predictions[i][0])  # Predicted electricity consumption (MW)
    })

# Specify the output file path
output_file = 'future_predictions.json'

# Write to JSON file
with open(output_file, 'w') as f:
    json.dump(output_data, f, indent=4)

# Print success message
print(f"Future predictions saved to {output_file}")

# Plot the predicted values using Seaborn
plt.figure(figsize=(12, 6))

# Seaborn lineplot for predictions
sns.lineplot(x=future_timestamps, y=predictions.flatten(), label='Predicted Power Consumption (MW)', marker='o')

# Rotate x-axis labels for better readability, showing hourly data
plt.xticks(rotation=45, ha='right')

# Labeling the axes with clear units
plt.xlabel('Time (Hourly Data)', fontsize=12)
plt.ylabel('Power Consumption (MW)', fontsize=12)

# Title to make the purpose of the graph clear
plt.title('Hourly Power Consumption Prediction for Next Month', fontsize=16)

# Adding a legend to differentiate predictions
plt.legend(loc='upper right')

# Ensure the plot layout is not cut off and labels are displayed clearly
plt.tight_layout()

# Display the plot
plt.show()
