In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

In [2]:
# Simulated dataset
data = pd.read_csv("C:/Users/Owner/Desktop/Tulas/power_load_forecasting_dataset.csv")

In [3]:
# Feature Engineering (assuming the dataset has timestamp and other features)
data['hour'] = pd.to_datetime(data['Timestamp']).dt.hour
data['day_of_week'] = pd.to_datetime(data['Timestamp']).dt.dayofweek
data['month'] = pd.to_datetime(data['Timestamp']).dt.month

In [4]:
# Select features (including Region) and target
X = data[['Temperature (°C)','Humidity (%)','Wind Speed (km/h)','Rain (mm)','Public Holiday (0/1)','Weekday/Weekend (0/1)',
          'Solar Generation (MW)','Region','Population Growth Rate (%)','hour','day_of_week','month']]
y = data['Power Load (MW)']

In [5]:
# List of numerical features to be scaled
numerical_features = ['Temperature (°C)', 'Humidity (%)', 'Wind Speed (km/h)', 
                      'Rain (mm)', 'Public Holiday (0/1)', 'Weekday/Weekend (0/1)',
                      'Solar Generation (MW)', 'Population Growth Rate (%)', 
                      'hour', 'day_of_week', 'month']

In [6]:
# Initialize the scaler
scaler = MinMaxScaler()

In [7]:
# Apply scaling to the numerical features
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [8]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Apply label encoding to the 'Region' column
label_encoder = LabelEncoder()
X_train['Region'] = label_encoder.fit_transform(X_train['Region'])
X_test['Region'] = label_encoder.transform(X_test['Region'])

In [10]:
# Convert DataFrame to NumPy array
X_train = X_train.values
X_test= X_test.values

In [11]:
# Reshape to 3D: (samples, time steps, features)
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [12]:
# Define LSTM model
lstm_model = Sequential()

In [13]:
# Add LSTM layers
lstm_model.add(LSTM(100, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
lstm_model.add(Dense(50, activation='relu'))
lstm_model.add(Dense(1))


  super().__init__(**kwargs)


In [14]:
# Compile the model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

In [15]:
# Train the model
lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 5465182.0000 - val_loss: 1069423.3750
Epoch 2/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 1072116.1250 - val_loss: 1017439.1250
Epoch 3/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 1048680.1250 - val_loss: 1008818.3125
Epoch 4/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 1020330.0000 - val_loss: 951782.3750
Epoch 5/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 982365.0000 - val_loss: 964587.7500
Epoch 6/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 975685.0625 - val_loss: 930310.6250
Epoch 7/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 951469.3125 - val_loss: 893201.5625
Epoch 8/20
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/st

<keras.src.callbacks.history.History at 0x1acffa81550>

In [16]:
# Predict and evaluate
lstm_y_pred = lstm_model.predict(X_test)
lstm_rmse = np.sqrt(np.mean((y_test - lstm_y_pred.reshape(-1))**2))
print(f'LSTM RMSE: {lstm_rmse}')

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
LSTM RMSE: 893.5433869738987


In [17]:
# Example input factors (reshaped to match LSTM's input requirements)
new_data =np.array([[
    50, # Temperature
    65,  # Humidity
    15,  # Wind Speed
    0,   # Rain (mm)
    0,   # Public Holiday (0 or 1)
    1,   # Weekday/Weekend (0 or 1)
    120, # Solar Generation (MW)
    2,    # Region (encoded)
    2.8, # Population Growth Rate
    24,  # Hour of the day
    2,   # Day of the week
    9   # Month
]])

In [18]:
# Reshape to (samples, time_steps, features)
new_data = new_data.reshape((1, 1, new_data.shape[1]))  # (1 sample, 1 time step, 12 features)

In [19]:
# Make prediction using the model
predicted_power_load = lstm_model.predict(new_data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 419ms/step


In [20]:
# Print predicted power load
print(f"Predicted Power Load (MW): {predicted_power_load[0][0]}")

Predicted Power Load (MW): 3816.4833984375


In [21]:
from joblib import dump

In [22]:
dump(lstm_model, './../savedModels/models.joblib')

['./../savedModels/models.joblib']