# BTP - 1

DAM and RTM Forecasting using LSTM

In [1]:
import pandas as pd
import torch
import numpy as np

# Preprocessing

Below function is applied to a csv file having the following columns:
1. Date
2. Hour: 00:00, 00:15, 00:30, 00:45 are considered as 1
3. MCP (Rs/MWh)

In [2]:
def convert_to_datetime(df):
    ndf = df.copy(deep = True)
    ndf['Date'] = pd.to_datetime(df['Date'], format = '%d-%m-%Y')
    ndf['Hour'] = df['Hour'].apply(lambda x: x-1)

    # Combine into datetime
    ndf['datetime'] = ndf['Date'] + pd.to_timedelta(ndf['Hour'], unit='h')

    # Add 15-minute increments (4 rows per hour → 00:00, 00:15, 00:30, 00:45)
    ndf['datetime'] = ndf.groupby(['Date', 'Hour'])['datetime'].transform(
    lambda x: x + pd.to_timedelta((x.groupby(x).cumcount()) * 15, unit='m'))

    ndf.drop(columns= ['Hour', 'Date'])
    ndf = ndf[['datetime', 'MCP (Rs/MWh)']]
    return ndf

In [4]:
DAM = pd.read_csv('/content/DAM_Price.csv')
DAM.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50496 entries, 0 to 50495
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          50496 non-null  object 
 1   Hour          50496 non-null  int64  
 2   MCP (Rs/MWh)  50496 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 1.2+ MB


In [5]:
RTM = pd.read_csv('/content/RTM_Price.csv')
RTM.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50400 entries, 0 to 50399
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          50400 non-null  object 
 1   Hour          50400 non-null  int64  
 2   MCP (Rs/MWh)  50400 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 1.2+ MB


No null values exist. Atleast 525 days data is present for each. **DAM** has extra one days prediction as it sold day before the given day. So extra 96 entries.

No IQR or Z-score is used to remove outliers as its removal we lead to us losing the continuity of time series data.

In [6]:
dam1 = convert_to_datetime(DAM)
dam1.head()

Unnamed: 0,datetime,MCP (Rs/MWh)
0,2023-09-01 00:00:00,10000.0
1,2023-09-01 00:15:00,10000.0
2,2023-09-01 00:30:00,10000.0
3,2023-09-01 00:45:00,10000.0
4,2023-09-01 01:00:00,10000.0


In [7]:
rtm1 = convert_to_datetime(RTM)
rtm1.head()

Unnamed: 0,datetime,MCP (Rs/MWh)
0,2023-09-01 00:00:00,10000.0
1,2023-09-01 00:15:00,10000.0
2,2023-09-01 00:30:00,10000.0
3,2023-09-01 00:45:00,10000.0
4,2023-09-01 01:00:00,10000.0


In [None]:
# from pandas.plotting import autocorrelation_plot
# import matplotlib.pyplot as plt

# autocorrelation_plot(dam_check)
# plt.show()


# Spliiting Data

In [8]:
total_len = len(dam1)
train_end = int(0.7 * total_len)
val_end = int(0.85 * total_len)

In [9]:
dam1_train = dam1.iloc[:train_end]
dam1_valid = dam1.iloc[train_end:val_end]
dam1_test = dam1.iloc[val_end:]

In [10]:
rtm1_train = rtm1.iloc[:train_end]
rtm1_valid = rtm1.iloc[train_end:val_end]
rtm1_test = rtm1.iloc[val_end:]

In [11]:
dam_t_mcp = dam1_train['MCP (Rs/MWh)']
rtm_t_mcp = rtm1_train['MCP (Rs/MWh)']

# Spectral Density of Data
Using Periodogram we get the spectral power density of the data.

In [12]:
from scipy.signal import periodogram

def SpectralPower(series, top_n = 10):
    # Periodogram: Spectral power density
    frequencies, power = periodogram(series.dropna(), fs=1.0)

    # Sort frequencies by power in descending order
    sorted_indices = np.argsort(power)[::-1]  # Get indices that would sort power in descending order
    sorted_frequencies = frequencies[sorted_indices]
    sorted_power = power[sorted_indices]

    # Top N periods having maximum power
    top_frequencies = sorted_frequencies[:top_n]
    top_powers = sorted_power[:top_n]
    top_periods = 1 / top_frequencies  # Convert frequencies to periods

    for i, (freq, pow, period) in enumerate(zip(top_frequencies, top_powers, top_periods), 1):
        print(f"{i}. Frequency: {freq:.6f} (power: {pow:.3f}) → Period: {period:.1f} timesteps")

In [13]:
SpectralPower(dam_t_mcp)

1. Frequency: 0.010411 (power: 40128173978.864) → Period: 96.1 timesteps
2. Frequency: 0.020822 (power: 29630946992.222) → Period: 48.0 timesteps
3. Frequency: 0.010439 (power: 19918299209.690) → Period: 95.8 timesteps
4. Frequency: 0.010383 (power: 19330737368.016) → Period: 96.3 timesteps
5. Frequency: 0.000085 (power: 13182876794.380) → Period: 11782.3 timesteps
6. Frequency: 0.010468 (power: 10377418071.950) → Period: 95.5 timesteps
7. Frequency: 0.000113 (power: 6189827571.453) → Period: 8836.8 timesteps
8. Frequency: 0.031261 (power: 5205747102.592) → Period: 32.0 timesteps
9. Frequency: 0.020879 (power: 4790573657.411) → Period: 47.9 timesteps
10. Frequency: 0.000538 (power: 4085173248.550) → Period: 1860.4 timesteps


In [14]:
SpectralPower(rtm_t_mcp)

1. Frequency: 0.010411 (power: 27133204066.506) → Period: 96.1 timesteps
2. Frequency: 0.020822 (power: 21494073145.662) → Period: 48.0 timesteps
3. Frequency: 0.010439 (power: 15665110391.039) → Period: 95.8 timesteps
4. Frequency: 0.010383 (power: 13430619429.507) → Period: 96.3 timesteps
5. Frequency: 0.000085 (power: 10317458504.231) → Period: 11782.3 timesteps
6. Frequency: 0.010468 (power: 7651354099.901) → Period: 95.5 timesteps
7. Frequency: 0.000113 (power: 7150281174.400) → Period: 8836.8 timesteps
8. Frequency: 0.001499 (power: 4769675738.714) → Period: 666.9 timesteps
9. Frequency: 0.031261 (power: 4439684091.911) → Period: 32.0 timesteps
10. Frequency: 0.000538 (power: 4233599237.475) → Period: 1860.4 timesteps


48, 96 (2*48) timesteps or 12, 24 hours have the highest spectral density or power among all periods. The dominant period (in our case, 96 time steps preferrred as they will also take care of 48 steps part) is used as the window size in an LSTM (or any time-series model) because it directly encodes the strongest seasonal pattern in the data.

# LSTM model
![A LSTM Cell](https://media.geeksforgeeks.org/wp-content/uploads/20250404172141987003/gate_of_lstm.webp)

![LSTM cells in series](https://dezyre.gumlet.io/images/blog/lstm-model/LSTM_cells_are_chained_together,_with_the_input_sequence_and_output_sequence_shown.png?w=376&dpr=2.6)



In [15]:
import torch.nn as nn
# -------- LSTM with Residuals (something like ResNet) --------
class LSTMResidualModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, dropout=0.3, target_len = 96):
        super(LSTMResidualModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            dropout=dropout, batch_first=True)
        self.linear = nn.Linear(hidden_size, target_len)
        self.residual = nn.Linear(input_size, target_len)  # 1: MCP input

    def forward(self, x):
        out, _ = self.lstm(x)       # lstm outputs: output y, (hidden state, cell state)
        last_hidden = out[:, -1, :] # to get the last hidden state
        prediction = self.linear(last_hidden)

        # Residual connection from last input time step
        residual = self.residual(x[:, -1, :])
        return prediction + residual

## DAM

In [16]:
from torch.utils.data import Dataset
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import torch

class PriceDataset(Dataset):
    def __init__(self, df, seq_len=96, fit_scaler=False, scaler=None, target_len = 96):
        """
            df: DataFrame containing at least a 'MCP (Rs/MWh)' column.
            seq_len: number of past time steps used for input.
            fit_scaler: if True, fit the MinMaxScaler to the current df.
            scaler: optional externally fitted scaler to apply. They are applied to reduce overfitting
        """
        self.df = df.reset_index(drop=True)
        self.seq_len = seq_len
        self.scaler = scaler or MinMaxScaler()
        self.target_len = target_len

        if fit_scaler:     # if True then MinMaxScaler() will be used
            self.scaled_mcp = self.scaler.fit_transform(self.df[['MCP (Rs/MWh)']])
        else:
            if self.scaler is None:
                raise ValueError("Must provide a fitted scaler if fit_scaler is False.")
            self.scaled_mcp = self.scaler.transform(self.df[['MCP (Rs/MWh)']]) # inputed scaler is used

        self.scaled_mcp = self.scaled_mcp.astype(np.float32)

    def __len__(self):
        return len(self.scaled_mcp) - 2*self.seq_len - self.target_len

    def __getitem__(self, idx):

        # last 96 time steps
        x = self.scaled_mcp[idx:idx + self.seq_len]

        # next day's 96 steps
        y = self.scaled_mcp[idx + 2*self.seq_len : idx + 2*self.seq_len + self.target_len]

        return torch.tensor(x, dtype = torch.float32), torch.tensor(y, dtype=torch.float32)

    def inverse_transform(self, scaled_array): # MinMaxScaled to normal values
        if isinstance(scaled_array, torch.Tensor):
            scaled_array = scaled_array.detach().cpu().numpy()
        scaled_array = np.array(scaled_array).reshape(-1, 1)
        return self.scaler.inverse_transform(scaled_array).flatten()

    def get_scaler(self): # Same scaler has to be used while validating and testing etc.
        return self.scaler


In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [17]:
def train_model(model, train_loader, val_loader, model_instance_name="model",
                num_epochs=100, lr=1e-3, device='cuda', target_len = 96):

    model = model.to(device)

    # Adam optimiser with learning rate lr
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # mean square error
    criterion = nn.MSELoss()

    best_val_loss = float('inf')      #initialisation of val_loss, inf is infinity

    best_model_path = f"best_lstm_{model_instance_name}.pt"

    best_model_state = None  # to store best weights

    for epoch in range(1, num_epochs + 1):

        # training mode
        model.train()
        train_losses = []

        for x_batch, y_batch in train_loader:

            # x_batch and y_batch is better of in GPU if present as batches can be computed parrllely
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            # to clear previous gradients as gradients are accumulative in PyTorch
            optimizer.zero_grad()

            #predictions
            preds = model(x_batch).squeeze()

            # loss calculation
            loss = criterion(preds, y_batch.squeeze())

            #backward propagation
            loss.backward()

            # updates the parameters
            optimizer.step()

            train_losses.append(loss.item())

        # evaluation mode: no backpropagation,
        model.eval()

        val_losses = []

        # no
        with torch.no_grad():     # disables gradient calculation
            for x_val, y_val in val_loader:
                x_val = x_val.to(device)
                y_val = y_val.to(device)
                val_preds = model(x_val).squeeze()
                val_loss = criterion(val_preds, y_val.squeeze())
                val_losses.append(val_loss.item())

        avg_train_loss = sum(train_losses) / (len(train_losses))
        avg_val_loss = sum(val_losses) / (len(val_losses))

        print(f"Epoch {epoch}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()            # save better weights
            torch.save(best_model_state, best_model_path)    # old ones are replaced with better ones


    # return best model weights
    print(f"Saved best model as {best_model_path}")
    return best_model_state

In [20]:
from torch.utils.data import DataLoader
# datasets
dam_train_dataset = PriceDataset(dam1_train, fit_scaler=True)  # MinMaxScaler is fitted

# gets the fitted MinMaxScaler model, uses the same scaler while validating and testing
dam_scaler = dam_train_dataset.get_scaler()

dam_val_dataset = PriceDataset(dam1_valid, scaler=dam_scaler)

"""DataLoaders is used for efficient batching, shuffles and to avoid GPU idle time.
It is provided by PyTorch. Classes of Dataset (from Pytorch) can only be used to load the data here"""
dam_train_loader = DataLoader(dam_train_dataset, batch_size=64, shuffle=False)
dam_val_loader = DataLoader(dam_val_dataset, batch_size=64)

Scaler is fitted and extracted only from the train dataset not the whole as it might lead to information extraction of validation and testing dataset.

In [30]:
dam_model_1 = LSTMResidualModel(target_len = 96, num_layers = 1)
dam_lstm1_weights = train_model(dam_model_1, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm1_weights")



Epoch 1/100 | Train Loss: 0.1246 | Val Loss: 0.0603
Epoch 2/100 | Train Loss: 0.0777 | Val Loss: 0.0507
Epoch 3/100 | Train Loss: 0.0614 | Val Loss: 0.0445
Epoch 4/100 | Train Loss: 0.0569 | Val Loss: 0.0402
Epoch 5/100 | Train Loss: 0.0515 | Val Loss: 0.0352
Epoch 6/100 | Train Loss: 0.0482 | Val Loss: 0.0339
Epoch 7/100 | Train Loss: 0.0452 | Val Loss: 0.0321
Epoch 8/100 | Train Loss: 0.0429 | Val Loss: 0.0317
Epoch 9/100 | Train Loss: 0.0402 | Val Loss: 0.0323
Epoch 10/100 | Train Loss: 0.0386 | Val Loss: 0.0329
Epoch 11/100 | Train Loss: 0.0369 | Val Loss: 0.0302
Epoch 12/100 | Train Loss: 0.0351 | Val Loss: 0.0286
Epoch 13/100 | Train Loss: 0.0338 | Val Loss: 0.0275
Epoch 14/100 | Train Loss: 0.0330 | Val Loss: 0.0266
Epoch 15/100 | Train Loss: 0.0323 | Val Loss: 0.0259
Epoch 16/100 | Train Loss: 0.0315 | Val Loss: 0.0255
Epoch 17/100 | Train Loss: 0.0306 | Val Loss: 0.0253
Epoch 18/100 | Train Loss: 0.0300 | Val Loss: 0.0250
Epoch 19/100 | Train Loss: 0.0297 | Val Loss: 0.0247
Ep

In [31]:
dam_lstm1_200_weights = train_model(dam_model_1, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm1_200_weights", num_epochs = 100)

Epoch 1/100 | Train Loss: 0.0218 | Val Loss: 0.0198
Epoch 2/100 | Train Loss: 0.0218 | Val Loss: 0.0202
Epoch 3/100 | Train Loss: 0.0217 | Val Loss: 0.0202
Epoch 4/100 | Train Loss: 0.0217 | Val Loss: 0.0203
Epoch 5/100 | Train Loss: 0.0216 | Val Loss: 0.0203
Epoch 6/100 | Train Loss: 0.0215 | Val Loss: 0.0203
Epoch 7/100 | Train Loss: 0.0214 | Val Loss: 0.0206
Epoch 8/100 | Train Loss: 0.0214 | Val Loss: 0.0205
Epoch 9/100 | Train Loss: 0.0214 | Val Loss: 0.0203
Epoch 10/100 | Train Loss: 0.0212 | Val Loss: 0.0202
Epoch 11/100 | Train Loss: 0.0213 | Val Loss: 0.0200
Epoch 12/100 | Train Loss: 0.0212 | Val Loss: 0.0199
Epoch 13/100 | Train Loss: 0.0212 | Val Loss: 0.0196
Epoch 14/100 | Train Loss: 0.0212 | Val Loss: 0.0194
Epoch 15/100 | Train Loss: 0.0214 | Val Loss: 0.0198
Epoch 16/100 | Train Loss: 0.0215 | Val Loss: 0.0186
Epoch 17/100 | Train Loss: 0.0215 | Val Loss: 0.0194
Epoch 18/100 | Train Loss: 0.0212 | Val Loss: 0.0195
Epoch 19/100 | Train Loss: 0.0209 | Val Loss: 0.0197
Ep

In [29]:
dam_model_2 = LSTMResidualModel(target_len = 96, num_layers = 2)
dam_lstm2_weights = train_model(dam_model_2, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm2_weights")

Epoch 1/100 | Train Loss: 0.1033 | Val Loss: 0.0536
Epoch 2/100 | Train Loss: 0.0603 | Val Loss: 0.0419
Epoch 3/100 | Train Loss: 0.0572 | Val Loss: 0.0414
Epoch 4/100 | Train Loss: 0.0521 | Val Loss: 0.0394
Epoch 5/100 | Train Loss: 0.0506 | Val Loss: 0.0346
Epoch 6/100 | Train Loss: 0.0437 | Val Loss: 0.0337
Epoch 7/100 | Train Loss: 0.0388 | Val Loss: 0.0326
Epoch 8/100 | Train Loss: 0.0362 | Val Loss: 0.0320
Epoch 9/100 | Train Loss: 0.0341 | Val Loss: 0.0310
Epoch 10/100 | Train Loss: 0.0333 | Val Loss: 0.0306
Epoch 11/100 | Train Loss: 0.0320 | Val Loss: 0.0300
Epoch 12/100 | Train Loss: 0.0313 | Val Loss: 0.0298
Epoch 13/100 | Train Loss: 0.0309 | Val Loss: 0.0287
Epoch 14/100 | Train Loss: 0.0327 | Val Loss: 0.0264
Epoch 15/100 | Train Loss: 0.0299 | Val Loss: 0.0260
Epoch 16/100 | Train Loss: 0.0292 | Val Loss: 0.0255
Epoch 17/100 | Train Loss: 0.0289 | Val Loss: 0.0251
Epoch 18/100 | Train Loss: 0.0287 | Val Loss: 0.0248
Epoch 19/100 | Train Loss: 0.0281 | Val Loss: 0.0241
Ep

In [32]:
dam_lstm2_150_weights = train_model(dam_model_2, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm2_150_weights")

Epoch 1/100 | Train Loss: 0.0198 | Val Loss: 0.0193
Epoch 2/100 | Train Loss: 0.0195 | Val Loss: 0.0190
Epoch 3/100 | Train Loss: 0.0195 | Val Loss: 0.0195
Epoch 4/100 | Train Loss: 0.0197 | Val Loss: 0.0199
Epoch 5/100 | Train Loss: 0.0198 | Val Loss: 0.0201
Epoch 6/100 | Train Loss: 0.0194 | Val Loss: 0.0205
Epoch 7/100 | Train Loss: 0.0195 | Val Loss: 0.0208
Epoch 8/100 | Train Loss: 0.0191 | Val Loss: 0.0209
Epoch 9/100 | Train Loss: 0.0190 | Val Loss: 0.0204
Epoch 10/100 | Train Loss: 0.0189 | Val Loss: 0.0201
Epoch 11/100 | Train Loss: 0.0190 | Val Loss: 0.0211
Epoch 12/100 | Train Loss: 0.0189 | Val Loss: 0.0213
Epoch 13/100 | Train Loss: 0.0186 | Val Loss: 0.0206
Epoch 14/100 | Train Loss: 0.0186 | Val Loss: 0.0198
Epoch 15/100 | Train Loss: 0.0187 | Val Loss: 0.0204
Epoch 16/100 | Train Loss: 0.0186 | Val Loss: 0.0212
Epoch 17/100 | Train Loss: 0.0185 | Val Loss: 0.0204
Epoch 18/100 | Train Loss: 0.0183 | Val Loss: 0.0203
Epoch 19/100 | Train Loss: 0.0190 | Val Loss: 0.0219
Ep

In [None]:
# set learning rate to 0.01
dam_model_2_01 = LSTMResidualModel(target_len = 96, num_layers = 2)
dam_lstm2_01_weights = train_model(dam_model_2_01, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm2_01_weights", lr = 1e-2)

Epoch 1/100 | Train Loss: 0.0913 | Val Loss: 0.0751
Epoch 2/100 | Train Loss: 0.0740 | Val Loss: 0.0713
Epoch 3/100 | Train Loss: 0.0733 | Val Loss: 0.0708
Epoch 4/100 | Train Loss: 0.0732 | Val Loss: 0.0707
Epoch 5/100 | Train Loss: 0.0732 | Val Loss: 0.0707
Epoch 6/100 | Train Loss: 0.0732 | Val Loss: 0.0707
Epoch 7/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 8/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 9/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 10/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 11/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 12/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 13/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 14/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 15/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 16/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 17/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 18/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Epoch 19/100 | Train Loss: 0.0732 | Val Loss: 0.0706
Ep

In [None]:
dam_model_2_05 = LSTMResidualModel(target_len = 96, num_layers = 2, dropout=0.05)
dam_lstm2_05_weights = train_model(dam_model_2_05, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm2_05_weights")

Epoch 1/100 | Train Loss: 0.1184 | Val Loss: 0.0510
Epoch 2/100 | Train Loss: 0.0652 | Val Loss: 0.0463
Epoch 3/100 | Train Loss: 0.0575 | Val Loss: 0.0402
Epoch 4/100 | Train Loss: 0.0490 | Val Loss: 0.0372
Epoch 5/100 | Train Loss: 0.0442 | Val Loss: 0.0376
Epoch 6/100 | Train Loss: 0.0389 | Val Loss: 0.0307
Epoch 7/100 | Train Loss: 0.0361 | Val Loss: 0.0293
Epoch 8/100 | Train Loss: 0.0345 | Val Loss: 0.0300
Epoch 9/100 | Train Loss: 0.0339 | Val Loss: 0.0294
Epoch 10/100 | Train Loss: 0.0327 | Val Loss: 0.0291
Epoch 11/100 | Train Loss: 0.0324 | Val Loss: 0.0283
Epoch 12/100 | Train Loss: 0.0316 | Val Loss: 0.0278
Epoch 13/100 | Train Loss: 0.0308 | Val Loss: 0.0273
Epoch 14/100 | Train Loss: 0.0303 | Val Loss: 0.0271
Epoch 15/100 | Train Loss: 0.0296 | Val Loss: 0.0266
Epoch 16/100 | Train Loss: 0.0291 | Val Loss: 0.0262
Epoch 17/100 | Train Loss: 0.0286 | Val Loss: 0.0257
Epoch 18/100 | Train Loss: 0.0282 | Val Loss: 0.0251
Epoch 19/100 | Train Loss: 0.0279 | Val Loss: 0.0246
Ep

In [None]:
# num_layers = 3
dam_model_3 = LSTMResidualModel(target_len = 96, num_layers = 3)
dam_lstm3_weights = train_model(dam_model_3, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm3_weights")

Epoch 1/100 | Train Loss: 0.1191 | Val Loss: 0.0686
Epoch 2/100 | Train Loss: 0.0822 | Val Loss: 0.0521
Epoch 3/100 | Train Loss: 0.0648 | Val Loss: 0.0459
Epoch 4/100 | Train Loss: 0.0601 | Val Loss: 0.0426
Epoch 5/100 | Train Loss: 0.0525 | Val Loss: 0.0407
Epoch 6/100 | Train Loss: 0.0471 | Val Loss: 0.0338
Epoch 7/100 | Train Loss: 0.0441 | Val Loss: 0.0339
Epoch 8/100 | Train Loss: 0.0405 | Val Loss: 0.0327
Epoch 9/100 | Train Loss: 0.0382 | Val Loss: 0.0326
Epoch 10/100 | Train Loss: 0.0366 | Val Loss: 0.0457
Epoch 11/100 | Train Loss: 0.0388 | Val Loss: 0.0317
Epoch 12/100 | Train Loss: 0.0362 | Val Loss: 0.0292
Epoch 13/100 | Train Loss: 0.0332 | Val Loss: 0.0289
Epoch 14/100 | Train Loss: 0.0326 | Val Loss: 0.0292
Epoch 15/100 | Train Loss: 0.0319 | Val Loss: 0.0283
Epoch 16/100 | Train Loss: 0.0311 | Val Loss: 0.0274
Epoch 17/100 | Train Loss: 0.0307 | Val Loss: 0.0272
Epoch 18/100 | Train Loss: 0.0304 | Val Loss: 0.0258
Epoch 19/100 | Train Loss: 0.0301 | Val Loss: 0.0262
Ep

In [None]:
# num_layers = 4
dam_model_4 = LSTMResidualModel(target_len = 96, num_layers = 4)
dam_lstm4_weights = train_model(dam_model_4, dam_train_loader, dam_val_loader, device = device, model_instance_name = "dam_lstm4_weights")

Epoch 1/100 | Train Loss: 0.1213 | Val Loss: 0.0695
Epoch 2/100 | Train Loss: 0.0886 | Val Loss: 0.0611
Epoch 3/100 | Train Loss: 0.0816 | Val Loss: 0.0565
Epoch 4/100 | Train Loss: 0.0772 | Val Loss: 0.0540
Epoch 5/100 | Train Loss: 0.0746 | Val Loss: 0.0525
Epoch 6/100 | Train Loss: 0.0729 | Val Loss: 0.0512
Epoch 7/100 | Train Loss: 0.0719 | Val Loss: 0.0506
Epoch 8/100 | Train Loss: 0.0713 | Val Loss: 0.0504
Epoch 9/100 | Train Loss: 0.0710 | Val Loss: 0.0502
Epoch 10/100 | Train Loss: 0.0708 | Val Loss: 0.0496
Epoch 11/100 | Train Loss: 0.0706 | Val Loss: 0.0495
Epoch 12/100 | Train Loss: 0.0705 | Val Loss: 0.0495
Epoch 13/100 | Train Loss: 0.0705 | Val Loss: 0.0495
Epoch 14/100 | Train Loss: 0.0704 | Val Loss: 0.0495
Epoch 15/100 | Train Loss: 0.0704 | Val Loss: 0.0495
Epoch 16/100 | Train Loss: 0.0704 | Val Loss: 0.0495
Epoch 17/100 | Train Loss: 0.0704 | Val Loss: 0.0494
Epoch 18/100 | Train Loss: 0.0704 | Val Loss: 0.0492
Epoch 19/100 | Train Loss: 0.0703 | Val Loss: 0.0492
Ep

1 LSTM is showing better results than two LSTMs in series.

Validation Loss is higher than training loss might feel counterintutive but this is mainly due to drop connection (with probability = drop rate).

## RTM

RTM market is opened atmost 7 timesteps (6 for odd timestep, 7 for even timestep) before the delivery of transacted power. An image from CERC illustrates this:
![RTM Schedule](https://etn.news/images/easyblog_articles/54/b2ap3_large_gate-closure.jpg)

We are training the model to predict price based on last 24 hours prices from auction start, that is, t - 15 min, t - 20 min so on where t is the time where auction for t + 7 delivery. RTMDataset is designed for this setup.

In [21]:
class RTMDataset(Dataset):
    def __init__(self, df, seq_len=96, fit_scaler=False, scaler=None):
        """
            df: DataFrame containing at least a 'MCP (Rs/MWh)' column.
            seq_len: number of past time steps used for input.
            fit_scaler: if True, fit the MinMaxScaler to the current df.
            scaler: optional externally fitted scaler to apply. They are applied to reduce overfitting
        """
        self.df = df.reset_index(drop=True)
        self.seq_len = seq_len
        self.scaler = scaler or MinMaxScaler()

        if fit_scaler:     # if True then MinMaxScaler() will be used
            self.scaled_mcp = self.scaler.fit_transform(self.df[['MCP (Rs/MWh)']])
        else:
            if self.scaler is None:
                raise ValueError("Must provide a fitted scaler if fit_scaler is False.")
            self.scaled_mcp = self.scaler.transform(self.df[['MCP (Rs/MWh)']]) # inputed scaler is used

        self.scaled_mcp = self.scaled_mcp.astype(np.float32)

    def __len__(self):
        return len(self.scaled_mcp) - self.seq_len - 7

    def __getitem__(self, idx):

        # last 96 time steps
        x = self.scaled_mcp[idx:idx + self.seq_len]

        # RTM transactions are applied after atmost 105 minutes or 7 x 15 minutes
        # or 7 timesteps from the begining of auction for a time slot
        y = self.scaled_mcp[idx + self.seq_len + 7]

        return torch.tensor(x, dtype = torch.float32), torch.tensor(y, dtype=torch.float32)

    def inverse_transform(self, scaled_array): # MinMaxScaled to normal values
        if isinstance(scaled_array, torch.Tensor):
            scaled_array = scaled_array.detach().cpu().numpy()
        scaled_array = np.array(scaled_array).reshape(-1, 1)
        return self.scaler.inverse_transform(scaled_array).flatten()

    def get_scaler(self): # Same scaler has to be used while validating and testing etc.
        return self.scaler


In [22]:
rtm_train_dataset = RTMDataset(rtm1_train, fit_scaler=True)  # MinMaxScaler is fitted

rtm_scaler = rtm_train_dataset.get_scaler()

rtm_val_dataset = RTMDataset(rtm1_valid, scaler=rtm_scaler)

rtm_train_loader = DataLoader(rtm_train_dataset, batch_size=64, shuffle=False)
rtm_val_loader = DataLoader(rtm_val_dataset, batch_size=64, shuffle = False)

In [23]:
rtm_model_1 = LSTMResidualModel(num_layers=1, dropout=0.3, target_len = 1)
rtm_lstm1_weights = train_model(rtm_model_1, rtm_train_loader, rtm_val_loader, device = device, model_instance_name = "rtm_lstm1_weights", target_len = 1)



Epoch 1/100 | Train Loss: 0.0950 | Val Loss: 0.0308
Epoch 2/100 | Train Loss: 0.0468 | Val Loss: 0.0261
Epoch 3/100 | Train Loss: 0.0422 | Val Loss: 0.0247
Epoch 4/100 | Train Loss: 0.0400 | Val Loss: 0.0241
Epoch 5/100 | Train Loss: 0.0389 | Val Loss: 0.0252
Epoch 6/100 | Train Loss: 0.0384 | Val Loss: 0.0246
Epoch 7/100 | Train Loss: 0.0380 | Val Loss: 0.0245
Epoch 8/100 | Train Loss: 0.0372 | Val Loss: 0.0248
Epoch 9/100 | Train Loss: 0.0366 | Val Loss: 0.0248
Epoch 10/100 | Train Loss: 0.0368 | Val Loss: 0.0213
Epoch 11/100 | Train Loss: 0.0291 | Val Loss: 0.0179
Epoch 12/100 | Train Loss: 0.0265 | Val Loss: 0.0175
Epoch 13/100 | Train Loss: 0.0262 | Val Loss: 0.0177
Epoch 14/100 | Train Loss: 0.0261 | Val Loss: 0.0166
Epoch 15/100 | Train Loss: 0.0255 | Val Loss: 0.0157
Epoch 16/100 | Train Loss: 0.0247 | Val Loss: 0.0168
Epoch 17/100 | Train Loss: 0.0253 | Val Loss: 0.0191
Epoch 18/100 | Train Loss: 0.0242 | Val Loss: 0.0160
Epoch 19/100 | Train Loss: 0.0244 | Val Loss: 0.0153
Ep

In [28]:
rtm_model_2 = LSTMResidualModel(num_layers=2, dropout=0.3, target_len = 1)
rtm_lstm2_weights = train_model(rtm_model_2, rtm_train_loader, rtm_val_loader, device = device, model_instance_name = "rtm_lstm2_weights", target_len = 1)

Epoch 1/100 | Train Loss: 0.1443 | Val Loss: 0.0300
Epoch 2/100 | Train Loss: 0.0502 | Val Loss: 0.0265
Epoch 3/100 | Train Loss: 0.0435 | Val Loss: 0.0249
Epoch 4/100 | Train Loss: 0.0412 | Val Loss: 0.0245
Epoch 5/100 | Train Loss: 0.0401 | Val Loss: 0.0242
Epoch 6/100 | Train Loss: 0.0391 | Val Loss: 0.0241
Epoch 7/100 | Train Loss: 0.0387 | Val Loss: 0.0238
Epoch 8/100 | Train Loss: 0.0382 | Val Loss: 0.0239
Epoch 9/100 | Train Loss: 0.0378 | Val Loss: 0.0236
Epoch 10/100 | Train Loss: 0.0373 | Val Loss: 0.0251
Epoch 11/100 | Train Loss: 0.0377 | Val Loss: 0.0246
Epoch 12/100 | Train Loss: 0.0371 | Val Loss: 0.0258
Epoch 13/100 | Train Loss: 0.0380 | Val Loss: 0.0255
Epoch 14/100 | Train Loss: 0.0356 | Val Loss: 0.0247
Epoch 15/100 | Train Loss: 0.0370 | Val Loss: 0.0263
Epoch 16/100 | Train Loss: 0.0366 | Val Loss: 0.0256
Epoch 17/100 | Train Loss: 0.0393 | Val Loss: 0.0252
Epoch 18/100 | Train Loss: 0.0364 | Val Loss: 0.0239
Epoch 19/100 | Train Loss: 0.0368 | Val Loss: 0.0280
Ep

In [None]:
rtm_model_3 = LSTMResidualModel(num_layers=3, dropout=0.3, target_len = 1, lr = 0.01)
rtm_lstm3_weights = train_model(rtm_model_3, rtm_train_loader, rtm_val_loader, device = device, model_instance_name = "rtm_lstm3_weights", target_len = 1)

Epoch 1/100 | Train Loss: 0.0587 | Val Loss: 0.0248
Epoch 2/100 | Train Loss: 0.0414 | Val Loss: 0.0251
Epoch 3/100 | Train Loss: 0.0401 | Val Loss: 0.0245
Epoch 4/100 | Train Loss: 0.0390 | Val Loss: 0.0241
Epoch 5/100 | Train Loss: 0.0389 | Val Loss: 0.0277
Epoch 6/100 | Train Loss: 0.0462 | Val Loss: 0.0249
Epoch 7/100 | Train Loss: 0.0427 | Val Loss: 0.0242
Epoch 8/100 | Train Loss: 0.0423 | Val Loss: 0.0238
Epoch 9/100 | Train Loss: 0.0406 | Val Loss: 0.0235
Epoch 10/100 | Train Loss: 0.0405 | Val Loss: 0.0235
Epoch 11/100 | Train Loss: 0.0404 | Val Loss: 0.0235
Epoch 12/100 | Train Loss: 0.0405 | Val Loss: 0.0234
Epoch 13/100 | Train Loss: 0.0394 | Val Loss: 0.0225
Epoch 14/100 | Train Loss: 0.0386 | Val Loss: 0.0224
Epoch 15/100 | Train Loss: 0.0384 | Val Loss: 0.0225
Epoch 16/100 | Train Loss: 0.0384 | Val Loss: 0.0225
Epoch 17/100 | Train Loss: 0.0382 | Val Loss: 0.0226
Epoch 18/100 | Train Loss: 0.0385 | Val Loss: 0.0226
Epoch 19/100 | Train Loss: 0.0384 | Val Loss: 0.0225
Ep

2 layers gives the best generalization performance (lowest validation loss), while 3 layers lead to overfitting or optimization issues.

### RTM tommorow using yesterday's MCP

In [24]:
rtm_d_train_dataset = PriceDataset(rtm1_train, fit_scaler=True)  # MinMaxScaler is fitted

rtm_d_scaler = rtm_d_train_dataset.get_scaler()

rtm_d_val_dataset = PriceDataset(rtm1_valid, scaler=rtm_scaler)

rtm_d_train_loader = DataLoader(rtm_d_train_dataset, batch_size=64, shuffle=False)
rtm_d_val_loader = DataLoader(rtm_d_val_dataset, batch_size=64, shuffle = False)

In [26]:
rtm_d_model_1 = LSTMResidualModel(num_layers=1, dropout=0.3, target_len = 96)
rtm_d_lstm1_weights = train_model(rtm_d_model_1, rtm_d_train_loader, rtm_d_val_loader, device = device, model_instance_name = "rtm_d_lstm1_weights", target_len = 96)



Epoch 1/100 | Train Loss: 0.1122 | Val Loss: 0.0386
Epoch 2/100 | Train Loss: 0.0679 | Val Loss: 0.0360
Epoch 3/100 | Train Loss: 0.0649 | Val Loss: 0.0354
Epoch 4/100 | Train Loss: 0.0629 | Val Loss: 0.0347
Epoch 5/100 | Train Loss: 0.0610 | Val Loss: 0.0345
Epoch 6/100 | Train Loss: 0.0591 | Val Loss: 0.0308
Epoch 7/100 | Train Loss: 0.0535 | Val Loss: 0.0293
Epoch 8/100 | Train Loss: 0.0506 | Val Loss: 0.0291
Epoch 9/100 | Train Loss: 0.0489 | Val Loss: 0.0283
Epoch 10/100 | Train Loss: 0.0476 | Val Loss: 0.0276
Epoch 11/100 | Train Loss: 0.0464 | Val Loss: 0.0271
Epoch 12/100 | Train Loss: 0.0451 | Val Loss: 0.0264
Epoch 13/100 | Train Loss: 0.0437 | Val Loss: 0.0257
Epoch 14/100 | Train Loss: 0.0419 | Val Loss: 0.0248
Epoch 15/100 | Train Loss: 0.0405 | Val Loss: 0.0243
Epoch 16/100 | Train Loss: 0.0398 | Val Loss: 0.0240
Epoch 17/100 | Train Loss: 0.0391 | Val Loss: 0.0239
Epoch 18/100 | Train Loss: 0.0386 | Val Loss: 0.0237
Epoch 19/100 | Train Loss: 0.0381 | Val Loss: 0.0236
Ep

In [27]:
rtm_d_model_1_1 = LSTMResidualModel(num_layers=1, dropout=0.1, target_len = 96)
rtm_d_lstm1_1_weights = train_model(rtm_d_model_1_1, rtm_d_train_loader, rtm_d_val_loader, device = device, model_instance_name = "rtm_d_lstm1_1_weights", target_len = 96)



Epoch 1/100 | Train Loss: 0.1051 | Val Loss: 0.0387
Epoch 2/100 | Train Loss: 0.0682 | Val Loss: 0.0359
Epoch 3/100 | Train Loss: 0.0646 | Val Loss: 0.0343
Epoch 4/100 | Train Loss: 0.0626 | Val Loss: 0.0349
Epoch 5/100 | Train Loss: 0.0601 | Val Loss: 0.0321
Epoch 6/100 | Train Loss: 0.0572 | Val Loss: 0.0311
Epoch 7/100 | Train Loss: 0.0535 | Val Loss: 0.0308
Epoch 8/100 | Train Loss: 0.0511 | Val Loss: 0.0287
Epoch 9/100 | Train Loss: 0.0494 | Val Loss: 0.0281
Epoch 10/100 | Train Loss: 0.0483 | Val Loss: 0.0276
Epoch 11/100 | Train Loss: 0.0473 | Val Loss: 0.0272
Epoch 12/100 | Train Loss: 0.0460 | Val Loss: 0.0265
Epoch 13/100 | Train Loss: 0.0437 | Val Loss: 0.0250
Epoch 14/100 | Train Loss: 0.0417 | Val Loss: 0.0245
Epoch 15/100 | Train Loss: 0.0406 | Val Loss: 0.0243
Epoch 16/100 | Train Loss: 0.0397 | Val Loss: 0.0242
Epoch 17/100 | Train Loss: 0.0387 | Val Loss: 0.0240
Epoch 18/100 | Train Loss: 0.0381 | Val Loss: 0.0238
Epoch 19/100 | Train Loss: 0.0376 | Val Loss: 0.0235
Ep

In [None]:
rtm_d_model_2 = LSTMResidualModel(num_layers=2, dropout=0.3, target_len = 96)
rtm_d_lstm2_weights = train_model(rtm_d_model_2, rtm_d_train_loader, rtm_d_val_loader, device = device, model_instance_name = "rtm_d_lstm2_weights", target_len = 96)

Epoch 1/100 | Train Loss: 0.1127 | Val Loss: 0.0398
Epoch 2/100 | Train Loss: 0.0678 | Val Loss: 0.0365
Epoch 3/100 | Train Loss: 0.0648 | Val Loss: 0.0350
Epoch 4/100 | Train Loss: 0.0623 | Val Loss: 0.0338
Epoch 5/100 | Train Loss: 0.0595 | Val Loss: 0.0324
Epoch 6/100 | Train Loss: 0.0545 | Val Loss: 0.0308
Epoch 7/100 | Train Loss: 0.0519 | Val Loss: 0.0298
Epoch 8/100 | Train Loss: 0.0489 | Val Loss: 0.0283
Epoch 9/100 | Train Loss: 0.0461 | Val Loss: 0.0276
Epoch 10/100 | Train Loss: 0.0432 | Val Loss: 0.0263
Epoch 11/100 | Train Loss: 0.0414 | Val Loss: 0.0254
Epoch 12/100 | Train Loss: 0.0402 | Val Loss: 0.0253
Epoch 13/100 | Train Loss: 0.0394 | Val Loss: 0.0249
Epoch 14/100 | Train Loss: 0.0388 | Val Loss: 0.0250
Epoch 15/100 | Train Loss: 0.0382 | Val Loss: 0.0253
Epoch 16/100 | Train Loss: 0.0377 | Val Loss: 0.0252
Epoch 17/100 | Train Loss: 0.0375 | Val Loss: 0.0254
Epoch 18/100 | Train Loss: 0.0372 | Val Loss: 0.0252
Epoch 19/100 | Train Loss: 0.0370 | Val Loss: 0.0252
Ep

In [None]:
rtm_d_model_3 = LSTMResidualModel(num_layers=3, dropout=0.3, target_len = 96)
rtm_d_lstm3_weights = train_model(rtm_d_model_3, rtm_d_train_loader, rtm_d_val_loader, device = device, model_instance_name = "rtm_d_lstm3_weights", target_len = 96)

Epoch 1/100 | Train Loss: 0.1049 | Val Loss: 0.0462
Epoch 2/100 | Train Loss: 0.0789 | Val Loss: 0.0386
Epoch 3/100 | Train Loss: 0.0670 | Val Loss: 0.0381
Epoch 4/100 | Train Loss: 0.0607 | Val Loss: 0.0338
Epoch 5/100 | Train Loss: 0.0592 | Val Loss: 0.0322
Epoch 6/100 | Train Loss: 0.0529 | Val Loss: 0.0341
Epoch 7/100 | Train Loss: 0.0507 | Val Loss: 0.0295
Epoch 8/100 | Train Loss: 0.0480 | Val Loss: 0.0303
Epoch 9/100 | Train Loss: 0.0480 | Val Loss: 0.0289
Epoch 10/100 | Train Loss: 0.0447 | Val Loss: 0.0294
Epoch 11/100 | Train Loss: 0.0436 | Val Loss: 0.0273
Epoch 12/100 | Train Loss: 0.0420 | Val Loss: 0.0269
Epoch 13/100 | Train Loss: 0.0406 | Val Loss: 0.0262
Epoch 14/100 | Train Loss: 0.0405 | Val Loss: 0.0260
Epoch 15/100 | Train Loss: 0.0394 | Val Loss: 0.0257
Epoch 16/100 | Train Loss: 0.0389 | Val Loss: 0.0258
Epoch 17/100 | Train Loss: 0.0391 | Val Loss: 0.0269
Epoch 18/100 | Train Loss: 0.0384 | Val Loss: 0.0259
Epoch 19/100 | Train Loss: 0.0382 | Val Loss: 0.0251
Ep

Trained rtm_d_model1_1 (dropout = 0.1, num_layer = 1) has the least val loss

## Store Parameters of Model and Scaler

In [33]:
import joblib
joblib.dump(dam_scaler, 'dam_scaler.save')

joblib.dump(rtm_scaler, 'rtm_scaler.save')

joblib.dump(rtm_d_scaler, 'rtm_d_scaler.save')

['rtm_d_scaler.save']

In [34]:
dam1_test.head()

Unnamed: 0,datetime,MCP (Rs/MWh)
42921,2024-11-21 02:15:00,2400.21
42922,2024-11-21 02:30:00,2393.83
42923,2024-11-21 02:45:00,2400.39
42924,2024-11-21 03:00:00,2393.61
42925,2024-11-21 03:15:00,2393.35


In [35]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


In [38]:
import joblib
from model_definition import LSTMResidualModel

"""
Change the num_layers and target_len as per the .pt file
best_lstm_dam_lstm1_weights.pt: num_layers = 1, target_len = 96
best_lstm_dam_lstm1_200_weights.pt: num_layers = 1, target_len = 96. 200 epochs
best_lstm_dam_lstm2_weights.pt: num_layers = 2, target_len = 96
best_lstm_dam_lstm2_150_weights.pt: num_layers = 2, target_len = 96. 150 epochs
best_lstm_dam_lstm3_weights.pt: num_layers = 3, target_len = 96
best_lstm_rtm_lstm2_weights.pt: num_layers = 2, target_len = 1
best_lstm_rtm_lstm3_weights.pt: num_layers = 3, target_len = 1
best_lstm_rtm_d_lstm1_1_weights.pt: num_layers = 1, target_len = 96, droput = 0.1
best_lstm_rtm_d_lstm2_weights.pt: num_layers = 2, target_len = 96
best_lstm_rtm_d_lstm3_weights.pt: num_layers = 3, target_len = 96
"""
model = LSTMResidualModel(num_layers=1, dropout=0.1, target_len = 96)
model.load_state_dict(torch.load('/content/best_lstm_dam_lstm1_weights.pt', map_location=device))

model.to(device)
"""
Load the scaler used while training the model
best_lstm_dam_...: 'dam_scaler.save'
best_lstm_rtm_...: 'rtm_scaler.save'
best_lstm_rtm_d_...: 'rtm_d_scaler.save'
"""
dam_scaler = joblib.load('/content/dam_scaler.save')




## How to use model parameters and scaler.save

### Yesterday's MCP to get Tommorow's MCP

In [39]:
import torch
import pandas as pd
import numpy as np

"""
Predict next day's 96 prices using the previous day's prices
Using df containing datetime and MCP columns, trained model and their
respective scalers
"""

def rolling_day_predictions(df, model, scaler, device):

    df = df.copy()
    df['date'] = df['datetime'].dt.date

    result = []

    unique_dates = sorted(df['date'].unique())

    for i in range(1, len(unique_dates) - 1):
        prev_day = unique_dates[i - 1]
        next_day = unique_dates[i + 1]

        prev_day_data = df[df['date'] == prev_day]['MCP (Rs/MWh)'].values
        next_day_times = df[df['date'] == next_day]['datetime'].values
        true_next_day_data = df[df['date'] == next_day]['MCP (Rs/MWh)'].values

        if len(prev_day_data) != 96 or len(next_day_times) != 96:
            continue  # skip incomplete days

        # Scale the input
        x = scaler.transform(prev_day_data.reshape(-1, 1)).astype(np.float32)  # (96, 1)
        x_tensor = torch.tensor(x).unsqueeze(0).to(device)  # (1, 96, 1)

        with torch.no_grad():
            pred_scaled = model(x_tensor).cpu().numpy().flatten()

        # unscale it back
        pred = scaler.inverse_transform(pred_scaled.reshape(-1, 1)).flatten()

        # Store predictions with corresponding datetime
        for dt, p, t in zip(next_day_times, pred, true_next_day_data):
            result.append({'datetime': dt, 'predicted': p, 'target': t})

    return pd.DataFrame(result)


In [40]:
test_dam = rolling_day_predictions(dam1_test, model, dam_scaler, device)



In [41]:
test_dam

Unnamed: 0,datetime,predicted,target
0,2024-11-24 00:00:00,2571.542236,2730.12
1,2024-11-24 00:15:00,2550.286133,2649.94
2,2024-11-24 00:30:00,2479.662109,2649.25
3,2024-11-24 00:45:00,2483.994141,2649.06
4,2024-11-24 01:00:00,2461.412109,2590.22
...,...,...,...
7291,2025-02-07 22:45:00,2907.379639,3264.65
7292,2025-02-07 23:00:00,2890.103760,3120.54
7293,2025-02-07 23:15:00,2933.112549,3119.11
7294,2025-02-07 23:30:00,2976.169922,3119.74


In [42]:
rtm_d_model  = LSTMResidualModel(num_layers=1, dropout=0.1, target_len = 96)
rtm_d_model.load_state_dict(torch.load('/content/best_lstm_rtm_d_lstm1_1_weights.pt', map_location=device))

rtm_d_model.to(device)
rtm_d_scaler = joblib.load('/content/rtm_d_scaler.save')



In [43]:
test_rtm_d = rolling_day_predictions(rtm1_test, rtm_d_model, rtm_d_scaler, device)



In [44]:
test_rtm_d

Unnamed: 0,datetime,predicted,target
0,2024-11-24 00:00:00,1824.916626,2778.00
1,2024-11-24 00:15:00,2000.451050,2900.14
2,2024-11-24 00:30:00,1926.638062,2900.41
3,2024-11-24 00:45:00,1977.558716,2843.49
4,2024-11-24 01:00:00,1988.781128,2765.59
...,...,...,...
7195,2025-02-06 22:45:00,2772.193848,3067.61
7196,2025-02-06 23:00:00,2660.985840,3067.47
7197,2025-02-06 23:15:00,2605.918701,3067.31
7198,2025-02-06 23:30:00,2620.921143,3067.49


### RTM
Get RTM MCP using 24 hours MCP data before RTM auction

In [59]:
import torch
import pandas as pd
import numpy as np

"""
Predict rtm after 7 prices using the last 96 prices
Using df containing datetime and MCP columns, trained model and their
respective scalers
"""

def rtm_rolling_day_predictions(df, model, scaler, device):
  ndf = df.copy()
  result = []
  for i in range(96, len(df) - 7):
    last_96_data = ndf[['MCP (Rs/MWh)']].iloc[i - 96:i]
    target = df['MCP (Rs/MWh)'].iloc[i + 7]
    datetime = df['datetime'].iloc[i + 7]

    # Scale the input
    x = scaler.transform(last_96_data).astype(np.float32)  # (96, 1)
    x_tensor = torch.tensor(x).unsqueeze(0).to(device)  # (1, 96, 1)

    with torch.no_grad():
      pred_scaled = model(x_tensor).cpu().numpy().flatten()

      # unscale it back
      pred = scaler.inverse_transform(pred_scaled.reshape(-1, 1)).flatten()
      pred = pred.squeeze()

      # Store predictions with corresponding datetime
      result.append({
            'datetime': datetime,
            'predicted': pred,
            'target': target
        })
  return pd.DataFrame(result)


In [51]:
rtm_model  = LSTMResidualModel(num_layers=2, dropout=0.3, target_len = 1)
rtm_model.load_state_dict(torch.load('/content/best_lstm_rtm_lstm2_weights.pt', map_location=device))

rtm_model.to(device)
rtm_scaler = joblib.load('/content/rtm_scaler.save')

In [60]:
test_rtm = rtm_rolling_day_predictions(rtm1_test, rtm_model, rtm_scaler, device)

In [62]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(test_rtm['target'], test_rtm['predicted'])
print(f"MSE: {mse:.4f}")

MSE: 3053535.4118


In [61]:
test_rtm

Unnamed: 0,datetime,predicted,target
0,2024-11-22 04:00:00,2575.7446,2751.99
1,2024-11-22 04:15:00,2417.4275,2954.06
2,2024-11-22 04:30:00,2602.4854,3082.16
3,2024-11-22 04:45:00,2594.5679,3097.28
4,2024-11-22 05:00:00,2510.7915,3176.62
...,...,...,...
7371,2025-02-06 22:45:00,3303.0327,3067.61
7372,2025-02-06 23:00:00,3208.7878,3067.47
7373,2025-02-06 23:15:00,2935.1433,3067.31
7374,2025-02-06 23:30:00,2991.9402,3067.49


# More Possible Works
Temporal Fusion Transformer supported by pytorch_forecasting. Tutorial: [forecasting with the Temporal Fusion Transformer](https://pytorch-forecasting.readthedocs.io/en/stable/tutorials/stallion.html)

*   They are more advanced, can notice both short trends and long trends
*   Uses attension to get even the small trends.

