In [1]:
# How to improve long-term tendency and preserve information till the last stave.
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
from tqdm import tqdm


import warnings
warnings.filterwarnings('ignore')

import os
from matplotlib.colors import LinearSegmentedColormap

sns.set_theme(style="darkgrid")

np.random.seed(40)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(f"Device : {device}")


Device : cuda:0


In [2]:
#Functions

class SequenceDataset(Dataset):
    def __init__(self, data, seq_length):
        self.data = data
        self.seq_length = seq_length

    def __len__(self):
        return len(self.data) - self.seq_length

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_length]
        y = self.data[idx + self.seq_length]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)




# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True,dropout=0.3)
        self.fc = nn.Linear(hidden_size, output_size)


    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        
        
        out = self.fc(out[:, -1, :])
        return out


In [3]:
def model_training_and_evaluation(data, current_run_number_list):

    identifier = str(current_run_number_list)
    path = f'./Outputs/{identifier}'
    os.makedirs(path, exist_ok=True)

# --------------------------------------------------------------------------------------------------------------------------------------------------- #

    data = data.loc[data.RunNumber.isin(current_run_number_list)]
    data.RunNumber = data.RunNumber.astype('category')
    data = data[data.Sector.str.startswith('UTaX')]
    data = data[['RunNumber','Staves','Rows','PedestalValue']]
    pedestal = data.groupby(['RunNumber',  'Staves', 'Rows'], as_index=False)['PedestalValue'].mean()
    pedestal['PedestalValue'] = pedestal['PedestalValue'].fillna(method='ffill')



# --------------------------------------------------------------------------------------------------------------------------------------------------- #

    seq_length = 256

    train_data = pedestal[:-seq_length*2]
    test_data = pedestal[-seq_length*2:]

    train_data['PedestalValue'] = scaler.fit_transform(train_data[['PedestalValue']])
    test_data['PedestalValue'] = scaler.transform(test_data[['PedestalValue']])


    train_dataset = SequenceDataset(train_data['PedestalValue'].values, seq_length)
    test_dataset = SequenceDataset(test_data['PedestalValue'].values, seq_length)

    # Use DataLoader for batching
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


# --------------------------------------------------------------------------------------------------------------------------------------------------- #

    # Parameters
    input_size = 1
    hidden_size = 128
    num_layers = 2
    output_size = 1
    num_epochs = 1000

    # Initialize the model, loss function, and optimizer
    model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device=device)
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters())

# --------------------------------------------------------------------------------------------------------------------------------------------------- #

    # Training loop
    losses = []
    for epoch in tqdm(range(num_epochs)):
        model.train()
        epoch_loss = 0.0
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.unsqueeze(-1).to(device)  # Add input_size dimension
            y_batch = y_batch.unsqueeze(-1).to(device)  # Ensure correct shape
            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        losses.append(epoch_loss / len(train_loader))
        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}')

# --------------------------------------------------------------------------------------------------------------------------------------------------- #
    # Loss Value Dumping

    losses_dataframe = pd.DataFrame(index=np.arange(num_epochs))
    losses_dataframe['LossValues'] = losses


    losses_dataframe.to_csv(f'{path}/loss.csv')
# --------------------------------------------------------------------------------------------------------------------------------------------------- #

    # Model Evaluation and Predictions

    model.eval()


    true_values = []
    predictions = []
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch = x_batch.unsqueeze(-1).to(device)  # Add input_size dimension and move to GPU
            y_batch = y_batch.unsqueeze(-1).to(device) 
            preds = model(x_batch)

            # np.concatenate(preds.cpu().numpy())
            predictions.append(preds.cpu().numpy())  # Move predictions back to CPU for further processing
            true_values.append(y_batch.cpu().numpy())

    true_values = np.concatenate(true_values)
    predictions = np.concatenate(predictions)

    template = test_data.iloc[256:][['Staves','Rows']].reset_index(drop=True)

    analysis = template.copy()

    analysis['Sequence'] = test_data.iloc[:256]['PedestalValue'].values

    analysis['TrueValues'] = true_values
    analysis['Predictions'] = predictions

    analysis['Difference'] = analysis['TrueValues'] - analysis['Predictions']


    analysis.to_csv(f'{path}/staves_prediction.csv')

# --------------------------------------------------------------------------------------------------------------------------------------------------- #

    # Corresponding Training Data for the Forecasting Plot

    train_data.to_csv(f'{path}/train_sample.csv')

    return

In [4]:
# Read All Data and Fetching the RUn Numbers
data = pd.read_parquet('/home/hashmi/FileDrive/UpstreamTrackerCalibration/00Data/Inputs/FullData.parquet')
all_runs = sorted(data['RunNumber'].unique().tolist())

In [5]:
# Iterating through the Run Numbers as rolling window, and executing the function.
for i in range(10):
    current_run_number_list = all_runs[i:10+i]
    print(f"Current Runs : {current_run_number_list}")
    model_training_and_evaluation(data=data,current_run_number_list=current_run_number_list)
    print("Run Model Dump Completed")
    print("---")

Current Runs : [293528, 293530, 293532, 293533, 297288, 298593, 298594, 298595, 298596, 302935]


 10%|█         | 100/1000 [00:48<07:16,  2.06it/s]

Epoch [100/1000], Loss: 0.0257


 20%|██        | 200/1000 [01:36<06:26,  2.07it/s]

Epoch [200/1000], Loss: 0.0246


 30%|███       | 300/1000 [02:24<05:41,  2.05it/s]

Epoch [300/1000], Loss: 0.0301


 40%|████      | 400/1000 [03:12<04:55,  2.03it/s]

Epoch [400/1000], Loss: 0.0264


 50%|█████     | 500/1000 [04:02<04:09,  2.00it/s]

Epoch [500/1000], Loss: 0.0248


 60%|██████    | 600/1000 [04:51<03:16,  2.04it/s]

Epoch [600/1000], Loss: 0.0216


 70%|███████   | 700/1000 [05:41<02:28,  2.02it/s]

Epoch [700/1000], Loss: 0.0185


 80%|████████  | 800/1000 [06:30<01:38,  2.02it/s]

Epoch [800/1000], Loss: 0.0154


 90%|█████████ | 900/1000 [07:19<00:49,  2.03it/s]

Epoch [900/1000], Loss: 0.0125


100%|██████████| 1000/1000 [08:07<00:00,  2.05it/s]

Epoch [1000/1000], Loss: 0.0112
Run Model Dump Completed
---
Current Runs : [293530, 293532, 293533, 297288, 298593, 298594, 298595, 298596, 302935, 303245]



 10%|█         | 100/1000 [00:48<07:16,  2.06it/s]

Epoch [100/1000], Loss: 0.0206


 20%|██        | 200/1000 [01:37<06:32,  2.04it/s]

Epoch [200/1000], Loss: 0.0197


 30%|███       | 300/1000 [02:25<05:44,  2.03it/s]

Epoch [300/1000], Loss: 0.0172


 40%|████      | 400/1000 [03:14<04:54,  2.03it/s]

Epoch [400/1000], Loss: 0.0143


 50%|█████     | 500/1000 [04:03<04:05,  2.04it/s]

Epoch [500/1000], Loss: 0.0104


 60%|██████    | 600/1000 [04:52<03:16,  2.03it/s]

Epoch [600/1000], Loss: 0.0081


 70%|███████   | 700/1000 [05:42<02:26,  2.05it/s]

Epoch [700/1000], Loss: 0.0067


 80%|████████  | 800/1000 [06:31<01:37,  2.04it/s]

Epoch [800/1000], Loss: 0.0059


 90%|█████████ | 900/1000 [07:19<00:49,  2.04it/s]

Epoch [900/1000], Loss: 0.0057


100%|██████████| 1000/1000 [08:08<00:00,  2.05it/s]

Epoch [1000/1000], Loss: 0.0048
Run Model Dump Completed
---
Current Runs : [293532, 293533, 297288, 298593, 298594, 298595, 298596, 302935, 303245, 303246]



 10%|█         | 100/1000 [00:49<07:24,  2.02it/s]

Epoch [100/1000], Loss: 0.0210


 20%|██        | 200/1000 [01:38<06:34,  2.03it/s]

Epoch [200/1000], Loss: 0.0198


 30%|███       | 300/1000 [02:26<05:43,  2.04it/s]

Epoch [300/1000], Loss: 0.0173


 40%|████      | 400/1000 [03:16<04:59,  2.00it/s]

Epoch [400/1000], Loss: 0.0254


 50%|█████     | 500/1000 [04:05<04:09,  2.01it/s]

Epoch [500/1000], Loss: 0.0218


 60%|██████    | 600/1000 [04:55<03:17,  2.03it/s]

Epoch [600/1000], Loss: 0.0216


 70%|███████   | 700/1000 [05:44<02:27,  2.03it/s]

Epoch [700/1000], Loss: 0.0216


 80%|████████  | 800/1000 [06:34<01:35,  2.08it/s]

Epoch [800/1000], Loss: 0.0215


 90%|█████████ | 900/1000 [07:23<00:50,  1.99it/s]

Epoch [900/1000], Loss: 0.0215


100%|██████████| 1000/1000 [08:13<00:00,  2.03it/s]

Epoch [1000/1000], Loss: 0.0215
Run Model Dump Completed
---
Current Runs : [293533, 297288, 298593, 298594, 298595, 298596, 302935, 303245, 303246, 303247]



 10%|█         | 100/1000 [00:48<07:20,  2.05it/s]

Epoch [100/1000], Loss: 0.0720


 20%|██        | 200/1000 [01:37<06:33,  2.03it/s]

Epoch [200/1000], Loss: 0.0417


 30%|███       | 300/1000 [02:26<05:44,  2.03it/s]

Epoch [300/1000], Loss: 0.0248


 40%|████      | 400/1000 [03:14<04:39,  2.15it/s]

Epoch [400/1000], Loss: 0.0189


 50%|█████     | 500/1000 [04:02<04:03,  2.05it/s]

Epoch [500/1000], Loss: 0.0131


 60%|██████    | 600/1000 [04:51<03:16,  2.04it/s]

Epoch [600/1000], Loss: 0.0108


 70%|███████   | 700/1000 [05:40<02:28,  2.01it/s]

Epoch [700/1000], Loss: 0.0096


 80%|████████  | 800/1000 [06:29<01:32,  2.17it/s]

Epoch [800/1000], Loss: 0.0088


 90%|█████████ | 900/1000 [07:18<00:49,  2.03it/s]

Epoch [900/1000], Loss: 0.0083


100%|██████████| 1000/1000 [08:07<00:00,  2.05it/s]

Epoch [1000/1000], Loss: 0.0074





Run Model Dump Completed
---
Current Runs : [297288, 298593, 298594, 298595, 298596, 302935, 303245, 303246, 303247, 303248]


 10%|█         | 100/1000 [00:48<07:22,  2.04it/s]

Epoch [100/1000], Loss: 0.0748


 20%|██        | 200/1000 [01:37<06:33,  2.03it/s]

Epoch [200/1000], Loss: 0.0381


 30%|███       | 300/1000 [02:26<05:43,  2.04it/s]

Epoch [300/1000], Loss: 0.0231


 40%|████      | 400/1000 [03:15<04:52,  2.05it/s]

Epoch [400/1000], Loss: 0.0169


 50%|█████     | 500/1000 [04:04<04:09,  2.00it/s]

Epoch [500/1000], Loss: 0.0125


 60%|██████    | 600/1000 [04:54<03:17,  2.02it/s]

Epoch [600/1000], Loss: 0.0122


 70%|███████   | 700/1000 [05:43<02:28,  2.02it/s]

Epoch [700/1000], Loss: 0.0119


 80%|████████  | 800/1000 [06:32<01:37,  2.06it/s]

Epoch [800/1000], Loss: 0.0091


 90%|█████████ | 900/1000 [07:21<00:49,  2.04it/s]

Epoch [900/1000], Loss: 0.0101


100%|██████████| 1000/1000 [08:10<00:00,  2.04it/s]

Epoch [1000/1000], Loss: 0.0092
Run Model Dump Completed
---
Current Runs : [298593, 298594, 298595, 298596, 302935, 303245, 303246, 303247, 303248, 306325]



 10%|█         | 100/1000 [00:48<07:14,  2.07it/s]

Epoch [100/1000], Loss: 0.0782


 20%|██        | 200/1000 [01:35<06:05,  2.19it/s]

Epoch [200/1000], Loss: 0.0444


 30%|███       | 300/1000 [02:22<05:29,  2.12it/s]

Epoch [300/1000], Loss: 0.0249


 40%|████      | 400/1000 [03:10<04:35,  2.18it/s]

Epoch [400/1000], Loss: 0.0184


 50%|█████     | 500/1000 [03:57<04:02,  2.06it/s]

Epoch [500/1000], Loss: 0.0138


 60%|██████    | 600/1000 [04:45<03:13,  2.07it/s]

Epoch [600/1000], Loss: 0.0116


 70%|███████   | 700/1000 [05:33<02:26,  2.05it/s]

Epoch [700/1000], Loss: 0.0101


 80%|████████  | 800/1000 [06:22<01:37,  2.05it/s]

Epoch [800/1000], Loss: 0.0085


 90%|█████████ | 900/1000 [07:10<00:49,  2.03it/s]

Epoch [900/1000], Loss: 0.0089


100%|██████████| 1000/1000 [07:59<00:00,  2.08it/s]

Epoch [1000/1000], Loss: 0.0087
Run Model Dump Completed
---
Current Runs : [298594, 298595, 298596, 302935, 303245, 303246, 303247, 303248, 306325, 306326]



 10%|█         | 100/1000 [00:48<07:24,  2.03it/s]

Epoch [100/1000], Loss: 0.0682


 20%|██        | 200/1000 [01:37<06:28,  2.06it/s]

Epoch [200/1000], Loss: 0.0350


 30%|███       | 300/1000 [02:25<05:40,  2.06it/s]

Epoch [300/1000], Loss: 0.0222


 40%|████      | 400/1000 [03:14<04:51,  2.06it/s]

Epoch [400/1000], Loss: 0.0142


 50%|█████     | 500/1000 [04:03<04:02,  2.06it/s]

Epoch [500/1000], Loss: 0.0116


 60%|██████    | 600/1000 [04:51<03:14,  2.06it/s]

Epoch [600/1000], Loss: 0.0103


 70%|███████   | 700/1000 [05:40<02:25,  2.06it/s]

Epoch [700/1000], Loss: 0.0092


 80%|████████  | 800/1000 [06:28<01:37,  2.05it/s]

Epoch [800/1000], Loss: 0.0085


 90%|█████████ | 900/1000 [07:17<00:48,  2.06it/s]

Epoch [900/1000], Loss: 0.0081


100%|██████████| 1000/1000 [08:06<00:00,  2.06it/s]

Epoch [1000/1000], Loss: 0.0829





Run Model Dump Completed
---
Current Runs : [298595, 298596, 302935, 303245, 303246, 303247, 303248, 306325, 306326, 306327]


 10%|█         | 100/1000 [00:48<07:21,  2.04it/s]

Epoch [100/1000], Loss: 0.0760


 20%|██        | 200/1000 [01:37<06:33,  2.04it/s]

Epoch [200/1000], Loss: 0.0819


 30%|███       | 300/1000 [02:26<05:43,  2.04it/s]

Epoch [300/1000], Loss: 0.0807


 40%|████      | 400/1000 [03:15<04:54,  2.04it/s]

Epoch [400/1000], Loss: 0.0802


 50%|█████     | 500/1000 [04:05<04:06,  2.03it/s]

Epoch [500/1000], Loss: 0.0798


 60%|██████    | 600/1000 [04:54<03:15,  2.05it/s]

Epoch [600/1000], Loss: 0.0799


 70%|███████   | 700/1000 [05:42<02:17,  2.17it/s]

Epoch [700/1000], Loss: 0.0762


 80%|████████  | 800/1000 [06:28<01:36,  2.06it/s]

Epoch [800/1000], Loss: 0.0496


 90%|█████████ | 900/1000 [07:17<00:48,  2.06it/s]

Epoch [900/1000], Loss: 0.0292


100%|██████████| 1000/1000 [08:05<00:00,  2.06it/s]

Epoch [1000/1000], Loss: 0.0203
Run Model Dump Completed
---
Current Runs : [298596, 302935, 303245, 303246, 303247, 303248, 306325, 306326, 306327, 306328]



 10%|█         | 100/1000 [00:48<07:20,  2.04it/s]

Epoch [100/1000], Loss: 0.0727


 20%|██        | 200/1000 [01:37<06:27,  2.06it/s]

Epoch [200/1000], Loss: 0.0575


 30%|███       | 300/1000 [02:25<05:39,  2.06it/s]

Epoch [300/1000], Loss: 0.0301


 40%|████      | 400/1000 [03:13<04:51,  2.06it/s]

Epoch [400/1000], Loss: 0.0206


 50%|█████     | 500/1000 [04:02<04:03,  2.06it/s]

Epoch [500/1000], Loss: 0.0140


 60%|██████    | 600/1000 [04:50<03:14,  2.06it/s]

Epoch [600/1000], Loss: 0.0122


 70%|███████   | 700/1000 [05:39<02:25,  2.06it/s]

Epoch [700/1000], Loss: 0.0105


 80%|████████  | 800/1000 [06:27<01:37,  2.05it/s]

Epoch [800/1000], Loss: 0.0106


 90%|█████████ | 900/1000 [07:16<00:48,  2.06it/s]

Epoch [900/1000], Loss: 0.0093


100%|██████████| 1000/1000 [08:05<00:00,  2.06it/s]

Epoch [1000/1000], Loss: 0.0093
Run Model Dump Completed
---
Current Runs : [302935, 303245, 303246, 303247, 303248, 306325, 306326, 306327, 306328, 306787]



 10%|█         | 100/1000 [00:48<07:13,  2.08it/s]

Epoch [100/1000], Loss: 0.0758


 20%|██        | 200/1000 [01:36<06:28,  2.06it/s]

Epoch [200/1000], Loss: 0.0440


 30%|███       | 300/1000 [02:24<05:22,  2.17it/s]

Epoch [300/1000], Loss: 0.0202


 40%|████      | 400/1000 [03:11<04:33,  2.19it/s]

Epoch [400/1000], Loss: 0.0146


 50%|█████     | 500/1000 [03:59<04:00,  2.08it/s]

Epoch [500/1000], Loss: 0.0126


 60%|██████    | 600/1000 [04:47<03:13,  2.07it/s]

Epoch [600/1000], Loss: 0.0113


 70%|███████   | 700/1000 [05:35<02:25,  2.07it/s]

Epoch [700/1000], Loss: 0.0102


 80%|████████  | 800/1000 [06:23<01:36,  2.07it/s]

Epoch [800/1000], Loss: 0.0100


 90%|█████████ | 900/1000 [07:10<00:46,  2.17it/s]

Epoch [900/1000], Loss: 0.0097


100%|██████████| 1000/1000 [07:59<00:00,  2.09it/s]

Epoch [1000/1000], Loss: 0.0092





Run Model Dump Completed
---
