# DATA PREPERATION

In [None]:
# --------------------- Imports ---------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
from google.colab import drive

# --------------------- Matplotlib Setup ---------------------
mpl.rcParams.update({
    'font.size': 14,
    'axes.titlesize': 15,
    'axes.labelsize': 12,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize': 11,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'figure.autolayout': True,
})

# --------------------- Load Data ---------------------
print("Mounting Google Drive and loading dataset...")
drive.mount('/content/drive')
total_capture_7k = pd.read_csv('drive/My Drive/correlation_wide.csv')
print(f"Loaded dataset with shape: {total_capture_7k.shape}")

# --------------------- Identify Unique Static Parameter Sets ---------------------
static_cols = [
    'MikeSorghum', 'Quartz', 'Plagioclase', 'Apatite', 'Ilmenite',
    'Diopside_Mn', 'Diopside', 'Olivine', 'Alkali-feldspar',
    'Montmorillonite', 'Glass', 'temp', 'shift', 'year'
]

# Add timestep count per file_id
file_lengths = total_capture_7k.groupby('file_id').size().rename("num_timesteps").reset_index()
static_rows = total_capture_7k.groupby('file_id')[static_cols].first().reset_index()
static_rows = static_rows.merge(file_lengths, on='file_id')

# Filter only unique static parameter sets
unique_static_rows = static_rows.drop_duplicates(subset=static_cols)
unique_file_ids = unique_static_rows['file_id'].tolist()

# --------------------- Extract Time Series Data ---------------------
filtered_df = total_capture_7k[total_capture_7k['file_id'].isin(unique_file_ids)].copy()

# Truncate each group to 101 timesteps
filtered_df = filtered_df.groupby('file_id').head(101).reset_index(drop=True)

# --------------------- Static Feature Table ---------------------
Input_Link_Table = filtered_df.groupby('file_id').agg({col: 'first' for col in static_cols}).reset_index()
print(f"Static feature table created: Input_Link_Table.shape = {Input_Link_Table.shape}")

# --------------------- Time Series Structuring ---------------------
result = filtered_df[['Total_CO2_capture', 'year', 'file_id']]
file_ids = result['file_id'].unique()
num_file_ids = len(file_ids)
max_timesteps = 101
relevant_data = np.zeros((num_file_ids, max_timesteps))
file_id_order = np.zeros(num_file_ids)

for i, file_id in enumerate(file_ids):
    file_data = result[result['file_id'] == file_id]['Total_CO2_capture'].values
    relevant_data[i, :len(file_data)] = file_data
    file_id_order[i] = file_id
print(f"Time series matrix constructed: relevant_data.shape = {relevant_data.shape}")

# --------------------- Clustering ---------------------
scaler = StandardScaler()
normalized_data = scaler.fit_transform(relevant_data)
kmeans = KMeans(n_clusters=8, random_state=42)
clusters = kmeans.fit_predict(normalized_data)
print("Performed KMeans clustering into 8 clusters")

# Compute boundary stats
cluster_boundaries = []
for cluster_id in range(8):
    cluster_data = normalized_data[clusters == cluster_id]
    min_v = scaler.inverse_transform(np.min(cluster_data, axis=0).reshape(1, -1)).flatten()
    median_v = scaler.inverse_transform(np.median(cluster_data, axis=0).reshape(1, -1)).flatten()
    mean_v = scaler.inverse_transform(np.mean(cluster_data, axis=0).reshape(1, -1)).flatten()
    max_v = scaler.inverse_transform(np.max(cluster_data, axis=0).reshape(1, -1)).flatten()
    cluster_boundaries.append((min_v, median_v, mean_v, max_v))
cluster_boundaries = np.array(cluster_boundaries)
print(f"Cluster boundary stats calculated: cluster_boundaries.shape = {cluster_boundaries.shape}")

# --------------------- Merge Static Features with Clusters ---------------------
Clustering_link_table = pd.DataFrame({'file_id': file_id_order.astype(int), 'cluster': clusters})
Clustering_link_table = Clustering_link_table.sort_values(by='file_id').reset_index(drop=True)
merged_df = pd.merge(Input_Link_Table, Clustering_link_table, on='file_id')
print(f"Final input features (static + cluster): merged_df.shape = {merged_df.shape}")

# --------------------- Create Output Time Series DataFrame ---------------------
data = [[file_id_order[i].astype(int), t, relevant_data[i, t]] for i in range(len(file_id_order)) for t in range(max_timesteps)]
df_output = pd.DataFrame(data, columns=['file_id', 'timestep', 'CO2']).sort_values(by=['file_id', 'timestep'])
print(f"Final output time series: df_output.shape = {df_output.shape}")

# --------------------- Summary ---------------------
print("Data Preparation Summary:")
print(f"Static Input Table: merged_df [{merged_df.shape[0]} rows × {merged_df.shape[1]} columns]")
print(f"Time Series Output: df_output [{df_output.shape[0]} rows × 3 columns]")
print(f"Cluster Boundaries: cluster_boundaries [{cluster_boundaries.shape}]")


Mounting Google Drive and loading dataset...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded dataset with shape: (1192157, 17)
Static feature table created: Input_Link_Table.shape = (2703, 15)
Time series matrix constructed: relevant_data.shape = (2703, 101)
Performed KMeans clustering into 8 clusters
Cluster boundary stats calculated: cluster_boundaries.shape = (8, 4, 101)
Final input features (static + cluster): merged_df.shape = (2703, 16)
Final output time series: df_output.shape = (273003, 3)
Data Preparation Summary:
Static Input Table: merged_df [2703 rows × 16 columns]
Time Series Output: df_output [273003 rows × 3 columns]
Cluster Boundaries: cluster_boundaries [(8, 4, 101)]


# MODELS

**Advanced DSSM model**

In [None]:
class AdvancedDSSMDeepState(nn.Module):
    def __init__(self, input_dim, static_dim, hidden_dim, output_dim):
        super(AdvancedDSSMDeepState, self).__init__()

        # Static Data Path (Fully connected layers for static features)
        self.fc_static1 = nn.Linear(static_dim, 512)
        self.fc_static2 = nn.Linear(512, 256)
        self.fc_static3 = nn.Linear(256, 128)
        self.fc_static4 = nn.Linear(128, 64)

        # Time-series Path (Conv1D for feature extraction)
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=hidden_dim, kernel_size=3, padding=1)
        self.relu = nn.ReLU()

        # Deep State Dynamics (LSTM for latent state transitions)
        self.lstm_state = nn.LSTM(hidden_dim + 64, hidden_dim, batch_first=True)

        # Observation Model (Mapping latent states to outputs)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, time_series_input, static_input):
        # Static Data Path
        static_out = self.relu(self.fc_static1(static_input))
        static_out = self.relu(self.fc_static2(static_out))
        static_out = self.relu(self.fc_static3(static_out))
        static_out = self.relu(self.fc_static4(static_out))  # Shape: [batch_size, 64]

        # Time-Series Data Path
        if len(time_series_input.shape) == 2:  # [batch_size, seq_len]
            time_series_input = time_series_input.unsqueeze(1)  # Add channel dimension: [batch_size, 1, seq_len]

        conv_out = self.conv1(time_series_input)  # Conv1D layer
        conv_out = self.relu(conv_out)
        conv_out = conv_out.transpose(1, 2)  # Shape: [batch_size, seq_len, hidden_dim]

        # Expand static features to match the sequence length
        static_expanded = static_out.unsqueeze(1).expand(-1, conv_out.size(1), -1)  # Shape: [batch_size, seq_len, 64]

        # Combine Conv1D features and static features
        lstm_input = torch.cat([conv_out, static_expanded], dim=2)  # Shape: [batch_size, seq_len, hidden_dim + 64]

        # Latent State Dynamics (LSTM for state transitions)
        lstm_out, _ = self.lstm_state(lstm_input)  # Shape: [batch_size, seq_len, hidden_dim]

        # Observation Model
        lstm_out_final = lstm_out[:, -1, :]  # Use the last state for prediction
        x = self.fc1(lstm_out_final)
        x = self.relu(x)
        output = self.fc2(x)  # Final prediction

        return output

def plot_boundary_cases_with_input(inputs, Boundary_case_actuals, Boundary_case_predicted, model_name, input_length):
    case_names = ["Best", "Average", "Worst"]
    x_range = input_length
    y_range = Boundary_case_actuals.shape[1]
    total_timesteps = x_range + y_range

    for i in range(3):
        plt.figure(figsize=(7.5, 3.2))

        # Plot input (X)
        plt.plot(range(x_range), inputs[i], color='black', alpha=0.5, label='Input')

        # Plot output actual vs predicted (Y)
        plt.plot(range(x_range, total_timesteps), Boundary_case_actuals[i], color='blue', alpha=0.8, label='Actual')
        plt.plot(range(x_range, total_timesteps), Boundary_case_predicted[i], color='red', alpha=0.8, label='Predicted')

        plt.xlabel("Time Steps")
        plt.ylabel("CO₂ Sequestration")
        plt.title(f"{case_names[i]} Case – {model_name}")
        plt.legend()
        plt.tight_layout(pad=2.5)

        filename = f"drive/My Drive/DSSM-Figures1/{model_name}_{case_names[i]}.pdf"
        plt.savefig(filename, format='pdf', bbox_inches='tight')
        plt.close()

**DSSM basic**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np

# Define the Deep State Space Model
class DeepStateSpaceModel(nn.Module):
    def __init__(self, input_dim, static_dim, hidden_dim, output_dim):
        super(DeepStateSpaceModel, self).__init__()

        # State Transition Model (LSTM for latent state evolution)
        self.state_transition = nn.LSTM(input_dim + static_dim, hidden_dim, batch_first=True)

        # Observation Model (Fully connected layers for mapping latent state to output)
        self.fc_obs1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc_obs2 = nn.Linear(hidden_dim, output_dim)

        # ReLU activation for nonlinearity
        self.relu = nn.ReLU()

    def forward(self, time_series_input, static_input):
        # Validate and adjust time_series_input shape if needed
        if len(time_series_input.shape) == 2:  # [batch_size, seq_len]
            time_series_input = time_series_input.unsqueeze(-1)  # Add feature dimension: [batch_size, seq_len, 1]

        # Expand static features to match sequence length
        static_expanded = static_input.unsqueeze(1).expand(-1, time_series_input.size(1), -1)  # [batch_size, seq_len, static_dim]

        # Concatenate static and time-series inputs
        lstm_input = torch.cat([time_series_input, static_expanded], dim=2)  # [batch_size, seq_len, input_dim + static_dim]

        # State Transition Dynamics
        state_output, _ = self.state_transition(lstm_input)  # [batch_size, seq_len, hidden_dim]

        # Observation Model (map latent states to predictions)
        obs_out = self.relu(self.fc_obs1(state_output))  # [batch_size, seq_len, hidden_dim]
        predictions = self.fc_obs2(obs_out)  # [batch_size, seq_len, output_dim]

        # Return only the prediction for the last time step
        return predictions[:, -1, :]  # [batch_size, output_dim]

**TFT**

In [None]:
import torch
import torch.nn as nn

class GatedResidualNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=None, dropout=0.1):
        super().__init__()
        if output_dim is None:
            output_dim = input_dim
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.elu = nn.ELU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.gate = nn.Linear(output_dim, output_dim)
        self.sigmoid = nn.Sigmoid()
        self.skip = nn.Linear(input_dim, output_dim) if input_dim != output_dim else None
        self.norm = nn.LayerNorm(output_dim)

    def forward(self, x):
        residual = x if self.skip is None else self.skip(x)
        x = self.fc1(x)
        x = self.elu(x)
        x = self.fc2(x)
        x = self.dropout(x)
        gate = self.sigmoid(self.gate(x))
        x = gate * x
        x = x + residual
        x = self.norm(x)
        return x


class TemporalFusionTransformer(nn.Module):
    def __init__(self, input_size, output_size, static_size, hidden_size=64, num_heads=4, dropout=0.1, num_lstm_layers=1):
        super().__init__()
        self.input_proj = nn.Linear(1, hidden_size)
        self.static_encoder = nn.Sequential(
            nn.Linear(static_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size)
        )

        self.lstm = nn.LSTM(input_size=hidden_size + hidden_size,
                            hidden_size=hidden_size,
                            num_layers=num_lstm_layers,
                            batch_first=True)

        self.post_lstm_grn = GatedResidualNetwork(hidden_size, hidden_size, hidden_size, dropout)
        self.attention = nn.MultiheadAttention(embed_dim=hidden_size, num_heads=num_heads, dropout=dropout, batch_first=True)
        self.attn_grn = GatedResidualNetwork(hidden_size, hidden_size, hidden_size, dropout)
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x_seq, x_static):
        # x_seq: [B, T_in, 1]
        # x_static: [B, static_size]
        batch_size, seq_len, _ = x_seq.size()

        x_seq_emb = self.input_proj(x_seq)  # [B, T_in, H]
        x_static_emb = self.static_encoder(x_static)  # [B, H]
        x_static_expanded = x_static_emb.unsqueeze(1).expand(-1, seq_len, -1)  # [B, T_in, H]

        x_combined = torch.cat([x_seq_emb, x_static_expanded], dim=-1)  # [B, T_in, 2H]
        lstm_out, _ = self.lstm(x_combined)  # [B, T_in, H]
        lstm_out = self.post_lstm_grn(lstm_out)  # GRN layer

        # Self-attention
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out)
        attn_out = self.attn_grn(attn_out)  # GRN layer again

        final_out = self.output_layer(attn_out[:, -1, :])  # final timestep only
        return final_out


**NLinear**

In [None]:
class NLinear(nn.Module):
    def __init__(self, seq_len, pred_len, individual=False):
        super(NLinear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.individual = individual

        if self.individual:
            self.Linear = nn.ModuleList()
            for i in range(1):  # Univariate case
                self.Linear.append(nn.Linear(self.seq_len, self.pred_len))
        else:
            self.Linear = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        if len(x.shape) == 3:
            x = x.squeeze(-1)

        seq_last = x[:, -1:].detach()
        x = x - seq_last

        if self.individual:
            out = torch.zeros([x.size(0), self.pred_len], dtype=x.dtype).to(x.device)
            for i in range(1):
                out[:, :] = self.Linear[i](x)
        else:
            out = self.Linear(x)

        out = out + seq_last
        return out.unsqueeze(-1)

**TCN**

In [None]:
import torch
import torch.nn as nn

class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super().__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()

class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, dilation, padding, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                               stride=stride, padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                               stride=stride, padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TCN_MIMO(nn.Module):
    def __init__(self, input_len, output_len, static_dim, num_channels, kernel_size=3, dropout=0.2):
        super().__init__()
        self.static_fc = nn.Sequential(
            nn.Linear(static_dim, num_channels[0]),
            nn.ReLU(),
            nn.Linear(num_channels[0], num_channels[0])
        )

        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_ch = 1 if i == 0 else num_channels[i-1]
            out_ch = num_channels[i]
            layers += [TemporalBlock(in_ch, out_ch, kernel_size, stride=1,
                                     dilation=dilation_size, padding=(kernel_size-1)*dilation_size, dropout=dropout)]
        self.tcn = nn.Sequential(*layers)

        self.output_layer = nn.Sequential(
            nn.Linear(num_channels[-1] + num_channels[0], 128),
            nn.ReLU(),
            nn.Linear(128, output_len)
        )

    def forward(self, x_seq, x_static):
        # x_seq: [B, T, 1], x_static: [B, static_dim]
        x_seq = x_seq.transpose(1, 2)  # [B, 1, T]
        tcn_out = self.tcn(x_seq)     # [B, C, T]
        tcn_out = tcn_out[:, :, -1]   # [B, C] final time step only

        static_out = self.static_fc(x_static)  # [B, C]
        combined = torch.cat([tcn_out, static_out], dim=1)  # [B, 2C]
        return self.output_layer(combined)  # [B, output_len]


**LSTM**

In [None]:
import torch
import torch.nn as nn

class LSTM_MIMO(nn.Module):
    def __init__(self, input_len, output_len, static_dim, hidden_dim=128, num_layers=2):
        super(LSTM_MIMO, self).__init__()
        self.input_len = input_len
        self.output_len = output_len

        self.static_fc = nn.Sequential(
            nn.Linear(static_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim)
        )

        self.lstm = nn.LSTM(input_size=1 + hidden_dim, hidden_size=hidden_dim,
                            num_layers=num_layers, batch_first=True)

        self.output_fc = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_len)
        )

    def forward(self, x_seq, x_static):
        # x_seq: [B, T, 1]
        # x_static: [B, static_dim]
        batch_size, seq_len, _ = x_seq.size()
        static_encoded = self.static_fc(x_static)  # [B, H]
        static_expanded = static_encoded.unsqueeze(1).expand(-1, seq_len, -1)  # [B, T, H]
        lstm_input = torch.cat([x_seq, static_expanded], dim=-1)  # [B, T, 1+H]
        lstm_out, _ = self.lstm(lstm_input)  # [B, T, H]
        last_hidden = lstm_out[:, -1, :]  # [B, H]
        out = self.output_fc(last_hidden)  # [B, output_len]
        return out


# Experiments - Overnight duration experimentation

**Advanced-DSSM-MSE-experiment (best model, saved model, hyperparameter listing, training status report)**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import os

# Define fixed hyperparameters (excluding input/output dimensions)
dssm_advanced_config = {
    'hidden_dim': 101,
    'epochs': 500,
    'batch_size': 64,
    'learning_rate': 0.001,
    'model_name': 'DSSM_Advanced'
}

# DataFrame to store results
DSSM_Advanced_mse = pd.DataFrame(columns=['Split', 'Test_MSE'])

# Output path
drive_path = '/content/drive/MyDrive/DSSM-Figures'
os.makedirs(drive_path, exist_ok=True)

# Define splits
splits = [ (80, 20),(60, 40), (50, 50), (40, 60), (20, 80), (10, 90), (5, 95), (3, 97), (1, 99)]

for train_pct, test_pct in splits:
    split_name = f"{train_pct}_{test_pct}"
    print(f"\n==== Running Split: {split_name} ====")

    file_ids = df_output['file_id'].unique()
    trainval_ids, test_ids = train_test_split(file_ids, test_size=0.2, random_state=42)
    train_ids, val_ids = train_test_split(trainval_ids, test_size=0.2, random_state=42)

    def extract_X_Y(ids, pct):
        df_subset = df_output[df_output['file_id'].isin(ids)]
        pivoted = df_subset.pivot(index='file_id', columns='timestep', values='CO2').values
        split_idx = int(pct / 100 * 101)
        X = pivoted[:, :split_idx]
        Y = pivoted[:, split_idx:]
        return X, Y

    X_train, Y_train = extract_X_Y(train_ids, train_pct)
    X_val, Y_val = extract_X_Y(val_ids, train_pct)
    X_test, Y_test = extract_X_Y(test_ids, train_pct)

    static_train = merged_df[merged_df['file_id'].isin(train_ids)].drop(columns=['file_id', 'cluster']).values
    static_val = merged_df[merged_df['file_id'].isin(val_ids)].drop(columns=['file_id', 'cluster']).values
    static_test = merged_df[merged_df['file_id'].isin(test_ids)].drop(columns=['file_id', 'cluster']).values

    print(f"Split {split_name} — X (INPUT): {X_train.shape[1]}, Y (OUTPUT): {Y_train.shape[1]}")
    print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

    # Convert to tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
    static_train_tensor = torch.tensor(static_train, dtype=torch.float32)

    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)
    static_val_tensor = torch.tensor(static_val, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
    static_test_tensor = torch.tensor(static_test, dtype=torch.float32)

    # DataLoaders
    train_loader = DataLoader(TensorDataset(X_train_tensor, static_train_tensor, Y_train_tensor),
                              batch_size=dssm_advanced_config['batch_size'], shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, static_val_tensor, Y_val_tensor),
                            batch_size=dssm_advanced_config['batch_size'])

    # Model setup
    model = AdvancedDSSMDeepState(
        input_dim=X_train.shape[1],
        static_dim=static_train.shape[1],
        hidden_dim=dssm_advanced_config['hidden_dim'],
        output_dim=Y_train.shape[1]
    )

    optimizer = optim.Adam(model.parameters(), lr=dssm_advanced_config['learning_rate'])
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    best_model_state = None

    print("Training started...")
    for epoch in range(dssm_advanced_config['epochs']):
        model.train()
        for X_batch, static_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch, static_batch)
            loss = criterion(preds, Y_batch)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, static_batch, Y_batch in val_loader:
                preds = model(X_batch, static_batch)
                val_loss += criterion(preds, Y_batch).item()
            val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1}: Train Loss = {loss.item():.6f}, Val Loss = {val_loss:.6f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            #print(f"New best model found at epoch {epoch + 1} with val loss {val_loss:.6f}")
            torch.save(best_model_state, os.path.join(drive_path, f"best_model_{split_name}.pt"))

    # Load best model
    model.load_state_dict(torch.load(os.path.join(drive_path, f"best_model_{split_name}.pt")))
    model.eval()

    # Final test loss
    with torch.no_grad():
        test_preds = model(X_test_tensor, static_test_tensor)
        test_loss = criterion(test_preds, Y_test_tensor).item()

    DSSM_Advanced_mse.loc[len(DSSM_Advanced_mse)] = [split_name, test_loss]
    print(f"Split {split_name} — Final Test MSE: {test_loss:.6f}")



==== Running Split: 80_20 ====
Split 80_20 — X (INPUT): 80, Y (OUTPUT): 21
Train: (1729, 80), Val: (433, 80), Test: (541, 80)
Training started...
Epoch 10: Train Loss = 0.000259, Val Loss = 0.003496
Epoch 20: Train Loss = 0.002615, Val Loss = 0.007881
Epoch 30: Train Loss = 0.000061, Val Loss = 0.000755
Epoch 40: Train Loss = 0.000161, Val Loss = 0.000293
Epoch 50: Train Loss = 0.000015, Val Loss = 0.000746
Epoch 60: Train Loss = 0.000051, Val Loss = 0.000164
Epoch 70: Train Loss = 0.000059, Val Loss = 0.000160
Epoch 80: Train Loss = 0.000551, Val Loss = 0.001038
Epoch 90: Train Loss = 0.000009, Val Loss = 0.000293
Epoch 100: Train Loss = 0.000015, Val Loss = 0.000111
Epoch 110: Train Loss = 0.000047, Val Loss = 0.000075
Epoch 120: Train Loss = 0.000028, Val Loss = 0.000098
Epoch 130: Train Loss = 0.000366, Val Loss = 0.000365
Epoch 140: Train Loss = 0.000185, Val Loss = 0.000829
Epoch 150: Train Loss = 0.000041, Val Loss = 0.000240
Epoch 160: Train Loss = 0.000002, Val Loss = 0.00006

**New TFT experiment**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

# === TFT Hyperparameter Configuration ===
tft_config = {
    'hidden_size': 64,
    'num_heads': 4,
    'dropout': 0.1,
    'epochs': 500,
    'batch_size': 64,
    'learning_rate': 0.001,
    'model_name': 'TFT'
}

# Ensure drive path exists
drive_path = '/content/drive/MyDrive/DSSM-Figures'
os.makedirs(drive_path, exist_ok=True)

# Initialize results container
TFT_mse = pd.DataFrame(columns=['Split', 'Test_MSE'])

# All input-output splits
splits = [(80, 20), (60, 40), (50, 50), (40, 60), (20, 80), (10, 90), (5, 95), (3, 97), (1, 99)]

np.random.seed(42)
torch.manual_seed(42)

for train_pct, test_pct in splits:
    split_name = f"{train_pct}_{test_pct}"
    print(f"\n==== Running Split: {split_name} ====")

    # File ID splits
    file_ids = df_output['file_id'].unique()
    trainval_ids, test_ids = train_test_split(file_ids, test_size=0.2, random_state=42)
    train_ids, val_ids = train_test_split(trainval_ids, test_size=0.2, random_state=42)

    def extract_X_Y(ids, pct):
        df_subset = df_output[df_output['file_id'].isin(ids)]
        pivoted = df_subset.pivot(index='file_id', columns='timestep', values='CO2').values
        split_idx = int(pct / 100 * 101)
        X = pivoted[:, :split_idx]
        Y = pivoted[:, split_idx:]
        return X, Y

    X_train, Y_train = extract_X_Y(train_ids, train_pct)
    X_val, Y_val = extract_X_Y(val_ids, train_pct)
    X_test, Y_test = extract_X_Y(test_ids, train_pct)

    static_train = merged_df[merged_df['file_id'].isin(train_ids)].drop(columns=['file_id', 'cluster']).values
    static_val = merged_df[merged_df['file_id'].isin(val_ids)].drop(columns=['file_id', 'cluster']).values
    static_test = merged_df[merged_df['file_id'].isin(test_ids)].drop(columns=['file_id', 'cluster']).values

    print(f"Split {split_name} — X (INPUT): {X_train.shape[1]}, Y (OUTPUT): {Y_train.shape[1]}")
    print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

    X_train_tensor = torch.tensor(X_train[:, :, None], dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
    static_train_tensor = torch.tensor(static_train, dtype=torch.float32)

    X_val_tensor = torch.tensor(X_val[:, :, None], dtype=torch.float32)
    Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)
    static_val_tensor = torch.tensor(static_val, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test[:, :, None], dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
    static_test_tensor = torch.tensor(static_test, dtype=torch.float32)

    train_loader = DataLoader(TensorDataset(X_train_tensor, static_train_tensor, Y_train_tensor), batch_size=tft_config['batch_size'], shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, static_val_tensor, Y_val_tensor), batch_size=tft_config['batch_size'])

    model = TemporalFusionTransformer(
        input_size=1,
        output_size=Y_train.shape[1],
        static_size=static_train.shape[1],
        hidden_size=tft_config['hidden_size'],
        num_heads=tft_config['num_heads'],
        dropout=tft_config['dropout']
    )

    optimizer = optim.Adam(model.parameters(), lr=tft_config['learning_rate'])
    criterion = nn.MSELoss()
    best_val_loss = float('inf')
    best_model_state = None

    print("Training started...")
    for epoch in range(tft_config['epochs']):
        model.train()
        for X_batch, static_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch, static_batch)
            loss = criterion(preds, Y_batch)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, static_batch, Y_batch in val_loader:
                preds = model(X_batch, static_batch)
                val_loss += criterion(preds, Y_batch).item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}: Train Loss = {loss.item():.6f}, Val Loss = {val_loss:.6f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            #print(f"New best model found at epoch {epoch+1} with val loss {val_loss:.6f}")
            torch.save(best_model_state, os.path.join(drive_path, f"TFT_best_model_{split_name}.pt"))

    model.load_state_dict(torch.load(os.path.join(drive_path, f"TFT_best_model_{split_name}.pt")))
    model.eval()

    with torch.no_grad():
        preds = model(X_test_tensor, static_test_tensor)
        test_loss = criterion(preds, Y_test_tensor).item()

    TFT_mse.loc[len(TFT_mse)] = [split_name, test_loss]
    print(f"Split {split_name} — Final Test MSE: {test_loss:.6f}")


==== Running Split: 80_20 ====
Split 80_20 — X (INPUT): 80, Y (OUTPUT): 21
Train: (1729, 80), Val: (433, 80), Test: (541, 80)
Training started...
Epoch 10: Train Loss = 0.039389, Val Loss = 0.092938
Epoch 20: Train Loss = 0.041222, Val Loss = 0.051265
Epoch 30: Train Loss = 0.000654, Val Loss = 0.013818
Epoch 40: Train Loss = 0.000836, Val Loss = 0.003894
Epoch 50: Train Loss = 0.000830, Val Loss = 0.007091
Epoch 60: Train Loss = 0.007971, Val Loss = 0.024487
Epoch 70: Train Loss = 0.001138, Val Loss = 0.003994
Epoch 80: Train Loss = 0.016205, Val Loss = 0.056298
Epoch 90: Train Loss = 0.001833, Val Loss = 0.004595
Epoch 100: Train Loss = 0.000128, Val Loss = 0.001589
Epoch 110: Train Loss = 0.000026, Val Loss = 0.003082
Epoch 120: Train Loss = 0.000091, Val Loss = 0.001999
Epoch 130: Train Loss = 0.005087, Val Loss = 0.004575
Epoch 140: Train Loss = 0.002832, Val Loss = 0.002473
Epoch 150: Train Loss = 0.000482, Val Loss = 0.002220
Epoch 160: Train Loss = 0.005423, Val Loss = 0.01656

**New Nlinear experiment**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

# Model Definition
class NLinear(nn.Module):
    def __init__(self, seq_len, pred_len, individual=False):
        super(NLinear, self).__init__()
        self.seq_len = seq_len
        self.pred_len = pred_len
        self.individual = individual
        if self.individual:
            self.Linear = nn.ModuleList([nn.Linear(self.seq_len, self.pred_len)])
        else:
            self.Linear = nn.Linear(self.seq_len, self.pred_len)

    def forward(self, x):
        if len(x.shape) == 3:
            x = x.squeeze(-1)
        seq_last = x[:, -1:].detach()
        x = x - seq_last
        if self.individual:
            out = torch.zeros([x.size(0), self.pred_len], dtype=x.dtype).to(x.device)
            for i in range(1):
                out[:, :] = self.Linear[i](x)
        else:
            out = self.Linear(x)
        out = out + seq_last
        return out.unsqueeze(-1)

# Hyperparameters and Output Storage
nlinear_config = {
    'epochs': 500,
    'batch_size': 64,
    'learning_rate': 0.001,
    'individual': False,
    'model_name': 'NLinear'
}

NLinear_mse = pd.DataFrame(columns=['Split', 'Test_MSE'])
splits = [(80, 20), (60, 40), (50, 50), (40, 60), (20, 80), (10, 90), (5, 95), (3, 97), (1, 99)]
drive_path = '/content/drive/MyDrive/DSSM-Figures'
os.makedirs(drive_path, exist_ok=True)

for train_pct, test_pct in splits:
    split_name = f"{train_pct}_{test_pct}"
    print(f"\n==== Running Split: {split_name} ====")

    # Split file_ids
    file_ids = df_output['file_id'].unique()
    trainval_ids, test_ids = train_test_split(file_ids, test_size=0.2, random_state=42)
    train_ids, val_ids = train_test_split(trainval_ids, test_size=0.2, random_state=42)

    def extract_X_Y(ids, pct):
        df_subset = df_output[df_output['file_id'].isin(ids)]
        pivoted = df_subset.pivot(index='file_id', columns='timestep', values='CO2').values
        split_idx = int(pct / 100 * 101)
        X = pivoted[:, :split_idx]
        Y = pivoted[:, split_idx:]
        return X, Y

    X_train, Y_train = extract_X_Y(train_ids, train_pct)
    X_val, Y_val = extract_X_Y(val_ids, train_pct)
    X_test, Y_test = extract_X_Y(test_ids, train_pct)

    X_train_tensor = torch.tensor(X_train[:, :, None], dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_train[:, :, None], dtype=torch.float32)
    X_val_tensor = torch.tensor(X_val[:, :, None], dtype=torch.float32)
    Y_val_tensor = torch.tensor(Y_val[:, :, None], dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test[:, :, None], dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test[:, :, None], dtype=torch.float32)

    print(f"Split {split_name} — X (INPUT): {X_train.shape[1]}, Y (OUTPUT): {Y_train.shape[1]}")
    print(f"Train: {X_train_tensor.shape}, Val: {X_val_tensor.shape}, Test: {X_test_tensor.shape}")

    train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, Y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=nlinear_config['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=nlinear_config['batch_size'])
    test_loader = DataLoader(test_dataset, batch_size=nlinear_config['batch_size'])

    model = NLinear(seq_len=X_train.shape[1], pred_len=Y_train.shape[1], individual=nlinear_config['individual'])
    optimizer = optim.Adam(model.parameters(), lr=nlinear_config['learning_rate'])
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    best_model_state = None

    print("Training started...")
    for epoch in range(nlinear_config['epochs']):
        model.train()
        for X_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, Y_batch)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, Y_batch in val_loader:
                preds = model(X_batch)
                val_loss += criterion(preds, Y_batch).item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}: Train Loss = {loss.item():.6f}, Val Loss = {val_loss:.6f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            #print(f"New best model found at epoch {epoch+1} with val loss {val_loss:.6f}")
            torch.save(best_model_state, os.path.join(drive_path, f"best_model_{split_name}.pt"))

    model.load_state_dict(torch.load(os.path.join(drive_path, f"best_model_{split_name}.pt")))
    model.eval()

    total_mse = 0.0
    total_samples = 0
    with torch.no_grad():
        for X_batch, Y_batch in test_loader:
            outputs = model(X_batch)
            batch_mse = criterion(outputs, Y_batch).item()
            total_mse += batch_mse * X_batch.size(0)
            total_samples += X_batch.size(0)

    avg_mse = total_mse / total_samples
    print(f"Final Test MSE ({split_name}): {avg_mse:.6f}")
    NLinear_mse.loc[len(NLinear_mse)] = [split_name, avg_mse]


==== Running Split: 80_20 ====
Split 80_20 — X (INPUT): 80, Y (OUTPUT): 21
Train: torch.Size([1729, 80, 1]), Val: torch.Size([433, 80, 1]), Test: torch.Size([541, 80, 1])
Training started...
Epoch 10: Train Loss = 0.000252, Val Loss = 0.000861
Epoch 20: Train Loss = 0.000139, Val Loss = 0.000282
Epoch 30: Train Loss = 0.000080, Val Loss = 0.000136
Epoch 40: Train Loss = 0.000057, Val Loss = 0.000084
Epoch 50: Train Loss = 0.000020, Val Loss = 0.000071
Epoch 60: Train Loss = 0.000014, Val Loss = 0.000048
Epoch 70: Train Loss = 0.000070, Val Loss = 0.000042
Epoch 80: Train Loss = 0.000024, Val Loss = 0.000035
Epoch 90: Train Loss = 0.000076, Val Loss = 0.000053
Epoch 100: Train Loss = 0.000030, Val Loss = 0.000031
Epoch 110: Train Loss = 0.000014, Val Loss = 0.000035
Epoch 120: Train Loss = 0.000027, Val Loss = 0.000030
Epoch 130: Train Loss = 0.000064, Val Loss = 0.000019
Epoch 140: Train Loss = 0.000017, Val Loss = 0.000017
Epoch 150: Train Loss = 0.000003, Val Loss = 0.000017
Epoch 1

**New BASIC-DSSM EXPERIMENT**

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import os

# Set seeds
np.random.seed(42)
torch.manual_seed(42)

# Output path
drive_path = '/content/drive/MyDrive/DSSM-Figures'
os.makedirs(drive_path, exist_ok=True)

# Config
config = {
    'hidden_dim': 101,
    'epochs': 500,
    'batch_size': 64,
    'learning_rate': 0.001,
    'model_name': 'DSSM_Basic'
}

# Store results
DSSM_BASIC_MSE = pd.DataFrame(columns=['Split', 'Test_MSE'])

# Loop through splits
splits = [(80, 20), (60, 40), (50, 50), (40, 60), (20, 80), (10, 90), (5, 95), (3, 97), (1, 99)]
for train_pct, test_pct in splits:
    split_name = f"{train_pct}_{test_pct}"
    print(f"\n==== Running Split: {split_name} ====")

    file_ids = df_output['file_id'].unique()
    train_ids, test_ids = train_test_split(file_ids, test_size=0.2, random_state=42)

    df_train = df_output[df_output['file_id'].isin(train_ids)]
    df_test = df_output[df_output['file_id'].isin(test_ids)]

    train_timestep = int(train_pct / 100 * 101)
    print(f"X (INPUT): {train_timestep}, Y (OUTPUT): {101 - train_timestep}")

    X_train = df_train.pivot(index='file_id', columns='timestep', values='CO2').values[:, :train_timestep]
    Y_train = df_train.pivot(index='file_id', columns='timestep', values='CO2').values[:, train_timestep:]
    X_test = df_test.pivot(index='file_id', columns='timestep', values='CO2').values[:, :train_timestep]
    Y_test = df_test.pivot(index='file_id', columns='timestep', values='CO2').values[:, train_timestep:]

    static_train = merged_df[merged_df['file_id'].isin(train_ids)].drop(columns=['file_id', 'cluster']).values
    static_test = merged_df[merged_df['file_id'].isin(test_ids)].drop(columns=['file_id', 'cluster']).values

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
    static_train_tensor = torch.tensor(static_train, dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
    static_test_tensor = torch.tensor(static_test, dtype=torch.float32)

    print(f"Train X shape: {X_train_tensor.shape}, Y: {Y_train_tensor.shape}, Static: {static_train_tensor.shape}")
    print(f"Test  X shape: {X_test_tensor.shape}, Y: {Y_test_tensor.shape}, Static: {static_test_tensor.shape}")

    train_dataset = TensorDataset(X_train_tensor, static_train_tensor, Y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, static_test_tensor, Y_test_tensor)
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'])

    model = DeepStateSpaceModel(
        input_dim=1,
        static_dim=static_train.shape[1],
        hidden_dim=config['hidden_dim'],
        output_dim=Y_train.shape[1]
    )

    optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'])
    criterion = nn.MSELoss()

    best_loss = float('inf')
    best_model_path = os.path.join(drive_path, f"{config['model_name']}_best_model_{split_name}.pt")

    print("Training started...")
    for epoch in range(config['epochs']):
        model.train()
        epoch_loss = 0.0
        for X_batch, static_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch, static_batch)
            loss = criterion(preds, Y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        epoch_loss /= len(train_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}: Train Loss = {epoch_loss:.6f}")

        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), best_model_path)
            print(f"New best model saved at epoch {epoch+1} with loss {epoch_loss:.6f}")

    model.load_state_dict(torch.load(best_model_path))
    model.eval()

    with torch.no_grad():
        total_mse = 0.0
        total_samples = 0
        for X_batch, static_batch, Y_batch in test_loader:
            preds = model(X_batch, static_batch)
            batch_mse = criterion(preds, Y_batch).item()
            total_mse += batch_mse * X_batch.size(0)
            total_samples += X_batch.size(0)

    avg_mse = total_mse / total_samples
    print(f"Final Test MSE for DSSM-Basic ({split_name}): {avg_mse:.6f}")
    DSSM_BASIC_MSE.loc[len(DSSM_BASIC_MSE)] = [split_name, avg_mse]


==== Running Split: 80_20 ====
X (INPUT): 80, Y (OUTPUT): 21
Train X shape: torch.Size([2162, 80]), Y: torch.Size([2162, 21]), Static: torch.Size([2162, 14])
Test  X shape: torch.Size([541, 80]), Y: torch.Size([541, 21]), Static: torch.Size([541, 14])
Training started...
New best model saved at epoch 1 with loss 2.632255
New best model saved at epoch 2 with loss 0.206199
New best model saved at epoch 3 with loss 0.158680
New best model saved at epoch 4 with loss 0.153687
New best model saved at epoch 5 with loss 0.149631
New best model saved at epoch 6 with loss 0.147199
New best model saved at epoch 7 with loss 0.142990
New best model saved at epoch 8 with loss 0.138572
New best model saved at epoch 9 with loss 0.132981
Epoch 10: Train Loss = 0.128052
New best model saved at epoch 10 with loss 0.128052
New best model saved at epoch 11 with loss 0.124268
New best model saved at epoch 12 with loss 0.120602
New best model saved at epoch 13 with loss 0.114972
New best model saved at epoc

**New LSTM experiment-**

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.model_selection import train_test_split
import pandas as pd
import os

# Setup
splits = [(80, 20), (60, 40), (50, 50), (40, 60), (20, 80), (10, 90), (5, 95), (3, 97), (1, 99)]
LSTM_mse = []

# For saving models
os.makedirs("drive/My Drive/DSSM-Models", exist_ok=True)

np.random.seed(42)
torch.manual_seed(42)

for train_pct, test_pct in splits:
    split_name = f"{train_pct}_{test_pct}"
    train_ids, test_ids = train_test_split(df_output['file_id'].unique(), test_size=0.2, random_state=42)
    df_train = df_output[df_output['file_id'].isin(train_ids)]
    df_test = df_output[df_output['file_id'].isin(test_ids)]

    train_timestep = int(train_pct / 100 * 101)
    pred_timestep = 101 - train_timestep
    print(f"\n====== Split: {split_name} | X (input): {train_timestep} AND Y (output): {pred_timestep} ======")

    # Pivot time series
    X_train = df_train.pivot(index='file_id', columns='timestep', values='CO2').values[:, :train_timestep]
    Y_train = df_train.pivot(index='file_id', columns='timestep', values='CO2').values[:, train_timestep:]
    X_test = df_test.pivot(index='file_id', columns='timestep', values='CO2').values[:, :train_timestep]
    Y_test = df_test.pivot(index='file_id', columns='timestep', values='CO2').values[:, train_timestep:]

    static_train = merged_df[merged_df['file_id'].isin(train_ids)].drop(columns=['file_id', 'cluster']).values
    static_test = merged_df[merged_df['file_id'].isin(test_ids)].drop(columns=['file_id', 'cluster']).values

    # Tensors
    X_train_tensor = torch.tensor(X_train[:, :, None], dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
    static_train_tensor = torch.tensor(static_train, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test[:, :, None], dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
    static_test_tensor = torch.tensor(static_test, dtype=torch.float32)

    print(f"Train X shape: {X_train_tensor.shape}, Train Y shape: {Y_train_tensor.shape}, Static: {static_train_tensor.shape}")
    print(f"Test  X shape: {X_test_tensor.shape}, Test  Y shape: {Y_test_tensor.shape}, Static: {static_test_tensor.shape}")

    # Dataset and loaders
    full_train_dataset = TensorDataset(X_train_tensor, static_train_tensor, Y_train_tensor)
    val_size = int(0.1 * len(full_train_dataset))
    train_size = len(full_train_dataset) - val_size
    train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))
    test_dataset = TensorDataset(X_test_tensor, static_test_tensor, Y_test_tensor)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=64)
    test_loader = DataLoader(test_dataset, batch_size=64)

    # ------------------ Model and Config ------------------
    lstm_config = {
        "static_dim": static_train.shape[1],
        "hidden_dim": 128,
        "num_layers": 2,
        "dropout": 0.0,
        "lr": 0.001,
        "batch_size": 64,
        "epochs": 500
    }

    model = LSTM_MIMO(
        input_len=train_timestep,
        output_len=pred_timestep,
        static_dim=lstm_config["static_dim"],
        hidden_dim=lstm_config["hidden_dim"],
        num_layers=lstm_config["num_layers"]
    )

    optimizer = optim.Adam(model.parameters(), lr=lstm_config["lr"])
    criterion = nn.MSELoss()

    # ------------------ Training ------------------
    best_val_loss = float('inf')
    best_model_path = f"drive/My Drive/DSSM-Models/LSTM_best_{split_name}.pt"
    print("Training started...")

    for epoch in range(1, lstm_config["epochs"] + 1):
        model.train()
        for X_batch, static_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch, static_batch)
            loss = criterion(preds, Y_batch)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        val_samples = 0
        with torch.no_grad():
            for X_batch, static_batch, Y_batch in val_loader:
                preds = model(X_batch, static_batch)
                batch_loss = criterion(preds, Y_batch).item()
                val_loss += batch_loss * X_batch.size(0)
                val_samples += X_batch.size(0)
        avg_val_loss = val_loss / val_samples

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), best_model_path)

        if epoch % 10 == 0:
            print(f"[Epoch {epoch}] Training Loss: {loss.item():.6f} | Val Loss: {avg_val_loss:.6f}")

    # ------------------ Evaluation ------------------
    model.load_state_dict(torch.load(best_model_path))
    model.eval()
    total_mse = 0.0
    total_samples = 0
    with torch.no_grad():
        for X_batch, static_batch, Y_batch in test_loader:
            outputs = model(X_batch, static_batch)
            batch_mse = criterion(outputs, Y_batch).item()
            total_mse += batch_mse * X_batch.size(0)
            total_samples += X_batch.size(0)

    avg_mse = total_mse / total_samples
    print(f"Final Test MSE for LSTM ({split_name}): {avg_mse:.6f}")
    LSTM_mse.append({'Split': split_name, 'Test_MSE': avg_mse})

# Save result as DataFrame
LSTM_mse = pd.DataFrame(LSTM_mse)


Train X shape: torch.Size([2162, 80, 1]), Train Y shape: torch.Size([2162, 21]), Static: torch.Size([2162, 14])
Test  X shape: torch.Size([541, 80, 1]), Test  Y shape: torch.Size([541, 21]), Static: torch.Size([541, 14])
Training started...
[Epoch 10] Training Loss: 0.130054 | Val Loss: 0.182844
[Epoch 20] Training Loss: 0.168539 | Val Loss: 0.188108
[Epoch 30] Training Loss: 0.147180 | Val Loss: 0.194307
[Epoch 40] Training Loss: 0.136666 | Val Loss: 0.180678
[Epoch 50] Training Loss: 0.131765 | Val Loss: 0.180540
[Epoch 60] Training Loss: 0.113578 | Val Loss: 0.194123
[Epoch 70] Training Loss: 0.176367 | Val Loss: 0.181501
[Epoch 80] Training Loss: 0.118859 | Val Loss: 0.185078
[Epoch 90] Training Loss: 0.191583 | Val Loss: 0.181211
[Epoch 100] Training Loss: 0.150244 | Val Loss: 0.215887
[Epoch 110] Training Loss: 0.180040 | Val Loss: 0.181872
[Epoch 120] Training Loss: 0.152387 | Val Loss: 0.206184
[Epoch 130] Training Loss: 0.168289 | Val Loss: 0.183208
[Epoch 140] Training Loss:

**New TCN model**

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import os

# ========== TCN-MIMO Hyperparameters ==========
tcn_mimo_config = {
    'num_channels': [64, 64, 64],
    'kernel_size': 3,
    'dropout': 0.2,
    'batch_size': 64,
    'learning_rate': 0.001,
    'epochs': 500,
    'model_name': 'TCN_MIMO'
}

# ========== MSE Storage ==========
TCN_MIMO_mse = pd.DataFrame(columns=['Split', 'Test_MSE'])

# ========== Model Save Path ==========
drive_path = '/content/drive/MyDrive/DSSM-Figures'
os.makedirs(drive_path, exist_ok=True)

# ========== Define Splits ==========
splits = [(80, 20), (60, 40), (50, 50), (40, 60), (20, 80), (10, 90), (5, 95), (3, 97), (1, 99)]

# ========== Seed ==========
np.random.seed(42)
torch.manual_seed(42)

# ========== Main Loop ==========
for train_pct, test_pct in splits:
    split_name = f"{train_pct}_{test_pct}"
    train_ids, test_ids = train_test_split(df_output['file_id'].unique(), test_size=0.2, random_state=42)
    df_train = df_output[df_output['file_id'].isin(train_ids)]
    df_val = df_output[df_output['file_id'].isin(test_ids[:len(test_ids)//2])]
    df_test = df_output[df_output['file_id'].isin(test_ids[len(test_ids)//2:])]

    train_timestep = int(train_pct / 100 * 101)
    pred_timestep = 101 - train_timestep
    print(f"\n====== Split: {split_name} | X (input): {train_timestep} AND Y (output): {pred_timestep} ======")

    def get_xy_static(df, ids):
        df_ = df[df['file_id'].isin(ids)]
        X = df_.pivot(index='file_id', columns='timestep', values='CO2').values[:, :train_timestep]
        Y = df_.pivot(index='file_id', columns='timestep', values='CO2').values[:, train_timestep:]
        static = merged_df[merged_df['file_id'].isin(ids)].drop(columns=['file_id', 'cluster']).values
        return X[:, :, None], Y, static

    X_train, Y_train, static_train = get_xy_static(df_train, train_ids)
    X_val, Y_val, static_val = get_xy_static(df_val, test_ids[:len(test_ids)//2])
    X_test, Y_test, static_test = get_xy_static(df_test, test_ids[len(test_ids)//2:])

    # Tensor conversion
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
    static_train_tensor = torch.tensor(static_train, dtype=torch.float32)

    X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
    Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32)
    static_val_tensor = torch.tensor(static_val, dtype=torch.float32)

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
    Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)
    static_test_tensor = torch.tensor(static_test, dtype=torch.float32)

    # DataLoaders
    train_loader = DataLoader(TensorDataset(X_train_tensor, static_train_tensor, Y_train_tensor), batch_size=tcn_mimo_config['batch_size'], shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val_tensor, static_val_tensor, Y_val_tensor), batch_size=tcn_mimo_config['batch_size'])
    test_loader = DataLoader(TensorDataset(X_test_tensor, static_test_tensor, Y_test_tensor), batch_size=tcn_mimo_config['batch_size'])

    # Model
    model = TCN_MIMO(
        input_len=train_timestep,
        output_len=pred_timestep,
        static_dim=static_train.shape[1],
        num_channels=tcn_mimo_config['num_channels'],
        kernel_size=tcn_mimo_config['kernel_size'],
        dropout=tcn_mimo_config['dropout']
    )

    optimizer = optim.Adam(model.parameters(), lr=tcn_mimo_config['learning_rate'])
    criterion = nn.MSELoss()

    best_val_loss = float('inf')
    best_model_path = os.path.join(drive_path, f"best_model_TCN_MIMO_{split_name}.pt")

    print("Training started...")
    for epoch in range(tcn_mimo_config['epochs']):
        model.train()
        for X_batch, static_batch, Y_batch in train_loader:
            optimizer.zero_grad()
            preds = model(X_batch, static_batch)
            loss = criterion(preds, Y_batch)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for X_batch, static_batch, Y_batch in val_loader:
                preds = model(X_batch, static_batch)
                val_loss += criterion(preds, Y_batch).item()
        val_loss /= len(val_loader)

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}: Train Loss = {loss.item():.6f}, Val Loss = {val_loss:.6f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_path)
            #print(f"Best model updated at epoch {epoch+1} with val loss {val_loss:.6f}")

    # Load and evaluate
    model.load_state_dict(torch.load(best_model_path))
    model.eval()
    total_mse = 0.0
    total_samples = 0

    with torch.no_grad():
        for X_batch, static_batch, Y_batch in test_loader:
            outputs = model(X_batch, static_batch)
            batch_mse = criterion(outputs, Y_batch).item()
            total_mse += batch_mse * X_batch.size(0)
            total_samples += X_batch.size(0)

    avg_mse = total_mse / total_samples
    print(f"✅ TCN_MIMO Final Test MSE for split {split_name}: {avg_mse:.6f}")
    TCN_MIMO_mse.loc[len(TCN_MIMO_mse)] = [split_name, avg_mse]



Training started...
Epoch 10: Train Loss = 0.008023, Val Loss = 0.006117
Epoch 20: Train Loss = 0.003023, Val Loss = 0.002582
Epoch 30: Train Loss = 0.002007, Val Loss = 0.001840
Epoch 40: Train Loss = 0.003558, Val Loss = 0.004436
Epoch 50: Train Loss = 0.002107, Val Loss = 0.001406
Epoch 60: Train Loss = 0.001418, Val Loss = 0.001573
Epoch 70: Train Loss = 0.003597, Val Loss = 0.001043
Epoch 80: Train Loss = 0.000921, Val Loss = 0.000836
Epoch 90: Train Loss = 0.002337, Val Loss = 0.001455
Epoch 100: Train Loss = 0.000890, Val Loss = 0.000964
Epoch 110: Train Loss = 0.002451, Val Loss = 0.003616
Epoch 120: Train Loss = 0.000463, Val Loss = 0.001024
Epoch 130: Train Loss = 0.001080, Val Loss = 0.000867
Epoch 140: Train Loss = 0.001432, Val Loss = 0.000715
Epoch 150: Train Loss = 0.000584, Val Loss = 0.000574
Epoch 160: Train Loss = 0.000661, Val Loss = 0.000500
Epoch 170: Train Loss = 0.000351, Val Loss = 0.000496
Epoch 180: Train Loss = 0.000488, Val Loss = 0.000353
Epoch 190: Train

# Visualization

**BASELINE VISUALIZATION - MSE Bar and line**

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

# ---------------- SETUP ----------------
mpl.rcParams.update({
    'font.size': 20,
    'axes.titlesize': 24,
    'axes.labelsize': 22,
    'xtick.labelsize': 18,
    'ytick.labelsize': 18,
    'legend.fontsize': 20,
    'figure.dpi': 300,
    'savefig.dpi': 300,
    'figure.autolayout': True,
})

# ---------------- FIX COLUMN NAMES ----------------
DSSM_Advanced_mse.rename(columns={'Test_MSE': 'MSE'}, inplace=True)
DSSM_BASIC_MSE.rename(columns={'Test_MSE': 'MSE'}, inplace=True)
TFT_mse.rename(columns={'Test_MSE': 'MSE'}, inplace=True)
NLinear_mse.rename(columns={'Test_MSE': 'MSE'}, inplace=True)
TCN_MIMO_mse.rename(columns={'Test_MSE': 'MSE'}, inplace=True)

# ---------------- ASSIGN MODEL NAMES ----------------
DSSM_Advanced_mse['Model'] = 'DSSM-advanced'
DSSM_BASIC_MSE['Model'] = 'DSSM-basic'
TFT_mse['Model'] = 'TFT'
NLinear_mse['Model'] = 'NLinear'
TCN_MIMO_mse['Model'] = 'TCN'
LSTM_mse['Model'] = 'BiLSTM'

# ---------------- COMBINE MODEL MSE ----------------
mse_df = pd.concat([
    DSSM_Advanced_mse,
    DSSM_BASIC_MSE,
    TFT_mse,
    NLinear_mse,
    TCN_MIMO_mse,
    LSTM_mse
], ignore_index=True)

# ---------------- PIVOT ----------------
splits = ['80_20', '60_40', '50_50', '40_60', '20_80', '10_90', '5_95', '3_97']
mse_pivot = mse_df.pivot(index='Model', columns='Split', values='MSE').reset_index()
mse_pivot = mse_pivot[['Model'] + [s for s in splits if s in mse_pivot.columns]]

# ---------------- PLOTTING ----------------
output_folder = 'drive/My Drive/DSSM-Figures'
os.makedirs(output_folder, exist_ok=True)

# --- BAR PLOT ---
x = range(len(mse_pivot['Model']))
width = 0.09
plt.figure(figsize=(16, 10))
for i, split in enumerate(mse_pivot.columns[1:]):
    plt.bar(
        [pos + i * width for pos in x],
        mse_pivot[split],
        width=width,
        label=split.replace('_', ':')
    )

plt.xticks([pos + (len(mse_pivot.columns[1:]) // 2) * width for pos in x],
           mse_pivot['Model'], rotation=30, ha='right')
plt.xlabel("Model")
plt.ylabel("MSE Score")
plt.title("MSE Scores across Models and X-Y Splits")
plt.legend(title="X-Y Splits")
plt.tight_layout(pad=3.0)
plt.savefig(f'{output_folder}/Figure_X_MSE_Barplot.pdf', format='pdf', bbox_inches='tight')
plt.close()

# --- LINE PLOT ---
plt.figure(figsize=(16, 10))
for idx, row in mse_pivot.iterrows():
    model = row['Model']
    plt.plot(
        [s.replace('_', ':') for s in mse_pivot.columns[1:]],
        row[mse_pivot.columns[1:]].values,
        marker='o',
        label=model
    )

plt.xlabel("X-Y Split")
plt.ylabel("MSE Score")
plt.title("MSE Scores across X-Y Splits for Each Model")
plt.legend(title="Model", loc='upper left')
plt.tight_layout(pad=3.0)
plt.savefig(f'{output_folder}/Figure_Y_MSE_Lineplot.pdf', format='pdf', bbox_inches='tight')
plt.close()

**Log (MSE) - Bar and Line**

In [None]:
# --- BAR PLOT with log scale ---
x = range(len(mse_pivot['Model']))
width = 0.09
plt.figure(figsize=(16, 10))
for i, split in enumerate(mse_pivot.columns[1:]):
    plt.bar(
        [pos + i * width for pos in x],
        mse_pivot[split],
        width=width,
        label=split.replace('_', ':')
    )

plt.xticks([pos + (len(mse_pivot.columns[1:]) // 2) * width for pos in x],
           mse_pivot['Model'], rotation=30, ha='right')
plt.xlabel("Model")
plt.ylabel("MSE Score (log scale)")
plt.title("Log-Scaled MSE Scores across Models and X-Y Splits")
plt.yscale('log')  # <--- Apply log scale here
plt.legend(title="X-Y Splits")
plt.tight_layout(pad=3.0)
plt.savefig(f'{output_folder}/Figure_X_MSE_Barplot_log-1.0.pdf', format='pdf', bbox_inches='tight')
plt.close()

# --- LINE PLOT with log scale ---
plt.figure(figsize=(16, 10))
for idx, row in mse_pivot.iterrows():
    model = row['Model']
    plt.plot(
        [s.replace('_', ':') for s in mse_pivot.columns[1:]],
        row[mse_pivot.columns[1:]].values,
        marker='o',
        label=model
    )

plt.xlabel("X-Y Split")
plt.ylabel("MSE Score (log scale)")
plt.title("Log-Scaled MSE Scores across X-Y Splits for Each Model")
plt.yscale('log')  # <--- Apply log scale here
plt.legend(title="Model", loc='upper left')
plt.tight_layout(pad=3.0)
plt.savefig(f'{output_folder}/Figure_Y_MSE_Lineplot_log-1.0.pdf', format='pdf', bbox_inches='tight')
plt.close()

Saving dataframe as csv

In [None]:
# ---------------- SAVE MSE TABLE TO CSV ----------------
csv_output_path = 'drive/My Drive/DSSM-Figures/All_Model_MSE_Table-1.0.csv'
mse_pivot.to_csv(csv_output_path, index=False)
print(f"MSE table saved to: {csv_output_path}")

MSE table saved to: drive/My Drive/DSSM-Figures/All_Model_MSE_Table-1.0.csv


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import matplotlib as mpl
from google.colab import drive
# --------------------- Load Data ---------------------
print("Mounting Google Drive and loading dataset...")
drive.mount('/content/drive')

# ---------------- LOAD MSE TABLE ----------------
csv_input_path = 'drive/My Drive/DSSM-Figures/All_Model_MSE_Table-1.0.csv'
mse_pivot = pd.read_csv(csv_input_path)

Mounting Google Drive and loading dataset...
Mounted at /content/drive


In [3]:
mse_pivot

Unnamed: 0,Model,80_20,60_40,50_50,40_60,20_80,10_90,5_95
0,DSSM-advanced,4.5e-05,0.000172,0.000832,0.000721,0.002107,0.004962,0.007876
1,DSSM-basic,0.001168,0.002288,0.003769,0.0059,0.010188,0.016807,0.021219
2,NLinear,1e-05,0.00014,0.00034,0.001077,0.009894,0.024607,0.03383
3,TFT,0.000209,0.000446,0.00059,0.00243,0.003448,0.01068,0.016687
4,LSTM,0.001343,0.005442,0.008847,0.017022,0.054216,0.112788,0.118881


In [6]:
# --- LINE PLOT ---
output_folder = 'drive/My Drive/DSSM-Figures'
plt.figure(figsize=(16, 10))
for idx, row in mse_pivot.iterrows():
    model = row['Model']
    plt.plot(
        [s.replace('_', ':') for s in mse_pivot.columns[1:]],
        row[mse_pivot.columns[1:]].values,
        marker='o',
        label=model
    )

plt.xlabel("X-Y Split")
plt.ylabel("MSE Score")
plt.title("MSE Scores across X-Y Splits for Each Model")
plt.legend(title="Model", loc='upper left')
plt.tight_layout(pad=3.0)
plt.savefig(f'{output_folder}/Figure_Y_MSE_Lineplot-final.pdf', format='pdf', bbox_inches='tight')
plt.close()