In [75]:
import sys
sys.path.append('/home/gddaslab/mxp140/ca_signaling_surrogate_model')

In [76]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.amp import GradScaler, autocast
import numpy as np
import pickle
from models import GRUModel
from sklearn.preprocessing import MinMaxScaler
# ----------------------------
#        DEVICE SETUP
# ----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Enable cuDNN autotuner for faster performance on fixed input sizes
torch.backends.cudnn.benchmark = True

In [77]:
# ----------------------------
#       DATA LOADING
# ----------------------------
NUM_FEATURES = 1
NUM_TRAJECTORIES = 100
NUM_T_INPUTS = 50
all_input = NUM_TRAJECTORIES
input_arr = np.load("improper_solutions.npy")
output_arr = np.loadtxt("improper_k_samples.txt").reshape(-1, 1)

In [78]:
# ----------------------------
#       NORMALIZATION
# ----------------------------
# Reshape input: (N, T, F) → (N*T, F)
input_2d = input_arr.reshape(-1, NUM_FEATURES)
input_scaler = MinMaxScaler()
input_scaled = input_scaler.fit_transform(input_2d)
norm_in_arr = input_scaled.reshape(input_arr.shape)
# Output: shape (N, 5)
output_scaler = MinMaxScaler()
norm_out_arr = output_scaler.fit_transform(output_arr)
# Save scalers for use during prediction
with open("improper_input_scaler.pkl", "wb") as f:
    pickle.dump(input_scaler, f)
with open("improper_output_scaler.pkl", "wb") as f:
    pickle.dump(output_scaler, f)

In [79]:
# ----------------------------
#       DATASET PREP
# ----------------------------
input_tensor = torch.from_numpy(norm_in_arr).float()  # (N, T, F)
output_tensor = torch.from_numpy(norm_out_arr).float()  # (N, 1)

# Create a single dataset with three outputs
dataset = TensorDataset(input_tensor, output_tensor)

# Train/Val Split
dataset_size = len(dataset)
train_size = int(0.9 * dataset_size)
val_size = dataset_size - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
batch_size = 10
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    pin_memory=True,
    num_workers=0,
    drop_last=True,
)
val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    pin_memory=True,
    num_workers=0,
    drop_last=True,
)

In [80]:
# ----------------------------
#       MODEL SETUP
# ----------------------------
input_dim = NUM_FEATURES
hidden_dim = 128
output_dim = 1
num_layers = 1  # number of GRU layers
dropout = (
    0.1 if num_layers > 1 else 0.0
)  # dropout probability is the same for all layers regressors
bidirectional = False

model = GRUModel(
    input_dim, hidden_dim, output_dim, num_layers, bidirectional, dropout
).to(device)
# Optional: Compile model (requires PyTorch 2.0+)
model = torch.compile(model)

# We will have a combined loss for trajectory and parameter
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001)
scaler = GradScaler(device="cuda")

In [81]:
# ----------------------------
#      TRAINING LOOP
# ----------------------------
num_epochs = 100
best_val_loss = float("inf")

for epoch in range(num_epochs):
    model.train()
    running_train_loss = 0.0

    for batch_X, batch_y in train_loader:
        batch_X = batch_X.unsqueeze(-1).to(device)  # Adds a dimension at the end → shape becomes [batch_size, 50, 1]
        batch_y = batch_y.to(device) #Shape [batch_size, 1]

        optimizer.zero_grad(set_to_none=True)

        with autocast(device_type="cuda"):
            # Forward pass
            pred_output = model(batch_X)

            # Calculate combined loss
            loss = criterion(pred_output, batch_y)

        # Backward and optimize
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_train_loss += loss.item()

    train_loss = running_train_loss / len(train_loader)

    # Validation
    model.eval()
    running_val_loss = 0.0
    with torch.no_grad():
        for val_X, val_y in val_loader:
            val_X = val_X.unsqueeze(-1).to(device)  # Adds a dimension at the end → shape becomes [batch_size, 50, 1]
            val_y = val_y.to(device)

            with autocast(device_type="cuda"):

                pred_output_val = model(val_X)
                val_loss = criterion(pred_output_val, val_y)
                running_val_loss += val_loss.item()

    val_loss = running_val_loss / len(val_loader)

    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "toy_model_improper.pth")
        print(
            f"Epoch [{epoch+1}/{num_epochs}], "
            f"Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f} -- Model saved"
        )
    else:
        print(
            f"Epoch [{epoch+1}/{num_epochs}], "
            f"Train Loss: {train_loss:.6f}, Validation Loss: {val_loss:.6f}"
        )

Epoch [1/100], Train Loss: 0.5165, Validation Loss: 0.3266 -- Model saved
Epoch [2/100], Train Loss: 0.4739, Validation Loss: 0.2932 -- Model saved
Epoch [3/100], Train Loss: 0.4339, Validation Loss: 0.2625 -- Model saved
Epoch [4/100], Train Loss: 0.3961, Validation Loss: 0.2339 -- Model saved
Epoch [5/100], Train Loss: 0.3613, Validation Loss: 0.2064 -- Model saved
Epoch [6/100], Train Loss: 0.3251, Validation Loss: 0.1804 -- Model saved
Epoch [7/100], Train Loss: 0.2911, Validation Loss: 0.1547 -- Model saved
Epoch [8/100], Train Loss: 0.2566, Validation Loss: 0.1294 -- Model saved
Epoch [9/100], Train Loss: 0.2199, Validation Loss: 0.1051 -- Model saved
Epoch [10/100], Train Loss: 0.1843, Validation Loss: 0.0818 -- Model saved
Epoch [11/100], Train Loss: 0.1471, Validation Loss: 0.0626 -- Model saved
Epoch [12/100], Train Loss: 0.1095, Validation Loss: 0.0530 -- Model saved
Epoch [13/100], Train Loss: 0.088471, Validation Loss: 0.060854
Epoch [14/100], Train Loss: 0.079038, Validat