In [1]:
%load_ext autoreload
%autoreload 2

import subprocess
import sys
import torch
from pathlib import Path


In [2]:
import torch

from regular import RegularTransformer, RegularFeedForward
from projected import ProjectedTransformer, ProjectedFeedForward
from exponential import ExponentialTransformer, ExponentialFeedForward
from train_constrained import train_model
import projections
import exponentials

In [5]:
loaded = torch.load("./../Data/cs_dataset.pt", map_location="cpu", weights_only = False)

for k, v in loaded.items():
    try:
        print(k, v.shape)
    except AttributeError:
        print(k, type(v))

X_train = torch.tensor(loaded['X_train'], dtype = torch.float32)
Y_train = torch.tensor(loaded['Y_train'], dtype = torch.float32)

for name in ["X_train","Y_train","X_val","Y_val","X_test","Y_test"]:
    assert torch.isfinite(torch.tensor(loaded[name])).all(), f"Found NaN/Inf in {name}"

X_train (4000, 10, 9)
Y_train (4000, 10, 9)
X_val (800, 10, 9)
Y_val (800, 10, 9)
X_test (1200, 10, 9)
Y_test (1200, 10, 9)


In [13]:
model = RegularTransformer(
                input_dim=9,
                nhead=3,
                d_hid=128,
                nlayers=2,
                dropout=0,
                dt=1,
            )

In [14]:
model(X_train).shape

torch.Size([4000, 10, 9])

In [5]:
model_types = ["projected", "regular", "exponential", "probabilistic", "flow_matching"]

for model_type in model_types:
    print(f"\n{'='*60}")
    print(f"Testing {model_type} transformer on CS dataset (via train_ff.py)")
    print(f"{'='*60}\n")
    
    cmd = [
        sys.executable, "train_ff.py",
        "--model_type", model_type,
        "--dataset", "cs",
        "--depth", "2",
        "--num_epochs", "10",
        "--batch_size", "100",
        "--eval_every", "1",
        "--lr", "1e-3",
        "--device", "cuda" if torch.cuda.is_available() else "cpu",
        "--residual",
    ]
    
    # Use internal projections/exponentials for these model types
    if model_type in ["exponential", "projected", "flow_matching"]:
        cmd.append("--use_internal")
    
    if model_type == "probabilistic":
        cmd.extend(["--num_anchors", "50"])
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True, cwd=Path.cwd())
        print(result.stdout)
        if result.stderr:
            print("STDERR:", result.stderr)
        print(f"✓ {model_type} completed successfully")
    except subprocess.CalledProcessError as e:
        print(f"✗ {model_type} failed with return code {e.returncode}")
        print("STDOUT:", e.stdout)
        print("STDERR:", e.stderr)
    except FileNotFoundError:
        print(f"⚠ train_ff.py not found - please check the file path")



Testing projected transformer on CS dataset (via train_ff.py)

/projects/gtml/Constrained Networks/src/Data/cs_dataset.pt
torch.Size([4000, 10, 9])
Epoch      1 | train_loss=6.719520e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      2 | train_loss=6.719520e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      3 | train_loss=6.719520e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      4 | train_loss=6.719519e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      5 | train_loss=6.719519e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      6 | train_loss=6.719520e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      7 | train_loss=6.719519e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      8 | train_loss=6.719519e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch      9 | train_loss=6.719520e-01 | val_loss=6.743161e-01 | lr=1.000e-03
Epoch     10 | train_loss=6.719520e-01 | val_loss=6.743161e-01 | lr=1.000e-03

Saved best checkpoint (epoch=1, best_val=6.743161e-01) to: outputs/cs/