In [1]:
import torch
import scipy.io as sio
import glob
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from neuralop.models import FNO
from neuralop import Trainer
from neuralop.training import AdamW
from neuralop import LpLoss, H1Loss
from neuralop.utils import count_model_params
from neuralop.data.transforms.normalizers import UnitGaussianNormalizer
from abc import abstractmethod

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



In [2]:
# --- 1. Find and Load All Data Files ---
data_path = './FNO_Dataset/'
file_paths = glob.glob(f"{data_path}/FNO_dataset_run_*.mat")
file_paths.sort()

if not file_paths:
    raise FileNotFoundError(f"No .mat files found in {data_path}")

print(f"Found {len(file_paths)} data files.")

# --- 2. Split File Paths into Train and Test ---
train_split = int(0.8 * len(file_paths))
train_paths = file_paths[:train_split]
test_paths = file_paths[train_split:]

# --- 3. Load ALL Data into RAM for Normalization ---
# This is the workflow you prefer. It requires
# loading all training data into memory first.

def load_data_from_paths(paths, data_key='velocity_field_5D'):
    all_tensors = []
    for p in paths:
        try:
            mat_data = sio.loadmat(p)
            tensor_data = torch.tensor(mat_data[data_key]).float()
            all_tensors.append(tensor_data)
        except Exception as e:
            print(f"Warning: Error loading {p}: {e}")
    # Concatenate all runs along the time dimension (dim=0)
    return torch.cat(all_tensors, dim=0)

print("Loading training data into memory...")
train_data_sequence = load_data_from_paths(train_paths)
print(f"Full training sequence shape: {train_data_sequence.shape}")

print("Loading test data into memory...")
test_data_sequence = load_data_from_paths(test_paths)
print(f"Full test sequence shape: {test_data_sequence.shape}")

# --- 4. Fit and Transform (Your Method) ---
# Create the normalizer
normalizer = UnitGaussianNormalizer(dim=[0, 2, 3, 4]) 

# Fit ONLY on the training data
print("Fitting normalizer on training data...")
normalizer.fit(train_data_sequence)
print("Fit complete.")

# Transform both sets
print("Normalizing data...")
train_data = normalizer.transform(train_data_sequence)
test_data = normalizer.transform(test_data_sequence)

# --- ADD THIS SANITY CHECK ---
print(f"Normalized train data mean: {train_data.mean()}")
print(f"Normalized train data std: {train_data.std()}")
# -----------------------------

# Free up memory
del train_data_sequence
del test_data_sequence
print("Normalization complete. Raw data cleared from RAM.")

Found 1000 data files.
Loading training data into memory...


KeyboardInterrupt: 

In [3]:
# --- 5. Define Simple Dataset Class ---
class TimeSteppingDataset(Dataset):
    """
    A simple dataset that just returns the (t, t+1) pairs
    from a pre-normalized data sequence.
    """
    def __init__(self, data_sequence):
        self.data = data_sequence

    def __len__(self):
        return self.data.shape[0] - 1

    def __getitem__(self, idx):
        return {'x': self.data[idx], 'y': self.data[idx + 1]}

In [4]:
# --- 6. Create Datasets and DataLoaders ---
# Create the datasets from your NEW normalized tensors
train_dataset = TimeSteppingDataset(train_data)
test_dataset = TimeSteppingDataset(test_data)

# Create the DataLoaders
# Try a small batch size first due to memory
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

In [7]:
# --- 7. Define Model, Optimizer, Loss ---
model = FNO(
    n_modes=(16, 16, 16),
    hidden_channels=32,
    in_channels=3,
    out_channels=3,
    n_layers=4
).to(device) 

print(f"Model has {count_model_params(model)} parameters.")

optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4) # Using the lower 1e-4 lr
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
l2loss = LpLoss(d=3, p=2)
h1loss = H1Loss(d=3)

# --- 8. Create Trainer (No Processor) ---
trainer = Trainer(model=model, n_epochs=1000,
                  device=device,
                  wandb_log=False,
                  eval_interval=10,
                  use_distributed=False,
                  verbose=True)

Model has 18887843 parameters.


In [8]:
# --- 9. Start Training ---
print("Starting training on full, normalized dataset...")
# Use the shape of one test sample as the key
test_key = test_data[0].shape[1] 
trainer.train(train_loader=train_loader,
              test_loaders={test_key: test_loader},
              optimizer=optimizer,
              scheduler=scheduler,
              training_loss=h1loss,
              eval_losses={'h1': h1loss, 'l2': l2loss},
                save_every=50,
                save_dir='./checkpoints/')

Starting training on full, normalized dataset...
Training on 807 samples
Testing on [201] samples         on resolutions [32].
Raw outputs of shape torch.Size([4, 3, 32, 32, 32])
[0] time=15.88, avg_loss=15059018.5891, train_err=60161524.7596
Eval: 32_h1=1997268.5000, 32_l2=807935.3750
[Rank 0]: saved training state to ./checkpoints/
[10] time=15.92, avg_loss=2591223.2009, train_err=10352064.9658
Eval: 32_h1=3431261.5000, 32_l2=2669713.5000
[20] time=15.92, avg_loss=1116687.0498, train_err=4461220.0454
Eval: 32_h1=626017.3125, 32_l2=362783.4062
[30] time=15.92, avg_loss=754442.1228, train_err=3014033.6292
Eval: 32_h1=950500.2500, 32_l2=951149.7500
[40] time=15.93, avg_loss=234416.5870, train_err=936505.8697
Eval: 32_h1=266810.7500, 32_l2=158060.8906
[50] time=15.92, avg_loss=135453.8060, train_err=541144.6605
Eval: 32_h1=129346.8047, 32_l2=5864.8750
[Rank 0]: saved training state to ./checkpoints/
[60] time=15.94, avg_loss=325914.5363, train_err=1302044.7070
Eval: 32_h1=275220.3438, 32

KeyboardInterrupt: 