In [1]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import matplotlib.pyplot as plt
import csv

# Import the FNO2d model from the neural operator library.
# (Adjust the import if necessary.)
from neuralop.models import FNO2d, TFNO2d, UNO, LocalFNO, FNO3d, TFNO3d
from spatioTemporalFNO import SpatioTemporalFNO

In [2]:
image_path = "/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00000.jpg"

# Open the image
img = Image.open(image_path)

# Get its width and height
width, height = img.size
print("Width:", width, "Height:", height)


Width: 1786 Height: 966


In [3]:
neural_operator = "fno"
sequence_length = 1

In [4]:
# Define the CSV file and write a header.
if sequence_length == 1:
    csv_file = "training_history_"+neural_operator+"_bull2.csv"
else:
    csv_file = "training_history_"+neural_operator+"_bull2_"+str(sequence_length)+".csv"
with open(csv_file, mode="w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Epoch", "MSE Loss"])

In [4]:
torch.cuda.empty_cache()

In [5]:
import os
import glob
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image

class OpticalFlowDataset(Dataset):
    """
    Loads optical flow images for forecasting.
    
    This dataset can operate in two modes:
      - Original Mode (single-frame input): when sequence_length == 1.
        Returns:
           input  = optical flow at time t (shape: (C, H, W))
           target = optical flow at time t + forecast_offset (shape: (C, H, W))
      - Sequence Mode (multi-frame input): when sequence_length > 1.
        Returns:
           input  = sequence of optical flow frames of length `sequence_length` 
                    (shape: (T, C, H, W))
           target = optical flow at time t + sequence_length + forecast_offset - 1
                    (shape: (C, H, W))
                    
    The dataset length is computed appropriately based on the mode.
    """
    def __init__(self, data_dir, transform=None, sequence_length=1, forecast_offset=1):
        self.data_dir = data_dir
        self.transform = transform
        self.sequence_length = sequence_length  # Set to 1 for original behavior; >1 for multi-frame input.
        self.forecast_offset = forecast_offset
        
        self.samples = []
        flow_x_files = sorted(glob.glob(os.path.join(data_dir, 'flow_x_*.jpg')))
        print("Found flow_x files:", flow_x_files)
        for fx in flow_x_files:
            fy = fx.replace("flow_x_", "flow_y_")
            if os.path.exists(fy):
                self.samples.append((fx, fy))
            else:
                print(f"Warning: Matching flow_y image not found for {fx}")

    def __len__(self):
        if self.sequence_length == 1:
            # Original: need one frame for input plus forecast_offset for target.
            return len(self.samples) - self.forecast_offset
        else:
            # Sequence mode: need sequence_length frames for input plus forecast_offset.
            return len(self.samples) - (self.sequence_length - 1 + self.forecast_offset)

    def __getitem__(self, idx):
        if self.sequence_length == 1:
            # ----- Original Mode -----
            # Input frame at time t
            flow_x_file, flow_y_file = self.samples[idx]
            input_flow_x = np.array(Image.open(flow_x_file).convert('L'), dtype=np.float32) / 255.0
            input_flow_y = np.array(Image.open(flow_y_file).convert('L'), dtype=np.float32) / 255.0
            input_flow = np.stack([input_flow_x, input_flow_y], axis=-1)  # (H, W, 2)
            
            # Target frame at time t + forecast_offset
            target_flow_x_file, target_flow_y_file = self.samples[idx + self.forecast_offset]
            target_flow_x = np.array(Image.open(target_flow_x_file).convert('L'), dtype=np.float32) / 255.0
            target_flow_y = np.array(Image.open(target_flow_y_file).convert('L'), dtype=np.float32) / 255.0
            target_flow = np.stack([target_flow_x, target_flow_y], axis=-1)
            
            if self.transform is not None:
                input_flow = self.transform(input_flow)
                target_flow = self.transform(target_flow)
            
            # Convert to tensor and change order to (C, H, W)
            input_tensor = torch.tensor(input_flow, dtype=torch.float32).permute(2, 0, 1)
            target_tensor = torch.tensor(target_flow, dtype=torch.float32).permute(2, 0, 1)
            
            return input_tensor, target_tensor
        
        else:
            # ----- Sequence Mode -----
            # Build the input sequence from idx to idx + sequence_length - 1
            input_frames = []
            for i in range(self.sequence_length):
                flow_x_file, flow_y_file = self.samples[idx + i]
                input_flow_x = np.array(Image.open(flow_x_file).convert('L'), dtype=np.float32) / 255.0
                input_flow_y = np.array(Image.open(flow_y_file).convert('L'), dtype=np.float32) / 255.0
                flow = np.stack([input_flow_x, input_flow_y], axis=-1)  # (H, W, 2)
                if self.transform is not None:
                    flow = self.transform(flow)
                flow_tensor = torch.tensor(flow, dtype=torch.float32).permute(2, 0, 1)
                input_frames.append(flow_tensor)
            # Stack the sequence into a tensor: (T, C, H, W)
            input_tensor = torch.stack(input_frames, dim=0)
            
            # Target frame: located at index = idx + sequence_length + forecast_offset - 1
            target_idx = idx + self.sequence_length + self.forecast_offset - 1
            target_flow_x_file, target_flow_y_file = self.samples[target_idx]
            target_flow_x = np.array(Image.open(target_flow_x_file).convert('L'), dtype=np.float32) / 255.0
            target_flow_y = np.array(Image.open(target_flow_y_file).convert('L'), dtype=np.float32) / 255.0
            target_flow = np.stack([target_flow_x, target_flow_y], axis=-1)
            if self.transform is not None:
                target_flow = self.transform(target_flow)
            target_tensor = torch.tensor(target_flow, dtype=torch.float32).permute(2, 0, 1)
            
            return input_tensor, target_tensor


In [None]:

###########################################################################
# 2. Prepare the Data (Train/Test Split)
###########################################################################
seed = 42
torch.manual_seed(seed)

data_dir = '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/'  # Replace with the correct path

# Create dataset with forecast_offset=1 (predicting next frame)
dataset = OpticalFlowDataset(data_dir, forecast_offset=1, sequence_length=sequence_length)
print("Total available samples for forecasting:", len(dataset))  # Should be total_frames - 1

# Randomly split the dataset into training and testing
train_ratio = 0.8
train_size = int(train_ratio * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size], generator=torch.Generator().manual_seed(seed))

print("Train dataset size:", len(train_dataset))
print("Test dataset size:", len(test_dataset))

# Create DataLoaders
batch_size = 1  # Adjust as needed
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)



SyntaxError: cannot assign to function call (2928894085.py, line 17)

In [269]:
from torch.utils.data import Subset, DataLoader

# ------------------------------------------------------------------
# 1.  Build the full dataset (no shuffling here)
# ------------------------------------------------------------------
data_dir = "/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/"
full_ds  = OpticalFlowDataset(
            data_dir,
            forecast_offset=1,
            sequence_length=sequence_length
          )

N = len(full_ds)                 # total samples
train_ratio = 0               # or whatever you need
train_end   = int(train_ratio * N)

# ------------------------------------------------------------------
# 2.  Construct index lists *in order*
# ------------------------------------------------------------------
train_idx = list(range(train_end))          #  0 … train_end-1
test_idx  = list(range(train_end, N - 2))   #  train_end … N-3  (drop last 2)

# ------------------------------------------------------------------
# 3.  Wrap them in Subset objects
# ------------------------------------------------------------------
train_ds = Subset(full_ds, train_idx)
test_ds  = Subset(full_ds, test_idx)

print(f"Total  : {N}")
print(f"Train  : {len(train_ds)}  (indices 0 … {train_end-1})")
print(f"Test   : {len(test_ds)}   (indices {train_end} … {N-3})")

# ------------------------------------------------------------------
# 4.  DataLoaders (shuffle only the training loader if desired)
# ------------------------------------------------------------------
#train_loader = DataLoader(train_ds, batch_size=1, shuffle=True)
test_loader  = DataLoader(test_ds,  batch_size=1, shuffle=False)



Found flow_x files: ['/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00000.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00001.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00002.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00003.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00004.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00005.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00006.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00007.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00008.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00009.jpg', '/cs/student/projects1/2021/rstewart/denseflow/build/bull_festival1/flow_x_00010.jpg', '/cs/student/projects1

In [270]:
###########################################################################
# 3. Instantiate the Model
###########################################################################
if neural_operator == "fno" and sequence_length == 1:
    # Our optical flow images have 2 channels. The FNO2d model uses a positional grid.
    # When using in_channels=2, the model will add 2 grid channels, resulting in a total of 4 channels
    # (which is what we want since our data are 2-channel images).
    modes1 = 12  # number of Fourier modes along height
    modes2 = 12  # number of Fourier modes along width
    hidden_channels = 32

    model = FNO2d(
        n_modes_height=modes1,
        n_modes_width=modes2,
        hidden_channels=hidden_channels,
        in_channels=2,   # raw optical flow data have 2 channels
        out_channels=2   # predicting 2 channels: flow_x and flow_y
    )

if neural_operator == "fno" and sequence_length != 1:
    # Our optical flow images have 2 channels. The FNO2d model uses a positional grid.
    # When using in_channels=2, the model will add 2 grid channels, resulting in a total of 4 channels
    # (which is what we want since our data are 2-channel images).
    modes1 = 6  # number of Fourier modes along height
    modes2 = 6  # number of Fourier modes along width
    hidden_channels = 16
    

    model = FNO3d(
        n_modes_height=modes1,
        n_modes_width=modes2,
        n_modes_depth=4,
        hidden_channels=hidden_channels,
        in_channels=2,   # raw optical flow data have 2 channels
        out_channels=2   # predicting 2 channels: flow_x and flow_y
    )

if neural_operator == "tfno":
    # Our optical flow images have 2 channels. The FNO2d model uses a positional grid.
    # When using in_channels=2, the model will add 2 grid channels, resulting in a total of 4 channels
    # (which is what we want since our data are 2-channel images).
    modes1 = 12  # number of Fourier modes along height
    modes2 = 12  # number of Fourier modes along width
    hidden_channels = 32

    model = TFNO2d(
        n_modes_height=modes1,
        n_modes_width=modes2,
        hidden_channels=hidden_channels,
        in_channels=2,   # raw optical flow data have 2 channels
        out_channels=2   # predicting 2 channels: flow_x and flow_y
    )

if neural_operator == "uno":
    in_channels = 2
    out_channels = 2

    model = UNO(
        in_channels=in_channels,
        out_channels=out_channels,
        hidden_channels=16,  # base "width"
        n_layers=2,          # we have 2 scales in this example

        # Multi-scale parameters:
        uno_out_channels=[16, 32],
        uno_n_modes=[(12, 12), (12, 12)],
        # uno_scalings must be list of lists so that uno_scalings[0] is also a list
        uno_scalings=[[1, 1], [1, 1]]
    )

if neural_operator == "localfno":
    # Example parameters for a 2D local operator with 2 in-channels and 2 out-channels
    in_channels = 2   # e.g. optical flow with (flow_x, flow_y)
    out_channels = 2  # predicting (flow_x, flow_y) at next time step
    kernel_size = 3   # local receptive field size
    n_layers = 4      # number of operator layers
    hidden_channels = 32
    n_dim = 2         # 2D problem
    modes1 = 12  # number of Fourier modes along height
    modes2 = 12  # number of Fourier modes along width

    # Instantiate the LocalNO model
    
    model = LocalFNO(
        n_modes=(modes1, modes2),
        default_in_shape=(height, width-800),
        in_channels=in_channels,
        out_channels=out_channels,
        kernel_size=kernel_size,
        n_layers=n_layers,
        hidden_channels=hidden_channels,
        n_dim=n_dim,
        # Optional parameters:
        # domain_padding=0,
        # domain_padding_mode="symmetric",
        # non_linearity=nn.ReLU(),
        # dropout=0.0,
        # rescaling=None,
    )


if neural_operator == "spatiotemporalfno":
     # Optical flow images have 2 channels.
    # SpatioTemporalFNO expects input in shape (B, T_in, in_channels, H, W) where T_in is the number of time steps.
    # Since you are training on pairs (t, t+1), T_in is set to 2.
    modes1 = 12  # Fourier modes along height
    modes2 = 12  # Fourier modes along width
    hidden_channels = 32
    T_in = 3    # Number of input frames (t and t+1)
    
    model = SpatioTemporalFNO(
        n_modes=(modes1, modes2),
        in_channels=2,        # raw optical flow has 2 channels
        out_channels=2,       # predicting 2 channels: flow_x and flow_y
        hidden_channels=hidden_channels,
        n_layers=4,           # number of spatio-temporal blocks
        T_in=T_in,            # number of input time steps
        temporal_kernel=3     # temporal convolution kernel size (default is 3)
    )

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
print("Operator: ",neural_operator)
print(device)



Operator:  fno
cuda


In [56]:

###########################################################################
# 4. Define Loss Function and Optimizer
###########################################################################

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [52]:

###########################################################################
# 5. Training Loop
###########################################################################

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        inputs = inputs.to(device)    # shape: (B, 2, H, W)
        targets = targets.to(device)  # shape: (B, 2, H, W)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] MSE Loss: {avg_loss:.6f}")

    # Append the current epoch and average loss to the CSV file.
    with open(csv_file, mode="a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_loss])

# Define model save path
if sequence_length == 1:
    model_save_path = neural_operator+"_model.pth"
else:
    model_save_path = neural_operator+"_model_"+str(sequence_length)+".pth"

# After training is complete
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")



Epoch [1/50] MSE Loss: 0.009979
Epoch [2/50] MSE Loss: 0.001167
Epoch [3/50] MSE Loss: 0.001011
Epoch [4/50] MSE Loss: 0.000794
Epoch [5/50] MSE Loss: 0.000617
Epoch [6/50] MSE Loss: 0.000524
Epoch [7/50] MSE Loss: 0.000458
Epoch [8/50] MSE Loss: 0.000378
Epoch [9/50] MSE Loss: 0.000401
Epoch [10/50] MSE Loss: 0.000400
Epoch [11/50] MSE Loss: 0.000432
Epoch [12/50] MSE Loss: 0.000351
Epoch [13/50] MSE Loss: 0.000343
Epoch [14/50] MSE Loss: 0.000334
Epoch [15/50] MSE Loss: 0.000337
Epoch [16/50] MSE Loss: 0.000286
Epoch [17/50] MSE Loss: 0.000307
Epoch [18/50] MSE Loss: 0.000255
Epoch [19/50] MSE Loss: 0.000252
Epoch [20/50] MSE Loss: 0.000250
Epoch [21/50] MSE Loss: 0.000224
Epoch [22/50] MSE Loss: 0.000198
Epoch [23/50] MSE Loss: 0.000286
Epoch [24/50] MSE Loss: 0.000178
Epoch [25/50] MSE Loss: 0.000188
Epoch [26/50] MSE Loss: 0.000231
Epoch [27/50] MSE Loss: 0.000170
Epoch [28/50] MSE Loss: 0.000161
Epoch [29/50] MSE Loss: 0.000165
Epoch [30/50] MSE Loss: 0.000172
Epoch [31/50] MSE L

In [41]:
###########################################################################
# 5. Training Loop
###########################################################################

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, targets in train_loader:
        # inputs has shape (B, T=3, C=2, H, W).
        # We want (B, C=2, depth=3, H, W) for the FNO3d.
        inputs = inputs.permute(0, 2, 1, 3, 4)  # => (B, 2, 3, H, W)
          # add a singleton depth dimension so it becomes (B, 2, 1, H, W)
        targets = targets.unsqueeze(2).to(device)               # (B, 2, 1, H, W)

        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)  # now the second dimension is in_channels=2
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] MSE Loss: {avg_loss:.6f}")

    # Append the current epoch and average loss to the CSV file.
    with open(csv_file, mode="a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([epoch+1, avg_loss])

# Define model save path
if sequence_length == 1:
    model_save_path = neural_operator+"_model.pth"
else:
    model_save_path = neural_operator+"_model_"+str(sequence_length)+".pth"

# After training is complete
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")



RuntimeError: permute(sparse_coo): number of dimensions in the tensor input does not match the length of the desired ordering of dimensions i.e. input.dim() = 4 is not equal to len(dims) = 5

In [227]:
###########################################################################
# 6. Testing (One-Step Forecasting)
###########################################################################
# 2) Load the saved weights
model_load_path = "/cs/student/projects1/2021/rstewart/code/models/love_parade_3_length/spatiotemporalfno_model_3.pth"
model.load_state_dict(torch.load(model_load_path, map_location="cpu"))

# 3) Move to device, set to eval
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

model.eval()
total_loss = 0
with torch.no_grad():
    for idx, (inputs, targets) in enumerate(test_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        loss_val = criterion(outputs, targets).item()
        total_loss += loss_val

        print(f"Test sample {idx} MSE: {loss_val:.6f}")

# Calculate average test loss (MSE)
avg_loss = total_loss / len(test_loader)
print(f"Average test MSE: {avg_loss:.6f}")
        


  model.load_state_dict(torch.load(model_load_path, map_location="cpu"))


RuntimeError: Error(s) in loading state_dict for SpatioTemporalFNO:
	size mismatch for lifting_conv.weight: copying a param with shape torch.Size([16, 2, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 2, 3, 3]).
	size mismatch for lifting_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for lifting_residual.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for lifting_residual.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for lifting_residual.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for lifting_residual.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.0.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.0.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.0.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.0.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.0.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.0.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.0.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.0.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.1.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.1.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.1.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.1.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.1.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.1.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.1.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.1.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.2.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.2.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.2.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.2.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.2.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.2.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.2.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.2.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.3.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.3.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.3.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.3.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.3.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.3.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.3.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.3.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for refinement.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for refinement.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for refinement.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for refinement.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for projection_conv.weight: copying a param with shape torch.Size([2, 16, 1, 1]) from checkpoint, the shape in current model is torch.Size([2, 32, 1, 1]).

In [None]:
model_load_path = "/cs/student/projects1/2021/rstewart/code/models/bull1_3_length/spatiotemporalfno_model_3.pth"
model.load_state_dict(torch.load(model_load_path, map_location="cpu"))

# 3) Move to device, set to eval
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

model.eval()
total_loss = 0.0

with torch.no_grad():
    for idx, (inputs, targets) in enumerate(test_loader):
        # 1 Arrange shapes exactly like training
        inputs  = inputs.permute(0, 2, 1, 3, 4).to(device)   # (B, 2, 3, H, W)
        targets = targets.unsqueeze(2).to(device)            # (B, 2, 1, H, W)

        # 2 Forward pass
        outputs = model(inputs)                              # (B, 2, 3, H, W)

        # 3 Same loss definition as training
        loss_val = criterion(outputs, targets).item()
        total_loss += loss_val
        print(f"Test sample {idx:02d} – MSE: {loss_val:.6f}")

avg_loss = total_loss / len(test_loader)
print(f"Average test MSE: {avg_loss:.6f}")

  model.load_state_dict(torch.load(model_load_path, map_location="cpu"))


RuntimeError: Error(s) in loading state_dict for SpatioTemporalFNO:
	size mismatch for lifting_conv.weight: copying a param with shape torch.Size([16, 2, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 2, 3, 3]).
	size mismatch for lifting_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for lifting_residual.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for lifting_residual.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for lifting_residual.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for lifting_residual.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.0.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.0.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.0.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.0.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.0.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.0.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.0.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.0.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.1.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.1.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.1.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.1.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.1.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.1.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.1.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.1.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.2.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.2.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.2.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.2.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.2.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.2.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.2.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.2.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.3.spectral_conv.bias: copying a param with shape torch.Size([16, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 1, 1]).
	size mismatch for blocks.3.spectral_conv.weight.tensor: copying a param with shape torch.Size([16, 16, 6, 4]) from checkpoint, the shape in current model is torch.Size([32, 32, 12, 7]).
	size mismatch for blocks.3.spatial_conv.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.3.spatial_conv.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.3.spatial_conv.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for blocks.3.spatial_conv.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for blocks.3.temporal_conv.weight: copying a param with shape torch.Size([16, 16, 3, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 1, 1]).
	size mismatch for blocks.3.temporal_conv.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for refinement.0.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for refinement.0.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for refinement.2.weight: copying a param with shape torch.Size([16, 16, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]).
	size mismatch for refinement.2.bias: copying a param with shape torch.Size([16]) from checkpoint, the shape in current model is torch.Size([32]).
	size mismatch for projection_conv.weight: copying a param with shape torch.Size([2, 16, 1, 1]) from checkpoint, the shape in current model is torch.Size([2, 32, 1, 1]).

In [271]:
mse_fno = []

In [256]:
mse_stfno = []

In [None]:
import torch
import numpy as np
from torch.nn import MSELoss

criterion = MSELoss(reduction="none")  # so we can compute per-sample losses
model_load_path = "/cs/student/projects1/2021/rstewart/code/models/bull1_1_length/fno_model.pth"
model.load_state_dict(torch.load(model_load_path, map_location="cpu"))

# 3) Move to device, set to eval
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs  = inputs.permute(0, 2, 1, 3, 4).to(device)   # (B, 2, 3, H, W)
        targets = targets.unsqueeze(2).to(device)            # (B, 2, 1, H, W)

        out_fno = model(inputs)
        

        # per-sample MSE (averaged over channels, H, W)
        # criterion gives [B, C, H, W]; we mean over (1,2,3)
        per_sample = criterion(out_fno, targets).mean(dim=(1, 2, 3, 4)) 
        mse_fno.append(per_sample.cpu().numpy())




  model.load_state_dict(torch.load(model_load_path, map_location="cpu"))
  return F.mse_loss(input, target, reduction=self.reduction)


In [272]:
import torch
import numpy as np
from torch.nn import MSELoss

criterion = MSELoss(reduction="none")  # so we can compute per-sample losses
model_load_path = "/cs/student/projects1/2021/rstewart/code/models/bull1_1_length/fno_model.pth"
model.load_state_dict(torch.load(model_load_path, map_location="cpu"))

# 3) Move to device, set to eval
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)


model.eval()
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        out_fno = model(inputs)
        

        # per-sample MSE (averaged over channels, H, W)
        # criterion gives [B, C, H, W]; we mean over (1,2,3)
        mse_fno.append(criterion(out_fno, targets).mean(dim=(1,2,3)).cpu().numpy())




  model.load_state_dict(torch.load(model_load_path, map_location="cpu"))


In [273]:
mse_fno = np.concatenate(mse_fno)   # shape (N,)

In [258]:
mse_stfno = np.concatenate(mse_stfno)   # shape (N,)

In [274]:
print(len(mse_fno), len(mse_stfno))

68 68


In [242]:
mse_fno   = np.asarray(mse_fno,   dtype=float).ravel()   # shape (Nf,)
mse_stfno = np.asarray(mse_stfno, dtype=float).ravel()   # shape (Ns,)

In [275]:
from scipy.stats import ttest_rel

t_stat, p_one = ttest_rel(mse_fno, mse_stfno, alternative="greater")
print(f"t = {t_stat:.3f}, one‑sided p = {p_one:.3e}")



t = 4.258, one‑sided p = 3.293e-05


In [32]:
import matplotlib.pyplot as plt

def tensor_to_numpy(tensor):
    # Ensure the tensor is dense and remove the batch dimension.
    if tensor.is_sparse:
        tensor = tensor.to_dense()
    # Remove the batch dimension (assuming batch size is 1) and change shape from (C, H, W) to (H, W, C)
    return tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()

model.eval()
with torch.no_grad():
    for idx, (inputs, targets) in enumerate(test_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        
        # Convert tensors to NumPy arrays.
        input_img  = tensor_to_numpy(inputs)    # shape: (H, W, 2)
        target_img = tensor_to_numpy(targets)    # shape: (H, W, 2)
        output_img = tensor_to_numpy(outputs)    # shape: (H, W, 2)
        
        # Compute the difference (target - prediction) for each channel.
        diff_img = target_img - output_img

        # Create a figure with 4 rows and 2 columns.
        fig, axs = plt.subplots(4, 2, figsize=(10, 20))
        
        # Row 1: Input
        axs[0, 0].imshow(input_img[..., 0], cmap='gray')
        axs[0, 0].set_title(f"Sample {idx}: Input Flow X")
        axs[0, 1].imshow(input_img[..., 1], cmap='gray')
        axs[0, 1].set_title(f"Sample {idx}: Input Flow Y")
        
        # Row 2: Target (Ground Truth)
        axs[1, 0].imshow(target_img[..., 0], cmap='gray')
        axs[1, 0].set_title(f"Sample {idx}: Target Flow X")
        axs[1, 1].imshow(target_img[..., 1], cmap='gray')
        axs[1, 1].set_title(f"Sample {idx}: Target Flow Y")
        
        # Row 3: Prediction (Model Output)
        axs[2, 0].imshow(output_img[..., 0], cmap='gray')
        axs[2, 0].set_title(f"Sample {idx}: Prediction Flow X")
        axs[2, 1].imshow(output_img[..., 1], cmap='gray')
        axs[2, 1].set_title(f"Sample {idx}: Prediction Flow Y")
        
        # Row 4: Difference (Target - Prediction) as heatmaps.
        # Using a diverging colormap like 'seismic' to show positive/negative differences.
        im0 = axs[3, 0].imshow(diff_img[..., 0], cmap='seismic')
        axs[3, 0].set_title(f"Sample {idx}: Diff Flow X")
        fig.colorbar(im0, ax=axs[3, 0])
        
        im1 = axs[3, 1].imshow(diff_img[..., 1], cmap='seismic')
        axs[3, 1].set_title(f"Sample {idx}: Diff Flow Y")
        fig.colorbar(im1, ax=axs[3, 1])
        
        # Remove axis ticks for clarity.
        for ax in axs.flat:
            ax.set_xticks([])
            ax.set_yticks([])
        
        plt.tight_layout()
        plt.show()


RuntimeError: Given groups=1, weight of size [32, 5, 1], expected input[1, 6, 6582960] to have 5 channels, but got 6 channels instead