In [1]:
import torch
import torch.nn as nn
import numpy as np
import sys
import os
import random
import matplotlib.pyplot as plt

src_path = os.path.abspath(os.path.join(os.getcwd(), 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)
    
from utils import MIMONetDataset, DeepONetDataset, ChannelScaler
from mimonet import MIMONet

In [2]:
# check if GPU is available and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [3]:
# set working directory
working_dir = "/projects/bcnx/kazumak2/MIMONet/Subchannel/"
data_dir = os.path.join(working_dir, "data")

## load datasets

### Load sharing parameters/dataset

In [4]:
# trunk dataset
trunk_input = np.load(os.path.join(data_dir, "share/trunk_input.npz"))['trunk']

### Training data

In [5]:
# training data
train_branch = np.load(os.path.join(data_dir, "training/train_branch_input.npz"))
train_branch_1 = train_branch['func_params']
train_branch_2 = train_branch['stat_params']

# [samples, channel, gridpoints]
train_target = np.load(os.path.join(data_dir, "training/train_target.npz"))['target']
# convert to [samples, gridpoints, channel]
train_target = np.moveaxis(train_target, 1, 2)

print("train_branch_1 shape:", train_branch_1.shape)
print("train_branch_2 shape:", train_branch_2.shape)
print("train_target shape:", train_target.shape)

train_branch_1 shape: (4000, 100)
train_branch_2 shape: (4000, 2)
train_target shape: (4000, 1733, 3)


In [6]:
# scaling the functional input data using predefined mean and std
f_mean = np.load(os.path.join(data_dir, "share/func_mean_std_params.npz"))['mean']
f_std = np.load(os.path.join(data_dir, "share/func_mean_std_params.npz"))['std']

train_branch_1 = (train_branch_1 - f_mean) / f_std

# scaling the static input data using predefined mean and std
s_mean = np.load(os.path.join(data_dir, "share/stat_mean_std_params.npz"))['mean']
s_std = np.load(os.path.join(data_dir, "share/stat_mean_std_params.npz"))['std']

for i in range(s_mean.shape[0]):
    train_branch_2[:, i] = (train_branch_2[:, i] - s_mean[i]) / s_std[i]

### Test data

In [7]:
test_branch = np.load(os.path.join(data_dir, "test/test_branch_input.npz"))
test_branch_1 = test_branch['func_params']
test_branch_2 = test_branch['stat_params']

test_target = np.load(os.path.join(data_dir, "test/test_target.npz"))['target']
test_target = np.moveaxis(test_target, 1, 2)

print("test_branch_1 shape:", test_branch_1.shape)
print("test_branch_2 shape:", test_branch_2.shape)
print("test_target shape:", test_target.shape)

# scaling the functional input data using predefined mean and std
test_branch_1 = (test_branch_1 - f_mean) / f_std
# scaling the static input data using predefined mean and std
for i in range(s_mean.shape[0]):
    test_branch_2[:, i] = (test_branch_2[:, i] - s_mean[i]) / s_std[i]

test_branch_1 shape: (1000, 100)
test_branch_2 shape: (1000, 2)
test_target shape: (1000, 1733, 3)


### Scaling the target data

In [8]:
# scaling the target data
'''  
note: reverse the scaling for the target data
train_target = scaler.inverse_transform(train_target_scaled)
test_target = scaler.inverse_transform(test_target_scaled)
'''
scaler = ChannelScaler(method='minmax', feature_range=(-1, 1))
scaler.fit(train_target)
train_target_scaled = scaler.transform(train_target)
test_target_scaled = scaler.transform(test_target)


## Torch Dataset and DataLoader

In [9]:
# test dataset and dataloader
test_dataset = MIMONetDataset(
    [test_branch_1, test_branch_2],  # branch_data_list
    trunk_input,                     # trunk_data
    test_target_scaled               # target_data
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1,  # set to 1 for testing
    shuffle=False,
    num_workers=0
)

In [10]:
train_dataset = MIMONetDataset(
    [train_branch_1, train_branch_2],  # branch_data_list
    trunk_input,                       # trunk_data
    train_target_scaled                # target_data
)

## MIMONet Model

In [11]:
# Architecture parameters
dim = 256
branch_input_dim1 = 100
branch_input_dim2 = 2
trunk_input_dim = 2

# Define MIONet instance (no Fourier, no final linear)
model = MIMONet(
    branch_arch_list=[
        [branch_input_dim1, 512, 512, 512, dim],
        [branch_input_dim2, 512, 512, 512, dim]
    ],
    trunk_arch=[trunk_input_dim, 256, 256, 256, dim],
    num_outputs=3, 
    activation_fn=nn.ReLU,
    merge_type='mul'  # or 'sum'
)

model = model.to(device)

# Print parameter count
num_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {num_params:,}")

Total number of parameters: 1,696,259


In [12]:
from training import train_model

In [13]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1E-6)
criterion = nn.MSELoss()
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

In [None]:
# Training model
''' 
train_model(
    model=model,
    dataset=train_dataset,
    optimizer = optimizer,
    scheduler = None,
    device='cuda',
    num_epochs=5,
    batch_size=4,
    criterion= criterion,
    patience=1000,
    k_fold=5,
    multi_gpu=False,
    working_dir=""
)
'''
print("Training completed.")

Training completed.


## Evaluation

In [15]:
from train_utils import test_kfold_model

In [16]:
train_mode = 'k_fold'
n_hold = 5

if train_mode == 'k_fold':
    for i in range(n_hold):
        best_model_path = os.path.join(working_dir, f"checkpoints/best_model_fold{i+1}.pt")
        
        if os.path.exists(best_model_path):
            model.load_state_dict(torch.load(best_model_path, map_location=device))
            model.to(device)
            model.eval()
            print(f"Best model for fold {i+1} loaded.")
        else:
            print(f"Best model for fold {i+1} not found. Please check the path.")
            exit(1)
        
        test_kfold_model(
            fold_id=i+1,
            model=model,
            test_loader=test_loader,
            scaler=scaler,
            working_dir=working_dir,
            device=device,
            test_branch=test_branch,
            save_array=False
        )   
    
else:
    # Load the best model (best_model.pt)
    best_model_path = os.path.join(working_dir, "checkpoints/best_model.pt")
    if os.path.exists(best_model_path):
        model.load_state_dict(torch.load(best_model_path))
        model.to(device)
        model.eval()
        print("Best model loaded.")
    else:
        print("Best model not found. Please check the path.")
        exit(1)
    
    # Test the model
    #test_model()

Best model for fold 1 loaded.
Mean relative L2 errors: [0.08067496 0.00322554 0.06602502]
Standard deviation of relative L2 errors: [0.00085585 0.00086175 0.00069564]
Best model for fold 2 loaded.
Mean relative L2 errors: [0.08369818 0.00428187 0.07606768]
Standard deviation of relative L2 errors: [0.00100787 0.00147104 0.00078902]
Best model for fold 3 loaded.
Mean relative L2 errors: [0.06665401 0.00285575 0.05448814]
Standard deviation of relative L2 errors: [0.0003883  0.00087991 0.00051551]
Best model for fold 4 loaded.
Mean relative L2 errors: [0.15695732 0.00465049 0.09746448]
Standard deviation of relative L2 errors: [0.00054871 0.00050717 0.00284726]
Best model for fold 5 loaded.
Mean relative L2 errors: [0.24503203 0.00877593 0.38087257]
Standard deviation of relative L2 errors: [0.00063114 0.00344348 0.00393621]
