In [2]:
import torch
import torch.nn as nn
import numpy as np
import sys
import os
import random
import matplotlib.pyplot as plt

src_path = os.path.abspath(os.path.join(os.getcwd(), '../src'))
if src_path not in sys.path:
    sys.path.append(src_path)
    
from utils import MIMONetDataset, DeepONetDataset, ChannelScaler
from mimonet import MIMONet

In [3]:
# check if GPU is available and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [14]:
# set working directory
working_dir = "."
data_dir = os.path.join(working_dir, "data")

## load datasets

### Load sharing parameters/dataset

In [17]:
# trunk dataset
trunk_input = np.load(os.path.join(data_dir, "share/trunk_input.npz"))['trunk']

### Training data

In [18]:
# training data
train_branch = np.load(os.path.join(data_dir, "training/train_branch_input.npz"))
train_branch_1 = train_branch['func_params']
train_branch_2 = train_branch['stat_params']

# [samples, channel, gridpoints]
train_target = np.load(os.path.join(data_dir, "training/train_target.npz"))['target']
# convert to [samples, gridpoints, channel]
train_target = np.moveaxis(train_target, 1, 2)

print("train_branch_1 shape:", train_branch_1.shape)
print("train_branch_2 shape:", train_branch_2.shape)
print("train_target shape:", train_target.shape)

train_branch_1 shape: (4000, 100)
train_branch_2 shape: (4000, 2)
train_target shape: (4000, 1733, 3)


In [19]:
# scaling the functional input data using predefined mean and std
f_mean = np.load(os.path.join(data_dir, "share/func_mean_std_params.npz"))['mean']
f_std = np.load(os.path.join(data_dir, "share/func_mean_std_params.npz"))['std']

train_branch_1 = (train_branch_1 - f_mean) / f_std

# scaling the static input data using predefined mean and std
s_mean = np.load(os.path.join(data_dir, "share/stat_mean_std_params.npz"))['mean']
s_std = np.load(os.path.join(data_dir, "share/stat_mean_std_params.npz"))['std']

for i in range(s_mean.shape[0]):
    train_branch_2[:, i] = (train_branch_2[:, i] - s_mean[i]) / s_std[i]

### Test data

In [20]:
test_branch = np.load(os.path.join(data_dir, "test/test_branch_input.npz"))
test_branch_1 = test_branch['func_params']
test_branch_2 = test_branch['stat_params']

test_target = np.load(os.path.join(data_dir, "test/test_target.npz"))['target']
test_target = np.moveaxis(test_target, 1, 2)

print("test_branch_1 shape:", test_branch_1.shape)
print("test_branch_2 shape:", test_branch_2.shape)
print("test_target shape:", test_target.shape)

# scaling the functional input data using predefined mean and std
test_branch_1 = (test_branch_1 - f_mean) / f_std
# scaling the static input data using predefined mean and std
for i in range(s_mean.shape[0]):
    test_branch_2[:, i] = (test_branch_2[:, i] - s_mean[i]) / s_std[i]

test_branch_1 shape: (1000, 100)
test_branch_2 shape: (1000, 2)
test_target shape: (1000, 1733, 3)


### Scaling the target data

In [21]:
# scaling the target data
'''  
note: reverse the scaling for the target data
train_target = scaler.inverse_transform(train_target_scaled)
test_target = scaler.inverse_transform(test_target_scaled)
'''
scaler = ChannelScaler(method='minmax', feature_range=(-1, 1))
scaler.fit(train_target)
train_target_scaled = scaler.transform(train_target)
test_target_scaled = scaler.transform(test_target)


## Torch Dataset and DataLoader

In [22]:
# test dataset and dataloader
test_dataset = MIMONetDataset(
    [test_branch_1, test_branch_2],  # branch_data_list
    trunk_input,                     # trunk_data
    test_target_scaled               # target_data
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1,  # set to 1 for testing
    shuffle=False,
    num_workers=0
)

In [23]:
train_dataset = MIMONetDataset(
    [train_branch_1, train_branch_2],  # branch_data_list
    trunk_input,                       # trunk_data
    train_target_scaled                # target_data
)

## MIMONet Model

In [24]:
# Architecture parameters
dim = 256
branch_input_dim1 = 100
branch_input_dim2 = 2
trunk_input_dim = 2

# Define MIONet instance (no Fourier, no final linear)
model = MIMONet(
    branch_arch_list=[
        [branch_input_dim1, 512, 512, 512, dim],
        [branch_input_dim2, 512, 512, 512, dim]
    ],
    trunk_arch=[trunk_input_dim, 256, 256, 256, dim],
    num_outputs=3, 
    activation_fn=nn.ReLU,
    merge_type='mul'  # or 'sum'
)

model = model.to(device)

# Print parameter count
num_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {num_params:,}")

Total number of parameters: 1,696,259


In [None]:
#from training import train_model

In [13]:
#optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1E-6)
#criterion = nn.MSELoss()
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

In [14]:
# Training model
''' 
train_model(
    model=model,
    dataset=train_dataset,
    optimizer = optimizer,
    scheduler = None,
    device='cuda',
    num_epochs=5,
    batch_size=4,
    criterion= criterion,
    patience=1000,
    k_fold=5,
    multi_gpu=False,
    working_dir=""
)
'''
print("Training completed.")

Training completed.


## Evaluation and Visualization

In [None]:
# load the trained model (checkpoints/best_model.pth)

Model file not found. Please check the path.


In [38]:
# Inspect where you're actually looking
model_path = os.path.join(".", "checkpoints", "best_model.pt")
print("Model path:", os.path.abspath(model_path))
print("Exists:", os.path.exists(model_path))

# Load the model
model.load_state_dict(torch.load(model_path))
model.eval()

Model path: /home/kazuma/Desktop/MIMONet/Subchannel/checkpoints/best_model.pt
Exists: True


MIMONet(
  (branch_nets): ModuleList(
    (0): FCN(
      (network): Sequential(
        (0): Linear(in_features=100, out_features=512, bias=True)
        (1): ReLU()
        (2): Linear(in_features=512, out_features=512, bias=True)
        (3): ReLU()
        (4): Linear(in_features=512, out_features=512, bias=True)
        (5): ReLU()
        (6): Linear(in_features=512, out_features=256, bias=True)
      )
    )
    (1): FCN(
      (network): Sequential(
        (0): Linear(in_features=2, out_features=512, bias=True)
        (1): ReLU()
        (2): Linear(in_features=512, out_features=512, bias=True)
        (3): ReLU()
        (4): Linear(in_features=512, out_features=512, bias=True)
        (5): ReLU()
        (6): Linear(in_features=512, out_features=256, bias=True)
      )
    )
  )
  (trunk_net): FCN(
    (network): Sequential(
      (0): Linear(in_features=2, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=256, bias=True)
      (

In [39]:
from train_utils import test_model

In [43]:
test_model(
    model=model,
    test_loader=test_loader,
    scaler=scaler,
    working_dir='.',
    device=device,
    test_branch=test_branch,
    save_array=True
)

Saved test results to ./results/test_results.npz
Mean relative L2 errors: [0.02211111 0.00268162 0.04149628]
Standard deviation of relative L2 errors: [0.00022993 0.00116533 0.00037741]
