In [440]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler

import sys
import os

src_path = os.path.abspath(os.path.join(os.getcwd(), 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)
    
from utils import create_sliding_windows, SequentialMIONetDataset
from s_mionet import SequentialMIONet

In [441]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [442]:
# Load neutron monitoring data
input_data = np.load('data/neutron_data_22yrs.npy')
trunk = np.load('data/grid_points.npy')
target = np.load('data/dose_array.npy')

# Normalize trunk input
trunk[:, 0] = (trunk[:, 0] - np.min(trunk[:, 0])) / (np.max(trunk[:, 0]) - np.min(trunk[:, 0]))
trunk[:, 1] = (trunk[:, 1] - np.min(trunk[:, 1])) / (np.max(trunk[:, 1]) - np.min(trunk[:, 1]))

In [443]:
def train_val_test_split(input_data, target):
    # Define the number of test samples (last 365 days)
    test_size = 365

    # Split data into training+validation and test
    train_val_input = input_data[:-test_size]
    train_val_target = target[:-test_size]
    test_input = input_data[-test_size:]
    test_target = target[-test_size:]

    # Calculate split index for training and validation
    train_size = int(len(train_val_input) * 0.5)  # 80% for training
    val_size = len(train_val_input) - train_size  # 20% for validation

    # Training set
    train_input = train_val_input[:train_size]
    train_target = train_val_target[:train_size]

    # Validation set
    val_input = train_val_input[train_size:]
    val_target = train_val_target[train_size:]

    # Final shapes check
    print("Train input shape:", train_input.shape)
    print("Validation input shape:", val_input.shape)
    print("Test input shape:", test_input.shape)

    return train_input, train_target, val_input, val_target, test_input, test_target

# Assuming input_data and target are defined elsewhere in the notebook
train_input, train_target, val_input, val_target, test_input, test_target = train_val_test_split(input_data, target)

Train input shape: (4017, 12)
Validation input shape: (4018, 12)
Test input shape: (365, 12)


In [444]:
# input data normalization (min-max scaling)
scaler = MinMaxScaler()
train_input = scaler.fit_transform(train_input)
val_input = scaler.transform(val_input)
test_input = scaler.transform(test_input)

# target data normalization (min-max scaling)
scaler_target = MinMaxScaler()
train_target = scaler_target.fit_transform(train_target)[..., np.newaxis]
val_target = scaler_target.transform(val_target)[..., np.newaxis]
test_target = scaler_target.transform(test_target)[..., np.newaxis]

In [445]:
window_size = 90
print("Window size:", window_size)

# Generate sequences for the training set
train_input_seq, train_target_seq = create_sliding_windows(train_input, train_target, window_size)

# Generate sequences for the testing set
test_input_seq, test_target_seq = create_sliding_windows(test_input, test_target, window_size)

# generate sequences for the validation set
val_input_seq, val_target_seq = create_sliding_windows(val_input, val_target, window_size)


# print the shapes of the generated sequences
print("Check the shapes of the generated sequences\n-----------------------------------------")
print("Train input shape:", train_input_seq.shape)
print("Train target shape:", train_target_seq.shape)
print("Validation input shape:", val_input_seq.shape)
print("Validation target shape:", val_target_seq.shape)
print("Test input shape:", test_input_seq.shape)
print("Test target shape:", test_target_seq.shape)
print("-----------------------------------------")


Window size: 90
Check the shapes of the generated sequences
-----------------------------------------
Train input shape: torch.Size([3928, 90, 12])
Train target shape: torch.Size([3928, 65341, 1])
Validation input shape: torch.Size([3929, 90, 12])
Validation target shape: torch.Size([3929, 65341, 1])
Test input shape: torch.Size([276, 90, 12])
Test target shape: torch.Size([276, 65341, 1])
-----------------------------------------


In [446]:
# Create DataLoaders for training and validation sets
print("Create DataLoaders for training and validation sets\n-----------------------------------------")
batch_size = 16
print("Batch size:", batch_size)

train_dataset = SequentialMIONetDataset(train_input_seq, trunk, train_target_seq)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = SequentialMIONetDataset(val_input_seq, trunk, val_target_seq)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

test_dataset = SequentialMIONetDataset(test_input_seq, trunk, test_target_seq)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Create DataLoaders for training and validation sets
-----------------------------------------
Batch size: 16


In [447]:
def init_model():
    dim = 128

    # Define a single branch configuration to be reused
    base_branch_config = {
        "type": "lstm",
        "input_size": 1,
        "hidden_size": 128,
        "num_layers": 4,
        "output_size": dim
    }

    # Create a dictionary with the same branch configuration for 12 branches
    branches_config = {f"sensor{i+1}": base_branch_config for i in range(12)}

    # Trunk network configuration
    trunk_architecture = [2, 128, 128, dim]
    num_outputs = 1

    # Instantiate the model with the replicated branches
    model = SequentialMIONet(branches_config, trunk_architecture, num_outputs,use_transform=False)

    return model

In [448]:
model = init_model().to(device)
print(model)

SequentialMIONet(
  (branches): ModuleDict(
    (sensor1): LSTM(
      (lstm): LSTM(1, 128, num_layers=4, batch_first=True)
      (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (fc): Linear(in_features=128, out_features=128, bias=True)
    )
    (sensor2): LSTM(
      (lstm): LSTM(1, 128, num_layers=4, batch_first=True)
      (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (fc): Linear(in_features=128, out_features=128, bias=True)
    )
    (sensor3): LSTM(
      (lstm): LSTM(1, 128, num_layers=4, batch_first=True)
      (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (fc): Linear(in_features=128, out_features=128, bias=True)
    )
    (sensor4): LSTM(
      (lstm): LSTM(1, 128, num_layers=4, batch_first=True)
      (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (fc): Linear(in_features=128, out_features=128, bias=True)
    )
    (sensor5): LSTM(
      (lstm): LSTM(1, 128, num_laye

In [449]:
model.load_state_dict(torch.load(f'multi_branch/lstm_window_{window_size}.pth', map_location=device))

<All keys matched successfully>

In [450]:
save_dir = 'multi_branch/array'

In [451]:
def evaluate_model(model, test_loader, scaler, device='cuda'):
    model.eval()
    all_preds, all_targets = [], []
    
    with torch.no_grad():
        for branch_batch, trunk_batch, target_batch in test_loader:
            branch_batch, trunk_batch, target_batch = (
                {key: value.to(device) for key, value in branch_batch.items()},
                trunk_batch.to(device),
                target_batch.to(device),
            )
            output = model(branch_batch, trunk_batch)
            all_preds.append(output.cpu().numpy())
            all_targets.append(target_batch.cpu().numpy())

    # Convert lists to numpy arrays
    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)
    
    print("All predictions shape before reshape:", all_preds.shape)
    print("All targets shape before reshape:", all_targets.shape)

    # Reshape to 2D (n_samples, n_features) for inverse scaling
    all_preds = all_preds.reshape(all_preds.shape[0], -1)
    all_targets = all_targets.reshape(all_targets.shape[0], -1)

    print("All predictions shape after reshape:", all_preds.shape)
    print("All targets shape after reshape:", all_targets.shape)

    # Inverse scaling
    all_preds = scaler.inverse_transform(all_preds)
    all_targets = scaler.inverse_transform(all_targets)
    
    # Compute metrics for each sample
    rmse, mae, r2, l2_error = [], [], [], []
    for i in range(all_preds.shape[0]):
        rmse.append(np.sqrt(np.mean((all_preds[i] - all_targets[i]) ** 2)))
        mae.append(np.mean(np.abs(all_preds[i] - all_targets[i])))
        r2.append(1 - np.sum((all_preds[i] - all_targets[i]) ** 2) / np.sum((all_targets[i] - np.mean(all_targets[i])) ** 2))
        l2_error.append(np.linalg.norm(all_preds[i] - all_targets[i], 2) / np.linalg.norm(all_targets[i], 2) * 100)

    # Convert lists to numpy arrays
    rmse = np.array(rmse)
    mae = np.array(mae)
    r2 = np.array(r2)
    l2_error = np.array(l2_error)
    
    # Save the results to a file
    results = np.stack((rmse, mae, r2, l2_error), axis=1)
    #save_path = os.path.join(save_dir, f'lstm_window_{window_size}_results.npy')
    #np.save(save_path, results)
    #print(f"Results saved to {save_path}")
    
    # Compute average metrics
    rmse = np.mean(rmse)
    mae = np.mean(mae)
    r2 = np.mean(r2)
    l2_error = np.mean(l2_error)

    print(f"Final Model Evaluation on Test Set:")
    print(f"RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}, L2 Error: {l2_error:.4f}")

    return rmse, mae, r2, l2_error

In [452]:
print(evaluate_model(model, test_loader, scaler_target, device=device))

All predictions shape before reshape: (276, 65341, 1)
All targets shape before reshape: (276, 65341, 1)
All predictions shape after reshape: (276, 65341)
All targets shape after reshape: (276, 65341)
Final Model Evaluation on Test Set:
RMSE: 0.0003, MAE: 0.0003, R²: 0.9816, L2 Error: 0.8948
(0.00029908805, 0.0002630907, 0.9815862175733934, 0.8947618552011208)
