In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler

import sys
import os

src_path = os.path.abspath(os.path.join(os.getcwd(), 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)
    
from utils import create_sliding_windows, SequentialDeepONetDataset
from s_deeponet import SequentialDeepONet
from deeponet import DeepONet

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cpu


In [3]:
# Load neutron monitoring data
input_data = np.load('data/neutron_data_22yrs.npy')
trunk = np.load('data/grid_points.npy')
target = np.load('data/dose_array.npy')

# Normalize trunk input
trunk[:, 0] = (trunk[:, 0] - np.min(trunk[:, 0])) / (np.max(trunk[:, 0]) - np.min(trunk[:, 0]))
trunk[:, 1] = (trunk[:, 1] - np.min(trunk[:, 1])) / (np.max(trunk[:, 1]) - np.min(trunk[:, 1]))

In [4]:
def train_val_test_split(input_data, target):
    # Define the number of test samples (last 365 days)
    test_size = 365

    # Split data into training+validation and test
    train_val_input = input_data[:-test_size]
    train_val_target = target[:-test_size]
    test_input = input_data[-test_size:]
    test_target = target[-test_size:]

    # Calculate split index for training and validation
    train_size = int(len(train_val_input) * 0.5)  # 80% for training
    val_size = len(train_val_input) - train_size  # 20% for validation

    # Training set
    train_input = train_val_input[:train_size]
    train_target = train_val_target[:train_size]

    # Validation set
    val_input = train_val_input[train_size:]
    val_target = train_val_target[train_size:]

    # Final shapes check
    print("Train input shape:", train_input.shape)
    print("Validation input shape:", val_input.shape)
    print("Test input shape:", test_input.shape)

    return train_input, train_target, val_input, val_target, test_input, test_target

# Assuming input_data and target are defined elsewhere in the notebook
train_input, train_target, val_input, val_target, test_input, test_target = train_val_test_split(input_data, target)

Train input shape: (4017, 12)
Validation input shape: (4018, 12)
Test input shape: (365, 12)


In [5]:
# input data normalization (min-max scaling)
scaler = MinMaxScaler()
train_input = scaler.fit_transform(train_input)
val_input = scaler.transform(val_input)
test_input = scaler.transform(test_input)

# target data normalization (min-max scaling)
scaler_target = MinMaxScaler()
train_target = scaler_target.fit_transform(train_target)[..., np.newaxis]
val_target = scaler_target.transform(val_target)[..., np.newaxis]
test_target = scaler_target.transform(test_target)[..., np.newaxis]

In [6]:
window_size = 30
print("Window size:", window_size)

# Generate sequences for the training set
train_input_seq, train_target_seq = create_sliding_windows(train_input, train_target, window_size)

# Generate sequences for the testing set
test_input_seq, test_target_seq = create_sliding_windows(test_input, test_target, window_size)

# generate sequences for the validation set
val_input_seq, val_target_seq = create_sliding_windows(val_input, val_target, window_size)


# print the shapes of the generated sequences
print("Check the shapes of the generated sequences\n-----------------------------------------")
print("Train input shape:", train_input_seq.shape)
print("Train target shape:", train_target_seq.shape)
print("Validation input shape:", val_input_seq.shape)
print("Validation target shape:", val_target_seq.shape)
print("Test input shape:", test_input_seq.shape)
print("Test target shape:", test_target_seq.shape)
print("-----------------------------------------")


Window size: 30
Check the shapes of the generated sequences
-----------------------------------------
Train input shape: torch.Size([3988, 30, 12])
Train target shape: torch.Size([3988, 65341, 1])
Validation input shape: torch.Size([3989, 30, 12])
Validation target shape: torch.Size([3989, 65341, 1])
Test input shape: torch.Size([336, 30, 12])
Test target shape: torch.Size([336, 65341, 1])
-----------------------------------------


In [7]:
# Create DataLoaders for training and validation sets
print("Create DataLoaders for training and validation sets\n-----------------------------------------")
batch_size = 16
print("Batch size:", batch_size)

train_dataset = SequentialDeepONetDataset(train_input_seq, trunk, train_target_seq)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

val_dataset = SequentialDeepONetDataset(val_input_seq, trunk, val_target_seq)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

test_dataset = SequentialDeepONetDataset(test_input_seq, trunk, test_target_seq)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Create DataLoaders for training and validation sets
-----------------------------------------
Batch size: 16


In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from fourier_embedding import FourierFeatures
from fcn import FCN
from lstm import LSTM, AttentionLSTM
from gru import GRU, AttentionGRU
from rnn import RNN
from transformer import Transformer, Informer

class SequentialDeepONet(nn.Module):
    def __init__(self, branch_type, branch_input_size, branch_hidden_size, branch_num_layers, branch_output_size, 
                 trunk_architecture, num_outputs, use_transform=True, activation_fn=nn.ReLU, num_heads=4):
        super(SequentialDeepONet, self).__init__()

        self.use_transform = use_transform
        self.num_outputs = num_outputs

        # Initialize the branch network based on the specified type
        if branch_type == 'rnn':
            self.branch_net = RNN(branch_input_size, branch_hidden_size, branch_num_layers, branch_output_size)
        elif branch_type == 'lstm':
            self.branch_net = LSTM(branch_input_size, branch_hidden_size, branch_num_layers, branch_output_size)
        elif branch_type == 'gru':
            self.branch_net = GRU(branch_input_size, branch_hidden_size, branch_num_layers, branch_output_size)
        elif branch_type == 'attention_lstm':
            self.branch_net = AttentionLSTM(branch_input_size, branch_hidden_size, branch_num_layers, branch_output_size, num_heads=num_heads)
        elif branch_type == 'attention_gru':
            self.branch_net = AttentionGRU(branch_input_size, branch_hidden_size, branch_num_layers, branch_output_size, num_heads=num_heads)
        elif branch_type == 'transformer':
            self.branch_net = Transformer(branch_input_size, branch_hidden_size, num_heads, branch_num_layers, branch_output_size)
        elif branch_type == 'informer':
            self.branch_net = Informer(branch_input_size, branch_hidden_size, num_heads, branch_num_layers, branch_output_size)
        elif branch_type == 'fcn':  # New FCN-based branch option
            self.branch_net = FCN([branch_input_size] + [branch_hidden_size] * (branch_num_layers - 1) + [branch_output_size], activation_fn)
        else:
            raise ValueError(f"Unsupported branch type: {branch_type}")

        # Trunk network (fully connected using the FCN class)
        self.trunk_net = FCN(trunk_architecture, activation_fn)

        if self.use_transform:
            self.final_layer = nn.Linear(branch_output_size, num_outputs)
        else:
            self.b = nn.Parameter(torch.zeros(1))  # Optional bias

    def forward(self, branch_input, trunk_input):
        # Process branch input (sequential data) through the selected branch network
        branch_output = self.branch_net(branch_input) 
        
        #print("Branch output shape before processing:", branch_output.shape)

        if len(branch_output.shape) == 3:  # If sequential (RNN, LSTM, GRU, etc.)
            branch_output = branch_output[:, -1, :]  # Take last time step

        #print("Branch output shape:", branch_output.shape)
        # Process trunk input (spatial data) through the trunk network
        trunk_output = self.trunk_net(trunk_input)
        #print("Trunk output shape:", trunk_output.shape)

        # Combine branch and trunk outputs using einsum
        combined_output = torch.einsum('bi,bpi->bpi', branch_output, trunk_output)  # Shape: (batch_size, num_trunk_points, hidden_size)
        print("Combined output shape:", combined_output.shape)
        
        if self.use_transform:
            combined_output = self.final_layer(combined_output)  # Final prediction
        else:
            combined_output = combined_output.sum(dim=-1, keepdim=True)  # Reduce last dimension
            combined_output += self.b  # Add bias
        print("Final output shape:", combined_output.shape)

        return combined_output


In [32]:
def init_model():
    dim = 128
    model = SequentialDeepONet(
        branch_type='lstm',
        branch_input_size=12,
        branch_hidden_size=128,
        branch_num_layers=4,
        branch_output_size=dim,
        trunk_architecture=[2, 128, 128, dim],
        num_outputs=1,
        use_transform=False,
        activation_fn=nn.ReLU,
    )
    return model

In [33]:
model = init_model().to(device)
print(model)

SequentialDeepONet(
  (branch_net): LSTM(
    (lstm): LSTM(12, 128, num_layers=4, batch_first=True)
    (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    (fc): Linear(in_features=128, out_features=128, bias=True)
  )
  (trunk_net): FCN(
    (network): Sequential(
      (0): Linear(in_features=2, out_features=128, bias=True)
      (1): ReLU()
      (2): Linear(in_features=128, out_features=128, bias=True)
      (3): ReLU()
      (4): Linear(in_features=128, out_features=128, bias=True)
    )
  )
)


In [34]:
print("Set the hyperparameters\n-----------------------------------------")
# save path
save_dir = 'single_branch/'
save_path = os.path.join(save_dir, f'test_fcn_window_{window_size}.pth')
print(save_path)

num_epochs = 100
learning_rate = 1e-3
patience = 5

print("Number of epochs:", num_epochs)
print("Learning rate:", learning_rate)
print("Patience:", patience)

# Loss function
criterion = nn.MSELoss()
print("Loss function:", criterion)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5, verbose=True)
#print("Optimizer:", optimizer)
print("-----------------------------------------")


Set the hyperparameters
-----------------------------------------
single_branch/test_fcn_window_30.pth
Number of epochs: 100
Learning rate: 0.001
Patience: 5
Loss function: MSELoss()
-----------------------------------------


In [35]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, save_path):
    best_val_loss = np.inf
    counter = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for i, (inputs, trunk, targets) in enumerate(train_loader):
            inputs, trunk, targets = inputs.to(device), trunk.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs, trunk)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            break

        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        for i, (inputs, trunk, targets) in enumerate(val_loader):
            inputs, trunk, targets = inputs.to(device), trunk.to(device), targets.to(device)
            outputs = model(inputs, trunk)
            loss = criterion(outputs, targets)
            val_loss += loss.item()

        val_loss /= len(val_loader)

        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.6f}, Validation Loss: {val_loss:.6f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), save_path)
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    print("Training completed!")
    return model

In [36]:
trained_model = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, patience, save_path)

Combined output shape: torch.Size([16, 65341, 128])
Final output shape: torch.Size([16, 65341, 1])


KeyboardInterrupt: 

In [14]:
def evaluate_model(model, test_loader, scaler, device='cuda'):
    model.eval()
    all_preds, all_targets = [], []
    
    with torch.no_grad():
        for branch_batch, trunk_batch, target_batch in test_loader:
            branch_batch, trunk_batch, target_batch = (
                branch_batch.to(device),
                trunk_batch.to(device),
                target_batch.to(device),
            )
            output = model(branch_batch, trunk_batch)
            all_preds.append(output.cpu().numpy())
            all_targets.append(target_batch.cpu().numpy())

    # Convert lists to numpy arrays
    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)
    
    
    print("All predictions shape before reshape:", all_preds.shape)
    print("All targets shape before reshape:", all_targets.shape)

    # Reshape to 2D (n_samples, n_features) for inverse scaling
    all_preds = all_preds.reshape(all_preds.shape[0], -1)
    all_targets = all_targets.reshape(all_targets.shape[0], -1)

    print("All predictions shape after reshape:", all_preds.shape)
    print("All targets shape after reshape:", all_targets.shape)

    # Inverse scaling
    all_preds = scaler.inverse_transform(all_preds)
    all_targets = scaler.inverse_transform(all_targets)
    
    # Compute metrics for each sample
    rmse, mae, r2, l2_error = [], [], [], []
    for i in range(all_preds.shape[0]):
        rmse.append(np.sqrt(np.mean((all_preds[i] - all_targets[i]) ** 2)))
        mae.append(np.mean(np.abs(all_preds[i] - all_targets[i])))
        r2.append(1 - np.sum((all_preds[i] - all_targets[i]) ** 2) / np.sum((all_targets[i] - np.mean(all_targets[i])) ** 2))
        l2_error.append(np.linalg.norm(all_preds[i] - all_targets[i], 2))

    # Convert lists to numpy arrays
    rmse = np.array(rmse)
    mae = np.array(mae)
    r2 = np.array(r2)
    l2_error = np.array(l2_error)
    
    # save the results to a file
    results = np.stack((rmse, mae, r2, l2_error), axis=1)
    save_path = os.path.join(save_dir, f'test_fcn_window_{window_size}_results.npy')
    np.save(save_path, results)
    print(f"Results saved to {save_path}")
    
    # Compute average metrics
    rmse = np.mean(rmse)
    mae = np.mean(mae)
    r2 = np.mean(r2)
    l2_error = np.mean(l2_error)

    print(f"Final Model Evaluation on Test Set:")
    print(f"RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}, L2 Error: {l2_error:.4f}")

    return rmse, mae, r2, l2_error

In [17]:
# load the best model
model = init_model().to(device)
model.load_state_dict(torch.load(save_path))

# evaluate the model
print("Evaluate the model\n-----------------------------------------")
print(evaluate_model(model, test_loader, scaler_target, device=device))

Evaluate the model
-----------------------------------------
All predictions shape before reshape: (336, 65341, 1)
All targets shape before reshape: (336, 65341, 1)
All predictions shape after reshape: (336, 65341)
All targets shape after reshape: (336, 65341)
Results saved to single_branch/test_fcn_window_30_results.npy
Final Model Evaluation on Test Set:
RMSE: 0.0000, MAE: 0.0000, R²: 0.9997, L2 Error: 0.0122
(4.7732476e-05, 3.9387345e-05, 0.9996720497903901, 0.012201323)
