# Notebook \#1 - Operator Learning and Optimal Control 

version: DeepONet

Lets start by importing everything that we need for training and solving optimal control problems.

In [None]:
import torch
from utils.scripts import solve_optimization
from models.deeponet import DeepONetCartesianProd
from utils.settings import compute_loss_random_grid, gradient_automatic
from utils.data import MultiFunctionDatasetODE, save_dataset
from utils.scripts import load_data
from torch.optim.lr_scheduler import StepLR
import torch.optim as optim
from utils.training import training

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

We can define DeepONet architecture by using class `DeepONetCartesianProd`. It takes as input 2 lists: `branch_net` and `trunk_net`. They define number of neurons at each layer. `brunch_activations` and `trunk_activations` define what kind of activation functions will be used.

In [2]:
# Model Parameters
m = 200         # sensor size (branch input size)
n_hid = 200     # layer's hidden sizes
p = 200         # output size
dim_x = 1       # trunk (trunk input size)

# Specify the MLP architecture
branch_net = [m, n_hid,  n_hid, n_hid, n_hid, p]
branch_activations = ['tanh', 'tanh', 'tanh', 'tanh','none']
trunk_net = [dim_x, n_hid,  n_hid, n_hid, n_hid, p]
trunk_activations = ['tanh', 'tanh', 'tanh', 'tanh','none']
model = DeepONetCartesianProd(branch_net, trunk_net, branch_activations, trunk_activations)
model.to(device)

model

DeepONetCartesianProd(
  (branch_net): DenseNetwork(
    (network): Sequential(
      (0): Linear(in_features=200, out_features=200, bias=True)
      (1): Tanh()
      (2): Linear(in_features=200, out_features=200, bias=True)
      (3): Tanh()
      (4): Linear(in_features=200, out_features=200, bias=True)
      (5): Tanh()
      (6): Linear(in_features=200, out_features=200, bias=True)
      (7): Tanh()
      (8): Linear(in_features=200, out_features=200, bias=True)
      (9): Identity()
    )
  )
  (trunk_net): DenseNetwork(
    (network): Sequential(
      (0): Linear(in_features=1, out_features=200, bias=True)
      (1): Tanh()
      (2): Linear(in_features=200, out_features=200, bias=True)
      (3): Tanh()
      (4): Linear(in_features=200, out_features=200, bias=True)
      (5): Tanh()
      (6): Linear(in_features=200, out_features=200, bias=True)
      (7): Tanh()
      (8): Linear(in_features=200, out_features=200, bias=True)
      (9): Identity()
    )
  )
)

Now, we have a model that we can either train to learn specific differential equation or load pre-trained weights. We can create a dataset using custom `MultiFunctionDatasetODE`. `fraction_supervised` defines what fraction of generated input functions will have a precomputed trajectory. Options for `selected_problem` are `linear`, `oscillatory`, `polynomial_tracking`, `nonlinear`, `singular_arc` like the ones used in the thesis. This set can be easily extended.

In [None]:
selected_problem = 'linear'
architecture = 'deeponet'

ds = MultiFunctionDatasetODE(
    m=200, 
    n_functions=10000,
    function_types=['sine', 'polynomial', 'constant', 'linear'],
    grf_lb= 0.05,
    grf_ub= 0.5,
    architecture = architecture,
    degree_range = (1, 5),
    slope_range = (-2, 2),
    intercept_range = (-2, 2),
    frequency_range = (0.1, 30),    # For 'sine'
    amplitude_range = (0.5, 2),  
    coeff_range = (-3, 3),
    end_time = 1,
    project = True,
    bound = [-1.5, -1.5],
    num_domain = 200,
    include_supervision = True,
    fraction_supervised = 1,
    problem = selected_problem,
)

It is also possible to save dataset and use it for future experiments:

In [None]:
save_dataset(ds,  "datasets/linear/", name = 'test')

`load_data()` function given path to saved train and test datasets create a `train_loader` and `test_loader` with default `batch_size = 64`

In [None]:
SEED = 42

train_loader, test_loader = load_data(
    architecture,
    'datasets/linear/train.pt',
    'datasets/linear/test.pt',
    SEED,
)

Before starting to train your model, you need to define `compute_loss`. For our case we can just use pre-written

In [6]:
compute_loss = compute_loss_random_grid['linear']
compute_loss

{'physics_loss': <function utils.settings.<lambda>(args)>,
 'initial_loss': <function utils.settings.<lambda>(args)>}

which is esentially under the hood is a dictionary defining `physics_loss` and `initial_loss` as lambda functions that take as input `args`. `args` is a disctionary too that accepts any parameters, like `t`, `u`, `x`. Note that for DeepONet these losses should be defined on a random grid and for FNO and LNO on a fixed grid.

For the gradient we have a function `gradient_automatic` that takes as input `x` and `t` and calculate a gradient via automatic differentiation:

In [7]:
gradient_automatic

<function utils.settings.gradient_automatic(x, t)>

Define anything needed for training 

In [None]:
#Initialize Optimizer
lr = 0.0001
epochs = 2000
optimizer = optim.Adam(model.parameters(), lr=lr)

scheduler = StepLR(
    optimizer,
    step_size=100,   
    gamma=0.9,     
)

And just use our function `training`. Make sure to include correct `architecture="deeponet"`,`problem="linear"`, `w=[1, 1]` (for the losses), `save_plot = 20` (how often make analytics, predictions and losses plots), `save=100` (how often to save model and computation time array), 

In [None]:
model, losses = training(model, optimizer, scheduler, train_loader, test_loader, compute_loss, gradient_automatic, num_epochs=epochs, problem=selected_problem, w=[1, 1])

Or you can just load pre-trained model

In [32]:
model = DeepONetCartesianProd(branch_net, trunk_net, branch_activations, trunk_activations)
model.to(device)

ckpt = torch.load("trained_models/linear/deeponet/unsupervised/epoch[1800]_model_time_[20250717_215634]_loss_[0.0006].pth", map_location=device)
model.load_state_dict(ckpt["model_state_dict"])

<All keys matched successfully>

# Control problem with Linear ODE

Now, we are ready to solve optimal control problem. We have control over the weights that are assigned to: physics loss, objective function, initial loss, smoothness and boundary loss (if there is any condition)

In [33]:
initial_guess = torch.rand((1, m), dtype=torch.float32, device=device, requires_grad=True)

w = [100, 1, 1, 1, 0]

analytics, x, u, x_optimal, u_optimal = solve_optimization(
    model, 'linear', initial_guess,
    lr=0.001,
    architecture="deeponet",  
    w=w,
    num_epochs=12000,
    m=m,
    device=device, logging = True
)

Epoch   50 | Loss: 7.350015 | rel_err_u: 3.8967, rel_err_x: 0.5252
Epoch  100 | Loss: 5.106818 | rel_err_u: 3.8528, rel_err_x: 0.5242
Epoch  150 | Loss: 3.604753 | rel_err_u: 3.8203, rel_err_x: 0.5243
Plot saved to found_trajectories/linear/deeponet/plot.png
Epoch  200 | Loss: 2.589784 | rel_err_u: 3.7956, rel_err_x: 0.5242
Epoch  250 | Loss: 1.903567 | rel_err_u: 3.7757, rel_err_x: 0.5237
Epoch  300 | Loss: 1.441365 | rel_err_u: 3.7588, rel_err_x: 0.5229
Epoch  350 | Loss: 1.131671 | rel_err_u: 3.7436, rel_err_x: 0.5219
Plot saved to found_trajectories/linear/deeponet/plot.png
Epoch  400 | Loss: 0.925187 | rel_err_u: 3.7294, rel_err_x: 0.5207
Epoch  450 | Loss: 0.788088 | rel_err_u: 3.7157, rel_err_x: 0.5194
Epoch  500 | Loss: 0.697338 | rel_err_u: 3.7020, rel_err_x: 0.5179
Epoch  550 | Loss: 0.637323 | rel_err_u: 3.6883, rel_err_x: 0.5164
Plot saved to found_trajectories/linear/deeponet/plot.png
Epoch  600 | Loss: 0.597494 | rel_err_u: 3.6744, rel_err_x: 0.5148
Epoch  650 | Loss: 0.5

In [34]:
print(f"Final objective loss: {analytics['obj'][-1]}")

Final objective loss: 0.19274890422821045


<image src="found_trajectories/linear/deeponet/plot.png" width = "600">

Let's repeat the same process for all other problems with DeepONet

# Control problem with Oscillatory Forcing

In [37]:
model = DeepONetCartesianProd(branch_net, trunk_net, branch_activations, trunk_activations)
model.to(device)

ckpt = torch.load("trained_models/oscillatory/deeponet/unsupervised/epoch[1500]_model_time_[20250717_222232]_loss_[0.0065].pth", map_location=device)
model.load_state_dict(ckpt["model_state_dict"])

initial_guess = torch.zeros((1, m), dtype=torch.float32, device=device, requires_grad=True)

w = [100, 1, 1, 0, 100]

analytics, x, u, x_optimal, u_optimal = solve_optimization(
    model, 'oscillatory', initial_guess,
    lr=0.001,
    architecture="deeponet",  
    w=w,
    num_epochs=12000,
    m=m,
    device=device, logging = True
)

Epoch   50 | Loss: 96.521370 | rel_err_u: 10.6354, rel_err_x: 0.0509
Epoch  100 | Loss: 93.286140 | rel_err_u: 19.8667, rel_err_x: 0.1034
Epoch  150 | Loss: 90.956848 | rel_err_u: 27.8460, rel_err_x: 0.1533
Plot saved to found_trajectories/oscillatory/deeponet/plot.png
Epoch  200 | Loss: 89.237671 | rel_err_u: 34.8885, rel_err_x: 0.1994
Epoch  250 | Loss: 87.914474 | rel_err_u: 41.3137, rel_err_x: 0.2408
Epoch  300 | Loss: 86.835518 | rel_err_u: 47.3954, rel_err_x: 0.2769
Epoch  350 | Loss: 85.885376 | rel_err_u: 53.3890, rel_err_x: 0.3063
Plot saved to found_trajectories/oscillatory/deeponet/plot.png
Epoch  400 | Loss: 84.964920 | rel_err_u: 59.5218, rel_err_x: 0.3280
Epoch  450 | Loss: 83.984947 | rel_err_u: 65.9549, rel_err_x: 0.3438
Epoch  500 | Loss: 82.912613 | rel_err_u: 72.6332, rel_err_x: 0.3616
Epoch  550 | Loss: 81.815392 | rel_err_u: 79.2246, rel_err_x: 0.3874
Plot saved to found_trajectories/oscillatory/deeponet/plot.png
Epoch  600 | Loss: 80.779320 | rel_err_u: 85.4767, r

In [38]:
print(f"Final objective loss: {analytics['obj'][-1]}")

Final objective loss: 2.029810667037964


<image src="found_trajectories/oscillatory/deeponet/plot.png" width="600">

# Control problem with Polynomial Tracking

In [40]:
model = DeepONetCartesianProd(branch_net, trunk_net, branch_activations, trunk_activations)
model.to(device)

ckpt = torch.load("trained_models/polynomial_tracking/deeponet/unsupervised/epoch[1300]_model_time_[20250717_222408]_loss_[0.0149].pth", map_location=device)
model.load_state_dict(ckpt["model_state_dict"])

initial_guess = torch.rand((1, m), dtype=torch.float32, device=device, requires_grad=True)

w = [200, 1, 1, 2, 0]

analytics, x, u, x_optimal, u_optimal = solve_optimization(
    model, 'polynomial_tracking', initial_guess,
    lr=0.001,
    architecture="deeponet",  
    w=w,
    num_epochs=10000,
    m=m,
    device=device, logging = True
)

Epoch   50 | Loss: 12.104949 | rel_err_u: 2.0987, rel_err_x: 1.2192
Epoch  100 | Loss: 7.915780 | rel_err_u: 2.0225, rel_err_x: 1.2422
Epoch  150 | Loss: 5.255310 | rel_err_u: 1.9594, rel_err_x: 1.2643
Plot saved to found_trajectories/polynomial_tracking/deeponet/plot.png
Epoch  200 | Loss: 3.539303 | rel_err_u: 1.9099, rel_err_x: 1.2784
Epoch  250 | Loss: 2.427424 | rel_err_u: 1.8712, rel_err_x: 1.2856
Epoch  300 | Loss: 1.704902 | rel_err_u: 1.8405, rel_err_x: 1.2883
Epoch  350 | Loss: 1.234455 | rel_err_u: 1.8157, rel_err_x: 1.2880
Plot saved to found_trajectories/polynomial_tracking/deeponet/plot.png
Epoch  400 | Loss: 0.928016 | rel_err_u: 1.7952, rel_err_x: 1.2858
Epoch  450 | Loss: 0.728640 | rel_err_u: 1.7777, rel_err_x: 1.2824
Epoch  500 | Loss: 0.599147 | rel_err_u: 1.7623, rel_err_x: 1.2780
Epoch  550 | Loss: 0.515069 | rel_err_u: 1.7482, rel_err_x: 1.2728
Plot saved to found_trajectories/polynomial_tracking/deeponet/plot.png
Epoch  600 | Loss: 0.460259 | rel_err_u: 1.7351, 

In [41]:
print(f"Final objective loss: {analytics['obj'][-1]}")

Final objective loss: 0.14795131981372833


<img src="found_trajectories/polynomial_tracking/deeponet/plot.png" width="600">

# Control problem with Nonlinear ODE

In [None]:
model = DeepONetCartesianProd(branch_net, trunk_net, branch_activations, trunk_activations)
model.to(device)

ckpt = torch.load("trained_models/nonlinear/deeponet/unsupervised/epoch[1500]_model_time_[20250717_215941]_loss_[0.0099].pth", map_location=device)
model.load_state_dict(ckpt["model_state_dict"])

initial_guess = torch.rand((1, m), dtype=torch.float32, device=device, requires_grad=True)

w = [150, 1, 1, 1, 0]

analytics, x, u, x_optimal, u_optimal = solve_optimization(
    model, 'nonlinear', initial_guess,
    lr=0.001,
    architecture="deeponet",  
    w=w,
    num_epochs=20000,
    m=m,
    bounds = [-1.5, 1.5],
    device=device, logging = True
)

Epoch   50 | Loss: 145.218002 | rel_err_u: 1.7862, rel_err_x: 0.3321
Epoch  100 | Loss: 85.869713 | rel_err_u: 1.7334, rel_err_x: 0.2606
Epoch  150 | Loss: 54.967941 | rel_err_u: 1.6883, rel_err_x: 0.2268
Plot saved to found_trajectories/nonlinear/deeponet/plot.png
Epoch  200 | Loss: 37.376125 | rel_err_u: 1.6517, rel_err_x: 0.2113
Epoch  250 | Loss: 26.777822 | rel_err_u: 1.6234, rel_err_x: 0.2074
Epoch  300 | Loss: 19.940796 | rel_err_u: 1.6016, rel_err_x: 0.2096
Epoch  350 | Loss: 15.378124 | rel_err_u: 1.5836, rel_err_x: 0.2125
Plot saved to found_trajectories/nonlinear/deeponet/plot.png
Epoch  400 | Loss: 12.277107 | rel_err_u: 1.5676, rel_err_x: 0.2134
Epoch  450 | Loss: 10.075662 | rel_err_u: 1.5528, rel_err_x: 0.2130
Epoch  500 | Loss: 8.443405 | rel_err_u: 1.5392, rel_err_x: 0.2121
Epoch  550 | Loss: 7.191081 | rel_err_u: 1.5267, rel_err_x: 0.2112
Plot saved to found_trajectories/nonlinear/deeponet/plot.png
Epoch  600 | Loss: 6.203009 | rel_err_u: 1.5150, rel_err_x: 0.2102
Epo

In [50]:
print(f"Final objective loss: {analytics['obj'][-1]}")

Final objective loss: -0.10695081204175949


<image src="found_trajectories/nonlinear/deeponet/plot.png" width = 600>

# Control problem with Singular Arc

In [54]:
model = DeepONetCartesianProd(branch_net, trunk_net, branch_activations, trunk_activations)
model.to(device)

ckpt = torch.load("trained_models/singular_arc/deeponet/unsupervised/epoch[1500]_model_time_[20250717_220059]_loss_[0.0059].pth", map_location=device)
model.load_state_dict(ckpt["model_state_dict"])

initial_guess = torch.rand((1, m), dtype=torch.float32, device=device, requires_grad=True) - 3 

w = [10, 1, 1, 1, 10]

analytics, x, u, x_optimal, u_optimal = solve_optimization(
    model, 'singular_arc', initial_guess,
    lr=0.001,
    architecture="deeponet",  
    w=w,
    num_epochs=10000,
    m=m,
    bounds = [-3.5, 0],
    device=device, logging = True
)

Epoch   50 | Loss: 10.701880 | rel_err_u: 1.0008, rel_err_x: 1.2033
Epoch  100 | Loss: 10.295107 | rel_err_u: 0.9726, rel_err_x: 1.1552
Epoch  150 | Loss: 9.911869 | rel_err_u: 0.9446, rel_err_x: 1.1068
Plot saved to found_trajectories/singular_arc/deeponet/plot.png
Epoch  200 | Loss: 9.550135 | rel_err_u: 0.9170, rel_err_x: 1.0587
Epoch  250 | Loss: 9.208881 | rel_err_u: 0.8898, rel_err_x: 1.0111
Epoch  300 | Loss: 8.887429 | rel_err_u: 0.8631, rel_err_x: 0.9643
Epoch  350 | Loss: 8.585202 | rel_err_u: 0.8370, rel_err_x: 0.9184
Plot saved to found_trajectories/singular_arc/deeponet/plot.png
Epoch  400 | Loss: 8.301638 | rel_err_u: 0.8115, rel_err_x: 0.8736
Epoch  450 | Loss: 8.036135 | rel_err_u: 0.7866, rel_err_x: 0.8302
Epoch  500 | Loss: 7.788013 | rel_err_u: 0.7623, rel_err_x: 0.7882
Epoch  550 | Loss: 7.556484 | rel_err_u: 0.7384, rel_err_x: 0.7478
Plot saved to found_trajectories/singular_arc/deeponet/plot.png
Epoch  600 | Loss: 7.340657 | rel_err_u: 0.7151, rel_err_x: 0.7092
Ep

In [55]:
print(f"Final objective loss: {analytics['obj'][-1]}")

Final objective loss: 2.1729605197906494


<image src="found_trajectories/singular_arc/deeponet/plot.png" width = 600>