In [1]:
from xno.models import ZNO



In [2]:
import torch
import matplotlib.pyplot as plt
import sys
from neuralop.models import FNO
from neuralop.data.datasets import load_darcy_flow_small
from neuralop.utils import count_model_params
from neuralop.training import AdamW
# from neuralop.training.incremental import IncrementalFNOTrainer
from xno.training.incremental import IncrementalFNOTrainer
from neuralop.data.transforms.data_processors import IncrementalDataProcessor
from neuralop import LpLoss, H1Loss

In [3]:
train_loader, test_loaders, output_encoder = load_darcy_flow_small(
    n_train=100,
    batch_size=16,
    test_resolutions=[16, 32],
    n_tests=[100, 50],
    test_batch_sizes=[32, 32],
)

Loading test db for resolution 16 with 100 samples 
Loading test db for resolution 32 with 50 samples 


  data = torch.load(
  data = torch.load(Path(root_dir).joinpath(f"{dataset_name}_test_{res}.pt").as_posix())


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [5]:
from xno.models import ZNO

In [6]:
incremental = True
if incremental:
    starting_modes = (2, 2)
else:
    starting_modes = (16, 16)

In [7]:
model = ZNO(
    max_n_modes=(16, 16),
    n_modes=(2, 2),
    hidden_channels=32,
    in_channels=1,
    out_channels=1,
)
model = model.to(device)
n_params = count_model_params(model)

In [8]:
optimizer = AdamW(model.parameters(), lr=8e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30)


In [9]:
data_transform = IncrementalDataProcessor(
    in_normalizer=None,
    out_normalizer=None,
    device=device,
    subsampling_rates=[2, 1],
    dataset_resolution=16,
    dataset_indices=[2, 3],
    epoch_gap=10,
    verbose=True,
)

data_transform = data_transform.to(device)

Original Incre Res: change index to 0
Original Incre Res: change sub to 2
Original Incre Res: change res to 8


In [10]:
l2loss = LpLoss(d=2, p=2)
h1loss = H1Loss(d=2)
train_loss = h1loss
eval_losses = {"h1": h1loss, "l2": l2loss}
print("\n### N PARAMS ###\n", n_params)
print("\n### OPTIMIZER ###\n", optimizer)
print("\n### SCHEDULER ###\n", scheduler)
print("\n### LOSSES ###")
print("\n### INCREMENTAL RESOLUTION + GRADIENT EXPLAINED ###")
print(f"\n * Train: {train_loss}")
print(f"\n * Test: {eval_losses}")
sys.stdout.flush()


### N PARAMS ###
 2118721

### OPTIMIZER ###
 AdamW (
Parameter Group 0
    betas: (0.9, 0.999)
    correct_bias: True
    eps: 1e-06
    initial_lr: 0.008
    lr: 0.008
    weight_decay: 0.0001
)

### SCHEDULER ###
 <torch.optim.lr_scheduler.CosineAnnealingLR object at 0x30a883a10>

### LOSSES ###

### INCREMENTAL RESOLUTION + GRADIENT EXPLAINED ###

 * Train: <neuralop.losses.data_losses.H1Loss object at 0x30a89dc90>

 * Test: {'h1': <neuralop.losses.data_losses.H1Loss object at 0x30a89dc90>, 'l2': <neuralop.losses.data_losses.LpLoss object at 0x30a89de10>}


In [11]:
# Finally pass all of these to the Trainer
trainer = IncrementalFNOTrainer(
    model=model,
    n_epochs=20,
    data_processor=data_transform,
    device=device,
    verbose=True,
    incremental_loss_gap=False,
    incremental_grad=True,
    incremental_grad_eps=0.9999,
    incremental_loss_eps = 0.001,
    incremental_buffer=5,
    incremental_max_iter=1,
    incremental_grad_max_iter=2,
)

In [12]:
trainer.train(
    train_loader,
    test_loaders,
    optimizer,
    scheduler,
    regularizer=False,
    training_loss=train_loss,
    eval_losses=eval_losses,
)

Training on 100 samples
Testing on [50, 50] samples         on resolutions [16, 32].
Raw outputs of shape torch.Size([16, 1, 8, 8])
[0] time=0.37, avg_loss=0.8313, train_err=11.8752
Eval: 16_h1=0.7576, 16_l2=0.6052, 32_h1=0.8197, 32_l2=0.5918
[1] time=0.30, avg_loss=0.7243, train_err=10.3476
Eval: 16_h1=0.8481, 16_l2=0.6477, 32_h1=0.9783, 32_l2=0.6300
[2] time=0.27, avg_loss=0.7535, train_err=10.7638
Eval: 16_h1=0.7579, 16_l2=0.5975, 32_h1=0.7752, 32_l2=0.5880
[3] time=0.34, avg_loss=0.7255, train_err=10.3649
Eval: 16_h1=0.7619, 16_l2=0.5699, 32_h1=0.8792, 32_l2=0.5582
[4] time=0.33, avg_loss=0.7101, train_err=10.1442
Eval: 16_h1=0.7126, 16_l2=0.5546, 32_h1=0.7489, 32_l2=0.5425
[5] time=0.33, avg_loss=0.6989, train_err=9.9841
Eval: 16_h1=0.7136, 16_l2=0.5506, 32_h1=0.7681, 32_l2=0.5395
[6] time=0.33, avg_loss=0.6924, train_err=9.8920
Eval: 16_h1=0.6865, 16_l2=0.5329, 32_h1=0.7314, 32_l2=0.5208
[7] time=0.30, avg_loss=0.6718, train_err=9.5975
Eval: 16_h1=0.6901, 16_l2=0.5332, 32_h1=0.73

{'train_err': 7.599588768822806,
 'avg_loss': 0.5319712138175965,
 'avg_lasso_loss': None,
 'epoch_train_time': 1.0819875409943052,
 '16_h1': tensor(0.6523),
 '16_l2': tensor(0.5058),
 '32_h1': tensor(0.6784),
 '32_l2': tensor(0.4915)}