# Imports/Set up

In [12]:
# Specific to NERSC: Set up kernel using: https://docs.nersc.gov/services/jupyter/how-to-guides/
from __future__ import annotations

import sys

import pandas as pd
import torch
from neuralop import H1Loss
from neuralop import LpLoss
from neuralop.data.datasets import load_darcy_flow_small
from neuralop.data.datasets.darcy import DarcyDataset
from neuralop.data.transforms.data_processors import IncrementalDataProcessor
from neuralop.models import FNO
from neuralop.training import AdamW
from neuralop.training.incremental import IncrementalFNOTrainer
from neuralop.utils import count_model_params
from neuralop.utils import get_project_root

root_dir = get_project_root() / 'neuralop/data/datasets/data'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'{device=}')

device=device(type='cuda')


In [13]:
# first download data
data = DarcyDataset(
    root_dir=root_dir,
    n_train=100,
    n_tests=[32, 32, 32, 32],
    batch_size=16,
    test_batch_sizes=[16, 16, 16, 16],
    train_resolution=128,  # change resolution to download different data
    test_resolutions=[16, 32, 64, 128],
)

Loading test db for resolution 16 with 32 samples 
Loading test db for resolution 32 with 32 samples 
Loading test db for resolution 64 with 32 samples 
Loading test db for resolution 128 with 32 samples 


In [14]:
# load darcy flow dataset

train_loader, test_loaders, output_encoder = load_darcy_flow_small(
    n_train=1000,
    batch_size=16,
    test_resolutions=[16, 32, 64, 128],
    n_tests=[100, 100, 100, 100],
    test_batch_sizes=[32, 32, 32, 32],
)

Loading test db for resolution 16 with 100 samples 
Loading test db for resolution 32 with 100 samples 
Loading test db for resolution 64 with 100 samples 
Loading test db for resolution 128 with 100 samples 


In [25]:
incremental = True
starting_modes = (16, 16)
if incremental:
    starting_modes = (2, 2)

In [16]:
# Set up model
model = FNO(
    max_n_modes=(16, 16),
    n_modes=starting_modes,
    hidden_channels=32,
    in_channels=1,
    out_channels=1,
)
model = model.to(device)
n_params = count_model_params(model)

In [17]:
optimizer = AdamW(model.parameters(), lr=8e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=30)

data_transform = IncrementalDataProcessor(
    in_normalizer=None,
    out_normalizer=None,
    device=device,
    subsampling_rates=[2, 1],
    dataset_resolution=16,
    dataset_indices=[2, 3],
    epoch_gap=10,
    verbose=True,
)

data_transform = data_transform.to(device)

Original Incre Res: change index to 0
Original Incre Res: change sub to 2
Original Incre Res: change res to 8


In [18]:
l2loss = LpLoss(d=2, p=2)
h1loss = H1Loss(d=2)
train_loss = h1loss
eval_losses = {'h1': h1loss, 'l2': l2loss}
print('\n### N PARAMS ###\n', n_params)
print('\n### OPTIMIZER ###\n', optimizer)
print('\n### SCHEDULER ###\n', scheduler)
print('\n### LOSSES ###')
print('\n### INCREMENTAL RESOLUTION + GRADIENT EXPLAINED ###')
print(f'\n * Train: {train_loss}')
print(f'\n * Test: {eval_losses}')
sys.stdout.flush()


### N PARAMS ###
 2110305

### OPTIMIZER ###
 AdamW (
Parameter Group 0
    betas: (0.9, 0.999)
    correct_bias: True
    eps: 1e-06
    initial_lr: 0.008
    lr: 0.008
    weight_decay: 0.0001
)

### SCHEDULER ###
 <torch.optim.lr_scheduler.CosineAnnealingLR object at 0x7fc7243bf010>

### LOSSES ###

### INCREMENTAL RESOLUTION + GRADIENT EXPLAINED ###

 * Train: <neuralop.losses.data_losses.H1Loss object at 0x7fc72419e020>

 * Test: {'h1': <neuralop.losses.data_losses.H1Loss object at 0x7fc72419e020>, 'l2': <neuralop.losses.data_losses.LpLoss object at 0x7fc72445f340>}


In [19]:
# Finally pass all of these to the Trainer
trainer = IncrementalFNOTrainer(
    model=model,
    n_epochs=20,
    data_processor=data_transform,
    device=device,
    verbose=True,
    incremental_loss_gap=False,
    incremental_grad=True,
    incremental_grad_eps=0.9999,
    incremental_loss_eps=0.001,
    incremental_buffer=5,
    incremental_max_iter=1,
    incremental_grad_max_iter=2,
)

In [20]:
end_stats = trainer.train(
    train_loader,
    test_loaders,
    optimizer,
    scheduler,
    regularizer=False,
    training_loss=train_loss,
    eval_losses=eval_losses,
)

Training on 1000 samples
Testing on [50, 100, 100, 100] samples         on resolutions [16, 32, 64, 128].
Raw outputs of shape torch.Size([16, 1, 8, 8])
[0] time=1.21, avg_loss=0.6164, train_err=9.7849
Eval: 16_h1=0.6505, 16_l2=0.3764, 32_h1=0.8450, 32_l2=0.4031, 64_h1=1.1013, 64_l2=0.4172, 128_h1=1.5083, 128_l2=0.4897
[1] time=0.74, avg_loss=0.4188, train_err=6.6470
Eval: 16_h1=0.5863, 16_l2=0.3301, 32_h1=0.8127, 32_l2=0.3561, 64_h1=1.0889, 64_l2=0.3740, 128_h1=1.5940, 128_l2=0.4818
[2] time=0.75, avg_loss=0.3691, train_err=5.8588
Eval: 16_h1=0.5545, 16_l2=0.2863, 32_h1=0.8036, 32_l2=0.3216, 64_h1=1.0984, 64_l2=0.3437, 128_h1=1.6642, 128_l2=0.4832
[3] time=0.75, avg_loss=0.3473, train_err=5.5134
Eval: 16_h1=0.5317, 16_l2=0.2860, 32_h1=0.7604, 32_l2=0.3209, 64_h1=1.0230, 64_l2=0.3432, 128_h1=1.5462, 128_l2=0.4739
[4] time=0.77, avg_loss=0.3267, train_err=5.1863
Eval: 16_h1=0.5213, 16_l2=0.2676, 32_h1=0.7558, 32_l2=0.3043, 64_h1=1.0268, 64_l2=0.3290, 128_h1=1.5899, 128_l2=0.4788
[5] tim

In [21]:
end_stats

{'train_err': 2.808846625070723,
 'avg_loss': 0.17695733737945557,
 'avg_lasso_loss': None,
 'epoch_train_time': 0.6864375360019039,
 '16_h1': tensor(0.1961, device='cuda:0'),
 '16_l2': tensor(0.1059, device='cuda:0'),
 '32_h1': tensor(0.3642, device='cuda:0'),
 '32_l2': tensor(0.1398, device='cuda:0'),
 '64_h1': tensor(0.5666, device='cuda:0'),
 '64_l2': tensor(0.1682, device='cuda:0'),
 '128_h1': tensor(1.0160, device='cuda:0'),
 '128_l2': tensor(0.3496, device='cuda:0')}

In [22]:
df = pd.DataFrame(columns=['train_resolution', 'test_resolution', 'l2', 'h1'])