In [1]:
import torch
import wandb
import sys
from configmypy import ConfigPipeline, YamlConfig, ArgparseConfig
from neuralop import get_model
from neuralop import Trainer
from neuralop.training import setup
from neuralop.datasets import load_darcy_pt
from neuralop.datasets.data_transforms import MGPatchingDataProcessor
from neuralop.utils import get_wandb_api_key, count_params
from neuralop import LpLoss, H1Loss

# Loading the configuration

You can open the yaml file in config/darcy_config in the same folder as this notebook to inspect the parameters and change them.

In [2]:
# Read the configuration
config_name = 'default'
pipe = ConfigPipeline([YamlConfig('./darcy_config.yaml', config_name='default', config_folder='./config'),
                      ])
config = pipe.read_conf()
config_name = pipe.steps[-1].config_name

## Setup

Here we just setup pytorch and print the configuration

In [3]:
# Set-up distributed communication, if using
device, is_logger = setup(config)

In [4]:
# Make sure we only print information when needed
config.verbose = config.verbose and is_logger

#Print config to screen
if config.verbose and is_logger:
    pipe.log()
    sys.stdout.flush()

###############################
#####    CONFIGURATION    #####
###############################

Steps:
------
 (1) YamlConfig with config_file=./darcy_config.yaml, config_name=default, config_folder=./config

-------------------------------

Configuration:
--------------

n_params_baseline=None
verbose=True
arch=tfno2d
distributed.use_distributed=False
tfno2d.data_channels=3
tfno2d.n_modes_height=32
tfno2d.n_modes_width=32
tfno2d.hidden_channels=64
tfno2d.projection_channels=256
tfno2d.n_layers=4
tfno2d.domain_padding=None
tfno2d.domain_padding_mode=one-sided
tfno2d.fft_norm=forward
tfno2d.norm=group_norm
tfno2d.skip=linear
tfno2d.implementation=factorized
tfno2d.separable=0
tfno2d.preactivation=0
tfno2d.use_mlp=1
tfno2d.mlp.expansion=0.5
tfno2d.mlp.dropout=0
tfno2d.factorization=None
tfno2d.rank=1.0
tfno2d.fixed_rank_modes=None
tfno2d.dropout=0.0
tfno2d.tensor_lasso_penalty=0.0
tfno2d.joint_factorization=False
opt.n_epochs=150
opt.learning_rate=0.005
opt.training_loss=h1
opt.weight_d

# Loading the data 

We train in one resolution and test in several resolutions to show the zero-shot super-resolution capabilities of neural-operators. 

In [None]:
# Loading the Darcy flow training set in 32x32 resolution, test set in 32x32 and 64x64 resolutions
train_loader, test_loaders, data_processor = load_darcy_pt(
        config.data.folder, train_resolution=config.data.train_resolution, n_train=config.data.n_train, batch_size=config.data.batch_size, 
        positional_encoding=config.data.positional_encoding,
        test_resolutions=config.data.test_resolutions, n_tests=config.data.n_tests, test_batch_sizes=config.data.test_batch_sizes,
        encode_input=config.data.encode_input, encode_output=config.data.encode_output,
        )

# convert dataprocessor to an MGPatchingDataprocessor if patching levels > 0
if config.patching.levels > 0:
    data_processor = MGPatchingDataProcessor(in_normalizer=data_processor.in_normalizer,
                                             out_normalizer=data_processor.out_normalizer,
                                             positional_encoding=data_processor.positional_encoding,
                                             padding_fraction=config.patching.padding,
                                             stitching=config.patching.stitching,
                                             levels=config.patching.levels)
data_processor = data_processor.to(device)

UnitGaussianNormalizer init on 3000, reducing over [0, 1, 2, 3], samples of shape [1, 32, 32].
   Mean and std of shape torch.Size([1, 1, 1]), eps=1e-05
Loading test db at resolution 64 with 500 samples and batch-size=32


# Creating the model and putting it on the GPU 

In [6]:
model = get_model(config)
model = model.to(device)

#Log parameter count
if is_logger:
    n_params = count_params(model)

    if config.verbose:
        print(f'\nn_params: {n_params}')
        sys.stdout.flush()

Given argument key='dropout' that is not in TFNO2d's signature.
Given argument key='tensor_lasso_penalty' that is not in TFNO2d's signature.
Keyword argument out_channels not specified for model TFNO2d, using default=1.
Keyword argument lifting_channels not specified for model TFNO2d, using default=256.
Keyword argument non_linearity not specified for model TFNO2d, using default=<built-in function gelu>.
Keyword argument decomposition_kwargs not specified for model TFNO2d, using default={}.

n_params: 16844673


# Create the optimizer and learning rate scheduler

Here, we use an Adam optimizer and a learning rate schedule depending on the configuration

In [7]:
#Create the optimizer
optimizer = torch.optim.Adam(model.parameters(), 
                                lr=config.opt.learning_rate, 
                                weight_decay=config.opt.weight_decay)

if config.opt.scheduler == 'ReduceLROnPlateau':
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=config.opt.gamma, patience=config.opt.scheduler_patience, mode='min')
elif config.opt.scheduler == 'CosineAnnealingLR':
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config.opt.scheduler_T_max)
elif config.opt.scheduler == 'StepLR':
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 
                                                step_size=config.opt.step_size,
                                                gamma=config.opt.gamma)
else:
    raise ValueError(f'Got {config.opt.scheduler=}')

# Creating the loss

We will optimize the Sobolev norm but also evaluate our goal: the l2 relative error

In [8]:
# Creating the losses
l2loss = LpLoss(d=2, p=2)
h1loss = H1Loss(d=2)
if config.opt.training_loss == 'l2':
    train_loss = l2loss
elif config.opt.training_loss == 'h1':
    train_loss = h1loss
else:
    raise ValueError(f'Got training_loss={config.opt.training_loss} but expected one of ["l2", "h1"]')
eval_losses={'h1': h1loss, 'l2': l2loss}

In [9]:
if config.verbose and is_logger:
    print('\n### MODEL ###\n', model)
    print('\n### OPTIMIZER ###\n', optimizer)
    print('\n### SCHEDULER ###\n', scheduler)
    print('\n### LOSSES ###')
    print(f'\n * Train: {train_loss}')
    print(f'\n * Test: {eval_losses}')
    print(f'\n### Beginning Training...\n')
    sys.stdout.flush()


### MODEL ###
 TFNO2d(
  (convs): FactorizedSpectralConv2d(
    (weight): ModuleList(
      (0): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (1): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (2): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (3): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (4): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (5): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (6): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
      (7): ComplexDenseTensor(shape=torch.Size([64, 64, 16, 16]), rank=None)
    )
  )
  (fno_skips): ModuleList(
    (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (2): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (3): Conv2d(64, 64, kernel_size=(1, 1), str

# Creating the trainer

In [10]:
trainer = Trainer(model, n_epochs=config.opt.n_epochs,
                  device=device,
                  wandb_log=False,
                  log_test_interval=False,
                  log_output=False,
                  use_distributed=config.distributed.use_distributed,
                  verbose=config.verbose and is_logger)

Training on regular inputs (no multi-grid patching).
MGPatching(self.n_patches=[1, 1], self.padding_fraction=[0, 0], self.levels=0, use_distributed=False, stitching=False)


In [11]:
# Training the model 
trainer.train(train_loader=train_loader, test_loaders=test_loaders,
              data_processor=data_processor,
              optimizer=optimizer,
              scheduler=scheduler, 
              regularizer=False, 
              training_loss=train_loss,
              eval_losses=eval_losses)

Training on 3000 samples, testing on [32, 64].
[0] time=3.03, avg_loss=7.8899, train_err=0.3945, 32_h1=0.2295, 32_l2=0.1710, 64_h1=0.2847, 64_l2=0.1733
[1] time=1.38, avg_loss=3.7664, train_err=0.1883, 32_h1=0.1646, 32_l2=0.1177, 64_h1=0.2326, 64_l2=0.1221
[2] time=1.37, avg_loss=3.1005, train_err=0.1550, 32_h1=0.1411, 32_l2=0.1027, 64_h1=0.2156, 64_l2=0.1106
[3] time=1.36, avg_loss=2.5222, train_err=0.1261, 32_h1=0.1238, 32_l2=0.0800, 64_h1=0.2026, 64_l2=0.0936
[4] time=1.36, avg_loss=2.3043, train_err=0.1152, 32_h1=0.1235, 32_l2=0.0808, 64_h1=0.1874, 64_l2=0.0858
[5] time=1.36, avg_loss=2.2108, train_err=0.1105, 32_h1=0.1332, 32_l2=0.1041, 64_h1=0.2055, 64_l2=0.1122
[6] time=1.37, avg_loss=1.9753, train_err=0.0988, 32_h1=0.1077, 32_l2=0.0720, 64_h1=0.1885, 64_l2=0.0838
[7] time=1.37, avg_loss=1.9352, train_err=0.0968, 32_h1=0.1032, 32_l2=0.0642, 64_h1=0.1847, 64_l2=0.0753
[8] time=1.36, avg_loss=1.8174, train_err=0.0909, 32_h1=0.1013, 32_l2=0.0632, 64_h1=0.1798, 64_l2=0.0763
[9] time

# Follow-up questions

You can now play with the configuration and see how the performance is impacted.

Which parameters do you think will most influence performance? 
Learning rate? Learning schedule? hidden_channels? Number of training samples? 

Does your intuition match the results you are getting?