# DiffNet Demo

In this tutorial, we will show  how to use DiffNets for 2D and 3D Poisson's equattion

### Preliminaries: installing dependencies and importing packages

In [1]:
import os
import sys
import json
import torch
import numpy as np

import matplotlib
# matplotlib.use("pgf")
matplotlib.rcParams.update({
    # 'font.family': 'serif',
    'font.size':12,
})
from matplotlib import pyplot as plt

import mlflow
import mlflow.pytorch
from mlflow.tracking import MlflowClient
tracking_uri = os.environ.get("TRACKING_URL")
client = MlflowClient(tracking_uri=tracking_uri)
mlflow.set_tracking_uri(tracking_uri)


import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import TensorBoardLogger
seed_everything(42)

import DiffNet
from DiffNet.networks.wgan_old import GoodGenerator
from DiffNet.networks.autoencoders import AE
from DiffNet.DiffNetFEM import DiffNet2DFEM
from DiffNet.datasets.parametric.klsum import KLSumStochastic
from DiffNet.datasets.single_instances.klsum import Dataset

Global seed set to 42


### Define few classes and methods

In [2]:
from tqdm import tqdm
from torch.utils import data
from DiffNet.gen_input_calc import generate_diffusivity_tensor


class KLSum(data.Dataset):
    'PyTorch dataset for sampling coefficients'
    def __init__(self, filename, domain_size=64, kl_terms=6):
        """
        Initialization
        """
        self.coeffs = np.load(filename)
        self.domain_size = domain_size
        self.kl_terms = kl_terms
        self.dataset = []
        
        print('loading dataset')
        for coeff in tqdm(self.coeffs):
            domain = generate_diffusivity_tensor(coeff, output_size=self.domain_size, n_sum_nu=kl_terms).squeeze()
            # bc1 will be source, u will be set to 1 at these locations
            bc1 = np.zeros_like(domain)
            bc1[:,0] = 1

            # bc2 will be sink, u will be set to 0 at these locations
            bc2 = np.zeros_like(domain)
            bc2[:,-1] = 1

            self.dataset.append(np.array([domain,bc1,bc2]))
        self.dataset = np.array(self.dataset)
        self.n_samples = self.dataset.shape[0]

    def __len__(self):
        'Denotes the total number of samples'
        return self.n_samples

    def __getitem__(self, index):
        'Generates one sample of data'
        inputs = self.dataset[index]
        forcing = np.zeros_like(self.dataset[index][0])
        return torch.FloatTensor(inputs), torch.FloatTensor(forcing).unsqueeze(0)



In [3]:
class Poisson(DiffNet2DFEM):
    """docstring for Poisson"""
    def __init__(self, network, dataset, **kwargs):
        super(Poisson, self).__init__(network, dataset, **kwargs)

    def loss(self, u, inputs_tensor, forcing_tensor):

        f = forcing_tensor # renaming variable
        
        # extract diffusivity and boundary conditions here
        nu = inputs_tensor[:,0:1,:,:]
        bc1 = inputs_tensor[:,1:2,:,:]
        bc2 = inputs_tensor[:,2:3,:,:]

        # apply boundary conditions
        u = torch.where(bc1>0.5,1.0+u*0.0,u)
        u = torch.where(bc2>0.5,u*0.0,u)


        nu_gp = self.gauss_pt_evaluation(nu)
        f_gp = self.gauss_pt_evaluation(f)
        u_gp = self.gauss_pt_evaluation(u)
        u_x_gp = self.gauss_pt_evaluation_der_x(u)
        u_y_gp = self.gauss_pt_evaluation_der_y(u)

        transformation_jacobian = self.gpw.unsqueeze(-1).unsqueeze(-1).unsqueeze(0).type_as(nu_gp)
        res_elmwise = transformation_jacobian * (nu_gp * (u_x_gp**2 + u_y_gp**2) - (u_gp * f_gp))
        res_elmwise = torch.sum(res_elmwise, 1) 

        # transformation_jacobian = (0.5 * self.h)**2 * self.gpw.unsqueeze(-1).unsqueeze(-1).unsqueeze(0).type_as(nu_gp)
        # res_elmwise = 0.5 * transformation_jacobian * (nu_gp * (u_x_gp**2 + u_y_gp**2) - (u_gp * f_gp))
        # res_elmwise = torch.sum(res_elmwise, 1) 

        loss = torch.mean(res_elmwise)
        return loss

    def forward(self, batch):
        inputs_tensor, forcing_tensor = batch
        u = self.network(inputs_tensor[:,0:1,:,:])
        return u, inputs_tensor, forcing_tensor

    def training_step(self, batch, batch_idx):
        u, inputs_tensor, forcing_tensor = self.forward(batch)
        loss_val = self.loss(u, inputs_tensor, forcing_tensor).mean()
        return {"loss": loss_val}

    def training_step_end(self, training_step_outputs):
        loss = training_step_outputs["loss"]
        self.log('PDE_loss', loss.item())
        self.log('loss', loss.item())
        return training_step_outputs

    def configure_optimizers(self):
        lr = self.learning_rate
        # opts = [torch.optim.LBFGS(self.network.parameters(), lr=lr, max_iter=5)]
        opts = [torch.optim.Adam(self.network.parameters(), lr=lr)]
        # schd = []
        schd = [torch.optim.lr_scheduler.MultiStepLR(opts[0], milestones=[10,15,30], gamma=0.1)]
        return opts, schd

    def on_epoch_end(self):
        num_query = 6
        plt_num_row = num_query
        plt_num_col = 2
        fig, axs = plt.subplots(plt_num_row, plt_num_col, figsize=(2*plt_num_col,1.2*plt_num_row),
                            subplot_kw={'aspect': 'auto'}, sharex=True, sharey=True, squeeze=True)
        for ax_row in axs:
            for ax in ax_row:
                ax.set_xticks([])
                ax.set_yticks([])
        
        self.network.eval()
        inputs, forcing = self.dataset[0:num_query]
        forcing = forcing.repeat(num_query,1,1,1)

        ub, inputs_tensor, forcing_tensor = self.forward((inputs.type_as(next(self.network.parameters())), forcing.type_as(next(self.network.parameters()))))
        
        loss = self.loss(ub, inputs_tensor, forcing_tensor[:,0:1,:,:])

        for idx in range(num_query):
            f = forcing_tensor # renaming variable
            
            # extract diffusivity and boundary conditions here
            nu = inputs_tensor[idx,0:1,:,:]
            u = ub[idx,0:1,:,:]
            bc1 = inputs_tensor[idx,1:2,:,:]
            bc2 = inputs_tensor[idx,2:3,:,:]

            # apply boundary conditions
            u = torch.where(bc1>0.5,1.0+u*0.0,u)
            u = torch.where(bc2>0.5,u*0.0,u)

            k = nu.squeeze().detach().cpu()
            u = u.squeeze().detach().cpu()

            im0 = axs[idx][0].imshow(k,cmap='jet')
            fig.colorbar(im0, ax=axs[idx,0])
            im1 = axs[idx][1].imshow(u,cmap='jet')
            fig.colorbar(im1, ax=axs[idx,1])  
        plt.savefig(os.path.join(self.logger[0].log_dir, 'contour_' + str(self.current_epoch) + '.png'))
        self.logger[0].experiment.add_figure('Contour Plots', fig, self.current_epoch)
        plt.close('all')


### Setting up the training

In [5]:
!wget https://github.com/rocketmlhq/sciml/raw/main/05_DiffNets/sobol_6d.npy

--2021-11-12 18:48:59--  https://github.com/rocketmlhq/sciml/raw/main/05_DiffNets/sobol_6d.npy
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/rocketmlhq/sciml/main/05_DiffNets/sobol_6d.npy [following]
--2021-11-12 18:48:59--  https://raw.githubusercontent.com/rocketmlhq/sciml/main/05_DiffNets/sobol_6d.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3145856 (3.0M) [application/octet-stream]
Saving to: ‘sobol_6d.npy’


2021-11-12 18:49:00 (43.6 MB/s) - ‘sobol_6d.npy’ saved [3145856/3145856]



In [6]:
kl_terms = 6
domain_size = 32
LR = 1e-3
batch_size = 128
sample_size = 65536
sobol_file = 'sobol_'+str(kl_terms)+'d.npy'
max_epochs = 10
print("Max_epochs = ", max_epochs)

dataset = KLSum(sobol_file, domain_size=domain_size, kl_terms=kl_terms)
# dataset = Dataset('../single_instance/example-coefficients.txt', domain_size=64)
network = AE(in_channels=1, out_channels=1, dims=16, n_downsample=2)
basecase = Poisson(network, dataset, batch_size=batch_size, domain_size=domain_size, learning_rate=LR)

# ------------------------
# 1 INIT TRAINER
# ------------------------
logger = pl.loggers.TensorBoardLogger('.', name="klsum_"+str(domain_size))
csv_logger = pl.loggers.CSVLogger(logger.save_dir, name=logger.name, version=logger.version)

early_stopping = pl.callbacks.early_stopping.EarlyStopping('loss',
    min_delta=1e-8, patience=10, verbose=False, mode='max', strict=True)
checkpoint = pl.callbacks.model_checkpoint.ModelCheckpoint(monitor='loss',
    dirpath=logger.log_dir, filename='{epoch}-{step}',
    mode='min', save_last=True)

trainer = Trainer(callbacks=[early_stopping,checkpoint],
    checkpoint_callback=True, logger=[logger,csv_logger],
    max_epochs=max_epochs, deterministic=True, profiler='simple')

Max_epochs =  10
loading dataset


100%|██████████| 65536/65536 [00:59<00:00, 1102.60it/s]
Missing logger folder: ./klsum_32
  f"Setting `Trainer(checkpoint_callback={checkpoint_callback})` is deprecated in v1.5 and will "
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [7]:
## Utility function to add libraries to conda environment
def add_libraries_to_conda_env(_conda_env,libraries=[],conda_dependencies=[]):
    dependencies = _conda_env["dependencies"]
    dependencies = dependencies + conda_dependencies
    pip_index = None
    for _index,_element in enumerate(dependencies):
        if type(_element) == dict:
            if "pip" in _element.keys():
                pip_index = _index
                break
    dependencies[pip_index]["pip"] =  dependencies[pip_index]["pip"] + libraries
    _conda_env["dependencies"] = dependencies
    return _conda_env


## Pytorch Model Wrapper that takes encoded image strings as inputs
class PytorchModelWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self,context):
        import torch
        scripted_model = context.artifacts["scripted_model"]
        self.model = torch.jit.load(scripted_model)
        print("Pytorch model initialized")
    
    def predict(self, context, model_input):
        import numpy as np
        import torch
        print("Predicting %d samples"%len(model_input))
        nparray_list = []
        nparray = np.array(model_input).astype(np.float32)
        torch_tensor = torch.from_numpy(nparray)
        predictions = self.model(torch_tensor)
        return json.dumps(predictions.detach().numpy().tolist())

### Training

In [None]:
# ------------------------
# 4 Training
# ------------------------
def print_auto_logged_info(r):

    tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")}
    artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")]
    print("run_id: {}".format(r.info.run_id))
    print("artifacts: {}".format(artifacts))
    print("params: {}".format(r.data.params))
    print("metrics: {}".format(r.data.metrics))
    print("tags: {}".format(tags))

# Fetch the associated conda environment
env = mlflow.pytorch.get_default_conda_env()
env = add_libraries_to_conda_env(env,libraries=['git+https://github.com/adityabalu/DiffNet.git'])
print("conda env: {}".format(env))

mlflow.pytorch.autolog()

# Train the model
with mlflow.start_run() as run:
    trainer.fit(basecase)
    mlflow.log_artifacts(logger.log_dir, artifact_path="events")
    # Log the model as an artifact of the MLflow run.
    scripted_model = torch.jit.script(basecase.network)
    torch.jit.save(scripted_model,"scripted_model.pth")
    model_artifacts = {"scripted_model" : "scripted_model.pth"}
    pyfunc_pytorch_model = PytorchModelWrapper()
    mlflow.pytorch.log_model("basecase_network",python_model=pyfunc_pytorch_model,artifacts=model_artifacts,conda_env=env)
    print("\nLogging the trained scripted model as a run artifact...")
    print("\nThe model is logged at:\n%s" % os.path.join(mlflow.get_artifact_uri(), "basecase_network"))

# ------------------------
# 5 SAVE NETWORK
# ------------------------
torch.save(basecase.network, os.path.join(logger.log_dir, 'network.pt'))
mlflow.end_run()



conda env: {'name': 'mlflow-env', 'channels': ['conda-forge'], 'dependencies': ['python=3.7.11', 'pip', {'pip': ['mlflow', 'torch==1.10.0', 'torchvision==0.11.1', 'cloudpickle==2.0.0', 'git+https://github.com/adityabalu/DiffNet.git']}]}


2021-11-12 18:50:21.668153: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-11-12 18:50:21.668198: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  f"Experiment logs directory {self.log_dir} exists and is not empty."

  | Name      | Type          | Params
--------------------------------------------
0 | network   | AE            | 264 K 
1 | N_gp      | ParameterList | 16    
2 | dN_x_gp   | ParameterList | 16    
3 | dN_y_gp   | ParameterList | 16    
4 | d2N_x_gp  | ParameterList | 16    
5 | d2N_y_gp  | ParameterList | 16    
6 | d2N_xy_gp | ParameterList | 16    
--------------------------------------------
264 K     Trainable params
96        Non-trainable params
264 K     Total params
1.058     Total estimated model params size (MB)
  rank_z

Epoch 0:   6%|▌         | 30/512 [07:03<1:53:25, 14.12s/it, loss=3.37e+03, v_num=0_0]

In [None]:
print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))

In [99]:
import requests
import json
import pandas as pd

url = "http://127.0.0.1:5011/invocations"
data_json = json.dumps(dataset.dataset[0][0,:,:].tolist())

headers = {"Content-Type":"application/json"}
response = requests.post(url,data=data_json, headers=headers)

if response.status_code == 200:
    print(response.json())
else:
    print(response.status_code)
    print("REST API deployment is in progress -- please try again in a few minutes!")

400
REST API deployment is in progress -- please try again in a few minutes!


In [94]:
!curl http://127.0.0.1:5011/invocations -H 'Content-Type:application/json' -d '{"columns": ["a", "b", "c"],"data": [[1, 2, 3], [4, 5, 6]]}'

{"error_code": "BAD_REQUEST", "message": "Encountered an unexpected error while evaluating the model. Verify that the serialized input Dataframe is compatible with the model for inference.", "stack_trace": "Traceback (most recent call last):\n  File \"/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/mlflow/pyfunc/scoring_server/__init__.py\", line 303, in transformation\n    raw_predictions = model.predict(data)\n  File \"/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/mlflow/pyfunc/__init__.py\", line 608, in predict\n    return self._model_impl.predict(data)\n  File \"/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/mlflow/pytorch/__init__.py\", line 754, in predict\n    preds = self.pytorch_model(input_tensor)\n  File \"/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/torch/nn/modules/module.py\", line 1051, in _call

In [100]:
print(response.json()['stack_trace'])

Traceback (most recent call last):
  File "/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/mlflow/pyfunc/scoring_server/__init__.py", line 303, in transformation
    raw_predictions = model.predict(data)
  File "/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/mlflow/pyfunc/__init__.py", line 608, in predict
    return self._model_impl.predict(data)
  File "/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/mlflow/pytorch/__init__.py", line 754, in predict
    preds = self.pytorch_model(input_tensor)
  File "/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "/anaconda/envs/mlflow-5586a9c73c1dbbd7f006268613ccfb208dae4a42/lib/python3.7/site-packages/DiffNet/networks/autoencoders.py", line 93, in forward
    code = self.enco

In [91]:
data_json

'{"columns":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],"index":[0,1,2],"data":[[[0.0402931964,0.0484534083,0.1198500906,0.4892311287,2.3402966844,8.9498379311,19.6491857008,20.2592881917,9.5870826225,2.4442554446,0.4558154851,0.0909005037,0.0279614754,0.0173771962,0.0244662406,0.0732954165,0.3745386673,2.3572695343,12.8053686796,44.1048373093,78.8692498253,69.0616484544,32.1143814938,9.5707378663,2.3278684897,0.5855861961,0.1829331153,0.0785167596,0.0470981408,0.0377066664,0.0374154491,0.0431122602],[0.0484534083,0.0576120408,0.1350005257,0.5066060502,2.2051342231,7.7643076023,16.1846452402,16.5252389646,8.087392396,2.2070249521,0.448392464,0.0971683018,0.0317950938,0.0203170831,0.0283214978,0.0813432335,0.3910249163,2.310871109,11.9604379686,40.2867404948,72.5915491901,65.6888257191,32.0045363141,9.9808612334,2.5009721674,0.6319110999,0.1928322272,0.0790548508,0.0448156891,0.034055255,0.0326693429,0.0374154491],[0.1198500906,0.1350005257,0.