<a href="https://colab.research.google.com/github/nicolaslepagecnam/stage-Nicolas-Lepage/blob/main/REALIST_data%2Bmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# setup - installs and imports

## pip installs

In [None]:
!pip install tqdm
!pip install petsc4py
!pip install mpi4py
!pip install perlin-noise
!pip install imageio[pyav]
!pip install torch
!pip install torchdiffeq
!pip install wandb --upgrade
!pip install pytorch-lightning

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting petsc4py
  Downloading petsc4py-3.17.4.tar.gz (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 4.0 MB/s 
Collecting petsc<3.18,>=3.17
  Downloading petsc-3.17.4.tar.gz (16.4 MB)
[K
[?25hTraceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/cli/base_command.py", line 180, in _main
    status = self.run(options, args)
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/cli/req_command.py", line 199, in wrapper
    return func(self, options, args)
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/commands/install.py", line 319, in run
    reqs, check_supported_wheels=not options.target_dir
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/resolution/resolvelib/resolver.py", line 128, in resolve
    r

## FEniCSx install 

In [None]:
try:
    import dolfinx
except ImportError:
    !wget "https://fem-on-colab.github.io/releases/fenicsx-install-real.sh" -O "/tmp/fenicsx-install.sh" && bash "/tmp/fenicsx-install.sh"
    import dolfinx

try:
    import gmsh
except ImportError:
    !wget "https://fem-on-colab.github.io/releases/gmsh-install.sh" -O "/tmp/gmsh-install.sh" && bash "/tmp/gmsh-install.sh"
    import gmsh


## Imports

In [None]:
### general imports ###

import os
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import numpy.typing
import typing
import random as rd
import imageio
import math, shelve
from perlin_noise import PerlinNoise
import pandas as pd

### Machine Learning imports ###

import torch
from torch import nn
import torch
import torchdiffeq
from collections import OrderedDict
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split, Dataset
import pytorch_lightning as pl
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from torch import optim
import wandb
from torchdiffeq import odeint_adjoint, odeint
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from pytorch_lightning.callbacks import ModelCheckpoint
from torch import optim

### FEniCSx imports ###

import ufl
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx.cpp.mesh import to_type, cell_entity_type
from dolfinx.fem import Constant, Function, FunctionSpace, assemble_scalar, dirichletbc, form, locate_dofs_topological, set_bc
from dolfinx.fem.petsc import apply_lifting, assemble_matrix, assemble_vector, create_vector, set_bc
from dolfinx.graph import create_adjacencylist
from dolfinx.geometry import BoundingBoxTree, compute_collisions, compute_colliding_cells
from dolfinx.mesh import create_mesh, meshtags_from_entities
from ufl import (FacetNormal, FiniteElement, Identity, Measure, TestFunction, TrialFunction, VectorElement,
                 as_vector, div, dot, ds, dx, inner, lhs, grad, nabla_grad, rhs, sym)

--2022-09-26 07:30:15--  https://fem-on-colab.github.io/releases/fenicsx-install-real.sh
Resolving fem-on-colab.github.io (fem-on-colab.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...
Connecting to fem-on-colab.github.io (fem-on-colab.github.io)|185.199.108.153|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3055 (3.0K) [application/x-sh]
Saving to: ‘/tmp/fenicsx-install.sh’


2022-09-26 07:30:15 (55.4 MB/s) - ‘/tmp/fenicsx-install.sh’ saved [3055/3055]

+ SHARE_PREFIX=/usr/local/share/fem-on-colab
+ FENICSX_INSTALLED=/usr/local/share/fem-on-colab/fenicsx.installed
+ [[ ! -f /usr/local/share/fem-on-colab/fenicsx.installed ]]
+ PYBIND11_INSTALL_SCRIPT_PATH=https://github.com/fem-on-colab/fem-on-colab.github.io/raw/5710736/releases/pybind11-install.sh
+ [[ https://github.com/fem-on-colab/fem-on-colab.github.io/raw/5710736/releases/pybind11-install.sh == http* ]]
+ PYBIND11_INSTALL_SCRIPT_DOWNLOAD=https://github.com/fem-on-colab/fem-on-colab.

ModuleNotFoundError: ignored

# Prepare folders

In [None]:
try:
  for i in os.listdir('./realist') :

      os.remove(os.path.join("./realist/" + i))

  os.rmdir('./realist')
except FileNotFoundError:
  pass

# Simulation Dataset

## FEniCSx simulations

In [None]:
### class to generate boudary conditions  ###

class boundary():
        def __init__(self):
            pass

        def __call__(self, x):
            
            noised = np.zeros((1, x.shape[1]),dtype=PETSc.ScalarType) + 0.4
            
            return noised

### class to generate initial conditions, seeding used to test and train models on the same data  ###
  
class Initial():
    def __init__(self,i):

        octa = rd.randint(1,6)

        self.noise = PerlinNoise(octaves=octa, seed = i)

    def __call__(self, x):

        noised = np.zeros((1, x.shape[1]),dtype=PETSc.ScalarType) + 0.4

        noised += [1.5*self.noise([i/x.shape[1]]) for i in range(x.shape[1])]

        return noised

### class called by dataloader to generate simulation  ###

class burgers():

    def __init__(self, path, precision, num_seq, time_horizon, dt, nu, group):
        
        # dataset parameters #
        
        super().__init__()

        rd.seed(0)

        self.precision = precision
        self.len = num_seq
        self.time_horizon = float(time_horizon)  # total time
        self.dt = float(dt)  # time step
        self.data = shelve.open(path)
        
        # initialising fenics simulation #
        
        self.t_eval = np.arange(0, self.time_horizon, self.dt)
        
        rank = MPI.COMM_WORLD
    
        self.mesh = dolfinx.mesh.create_unit_interval(rank, nx = self.precision)

        fdim = self.mesh.topology.dim - 1

        s_cg1 = FiniteElement("Lagrange", self.mesh.ufl_cell(), 1)
        self.W = FunctionSpace(self.mesh, s_cg1)

        facets = dolfinx.fem.locate_dofs_geometrical(self.W, lambda x: np.isclose(x[0], 0.0))

        t = 0
        T = self.time_horizon  
        
        dt = self.dt
        self.num_steps = int(np.round(T/dt))

        self.k = Constant(self.mesh, PETSc.ScalarType(dt))  

        self.nu_petsc = Constant(self.mesh, PETSc.ScalarType(nu))
        
        u_inlet = Function(self.W)
        inlet_velocity = boundary()
        u_inlet.interpolate(inlet_velocity)

        sides = dirichletbc(u_inlet, locate_dofs_topological(self.W, fdim, facets))    

        self.bc = [sides]
        
        if group == 'train' :
            self.adder = 128
        else : 
            self.adder = 0

    def __getitem__(self, index):

        
        if self.data.get(str(index)) is None:

            # physics function definition
                      
            u = ufl.TrialFunction(self.W)
            v = ufl.TestFunction(self.W)
            u_n = Function(self.W)

            # compute initial conditions

            init = Initial(index+self.adder)
            u_n.interpolate(init)

            # Burgers' equation weak form

            F1 = ((1/self.k)*inner(u - u_n, v)* ufl.dx
            + inner(u.dx(0) * u_n, v) * ufl.dx
            + self.nu_petsc * inner(u.dx(0), v.dx(0)) * ufl.dx)

            # matrix to store results

            list_u = np.zeros((self.precision+1, self.num_steps))

            # fenics function to compute the results

            u_temp = Function(self.W)

            # Store solution of initial time step
            
            list_u[:,0] = u_n.vector[:]

            # Finite element matrix system assembly
            
            a = form(lhs(F1))
            L = form(rhs(F1))
            A = assemble_matrix(a, bcs=self.bc)
            A.assemble()
            b = create_vector(L)  

            # solver parameters

            solver1 = PETSc.KSP().create(self.mesh.comm)
            solver1.setOperators(A)
            solver1.setType("preonly")
            solver1.setConvergenceHistory()
            pc1 = solver1.getPC()
            pc1.setType("lu")
            pc1.setFactorSolverType("mumps")

            # loop to compute all time steps

            for i in range(self.num_steps-1):

                # reassemble matrix at each time step
                
                A = assemble_matrix(a, bcs=self.bc)
                A.assemble()
                solver1.setOperators(A)

                # resolution
                with b.localForm() as loc:
                    loc.set(0)
                assemble_vector(b, L)
                apply_lifting(b, [a], [self.bc])
                b.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
                set_bc(b, self.bc)
                solver1.solve(b, u_temp.vector)
                u_temp.x.scatter_forward()    

                # store current time step solution

                list_u[:,i+1] = u_temp.vector[:]

                # Update previous time step solution with current time step solution

                with u_temp.vector.localForm() as loc_u, u_n.vector.localForm() as loc_un:
                    loc_u.copy(loc_un)

            self.data[str(index)] = list_u
            states = torch.from_numpy(list_u).float()
                        
        else:
            states = torch.from_numpy(self.data[str(index)]).float()
            
        return {'states': states, 't': torch.tensor(self.t_eval).float()}

    def __len__(self):
        return self.len

## Create folderto store dataloader simulations when initially computed 

In [None]:
path = os.path.join("./realist")
os.makedirs(path, exist_ok=True)

buffer_filepath = os.path.join(path, "burgers")

## Define Pytorch-lightning dataloader

In [None]:
class light_burgers_dataloader(pl.LightningDataModule):
    
    def __init__(
        self,
        data_dir: str = buffer_filepath,
        batch_size: int = 64,
        num_workers: int = 0,
        precision = 150,
        ex_multiplier = 12, # number of total batches
        dt = 0.005, 
        time_horizon = 1.5,
        nu =  0.001,):
        
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers

        # self.dims is returned when you call dm.size()

        time = int(np.round(time_horizon/dt))

        self.dims = (batch_size, time ,precision+1)
        
        num_seq = ex_multiplier * self.batch_size

        self.dataset_train_params = {
                'num_seq': num_seq, 
                'precision' : precision,
                'time_horizon': time_horizon,
                'dt': dt, 
                'group': 'train',
                'path': self.data_dir +'_train',
                'nu' : nu,
            }
        
        self.dataset_val_params = dict()
        self.dataset_val_params.update(self.dataset_train_params)
        self.dataset_val_params['num_seq'] = 64
        self.dataset_val_params['group'] = 'val'
        self.dataset_val_params['path'] = self.data_dir +'_val'
        
        self.dataset_test_params = dict()
        self.dataset_test_params.update(self.dataset_train_params)
        self.dataset_test_params['num_seq'] = 64
        self.dataset_test_params['group'] = 'test'
        self.dataset_test_params['path'] = self.data_dir +'_test'

    def setup(self, stage=None):

        dataset_train = burgers(**self.dataset_train_params)
        dataset_val  = burgers(**self.dataset_val_params)
        dataset_test  = burgers(**self.dataset_test_params)

        dataloader_train_params = {
            'dataset'    : dataset_train,
            'batch_size' : self.batch_size,
            'num_workers': self.num_workers,
            'pin_memory' : True,
            'drop_last'  : False,
            'shuffle'    : False,
        }

        dataloader_val_params = {
            'dataset'    : dataset_val,
            'batch_size' : 64,
            'num_workers': self.num_workers,
            'pin_memory' : True,
            'drop_last'  : False,
            'shuffle'    : False,
        }

        dataloader_test_params = {
            'dataset'    : dataset_test,
            'batch_size' : 64,
            'num_workers': self.num_workers,
            'pin_memory' : True,
            'drop_last'  : False,
            'shuffle'    : False,
        }       
        
        
        # Assign train/val datasets for use in dataloaders
        if stage == "fit" or stage is None:
            self.data_train = DataLoader(**dataloader_train_params)
            self.data_val = DataLoader(**dataloader_val_params)

        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            self.data_test = DataLoader(**dataloader_test_params)

    def train_dataloader(self):
        return self.data_train

    def val_dataloader(self):
        return self.data_val

    def test_dataloader(self):
        return self.data_test
    

# Realist model code

## Create all pytorch models' classes

In [None]:

### first class defines partail physics model : difusion computed by explicit finite differences ###

class burgersPDE(nn.Module):

    def __init__(self, dx):
        
        super().__init__()
        
        self.nu = nn.Parameter(torch.tensor(0.001), requires_grad = False).cuda()
        
        self.precision = dx - 1
        
        self.lap = nn.Parameter(torch.tensor([ 1,  -2,  1]).float().view(1,1,3) *self.precision**2 , requires_grad=False).cuda()
        


    def forward(self, state):
        
        x1,x2 = state.shape
        
        state = state.reshape(x1,1,x2)
        
        x = F.pad(state, (0,1), "constant", 0)
        
        x[:,:,-1] = x[:,:,-2]

        u = self.nu * F.conv1d(x, self.lap)

        u = F.pad(u, (1,0), mode = "constant", value= 0.0)

        u = u.reshape(x1,x2)

        return u

### class to compute finite fiference gradient to use for H1 norm ###
    
class get_grad(nn.Module):

    def __init__(self, dx):
        
        super().__init__()
        
        self.precision = dx - 1

        self.grad1 = nn.Parameter(torch.tensor([ -1,  0,  1]).float().view(1,1,3) *(self.precision/2) , requires_grad=False).cuda()

    def forward(self, state):
        
        x = state.permute(0,2,1)
        
        u = torch.zeros(x.size()).cuda()
        
        for i in range(1,x.size(dim=2)-1):
            
            u[:,:,i] = x[:,:,i-1]*self.grad1[:,:,0] + x[:,:,i+1]*self.grad1[:,:,2]
        
        u[:,:,0] = x[:,:,0]*self.grad1[:,:,0] + x[:,:,2]*self.grad1[:,:,2]
        
        u[:,:,-1] = x[:,:,-3]*self.grad1[:,:,0] + x[:,:,-1]*self.grad1[:,:,-1]
        
        u = u.permute(0,2,1)

        return u
    
### Neural network that computes the residual error ###

class ConvNetEstimator(nn.Module):
    def __init__(self, state_c=1, hidden=32):
        super().__init__()

        self.state_c = state_c
        
        self.net = nn.Sequential(
            nn.Conv1d(self.state_c, hidden, kernel_size=3, padding=1, bias=True),
            nn.BatchNorm1d(hidden, track_running_stats=False),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Conv1d(hidden, hidden, kernel_size=3, padding=1, bias=True),
            nn.BatchNorm1d(hidden, track_running_stats=False),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Conv1d(hidden, hidden, kernel_size=3, padding=1, bias=True),
            nn.BatchNorm1d(hidden, track_running_stats=False),
            nn.ReLU(),
            nn.Conv1d(hidden, hidden, kernel_size=3, padding=1, bias=True),
            nn.BatchNorm1d(hidden, track_running_stats=False),
            nn.ReLU(),
            nn.Conv1d(hidden, self.state_c, kernel_size=3, padding=0, bias = True),)
        

    def forward(self, x):
        
        x1,x2 = x.shape
        
        x = x.reshape(x1,1,x2)
        
        x = F.pad(x, (0,1), "constant", 0)
        
        x[:,:,-1] = x[:,:,-3]
        
        x = self.net(x)
        
        x = nn.functional.pad(x, (1,0), mode = "constant", value= 0.0)
        
        x = x.reshape(x1,x2)
        
        return x

    def get_derivatives(self, x):
        batch_size, h, T = x.shape
        
        x = x.permute(0, 2, 1).contiguous()
        
        x = x.view(batch_size * T, h)
        x = self.forward(x)
        x = x.view(batch_size, T, h)
        x = x.permute(0, 2, 1).contiguous()
        
        return x

### combining both models' predictions ###

class DerivativeEstimator(nn.Module):
    def __init__(self, model_phy, model_aug, is_augmented):
        
        super().__init__()
        
        self.model_phy = model_phy
        self.model_aug = model_aug
        self.is_augmented = is_augmented
        
    def forward(self, t, state):
        if self.model_phy == None :
            if self.is_augmented:
                res_aug = self.model_aug(state)
                return res_aug
            else :
                return None
        else :
            res_phy = self.model_phy(state)
            if self.is_augmented:
                res_aug = self.model_aug(state)
                return res_phy + res_aug
            else:
                return res_phy

### integrating the combined models in end-to-end Neural ODE framework ###

class Forecaster(nn.Module):
    def __init__(self, model_phy, model_aug, is_augmented, method='rk4', options=None):
        super().__init__()

        self.model_phy = model_phy
        self.model_aug = model_aug

        self.derivative_estimator = DerivativeEstimator(self.model_phy, self.model_aug, is_augmented=is_augmented)
        self.method = method
        self.options = options
        self.int_ = odeint_adjoint
        
    def forward(self, y0, t):
        # y0 = y[:,:,0]
        res = self.int_(self.derivative_estimator, y0=y0, t=t, method=self.method, options=self.options)
        # res: T x batch_size x n_c (x h x w)
        dim_seq = y0.dim() + 1
        dims = [1, 2, 0] + list(range(dim_seq))[3:]
        return res.permute(*dims)   # batch_size x n_c x T (x h x w)
    
    def get_pde_params(self):
        return self.model_phy.params
    

## Integrating models with pytorch-lightning framework

In [None]:
wandb.login()

class burgers_aphy(pl.LightningModule):
    
    def __init__(self, inputsize, batch_size, channels = 64, is_phy = True, is_aug = True , lr=5e-4, loss = "h1"):

        ### init of parameters ###
          
        super().__init__()        
        self.save_hyperparameters()
        
        self.inputsize = inputsize
        self.batch_size = batch_size
        self.channels = channels
        
        self.lr = lr

        ### init of all pytroch models ###
        
        if is_phy :
            self.model_phy = burgersPDE(dx = self.inputsize[2])
        else :
            self.model_phy = None
            
        if is_aug :
            self.model_aug = ConvNetEstimator(state_c = self.inputsize[1], hidden=self.channels)
        else :
            self.model_aug = None
        
        self.net = Forecaster(self.model_phy, self.model_aug, is_augmented = is_aug)
        
        self.get_grad = get_grad( dx = self.inputsize[2] )

        ###
        
        self.chosen_loss = loss # parameter to select which loss is used for training

        self.x = np.linspace(0, 1, self.inputsize[2])

    def training_step(self, batch, batch_idx):

        ### rearrange input to feed to model ###
        
        x = batch["states"]
        t = batch["t"][0]

        ### compute model prediction ###
        
        x_hat = self.net(x[:,:,0],t)

        ### compute ground truth and prediction physical gradients ###
        
        x_grad = self.get_grad(x)
        x_hat_grad = self.get_grad(x_hat)

        ### L2 loss ###
        
        loss = nn.functional.mse_loss(x_hat, x)
        
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        ### H1 loss ###
        
        loss_h1 = loss + 5e-2 * nn.functional.mse_loss(x_hat_grad, x_grad)
        
        self.log("train_loss_h1", loss_h1, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        ### return the correct training loss based on hyperparameter choice ###
        
        if self.chosen_loss == "l2" or self.current_epoch < 60 :
            return loss
        else:
            return loss_h1
    
    def validation_step(self, batch, batch_idx):

        ### rearrange input to feed to model ###

        x = batch["states"]
        t = batch["t"][0]

        ### compute model prediction ###
        
        x_hat = self.net(x[:,:,0],t)

        ### L2 loss ###

        val_loss = nn.functional.mse_loss(x_hat, x)
        self.log("val_loss", val_loss, on_step=False, on_epoch=True, prog_bar=False, logger=True)

        ### compute ground truth and prediction physical gradients ###
        
        x_grad = self.get_grad(x)
        x_hat_grad = self.get_grad(x_hat)

        ### H1 loss ###
        
        val_loss_h1 = nn.functional.mse_loss(x_hat, x)  + 5e-2 * nn.functional.mse_loss(x_hat_grad, x_grad)
        self.log("val_loss_h1", val_loss_h1, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        ### return the correct training loss based on hyperparameter choice ###

        if self.chosen_loss == "l2":
            return val_loss
        else:
            return val_loss_h1           
            
    def test_step(self, batch, batch_idx):

        ### rearrange input to feed to model ###

        x = batch["states"]
        t = batch["t"][0]

        ### compute model prediction ###

        x_hat = self.net(x[:,:,0],t)

        ### compute ground truth and prediction physical gradients ###

        x_grad = self.get_grad(x)
        x_hat_grad = self.get_grad(x_hat)

        ### L2 loss ###

        loss = nn.functional.mse_loss(x_hat, x, reduction= 'none')
        
        ### H1 loss ###

        loss_h1 = nn.functional.mse_loss(x_hat, x, reduction= 'none') + 5e-2 * nn.functional.mse_loss(x_hat_grad, x_grad, reduction = 'none')

        ### losses is averaged for each example and stored in a dataframe ###

        loss_numpy = loss.cpu().numpy()
        loss_h1_numpy = loss_h1.cpu().numpy()

        loss_mod = loss_numpy.mean(axis=(1,2))
        loss_h1_mod = loss_h1_numpy.mean(axis=(1,2))

        loss_concat = np.concatenate ((loss_mod.reshape((1,64)), loss_h1_mod.reshape((1,64))), axis = 0 )

        df = pd.DataFrame(data = loss_concat.T,  columns=["val_loss", "val_loss_h1"])

        print(df)

        wandb.log({"table": df})   

        ######################## following commented code is used to create video of the first 5 testing example to see model's prediction

        # filenames = []

        # with imageio.get_writer('test_vis.gif', mode='I') as writer:

        #     for i in range(5):

        #         loss2 = nn.functional.mse_loss(x_hat[i,:,:], x[i,:,:])

        #         loss_h1_2 = loss2 + 5e-2 * nn.functional.mse_loss(x_hat_grad[i,:,:], x_grad[i,:,:])

        #         for j in range(0, x.detach().cpu().numpy().shape[2],5):

        #             plt.plot(self.x, x[i,:,j].detach().cpu().numpy(), "r")   
        #             plt.plot(self.x, x_hat[i,:,j].detach().cpu().numpy(), "b") 
        #             plt.ylim(-0.5, 1)
                    
        #             plt.title('loss for ex ' + str(i) + ' is : ' + str(loss2.cpu().detach().numpy()) + ' and h1 loss is : ' + str( loss_h1_2.cpu().detach().numpy() ) )
                    
        #             # plt.title('run ' + str(i) + ', time = ' + str(np.round(10*j*1.5/x.numpy().shape[2])/10) + 's' )

        #             # create file name and append it to a list
        #             filename = f'{i*j + j}.png'

        #             # save frame
        #             plt.savefig(filename, dpi = 150)
        #             plt.close()
                    
        #             # build gif

        #             image = imageio.imread(filename)
        #             writer.append_data(image)

        #             os.remove(filename)

        # wandb.log({"video": wandb.Video('test_vis.gif', fps=30, format="gif")})

    def configure_optimizers(self):

        optimizer = optim.Adam(self.parameters(), lr = self.lr)
        return optimizer
    
            

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnicolaslepage[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Training model

## calling created pytorch-lightning modules ( GPU training takes 4-6 hours for 12*128 examples, 64 channels and 200 epochs)

In [None]:
dl = light_burgers_dataloader( batch_size = 128, ex_multiplier= 12)

test_model = burgers_aphy( inputsize = dl.dims, batch_size = dl.batch_size, is_phy = True, is_aug = True, lr= 2e-4, loss = "h1" )

wandb_logger = WandbLogger(project="lit-burgers")

checkpoint_callback = ModelCheckpoint(dirpath="/content/", save_top_k=2, monitor="val_loss_h1")

trainer = pl.Trainer(
    accelerator="gpu", 
    devices=1,
    logger=wandb_logger,    # W&B integration
    log_every_n_steps = 1,   # set the logging frequency
    check_val_every_n_epoch = 1,
    max_epochs= 200,
    callbacks=[TQDMProgressBar(refresh_rate=1),checkpoint_callback],
)

trainer.fit(test_model , dl)

test_model = test_model.load_from_checkpoint(checkpoint_callback.best_model_path)

trainer.test(test_model, dl)

wandb.finish()

Create sweep with ID: 35f22dhw
Sweep URL: https://wandb.ai/nicolaslepage/updated_project/sweeps/35f22dhw


[34m[1mwandb[0m: Agent Starting Run: sirc0uep with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	channels: 64
[34m[1mwandb[0m: 	ex_multiplier: 12
[34m[1mwandb[0m: 	learning_rate: 0.0002
[34m[1mwandb[0m: 	loss: h1
[34m[1mwandb[0m: 	model_phy: False
ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


{'batch_size': 128, 'channels': 64, 'ex_multiplier': 12, 'learning_rate': 0.0002, 'loss': 'h1', 'model_phy': False, 'total_ex': 1536}
 !!!!!!!!!!!!!! dl init  !!!!!!!!!!!!!! 
 !!!!!!!!!!!!!! model init  !!!!!!!!!!!!!! 
 !!!!!!!!!!!!!! logger init  !!!!!!!!!!!!!! 


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")



 !!!!!!!!!!!!!! trainer init  !!!!!!!!!!!!!! 


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name      | Type             | Params
-----------------------------------------------
0 | model_aug | ConvNetEstimator | 38.0 K
1 | net       | Forecaster       | 38.0 K
2 | get_grad  | get_grad         | 0     
-----------------------------------------------
38.0 K    Trainable params
0         Non-trainable params
38.0 K    Total params
0.152     Total estimated model params size (MB)
  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"



Sanity Checking: 0it [00:00, ?it/s]





Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=200` reached.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Testing: 0it [00:00, ?it/s]

    val_loss  val_loss_h1
0   0.000582     0.070647
1   0.000905     0.080330
2   0.000953     0.004828
3   0.000434     0.014697
4   0.000458     0.009886
..       ...          ...
59  0.000427     0.036689
60  0.000459     0.003025
61  0.000458     0.003238
62  0.000568     0.045244
63  0.000383     0.003312

[64 rows x 2 columns]
 !!!!!!!!!!!!!! trainer fitted  !!!!!!!!!!!!!! 
 !!!!!!!!!!!!!! files removed  !!!!!!!!!!!!!! 
 !!!!!!!!!!!!!! run is done !!!!!!!!!!!!!! 


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train_loss_epoch,█▃▁▁▁▁▁▁▁▁▁▁▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_h1_epoch,█▆▆▅▅▄▄▄▄▃▃▃▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_h1_step,█▆▇▇▆▅▅▄▄▄▄▃▅▄▃▂▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss_step,█▃▂▂▁▁▁▁▁▁▁▁▅▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val_loss,█▄▄▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss_h1_epoch,█▆██████▇▇▇▇▅▂▃▄▃▂▂▂▃▂▂▂▂▂▂▂▁▁▂▂▁▂▂▁▁▁▁▁
val_loss_h1_step,█▆██████▇▇▇▇▅▂▃▄▃▂▂▂▃▂▂▂▂▂▂▂▁▁▂▂▁▂▂▁▁▁▁▁

0,1
epoch,199.0
train_loss_epoch,0.00043
train_loss_h1_epoch,0.02325
train_loss_h1_step,0.01628
train_loss_step,0.00043
trainer/global_step,2399.0
val_loss,0.00074
val_loss_h1_epoch,0.03729
val_loss_h1_step,0.03729


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
