In [1]:
# In this notebook we test the gradient optimizer constraint

In [1]:
# General imports
import numpy as np
import torch

# DeepMoD stuff
from deepymod_torch import DeepMoD
from deepymod_torch.model.func_approx import NN
from deepymod_torch.model.library import Library1D
from deepymod_torch.model.constraint import LeastSquares, GradParams
from deepymod_torch.model.sparse_estimators import Threshold
from deepymod_torch.training import train_auto_split, train_auto_split_MSE, train_auto_split_test
from deepymod_torch.training.sparsity_scheduler import TrainTest, Periodic, TrainTestPeriodic

from phimal_utilities.data import Dataset
from phimal_utilities.data.burgers import BurgersDelta

if torch.cuda.is_available():
    device ='cuda'
else:
    device = 'cpu'
device = 'cpu'
    
# Settings for reproducibility
np.random.seed(42)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

%load_ext autoreload
%autoreload 2

# Code

In [25]:
import torch.nn as nn
from typing import Tuple
from deepymod_torch.utils.types import TensorList

class GradConstraint(nn.Module):
    def __init__(self, n_params) -> None:
        super().__init__()
        self.sparsity_masks: TensorList = None
        self.coeff_vectors = torch.nn.ParameterList([torch.nn.Parameter(torch.rand(n_params, 1))])

    def forward(self, input: Tuple[TensorList, TensorList]) -> Tuple[TensorList, TensorList]:
        """[summary]

        Args:
            input (Tuple[TensorList, TensorList]): [description]

        Returns:
            Tuple[TensorList, TensorList]: [description]
        """
        time_derivs, thetas = input

        if self.sparsity_masks is None:
            self.sparsity_masks = [torch.ones(theta.shape[1], dtype=torch.bool).to(theta.device) for theta in thetas]
        
        #self.masked_coeff_vectors = [sparsity_mask[:, None] * coeff for sparsity_mask, coeff in zip(self.sparsity_masks, self.coeff_vectors)]

# Initial testing

In [10]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=2000, noise=0.1, random=True, normalize=True)
X, y = X.to(device), y.to(device)

In [11]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [12]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/initial_test/', write_iterations=25, max_iterations=5000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       5000    100.00%               0s   1.96e-02   1.82e-02   1.36e-03   3.88e+01 

So the probleem seems to be that the MSE doesn't move past pretty high... Let's try without noise and unnormalized:

In [13]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=2000, noise=0.0, random=True, normalize=False)
X, y = X.to(device), y.to(device)

In [14]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [15]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/test_no_noise_not_normalized/', write_iterations=25, max_iterations=5000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       3325     66.50%              64s   3.52e-02   2.48e-02   1.04e-02   6.14e+00 Algorithm converged. Stopping training.


So again the MSE doesn't move past really high.... Why? Let's try writing a new base class:

In [24]:
model.constraint.sparsity_masks[0][:, None] * model.constraint.coeff_vectors[0]

tensor([[ 0.1821],
        [-0.5422],
        [ 0.2569],
        [ 0.5052],
        [ 0.4180],
        [-0.3064],
        [-0.8507],
        [ 0.6535],
        [ 0.2013],
        [-0.1281],
        [-0.2321],
        [ 0.7307]], grad_fn=<MulBackward0>)

# Writing a new baseclass

In [3]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=2000, noise=0.0, random=True, normalize=False)
X, y = X.to(device), y.to(device)

In [34]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [35]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/separate_base_class/', write_iterations=25, max_iterations=5000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       5000    100.00%               0s   4.07e-02   3.19e-02   8.78e-03   2.42e+01 

WTF? The MSE still doesn't train? What if we train only the MSE:

In [4]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=2000, noise=0.0, random=True, normalize=False)
X, y = X.to(device), y.to(device)

In [5]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [6]:
train_auto_split_MSE(model, X, y, optimizer, sparsity_scheduler, log_dir='data/separate_base_class_MSE/', write_iterations=25, max_iterations=5000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       5000    100.00%               0s   5.04e-06   5.04e-06   1.35e+01   1.06e+02 

So it trains the MSE; checking tensorboard shows that the estimator finds the right coefficients... However the coefficients update while they shouldn't, that's probably the cause of our problems. Let's adapt the MSE function until it doesn't update anymore. We start by not splitting the data:

In [9]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [10]:
train_auto_split_MSE(model, X, y, optimizer, sparsity_scheduler, log_dir='data/finding_bad_grads/', write_iterations=25, max_iterations=1000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       1000    100.00%               0s   3.55e-05   3.55e-05   6.22e+00   5.24e+01 

That didn't do the trick, it's still updating. What if we turn off the calculation of the regularisation term?

In [11]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [12]:
train_auto_split_MSE(model, X, y, optimizer, sparsity_scheduler, log_dir='data/finding_bad_grads_2/', write_iterations=25, max_iterations=1000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       1000    100.00%               0s   4.36e-05   4.36e-05   4.36e-05   6.55e+01 

Still updates.... Weirdly the unscaled version doesn't update so it's probably something in the writing of the coeffs. What if we put all the writing in a no_grad block?

In [13]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [14]:
train_auto_split_MSE(model, X, y, optimizer, sparsity_scheduler, log_dir='data/finding_bad_grads_3/', write_iterations=25, max_iterations=1000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       1000    100.00%               0s   6.84e-05   6.84e-05   6.84e-05   1.02e+02 

Still, my guess is that it's somewhere in the coeff_vector() method which scales them....

In [19]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [20]:
train_auto_split_MSE(model, X, y, optimizer, sparsity_scheduler, log_dir='data/finding_bad_grads_4/', write_iterations=25, max_iterations=1000, delta=0.000) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       1000    100.00%               0s   3.86e-05   3.86e-05   3.86e-05   1.07e+01 

So that did the trick, it was in that function... But fuck me, how much effect did this have the last few months...? Let's slowly build the function back up again and fix it....

No it was correct. The unscaled coeffs didnt update at all, just the scaled ones. So there's no bug, the problem is that the coefficients don't update...

# Figuring out why the coeffs don't update

Let's build a new training function based on auto_split to figure out why it doesn't update:

In [22]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [None]:
optimizer.param_groups

Inspecting the parameters from the optimizer shows that it's not added to the parameters, so apparently it's not registered in the model:

In [45]:
constraint = GradConstraint(12)
[param for param in constraint.parameters()]

[]

What if we turn the list into a parameterlist?

In [47]:
constraint = GradConstraint(12)
[param for param in constraint.parameters()]

[Parameter containing:
 tensor([[ 1.6578],
         [ 2.5803],
         [ 0.9771],
         [ 1.2829],
         [ 0.1055],
         [-0.8976],
         [-0.4052],
         [-0.1735],
         [ 1.3589],
         [ 0.1471],
         [-0.2989],
         [-0.0164]], requires_grad=True)]

Now it works! Let's test:

In [48]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [49]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/check_constraint_fixed/', write_iterations=25, max_iterations=1000, delta=0.005) # Running

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       1000    100.00%               0s   1.74e-02   1.71e-02   3.18e-04   2.05e+01 

It updates! Now let's do a check by running it long:

In [50]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [51]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/gradient_run_1/', write_iterations=25, max_iterations=5000, delta=0.00) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       5000    100.00%               0s   1.35e-02   1.25e-02   9.94e-04   1.47e+01 

So very slow convergence... Let's switch the initialization to rand:

In [53]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.999), amsgrad=True) # Defining optimizer

In [54]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/gradient_run_2/', write_iterations=25, max_iterations=5000, delta=0.00) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       5000    100.00%               0s   7.20e-03   6.50e-03   6.98e-04   1.08e+01 

That helped a bit I think, let's tweak the hyperparams:

In [56]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(periodicity=25, patience=5000)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [57]:
train_auto_split(model, X, y, optimizer, sparsity_scheduler, log_dir='data/gradient_run_3/', write_iterations=25, max_iterations=10000, delta=0.00) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
      10000    100.00%               0s   4.00e-06   2.00e-06   2.00e-06   2.26e+00 

# Implementing nicely with deepmod

So that works; now let's include the sparsity mask so that updates too. We make new training procedure cause a few things need to be changed there regarding the sparsity.

In [68]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=2000, noise=0.1, random=True, normalize=False)
X, y = X.to(device), y.to(device)

In [69]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradConstraint(12) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = Periodic(initial_epoch=2000, periodicity=25)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [70]:
train_auto_split_test(model, X, y, optimizer, sparsity_scheduler, log_dir='data/implementing_1/', write_iterations=25, max_iterations=10000, delta=0.001, patience=100) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       7500     75.00%             104s   4.86e-04   4.30e-04   5.57e-05   1.93e+00 Algorithm converged. Stopping training.


In [71]:
model.sparsity_masks

[tensor([False,  True,  True, False, False,  True, False,  True,  True, False,
          True, False])]

So it works... It's a bit slow, but it'll do for now.

Now let's adapt the constraint and other stuff to allow both gradient optimization and least squares...

In [89]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12, 1) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = Periodic(initial_epoch=1000, periodicity=25)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [90]:
train_auto_split_test(model, X, y, optimizer, sparsity_scheduler, log_dir='data/implementing_2/', write_iterations=25, max_iterations=2000, delta=0.001, patience=100) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       2000    100.00%               0s   6.11e-03   5.31e-03   8.00e-04   1.00e+01 

In [91]:
model.sparsity_masks

[tensor([ True,  True,  True,  True,  True,  True,  True, False,  True,  True,
         False,  True])]

Okay so that works as well. Now let's check how to move things to the GPU.

In [92]:
device ='cuda'

In [95]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=2000, noise=0.1, random=True, normalize=False)
X, y = X.to(device), y.to(device)

In [96]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12, 1) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = Periodic(initial_epoch=1000, periodicity=25)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [98]:
train_auto_split_test(model, X, y, optimizer, sparsity_scheduler, log_dir='data/implementing_3/', write_iterations=25, max_iterations=2000, delta=0.001, patience=100) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
       2000    100.00%               0s   4.44e-03   3.83e-03   6.07e-04   1.96e+00 

GPU works as well! Now let's test with the train / test set:

In [103]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12, 1) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(patience=200, periodicity=50)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [104]:
train_auto_split_test(model, X, y, optimizer, sparsity_scheduler, log_dir='data/implementing_4/', write_iterations=25, max_iterations=10000, delta=0.001, patience=500) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
      10000    100.00%               0s   3.68e-04   3.64e-04   3.54e-06   3.54e+00 

So the sparsity didn't get triggered... Let's add the regression cost as well

In [107]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12, 1) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(patience=200, periodicity=50)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [108]:
train_auto_split_test(model, X, y, optimizer, sparsity_scheduler, log_dir='data/implementing_5/', write_iterations=25, max_iterations=10000, delta=0.001, patience=500) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
      10000    100.00%               0s   3.68e-04   3.65e-04   2.85e-06   2.40e+00 

So this whole thing works, it just doesn't get triggered yet. Let's try it with a high noise one:

In [109]:
# Let's use a dataset with many samples and low noise to be sure it works.
v = 0.1
A = 1.0

x = np.linspace(-3, 4, 100)
t = np.linspace(0.5, 5.0, 50)
x_grid, t_grid = np.meshgrid(x, t, indexing='ij')
dataset = Dataset(BurgersDelta, v=v, A=A)
    
X, y = dataset.create_dataset(x_grid.reshape(-1, 1), t_grid.reshape(-1, 1), n_samples=1000, noise=0.4, random=True, normalize=False)
X, y = X.to(device), y.to(device)

In [112]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = GradParams(12, 1) # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(patience=200, periodicity=50)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [113]:
b

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
      12000     48.00%             375s   5.67e-03   5.63e-03   4.40e-05   1.33e+00 Algorithm converged. Stopping training.


That works too :-) Might require some tweaking, but we'll get there... Now, let's test least squares to make sure it stil; works:

In [114]:
network = NN(2, [30, 30, 30, 30, 30], 1)
library = Library1D(poly_order=2, diff_order=3) # Library function
estimator = Threshold(0.1) #Clustering() # Sparse estimator 
constraint = LeastSquares() # How to constrain
model = DeepMoD(network, library, estimator, constraint).to(device) # Putting it all in the model
  
sparsity_scheduler = TrainTestPeriodic(patience=200, periodicity=50)
optimizer = torch.optim.Adam(model.parameters(), betas=(0.99, 0.999), amsgrad=True, lr=2e-3) # Defining optimizer

In [115]:
train_auto_split_test(model, X, y, optimizer, sparsity_scheduler, log_dir='data/implementing_7/', write_iterations=25, max_iterations=25000, delta=0.001, patience=500) # Runni

| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |
      12975     51.90%             539s   5.64e-03   5.59e-03   4.92e-05   1.30e+00 Algorithm converged. Stopping training.


In [2]:
import numpy as np

In [4]:
np.arange(5, 5 + 5)

array([5, 6, 7, 8, 9])