In [10]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

### Let's begin analysing the code by replicating the results corresponding to section 4.1.1 in the original paper

In [11]:
import sys
sys.path.append('D:\\Desktop\\Lipschitz_DSNN')

In [12]:
from dataloader.Function_1D import *

In [13]:
# Generate the testing set
X, y = generate_testing_set(lambda x: slope_1_flat(x, n=100, seed=100), n_points=5000)


In [14]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)

# Print the shapes of the training and testing sets
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: torch.Size([1000, 1])
X_test shape: torch.Size([4000, 1])
y_train shape: torch.Size([1000, 1])
y_test shape: torch.Size([4000, 1])


In [15]:
### Note for me: for the time being,I am intentionally keeping the test size larger than the train size

We have the training and testing dataset, now we need to load the model and train it. This would also allow you to understand the 1-Lipschitz constraint model well. 

Let's load the saved configuration

In [17]:
import json

# Load the JSON file
file_config_training="D:\Desktop\Lipschitz_DSNN\configs\config_1d.json"
with open(file_config_training, 'r') as f:
    config = json.load(f)

In [18]:
### checking the configurations for 1d example
config

{'activation_fn_params': {'activation_type': 'linearspline',
  'groupsort_groupsize': 5,
  'prelu_init': -1,
  'lipschitz_constrained': True,
  'spline_init': 'relu',
  'spline_range': 0.5,
  'spline_scaling_coeff': True,
  'spline_size': 101,
  'lmbda': 1e-07},
 'dataset': {'function_type': 'f1',
  'number_knots': 9,
  'testing_dataset_size': 10000,
  'training_dataset_size': 1000},
 'exp_name': 'test',
 'log_dir': '1d_exps/ortho',
 'net_params': {'bias': True,
  'layer_sizes': [1, 10, 10, 10, 1],
  'projection': 'orthonormalize',
  'weight_initialization': 'He_uniform'},
 'optimizer': {'lr_spline_coeffs': 5e-05,
  'lr_spline_scaling_coeffs': 0.0005,
  'lr_weights': 0.002},
 'seed': 5,
 'training_options': {'batch_size': 10,
  'epochs': 1000,
  'nbr_models': 25,
  'num_workers': 1}}

In [34]:
### let's first change and simplify some of the config params
config['dataset']['training_dataset_size']=100
config['dataset']['testing_dataset_size']=200

config['training_options']['batch_size']=1
config['training_options']['epochs']=2## let's train only for 2 epochs

config['training_options']['nbr_models']=1
config['net_params']['layer_sizes']=[1,10,1]

In [35]:
config

{'activation_fn_params': {'activation_type': 'linearspline',
  'groupsort_groupsize': 5,
  'prelu_init': -1,
  'lipschitz_constrained': True,
  'spline_init': 'relu',
  'spline_range': 0.5,
  'spline_scaling_coeff': True,
  'spline_size': 101,
  'lmbda': 1e-07},
 'dataset': {'function_type': 'f1',
  'number_knots': 9,
  'testing_dataset_size': 200,
  'training_dataset_size': 100},
 'exp_name': 'test',
 'log_dir': '1d_exps/ortho',
 'net_params': {'bias': True,
  'layer_sizes': [1, 10, 1],
  'projection': 'orthonormalize',
  'weight_initialization': 'He_uniform'},
 'optimizer': {'lr_spline_coeffs': 5e-05,
  'lr_spline_scaling_coeffs': 0.0005,
  'lr_weights': 0.002},
 'seed': 5,
 'training_options': {'batch_size': 1,
  'epochs': 2,
  'nbr_models': 1,
  'num_workers': 1}}

In [22]:
!pip install tensorboard

Collecting tensorboard
  Downloading tensorboard-2.14.0-py3-none-any.whl (5.5 MB)
Collecting grpcio>=1.48.2
  Downloading grpcio-1.62.2-cp38-cp38-win_amd64.whl (3.8 MB)
Collecting google-auth-oauthlib<1.1,>=0.5
  Downloading google_auth_oauthlib-1.0.0-py2.py3-none-any.whl (18 kB)
Collecting markdown>=2.6.8
  Downloading Markdown-3.6-py3-none-any.whl (105 kB)
Collecting tensorboard-data-server<0.8.0,>=0.7.0
  Downloading tensorboard_data_server-0.7.2-py3-none-any.whl (2.4 kB)
Collecting google-auth<3,>=1.6.3
  Downloading google_auth-2.29.0-py2.py3-none-any.whl (189 kB)
Collecting pyasn1-modules>=0.2.1
  Downloading pyasn1_modules-0.4.0-py3-none-any.whl (181 kB)
Collecting rsa<5,>=3.1.4
  Downloading rsa-4.9-py3-none-any.whl (34 kB)
Collecting requests-oauthlib>=0.7.0
  Downloading requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)
Collecting importlib-metadata>=4.4
  Downloading importlib_metadata-7.1.0-py3-none-any.whl (24 kB)
Collecting pyasn1<0.7.0,>=0.4.6
  Downloading pyasn1-0.6

In [24]:
### let's load the trainer
from utils import trainer_1d 

In [36]:
### instantiating the trainer
train_1d_ae= trainer_1d.Trainer1D(config=config, seed=1, device='cpu')

Preparing the dataloaders
Building the model(s)
Number of models :  1
Number of parameters in the model(s):  1051
SimpleFC(
  (layers): Sequential(
    (0): LipschitzLinear(in_features=1, out_features=10, bias=True)
    (1): LinearSpline(mode=fc, num_activations=10, init=relu, size=101, grid=0.010, lipschitz_constrained=True.)
    (2): LipschitzLinear(in_features=10, out_features=1, bias=True)
  )
)


In [37]:
### now I need to print the model
train_1d_ae.models

[SimpleFC(
   (layers): Sequential(
     (0): LipschitzLinear(in_features=1, out_features=10, bias=True)
     (1): LinearSpline(mode=fc, num_activations=10, init=relu, size=101, grid=0.010, lipschitz_constrained=True.)
     (2): LipschitzLinear(in_features=10, out_features=1, bias=True)
   )
 )]

In [71]:
train_1d_ae.models[0].modules_linearspline

<generator object BaseModel.modules_linearspline at 0x000002729F765120>

In [38]:
### let's now train the model and in the process investigate what exactly is happening in the linear spline function
train_1d_ae.train()

T (0) | TotalLoss 0.06070914 |: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:21<00:00,  4.72it/s]
T (1) | TotalLoss 0.00760458 |: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:12<00:00,  7.82it/s]
T (2) | TotalLoss 0.02052213 |: 100%|████████████████████████████████████████████████████████████████| 100/100 [00:12<00:00,  8.30it/s]


Saving a checkpoint:


In [43]:
train_1d_ae.optimizers

[Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.002
     maximize: False
     weight_decay: 0
 
 Parameter Group 1
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 5e-05
     maximize: False
     weight_decay: 0
 
 Parameter Group 2
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.0005
     maximize: False
     weight_decay: 0
 )]

In [44]:
config['activation_fn_params']['lipschitz_constrained']

True

In [45]:
train_1d_ae.models[0].using_splines

True

In [46]:
train_1d_ae.models[0].modules_linearspline

<generator object BaseModel.modules_linearspline at 0x000002729F6BA2E0>

In [49]:
train_1d_ae.models[0]

SimpleFC(
  (layers): Sequential(
    (0): LipschitzLinear(in_features=1, out_features=10, bias=True)
    (1): LinearSpline(mode=fc, num_activations=10, init=relu, size=101, grid=0.010, lipschitz_constrained=True.)
    (2): LipschitzLinear(in_features=10, out_features=1, bias=True)
  )
)

In [50]:
for j, module in enumerate(train_1d_ae.models[0].modules_linearspline):
                    num_act = module.num_activations
                    size = module.size
                    grid = module.grid
                    coeffs = module.coefficients_vect.view(num_act, size)

In [54]:
coeffs

tensor([[ 2.5000e-05,  7.0366e-06,  1.1079e-05,  ...,  4.8025e-01,
          4.9135e-01,  4.9865e-01],
        [-1.4175e-05,  1.6300e-05, -1.4666e-05,  ...,  4.7969e-01,
          4.9007e-01,  5.0029e-01],
        [ 3.2881e-03, -3.3205e-03, -2.7684e-03,  ...,  4.8040e-01,
          4.8986e-01,  5.0034e-01],
        ...,
        [-5.2165e-06,  4.3071e-06, -5.2331e-06,  ...,  4.8048e-01,
          4.8965e-01,  5.0030e-01],
        [-1.9703e-06,  2.0825e-06,  8.7813e-06,  ...,  4.7981e-01,
          4.8974e-01,  5.0024e-01],
        [-1.5425e-03,  1.5138e-03,  8.3034e-04,  ...,  4.8042e-01,
          4.8968e-01,  5.0042e-01]], grad_fn=<ViewBackward0>)

In [68]:
def slope_normalization(cs, T):
    lipschitz = torch.max(torch.abs(cs[:, 1:] - cs[:,:-1]), dim=1)[0]
    print("lipschitz is:", lipschitz)
    new_cs = T * torch.div(cs.T, lipschitz).T

    return new_cs

In [70]:
coeffs.shape, num_act, size, grid

(torch.Size([10, 101]), 10, 101, tensor([0.0100]))

In [69]:
slope_normalization(coeffs, grid.item())

lipschitz is: tensor([0.0115, 0.0195, 0.0108, 0.0155, 0.0109, 0.0216, 0.0116, 0.0109, 0.0115,
        0.0110], grad_fn=<MaxBackward0>)


tensor([[ 2.1660e-05,  6.0965e-06,  9.5985e-06,  ...,  4.1609e-01,
          4.2571e-01,  4.3203e-01],
        [-7.2793e-06,  8.3708e-06, -7.5314e-06,  ...,  2.4634e-01,
          2.5167e-01,  2.5692e-01],
        [ 3.0498e-03, -3.0798e-03, -2.5678e-03,  ...,  4.4558e-01,
          4.5435e-01,  4.6408e-01],
        ...,
        [-4.7643e-06,  3.9338e-06, -4.7795e-06,  ...,  4.3883e-01,
          4.4721e-01,  4.5694e-01],
        [-1.7163e-06,  1.8140e-06,  7.6493e-06,  ...,  4.1795e-01,
          4.2661e-01,  4.3575e-01],
        [-1.4078e-03,  1.3817e-03,  7.5786e-04,  ...,  4.3849e-01,
          4.4694e-01,  4.5674e-01]], grad_fn=<MulBackward0>)

In [75]:
train_1d_ae.optimizers

[Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.002
     maximize: False
     weight_decay: 0
 
 Parameter Group 1
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 5e-05
     maximize: False
     weight_decay: 0
 
 Parameter Group 2
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.0005
     maximize: False
     weight_decay: 0
 )]

In [None]:
### now I need to understand the set_optimization steo because it contains things

In [76]:
### let's look into LinearSpline class and see how it works

train_1d_ae.models[0]

SimpleFC(
  (layers): Sequential(
    (0): LipschitzLinear(in_features=1, out_features=10, bias=True)
    (1): LinearSpline(mode=fc, num_activations=10, init=relu, size=101, grid=0.010, lipschitz_constrained=True.)
    (2): LipschitzLinear(in_features=10, out_features=10, bias=True)
    (3): LinearSpline(mode=fc, num_activations=10, init=relu, size=101, grid=0.010, lipschitz_constrained=True.)
    (4): LipschitzLinear(in_features=10, out_features=10, bias=True)
    (5): LinearSpline(mode=fc, num_activations=10, init=relu, size=101, grid=0.010, lipschitz_constrained=True.)
    (6): LipschitzLinear(in_features=10, out_features=1, bias=True)
  )
)

In [77]:
### go to pdf page number 11. they  have mentioned that they 
### and the LLS was
# initialized as ReLU and had a range of [−0.5, 0.5], 
# 100 linear regions, and λ = 10−7
# for the
# TV(2) regularization.
config 

{'activation_fn_params': {'activation_type': 'linearspline',
  'groupsort_groupsize': 5,
  'prelu_init': -1,
  'lipschitz_constrained': True,
  'spline_init': 'relu',
  'spline_range': 0.5,
  'spline_scaling_coeff': True,
  'spline_size': 101,
  'lmbda': 1e-07},
 'dataset': {'function_type': 'f1',
  'number_knots': 9,
  'testing_dataset_size': 200,
  'training_dataset_size': 100},
 'exp_name': 'test',
 'log_dir': '1d_exps/ortho',
 'net_params': {'bias': True,
  'layer_sizes': [1, 10, 1],
  'projection': 'orthonormalize',
  'weight_initialization': 'He_uniform'},
 'optimizer': {'lr_spline_coeffs': 5e-05,
  'lr_spline_scaling_coeffs': 0.0005,
  'lr_weights': 0.002},
 'seed': 5,
 'training_options': {'batch_size': 1,
  'epochs': 2,
  'nbr_models': 1,
  'num_workers': 1}}

In [78]:
act_arange=torch.arange(0,10)
print("act_arange is:"); print(act_arange)
zero_knot_indexes= (act_arange * 101 + (101 // 2))
print("zero_knot indices:"); print(zero_knot_indexes)

act_arange is:
tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
zero_knot indices:
tensor([ 50, 151, 252, 353, 454, 555, 656, 757, 858, 959])


In [81]:
grid = 2* 0.5/(101-1)# range is 0.5
grid

0.01

In [84]:
torch.Tensor([1, -2, 1]).view(1, 1, 3).div(grid)

tensor([[[ 100., -200.,  100.]]])

In [85]:
torch.linspace(-0.5,0.5,101)

tensor([-0.5000, -0.4900, -0.4800, -0.4700, -0.4600, -0.4500, -0.4400, -0.4300,
        -0.4200, -0.4100, -0.4000, -0.3900, -0.3800, -0.3700, -0.3600, -0.3500,
        -0.3400, -0.3300, -0.3200, -0.3100, -0.3000, -0.2900, -0.2800, -0.2700,
        -0.2600, -0.2500, -0.2400, -0.2300, -0.2200, -0.2100, -0.2000, -0.1900,
        -0.1800, -0.1700, -0.1600, -0.1500, -0.1400, -0.1300, -0.1200, -0.1100,
        -0.1000, -0.0900, -0.0800, -0.0700, -0.0600, -0.0500, -0.0400, -0.0300,
        -0.0200, -0.0100,  0.0000,  0.0100,  0.0200,  0.0300,  0.0400,  0.0500,
         0.0600,  0.0700,  0.0800,  0.0900,  0.1000,  0.1100,  0.1200,  0.1300,
         0.1400,  0.1500,  0.1600,  0.1700,  0.1800,  0.1900,  0.2000,  0.2100,
         0.2200,  0.2300,  0.2400,  0.2500,  0.2600,  0.2700,  0.2800,  0.2900,
         0.3000,  0.3100,  0.3200,  0.3300,  0.3400,  0.3500,  0.3600,  0.3700,
         0.3800,  0.3900,  0.4000,  0.4100,  0.4200,  0.4300,  0.4400,  0.4500,
         0.4600,  0.4700,  0.4800,  0.49

In [107]:
grid_tensor=torch.linspace(-0.5,0.5,4).expand((6,4))
grid_tensor

tensor([[-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [108]:
grid_tensor[::2,:]

tensor([[-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [109]:
grid_tensor[1::2,:]

tensor([[-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [110]:
coefficients = torch.zeros(grid_tensor.shape)
coefficients[::2, :] = (grid_tensor[::2, :]).abs()
coefficients[1::2, :] = grid_tensor[1::2, :]
coefficients

tensor([[ 0.5000,  0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [ 0.5000,  0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [ 0.5000,  0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [119]:
coefficients

tensor([[ 0.5000,  0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [ 0.5000,  0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000],
        [ 0.5000,  0.1667,  0.1667,  0.5000],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [118]:
coefficients.contiguous().view(-1)

tensor([ 0.5000,  0.1667,  0.1667,  0.5000, -0.5000, -0.1667,  0.1667,  0.5000,
         0.5000,  0.1667,  0.1667,  0.5000, -0.5000, -0.1667,  0.1667,  0.5000,
         0.5000,  0.1667,  0.1667,  0.5000, -0.5000, -0.1667,  0.1667,  0.5000])

In [114]:
check=torch.Tensor([[1,2,3,4],
        [5,6,7,8],
        [9,10,11,12],
        [13,14,15,16],
        [17,18,19,20],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [115]:
check[::2,:]

tensor([[ 1.,  2.,  3.,  4.],
        [ 9., 10., 11., 12.],
        [17., 18., 19., 20.]])

In [116]:
check[1::2,:]

tensor([[ 5.0000,  6.0000,  7.0000,  8.0000],
        [13.0000, 14.0000, 15.0000, 16.0000],
        [-0.5000, -0.1667,  0.1667,  0.5000]])

In [120]:
from utils import spline_utils

In [121]:
from activations.linearspline import LinearSpline

def get_spline_coefficients(model):
    coeffs_list = []
    for module in model.modules():
        if isinstance(module, LinearSpline):
            coeffs_list.append(module.coefficients_vect)
    return coeffs_list
    
def get_spline_scaling_coeffs(model):
    coeffs_list = []
    for module in model.modules():
        if isinstance(module, LinearSpline):
            coeffs_list.append(module.scaling_coeffs_vect)
    return coeffs_list

def get_no_spline_coefficients(model):
    coeffs_list = set(model.parameters())
    coeffs_list = coeffs_list - set(get_spline_coefficients(model)) - set(get_spline_scaling_coeffs(model))
    coeffs_list = list(coeffs_list)
    return coeffs_list

In [129]:
get_no_spline_coefficients(train_1d_ae.models[0])

[Parameter containing:
 tensor([[ 0.7653],
         [ 0.1386],
         [ 0.8019],
         [ 0.3225],
         [ 0.1878],
         [ 0.5325],
         [-0.8397],
         [ 0.0776],
         [-0.0912],
         [-0.5753]], requires_grad=True),
 Parameter containing:
 tensor([0.2756], requires_grad=True),
 Parameter containing:
 tensor([ 0.9200,  0.2306, -0.5924,  0.2930, -0.7203,  0.1466,  0.1552,  0.9365,
          0.7303, -0.6240], requires_grad=True),
 Parameter containing:
 tensor([[-1.7137e-01,  5.5110e-02, -2.5477e-01,  2.6344e-05, -1.8383e-01,
           2.8225e-01, -3.1604e-01,  1.6083e-01, -1.7860e-01, -2.4105e-01]],
        requires_grad=True)]

In [130]:
get_spline_coefficients(train_1d_ae.models[0])

[Parameter containing:
 tensor([2.5000e-05, 7.0366e-06, 1.1079e-05,  ..., 4.8042e-01, 4.8968e-01,
         5.0042e-01], requires_grad=True)]

In [131]:
get_spline_scaling_coeffs(train_1d_ae.models[0])

[Parameter containing:
 tensor([[[[0.9934]],
 
          [[1.0157]],
 
          [[1.0464]],
 
          [[0.9979]],
 
          [[0.9993]],
 
          [[0.9702]],
 
          [[0.9858]],
 
          [[0.9990]],
 
          [[0.9984]],
 
          [[1.0237]]]], requires_grad=True)]

### Let's understand the forward function of linearspline activation function

In [1]:
import torch

class DeepSplineActivation:
    def __init__(self, mode):
        self.mode = mode

    def reshape_forward(self, x):
        """
        Reshape inputs for deepspline activation forward pass, depending on
        mode ('conv' or 'fc').
        """
        input_size = x.size()
        if self.mode == 'fc':
            if len(input_size) == 2:
                # one activation per conv channel
                # transform to 4D size (N, num_units=num_activations, 1, 1)
                x = x.view(*input_size, 1, 1)
            else:
                raise ValueError(f'input size is {len(input_size)}D but should be 2D')
        else:
            assert len(input_size) == 4, 'input to activation should be 4D (N, C, H, W) if mode="conv".'

        return x

# Example usage
# Create an instance of DeepSplineActivation
activation = DeepSplineActivation(mode='fc')

# Create a tensor with size (batch_size, num_features)
x = torch.randn(5, 10)

# Reshape the tensor using reshape_forward
reshaped_x = activation.reshape_forward(x)

# Print the original and reshaped tensor
print("Original tensor shape:", x.shape)
print("Reshaped tensor shape:", reshaped_x.shape)


Original tensor shape: torch.Size([5, 10])
Reshaped tensor shape: torch.Size([5, 10, 1, 1])


In [6]:
activation = DeepSplineActivation(mode='conv')

# Create a tensor with size (batch_size, channels, height, width)
x = torch.randn(5, 3, 32, 32)

# Reshape the tensor using reshape_forward
reshaped_x = activation.reshape_forward(x)

# Print the original and reshaped tensor shapes
print("Original tensor shape:", x.shape)
print("Reshaped tensor shape:", reshaped_x.shape)

Original tensor shape: torch.Size([5, 3, 32, 32])
Reshaped tensor shape: torch.Size([5, 3, 32, 32])


In [1]:
import torch

In [22]:
def slope_clipping(cs, T):
    device = cs.device
    n = cs.shape[1]
    new_slopes = torch.clamp(cs[:,1:] - cs[:,:-1], -T, T)### this is the operation clipping_{T} (D c) (eqn 12 and 13)
    print(f"new slopes are: {new_slopes}")
    new_cs = torch.zeros(cs.shape, device=device)
    new_cs[:,1:] = torch.cumsum(new_slopes, dim=1)
    print(f"new cs cumsum first term are: {new_cs}")
    print(f"new_cs-cs: {new_cs-cs}")
    print(f"mean operation: {torch.mean(cs - new_cs, dim=1).unsqueeze(1)}")
    new_cs = new_cs + torch.mean(cs - new_cs, dim=1).unsqueeze(1)
    return new_cs


cs= torch.randn(size=(1,4))
print(f"initial coefficients: {cs}")
T=1
check= slope_clipping(cs,T)
print(f"coefficients now: {check}")

torch.mean(cs), torch.mean(check)

initial coefficients: tensor([[-2.3356,  2.4861, -1.7046,  0.7492]])
new slopes are: tensor([[ 1., -1.,  1.]])
new cs cumsum first term are: tensor([[0., 1., 0., 1.]])
new_cs-cs: tensor([[ 2.3356, -1.4861,  1.7046,  0.2508]])
mean operation: tensor([[-0.7012]])
coefficients now: tensor([[-0.7012,  0.2988, -0.7012,  0.2988]])


(tensor(-0.2012), tensor(-0.2012))

In [54]:
def checking_forward_linear_spline(x, coefficients_vect, grid, size, even):

    # The value of the spline at any x is a combination 
    # of at most two coefficients
    max_range = (grid.item() * (size // 2 - 1))
    if even:
        x = x - grid / 2
        max_range = (grid.item() * (size // 2 - 2))
    x_clamped = x.clamp(min=-(grid.item() * (size // 2)), max=max_range)
    print("clamped x is:",x_clamped)

    floored_x = torch.floor(x_clamped / grid)  #left coefficient
    print("floored x is:",floored_x)
    #fracs = x_clamped / grid - floored_x
    fracs = x / grid - floored_x  # distance to left coefficient
    print("frac is (dist to the left coefs):",fracs)
    # This gives the indexes (in coefficients_vect) of the left
    # coefficients
    # indexes = (zero_knot_indexes.view(1, -1, 1, 1) + floored_x).long()
    # # Only two B-spline basis functions are required to compute the output
    # # (through linear interpolation) for each input in the B-spline range.
    # activation_output = coefficients_vect[indexes + 1] * fracs + \
    #     coefficients_vect[indexes] * (1 - fracs)
    # if even:
    #     activation_output = activation_output + grid / 2

    # ctx.save_for_backward(fracs, coefficients_vect, indexes, grid)
    #return activation_output

In [55]:
x=torch.tensor([-10.5,-1.5,2,2.5,2.4,-11.5])
print(f"original tensor is: {x}")
grid=torch.tensor([1])
size=9
coefficients=torch.randn(9)
checking_forward_linear_spline(x,coefficients,grid,size,even=False)

original tensor is: tensor([-10.5000,  -1.5000,   2.0000,   2.5000,   2.4000, -11.5000])
clamped x is: tensor([-4.0000, -1.5000,  2.0000,  2.5000,  2.4000, -4.0000])
floored x is: tensor([-4., -2.,  2.,  2.,  2., -4.])
frac is (dist to the left coefs): tensor([-6.5000,  0.5000,  0.0000,  0.5000,  0.4000, -7.5000])


In [4]:
import torch
import torch.nn.functional as F

In [5]:
def initialize_coeffs(init, grid_tensor, grid, size):
        """The coefficients are initialized with the value of the activation
        # at each knot (c[k] = f[k], since B1 splines are interpolators)."""
        
        if init == 'identity':
            coefficients = grid_tensor
        elif init == 'zero':
            coefficients = torch.zeros(grid_tensor.shape)
        elif init == 'relu':
            coefficients = F.relu(grid_tensor)
        elif init == 'absolute_value':
            coefficients = torch.abs(grid_tensor)
            
        elif init == 'maxmin':
            # initalize half of the activations with the absolute and the other half with the 
            # identity. This is similar to maxmin because max(x1, x2) = (x1 + x2)/2 + |x1 - x2|/2 
            # and min(x1, x2) = (x1 + x2)/2 - |x1 - x2|/2
            coefficients = torch.zeros(grid_tensor.shape)
            coefficients[::2, :] = (grid_tensor[::2, :]).abs()
            coefficients[1::2, :] = grid_tensor[1::2, :]
        
        else:
            raise ValueError('init should be in [identity, relu, absolute_value, maxmin, max_tv].')
        
        return coefficients

In [14]:
grid_tensor = torch.linspace(-3,3,9).expand((1,9))
print(grid_tensor)

tensor([[-3.0000, -2.2500, -1.5000, -0.7500,  0.0000,  0.7500,  1.5000,  2.2500,
          3.0000]])


In [15]:
coefficients= initialize_coeffs(init="relu",
                                grid_tensor=grid_tensor,
                                grid=1,
                                size=9)
coefficients

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.7500, 1.5000, 2.2500, 3.0000]])

In [21]:
unsqueezed_coeff=coefficients.unsqueeze(1)
print("unsqueezed_coeff: ", unsqueezed_coeff)
print(f"shape of the unsqueezed_coeff: {unsqueezed_coeff.shape}")

unsqueezed_coeff:  tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.7500, 1.5000, 2.2500,
          3.0000]]])
shape of the unsqueezed_coeff: torch.Size([1, 1, 9])


In [22]:
grid=1
d2_filter=torch.tensor([1, -2, 1]).view(1, 1, 3).div(grid)
print(f"D2 filter is: {d2_filter} with shape {d2_filter.shape}")

D2 filter is: tensor([[[ 1., -2.,  1.]]]) with shape torch.Size([1, 1, 3])


In [23]:
F.conv1d(unsqueezed_coeff, d2_filter)

tensor([[[0.0000, 0.0000, 0.0000, 0.7500, 0.0000, 0.0000, 0.0000]]])

I understand the code now, I now need to now run some test cases and then check how it works.

1. classification task with some toy adversarial example I believe?

### testing torch.searchsorted function

In [1]:
import torch

In [9]:
# Create a sorted tensor with unequal distances
sorted_tensor = torch.tensor([-1,2,4,8,16])

# Values we want to search for
values = torch.tensor([12])

# Use searchsorted to find the indices
indices = torch.searchsorted(sorted_tensor, values)

# Output the results
print("Indices returned by searchsorted:", indices)

# Demonstrate the insertion
for value, index in zip(values, indices):
    if index == 0:
        print(f"Value {value} would be inserted at the start of the tensor (index {index}).")
    elif index == len(sorted_tensor):
        print(f"Value {value} would be inserted at the end of the tensor (index {index}).")
    else:
        print(f"Value {value} would be inserted at index {index}, between {sorted_tensor[index - 1]} and {sorted_tensor[index]} if index < {len(sorted_tensor)}.")


Indices returned by searchsorted: tensor([4])
Value 12 would be inserted at index 4, between 8 and 16 if index < 5.


In [8]:
torch.floor(torch.Tensor([3])/torch.Tensor([2]))

tensor([1.])