In [1]:
%pwd

'/Users/ryandevera/data-science/umn_environments/Deeplifting/deeplifting/notebooks'

In [2]:
%cd ../..

/Users/ryandevera/data-science/umn_environments/Deeplifting


In [3]:
from deeplifting.models import DeepliftingSkipMLP
from sklearn.datasets import load_iris
import pandas as pd
import torch
import numpy as np

# import pygranso functions
from pygranso.private.getNvar import getNvarTorch
from pygranso.pygranso import pygranso
from pygranso.pygransoStruct import pygransoStruct

In [4]:
device = torch.device('cpu')

# Load the dataset
iris = load_iris()

# The data and target labels
data = iris.data
labels = iris.target

# If you want the feature names and target names:
feature_names = iris.feature_names
target_names = iris.target_names

df = pd.DataFrame(data=iris.data, columns=['f1', 'f2', 'f3', 'f4'])
df['f5'] = 1.0
df['target'] = iris.target

df = df.sample(frac=1.0).reset_index(drop=True)

# Dimensions
output_size = len(feature_names) + 1

# Inputs
inputs_X = df[['f1', 'f2', 'f3', 'f4', 'f5']].values
inputs_X = torch.from_numpy(inputs_X).to(device=device, dtype=torch.double).T

# Just two classes for now
labels = df['target'].values
labels[labels > 0] = 1
labels[labels == 0] = -1
labels = torch.from_numpy(labels).to(device=device, dtype=torch.double)

In [5]:
df

Unnamed: 0,f1,f2,f3,f4,f5,target
0,5.0,3.4,1.6,0.4,1.0,-1
1,5.0,2.3,3.3,1.0,1.0,1
2,4.8,3.4,1.9,0.2,1.0,-1
3,7.2,3.0,5.8,1.6,1.0,1
4,5.6,2.7,4.2,1.3,1.0,1
...,...,...,...,...,...,...
145,6.7,2.5,5.8,1.8,1.0,1
146,6.1,2.8,4.7,1.2,1.0,1
147,5.2,3.4,1.4,0.2,1.0,-1
148,4.4,2.9,1.4,0.2,1.0,-1


In [6]:
inputs_X.shape, labels.shape

(torch.Size([5, 150]), torch.Size([150]))

In [7]:
# Set up the learning function
def deeplifting_svm(model, inputs_X, labels):
    outputs = model(None)
    weight_vec = outputs.mean(axis=0)

    # Compute SVM objective
    denominator = torch.linalg.norm(weight_vec, ord=2)
    prod = torch.matmul(weight_vec.T, inputs_X)
    numerator = labels * prod
    obj = numerator / denominator

    # Orig obj
    f = torch.amax(-1 * obj)

    ce = None
    ci = None
    return f, ci, ce


# Set up a model
# Deeplifting model with skip connections
model = DeepliftingSkipMLP(
    input_size=512,
    hidden_sizes=(128, 128),
    output_size=output_size,
    bounds=None,
    skip_every_n=1,
    activation='relu',
    output_activation='sine',
    agg_function='sum',
    include_bn=True,
    seed=0,
)

In [8]:
# Deeplifting time!
device = torch.device('cpu')
model = model.to(device=device, dtype=torch.double)
nvar = getNvarTorch(model.parameters())

opts = pygransoStruct()

# Inital x0
x0 = (
    torch.nn.utils.parameters_to_vector(model.parameters())
    .detach()
    .reshape(nvar, 1)
    .to(device=device, dtype=torch.double)
)

# PyGranso options
# Increase max number of iterations and let convege to stationarity
# Do we see local minima in the PyGranso version
# Dual Annealing, SCIP and Deeplifting, PyGranso (showing there are local minima)
opts.x0 = x0
opts.torch_device = device
opts.print_frequency = 100
opts.limited_mem_size = 5
opts.stat_l2_model = False
opts.double_precision = True
opts.opt_tol = 1e-10
opts.maxit = 100

# Combined function
comb_fn = lambda model: deeplifting_svm(model, inputs_X, labels)  # noqa

# Run the main algorithm
soln = pygranso(var_spec=model, combined_fn=comb_fn, user_opts=opts)

  prod = torch.matmul(weight_vec.T, inputs_X)




[33m╔═════ QP SOLVER NOTICE ════════════════════════════════════════════════════════════════════════╗
[0m[33m║  PyGRANSO requires a quadratic program (QP) solver that has a quadprog-compatible interface,  ║
[0m[33m║  the default is osqp. Users may provide their own wrapper for the QP solver.                  ║
[0m[33m║  To disable this notice, set opts.quadprog_info_msg = False                                   ║
[0m[33m╚═══════════════════════════════════════════════════════════════════════════════════════════════╝
[0m══════════════════════════════════════════════════════════════════════════════════════════════╗
PyGRANSO: A PyTorch-enabled port of GRANSO with auto-differentiation                          ║ 
Version 1.2.0                                                                                 ║ 
Licensed under the AGPLv3, Copyright (C) 2021-2022 Tim Mitchell and Buyun Liang               ║ 
════════════════════════════════════════════════════════════════════════════

In [9]:
# Get the best weights and let's check training sample fit
best_weights = model(None)
best_weights = best_weights.mean(axis=0)
best_weights = best_weights.detach().cpu().numpy()

In [10]:
inputs_X = inputs_X.cpu().numpy()
labels = labels.cpu().numpy()

In [15]:
raw_predictions = np.dot(best_weights, inputs_X)
predictions = np.sign(raw_predictions)

In [16]:
from sklearn.metrics import accuracy_score

In [17]:
accuracy_score(labels, predictions)

1.0

In [19]:
labels, predictions

(array([-1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.,  1.,
         1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  1.,  1., -1., -1.,  1.,
         1., -1., -1.,  1.,  1.,  1.,  1., -1.,  1., -1.,  1.,  1.,  1.,
         1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1., -1., -1., -1.,  1.,  1.,  1.,  1., -1.,  1.,  1., -1., -1.,
        -1.,  1., -1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,
         1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1., -1., -1., -1.,  1.,
         1., -1., -1.,  1., -1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
         1.,  1., -1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,  1.,
         1.,  1., -1.,  1.,  1., -1., -1., -1.,  1.,  1., -1.,  1.,  1.,
        -1.,  1., -1., -1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,
         1., -1.,  1.,  1., -1., -1.,  1.]),
 array([-1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1., -1.,  1.,
         1.,  1.,  1.,  1., -1., -1.,  1.,  1.,  1.,  1., -1., -1.,  1.,
      

In [20]:
raw_predictions

array([-0.99781407,  1.0909311 , -0.79709533,  2.68764594,  1.70375354,
       -1.02743934,  2.70870344,  1.83489419, -1.12995113,  0.7970951 ,
        2.56189138, -0.95934541,  1.99877669,  1.60162326,  1.89015767,
        1.37965626,  1.46200106, -1.29203157, -1.09591478,  1.56154343,
        1.87953348,  1.63025213,  1.35524771, -0.94431014, -1.22873035,
        2.48891988,  1.88517421, -1.33128463, -1.2631907 ,  1.92818108,
        1.86034164,  3.10460596,  1.42376565, -1.56618798,  3.96278631,
       -1.10134351,  1.97790994,  3.33590389,  1.47281601,  1.57072597,
       -1.380971  ,  3.41667831, -0.97834649,  2.64035501,  2.69992367,
       -1.33147539,  3.50195015,  2.34541685,  2.91198713,  2.88431388,
        2.20481791,  1.81082478,  2.65577178, -1.20387654, -1.18430307,
       -1.0522719 ,  1.26675353,  1.65338866,  1.92298561,  1.68386195,
       -1.2527149 ,  3.17547698,  2.78256511, -1.3563292 , -1.20430055,
       -1.32186885,  1.36031604, -1.11158606,  2.26788588,  2.28

In [26]:
import numpy as np

x1, x2 = 1.9999999999, 1.9999999999
numerator = np.sin(np.pi * (x1 - 2.0)) * np.sin(np.pi * (x2 - 2.0))
denumerator = (np.pi**2) * (x1 - 2.0) * (x2 - 2.0)

factor1 = 1.0 - (abs(numerator / denumerator)) ** 5.0
factor2 = 2 + (x1 - 7.0) ** 2.0 + 2 * (x2 - 7.0) ** 2.0

In [28]:
factor1 * factor2

4.2743586449733856e-14

In [38]:
x1, x2 = 2.01, 2.01
numerator = np.sin(np.pi * (x1 - 2.0)) * np.sin(np.pi * (x2 - 2.0))
denumerator = (np.pi**2) * (x1 - 2.0) * (x2 - 2.0)

factor1 = 1.0 - (abs(numerator / denumerator)) ** 5.0
factor2 = 2 + (x1 - 7.0) ** 2.0 + 2 * (x2 - 7.0) ** 2.0

In [40]:
factor1, factor2, factor1 * factor2

(0.0016436359349144558, 76.7003, 0.12606736929871923)

In [49]:
x = np.array([-10, 10.0])
X1 = x[0:-1]
X2 = x[1:]
X12X22 = X1**2 + X2**2
results = np.sum((np.sin(np.sqrt(X12X22)) ** 2 - 0.5) / (1 + 0.001 * X12X22) ** 2 + 0.5)

In [50]:
results

0.8472050780532028