In [1]:
%pwd

'/Users/ryandevera/data-science/umn_environments/Deeplifting/deeplifting/notebooks'

In [2]:
%cd ../..

/Users/ryandevera/data-science/umn_environments/Deeplifting


In [3]:
from deeplifting.models import DeepliftingSkipMLP
from sklearn.datasets import load_iris
import pandas as pd
import torch
import numpy as np

# import pygranso functions
from pygranso.private.getNvar import getNvarTorch
from pygranso.pygranso import pygranso
from pygranso.pygransoStruct import pygransoStruct

In [None]:
device = torch.device('cpu')

# Load the dataset
iris = load_iris()

# The data and target labels
data = iris.data
labels = iris.target

# If you want the feature names and target names:
feature_names = iris.feature_names
target_names = iris.target_names

df = pd.DataFrame(data=iris.data, columns=['f1', 'f2', 'f3', 'f4'])
df['f5'] = 1.0
df['target'] = iris.target

df = df.sample(frac=1.0).reset_index(drop=True)

# Dimensions
output_size = len(feature_names) + 1

# Inputs
inputs_X = df[['f1', 'f2', 'f3', 'f4', 'f5']].values
inputs_X = torch.from_numpy(inputs_X).to(device=device, dtype=torch.double).T

# Just two classes for now
labels = df['target'].values

y = np.zeros(len(labels))
y[labels != 1] = 1
y[labels == 1] = -1
y = torch.from_numpy(y).to(device=device, dtype=torch.double)

In [None]:
inputs_X.shape, labels.shape

In [None]:
# Set up the learning function
def deeplifting_svm(model, inputs_X, labels):
    outputs = model(None)
    weight_vec = outputs.mean(axis=0)

    # Compute SVM objective
    denominator = torch.linalg.norm(weight_vec, ord=2)
    prod = torch.matmul(weight_vec.T, inputs_X)
    numerator = labels * prod
    obj = numerator / denominator

    # Orig obj
    f = torch.amax(-1 * obj)

    ce = None
    ci = None
    return f, ci, ce


# Set up a model
# Deeplifting model with skip connections
model = DeepliftingSkipMLP(
    input_size=1,
    hidden_sizes=(64,) * 2,
    output_size=output_size,
    bounds=None,
    skip_every_n=1,
    activation='relu',
    output_activation='sine',
    agg_function='sum',
    include_bn=True,
    seed=1,
)

In [None]:
# Deeplifting time!
device = torch.device('cpu')
model = model.to(device=device, dtype=torch.double)
nvar = getNvarTorch(model.parameters())

opts = pygransoStruct()

# Inital x0
x0 = (
    torch.nn.utils.parameters_to_vector(model.parameters())
    .detach()
    .reshape(nvar, 1)
    .to(device=device, dtype=torch.double)
)

# PyGranso options
# Increase max number of iterations and let convege to stationarity
# Do we see local minima in the PyGranso version
# Dual Annealing, SCIP and Deeplifting, PyGranso (showing there are local minima)
opts.x0 = x0
opts.torch_device = device
opts.print_frequency = 1
opts.limited_mem_size = 5
opts.stat_l2_model = False
opts.double_precision = True
opts.opt_tol = 1e-10
opts.maxit = 1000

# Combined function
comb_fn = lambda model: deeplifting_svm(model, inputs_X, y)  # noqa

# Run the main algorithm
soln = pygranso(var_spec=model, combined_fn=comb_fn, user_opts=opts)

In [None]:
# Get the best weights and let's check training sample fit
best_weights = model(None)
best_weights = best_weights.mean(axis=0)
best_weights = best_weights.detach().cpu().numpy()
best_weights

In [None]:
inputs_X = inputs_X.cpu().numpy()
y = y.cpu().numpy()

In [None]:
raw_predictions = np.dot(best_weights, inputs_X)
predictions = np.sign(raw_predictions)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
accuracy_score(y, predictions)

In [None]:
y, predictions

In [None]:
iris.target_names

In [None]:
from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt

target_names = ['setosa', 'other']
# Apply t-SNE to the data
tsne = TSNE(n_components=2, random_state=0)
X_tsne = tsne.fit_transform(inputs_X.T[:, :-1])

# Create a scatter plot
# Create a scatter plot using matplotlib
colors = ['red', 'blue', 'green']  # Define a color for each class
for i in [-1, 1]:
    plt.scatter(
        X_tsne[predictions == i, 0],
        X_tsne[predictions == i, 1],
        c=colors[i],
        label=target_names[i],
        edgecolors='w',
    )

In [None]:
# from sklearn import datasets
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score

# # Load the iris dataset
# iris = datasets.load_iris()
# X = iris.data
# y = iris.target

# labels = np.zeros(len(y))
# labels[y != 0] = -1
# labels[y == 0] = 1

# # Initialize the Support Vector Machine Classifier
# clf = SVC(kernel='linear')

# # Fit the model to the training data
# clf.fit(X, labels)

# # Predict the labels of the test set
# y_pred = clf.predict(X)

# # Calculate the accuracy of the model
# accuracy_score(labels, y_pred)

In [None]:
# labels

In [None]:
# y_pred

In [None]:
# Set up the learning function
def svm(X_struct, inputs_X, labels):
    weight_vec = X_struct.w

    # Compute SVM objective
    denominator = torch.linalg.norm(weight_vec, ord=2)
    prod = torch.matmul(weight_vec.T, inputs_X)
    numerator = labels * prod
    obj = numerator / denominator

    # Orig obj
    f = torch.amax(-1 * obj)

    ce = None
    ci = None
    return f, ci, ce

In [None]:
# Load the dataset
iris = load_iris()

# The data and target labels
data = iris.data
labels = iris.target

# If you want the feature names and target names:
feature_names = iris.feature_names
target_names = iris.target_names

df = pd.DataFrame(data=iris.data, columns=['f1', 'f2', 'f3', 'f4'])
df['f5'] = 1.0
df['target'] = iris.target

df = df.sample(frac=1.0).reset_index(drop=True)

# Dimensions
output_size = len(feature_names) + 1

# Inputs
inputs_X = df[['f1', 'f2', 'f3', 'f4', 'f5']].values
inputs_X = torch.from_numpy(inputs_X).to(device=device, dtype=torch.double).T

# Just two classes for now
labels = df['target'].values

y = np.zeros(len(labels))
y[labels != 1] = 1
y[labels == 1] = -1
y = torch.from_numpy(y).to(device=device, dtype=torch.double)

# Deeplifting time!
device = torch.device('cpu')

w0 = torch.randn(
    (5, 1),
).to(device, dtype=torch.double)
var_in = {"w": list(w0.shape)}

comb_fn = lambda X_struct: svm(
    X_struct,
    inputs_X,
    y,
)

opts = pygransoStruct()

# PyGranso options
# Increase max number of iterations and let convege to stationarity
# Do we see local minima in the PyGranso version
# Dual Annealing, SCIP and Deeplifting, PyGranso (showing there are local minima)
opts.x0 = torch.reshape(w0, (-1, 1))
opts.torch_device = device
opts.print_frequency = 1
opts.limited_mem_size = 5
opts.stat_l2_model = False
opts.double_precision = True
opts.opt_tol = 1e-10
opts.maxit = 1000

# Run the main algorithm
soln = pygranso(var_spec=var_in, combined_fn=comb_fn, user_opts=opts)

In [None]:
best_weights = soln.best.x

In [None]:
raw_predictions = np.dot(best_weights.T, inputs_X)
predictions = np.sign(raw_predictions)

In [None]:
accuracy_score(y.numpy().flatten(), predictions.flatten())

# Let's Try MNIST

MNIST is a larger dataset with more "features"

In [4]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import metrics
from scipy.optimize import differential_evolution, dual_annealing

# Load the MNIST dataset
digits = datasets.load_digits()

# Split the dataset into features and target variable
X = digits.data
y = digits.target

labels = np.zeros(len(y))
labels[y == 0] = 1
labels[y != 0] = -1

In [5]:
# Dual annealing and differential evolution
inputs_X = X.T.copy()
inputs_X.shape

(64, 1797)

In [6]:
# Set up the learning function
def svm(weight_vec, inputs_X, labels):
    # Compute SVM objective
    denominator = np.linalg.norm(weight_vec, ord=2)
    prod = np.matmul(weight_vec.T, inputs_X)

    numerator = (labels * prod).flatten()
    obj = numerator / denominator

    # Orig obj
    f = np.amax(-1 * obj)
    return f

In [7]:
# Initialize a weight vector
x0 = np.random.randn(inputs_X.shape[0])

fn = lambda w: svm(w, inputs_X, labels)
bounds = [(-10, 10)] * inputs_X.shape[0]

result = dual_annealing(
    fn,
    bounds,
    x0=x0,
    maxiter=10000,
)

In [8]:
result

 message: ['Maximum number of iteration reached']
 success: True
  status: 0
     fun: -0.653067022392706
       x: [-2.275e-01 -8.673e+00 ... -5.005e-01 -1.996e+00]
     nit: 2000
    nfev: 857512
    njev: 9254
    nhev: 0

# PyGRANSO

In [9]:
import torch


# Set up the learning function
def svm(X_struct, inputs_X, labels):
    weight_vec = X_struct.w

    # Compute SVM objective
    denominator = torch.linalg.norm(weight_vec, ord=2)
    prod = torch.matmul(weight_vec.T, inputs_X)
    numerator = labels * prod
    obj = numerator / denominator

    # Orig obj
    f = torch.amax(-1 * obj)

    ce = None
    ci = None
    return f, ci, ce

In [10]:
inputs_X.shape

(64, 1797)

In [11]:
inputs_X = np.vstack([np.ones(inputs_X.shape[1]), inputs_X])
inputs_X

array([[ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ...,
       [ 0., 10., 16., ...,  6., 12., 12.],
       [ 0.,  0.,  9., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [12]:
inputs_X.shape

(65, 1797)

In [13]:
# import pygranso functions
from pygranso.private.getNvar import getNvarTorch
from pygranso.pygranso import pygranso
from pygranso.pygransoStruct import pygransoStruct

# Deeplifting time!
device = torch.device('cpu')

# Inputs
inputs_X = torch.from_numpy(inputs_X).to(device=device, dtype=torch.double)
labels = torch.from_numpy(labels).to(device=device, dtype=torch.double)


w0 = torch.randn(
    (65, 1),
).to(device, dtype=torch.double)
var_in = {"w": list(w0.shape)}

comb_fn = lambda X_struct: svm(
    X_struct,
    inputs_X,
    labels,
)

In [14]:
opts = pygransoStruct()

# PyGranso options
# Increase max number of iterations and let convege to stationarity
# Do we see local minima in the PyGranso version
# Dual Annealing, SCIP and Deeplifting, PyGranso (showing there are local minima)
opts.x0 = torch.reshape(w0, (-1, 1))
opts.torch_device = device
opts.print_frequency = 10
opts.limited_mem_size = 5
opts.stat_l2_model = False
opts.double_precision = True
opts.opt_tol = 1e-10
opts.maxit = 2000

# Run the main algorithm
soln = pygranso(var_spec=var_in, combined_fn=comb_fn, user_opts=opts)



[33m╔═════ QP SOLVER NOTICE ════════════════════════════════════════════════════════════════════════╗
[0m[33m║  PyGRANSO requires a quadratic program (QP) solver that has a quadprog-compatible interface,  ║
[0m[33m║  the default is osqp. Users may provide their own wrapper for the QP solver.                  ║
[0m[33m║  To disable this notice, set opts.quadprog_info_msg = False                                   ║
[0m[33m╚═══════════════════════════════════════════════════════════════════════════════════════════════╝
[0m══════════════════════════════════════════════════════════════════════════════════════════════╗
PyGRANSO: A PyTorch-enabled port of GRANSO with auto-differentiation                          ║ 
Version 1.2.0                                                                                 ║ 
Licensed under the AGPLv3, Copyright (C) 2021-2022 Tim Mitchell and Buyun Liang               ║ 
════════════════════════════════════════════════════════════════════════════

  alpha[j,:]  = self.rho[0,j] * (self.S[:,j].T  @ q)


  40 ║  - │   -   ║ -0.74345503881 ║   -  │   -  ║ QN │     2 │ 0.500000 ║     1 │ 0.162308   ║ 
  50 ║  - │   -   ║ -0.93537276693 ║   -  │   -  ║ QN │     1 │ 1.000000 ║     1 │ 0.181924   ║ 
  60 ║  - │   -   ║ -1.17486584859 ║   -  │   -  ║ QN │     6 │ 0.031250 ║     1 │ 0.850686   ║ 
  70 ║  - │   -   ║ -1.26083760315 ║   -  │   -  ║ QN │     1 │ 1.000000 ║     1 │ 0.182744   ║ 
  80 ║  - │   -   ║ -1.48662299102 ║   -  │   -  ║ QN │     4 │ 0.125000 ║     1 │ 9.831150   ║ 
  90 ║  - │   -   ║ -1.85221211242 ║   -  │   -  ║ QN │     3 │ 0.250000 ║     1 │ 0.093145   ║ 
 100 ║  - │   -   ║ -1.91587374773 ║   -  │   -  ║ QN │     7 │ 0.015625 ║     1 │ 1.666023   ║ 
 110 ║  - │   -   ║ -1.96308456640 ║   -  │   -  ║ QN │     3 │ 0.250000 ║     1 │ 0.372434   ║ 
 120 ║  - │   -   ║ -2.02572440724 ║   -  │   -  ║ QN │     5 │ 0.062500 ║     1 │ 0.495762   ║ 
 130 ║  - │   -   ║ -2.08859956004 ║   -  │   -  ║ QN │     6 │ 0.093750 ║     1 │ 0.593963   ║ 
 140 ║  - │   -   ║ -2.1192925

In [15]:
from deeplifting.models import DeepliftingSkipMLP
from sklearn.datasets import load_iris
import pandas as pd
import torch
import numpy as np

# Deeplifting

In [24]:
# Set up the learning function
def deeplifting_svm(model, inputs_X, labels):
    outputs = model(None)
    weight_vec = outputs.mean(axis=0)

    # Compute SVM objective
    denominator = torch.linalg.norm(weight_vec, ord=2)
    prod = torch.matmul(weight_vec.T, inputs_X)
    numerator = labels * prod
    obj = numerator / denominator

    # Orig obj
    f = torch.amax(-1 * obj)

    ce = None
    ci = None
    return f, ci, ce


# Set up a model
# Deeplifting model with skip connections
model = DeepliftingSkipMLP(
    input_size=1,
    hidden_sizes=(128,) * 5,
    output_size=65,
    bounds=None,
    skip_every_n=1,
    activation='sine',
    output_activation='sine',
    agg_function='identity',
    include_bn=True,
    seed=1,
)

In [None]:
# Deeplifting time!
device = torch.device('cpu')
model = model.to(device=device, dtype=torch.double)
nvar = getNvarTorch(model.parameters())

opts = pygransoStruct()

# Inital x0
x0 = (
    torch.nn.utils.parameters_to_vector(model.parameters())
    .detach()
    .reshape(nvar, 1)
    .to(device=device, dtype=torch.double)
)

# PyGranso options
# Increase max number of iterations and let convege to stationarity
# Do we see local minima in the PyGranso version
# Dual Annealing, SCIP and Deeplifting, PyGranso (showing there are local minima)
opts.x0 = x0
opts.torch_device = device
opts.print_frequency = 10
opts.limited_mem_size = 5
opts.stat_l2_model = False
opts.double_precision = True
opts.opt_tol = 1e-10
opts.maxit = 5000

# Combined function
comb_fn = lambda model: deeplifting_svm(model, inputs_X, labels)  # noqa

# Run the main algorithm
soln = pygranso(var_spec=model, combined_fn=comb_fn, user_opts=opts)



[33m╔═════ QP SOLVER NOTICE ════════════════════════════════════════════════════════════════════════╗
[0m[33m║  PyGRANSO requires a quadratic program (QP) solver that has a quadprog-compatible interface,  ║
[0m[33m║  the default is osqp. Users may provide their own wrapper for the QP solver.                  ║
[0m[33m║  To disable this notice, set opts.quadprog_info_msg = False                                   ║
[0m[33m╚═══════════════════════════════════════════════════════════════════════════════════════════════╝
[0m══════════════════════════════════════════════════════════════════════════════════════════════╗
PyGRANSO: A PyTorch-enabled port of GRANSO with auto-differentiation                          ║ 
Version 1.2.0                                                                                 ║ 
Licensed under the AGPLv3, Copyright (C) 2021-2022 Tim Mitchell and Buyun Liang               ║ 
════════════════════════════════════════════════════════════════════════════