In [1]:
"""
Train convex LassoNet models on a synthetic classification dataset using convex optimization.
"""

import sys
sys.path.append("..")

%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt

from convex_nn import optimize
from convex_nn.datasets import generate_synthetic_classification

In [3]:
# Generate realizable synthetic classification problem (ie. Figure 1)
n_train = 250
n_test = 250
d = 50
hidden_units = 100
kappa = 10  # condition number

(X_train, y_train), (X_test, y_test) = generate_synthetic_classification(123, n_train, n_test, d, hidden_units, kappa)

In [4]:
# generate specific gate vectors to use
max_patterns = 50
gates = np.random.randn(d, max_patterns)

In [5]:
grelu_model, grelu_metrics = optimize(X_train, 
               y_train, 
               X_test, 
               y_test, 
               train_metrics=["accuracy"],
               test_metrics=["accuracy"],
               additional_metrics=["feature_sparsity", "active_features", "step_size"],
               U=gates, 
               formulation="grelu_lasso_net",
               reg_strength=0.05,
               backend="numpy",
               verbose=True)

INFO:convex_nn:Processing data.
INFO:convex_nn:Constructing convex model.
INFO:convex_nn:Preparing optimizer.
INFO:convex_nn:Optimizing convex model.
INFO:convex_nn:Pre-Optimization Metrics: Train Set objective: 0.5, Train Set grad_norm: 0.009022955782711506, Train Set accuracy: 0.0, Test Set accuracy: 0.0, feature_sparsity: 1.0, active_features: 0, step_size: 10.0, 


fista:   0%|          | 0/10000 [00:00<?, ?it/s]

INFO:convex_nn:Termination criterion satisfied at iteration 13/10000. Exiting optimization loop.
INFO:convex_nn:Post-Optimization Metrics: Train Set objective: 0.30856877804440214, Train Set grad_norm: 8.990454819859312e-07, Train Set accuracy: 0.968, Test Set accuracy: 0.856, feature_sparsity: 0.38, active_features: 31, step_size: 30.517578125, 


Train Set objective: 0.5, Train Set grad_norm: 0.009022955782711506, Train Set accuracy: 0.0, Test Set accuracy: 0.0, feature_sparsity: 1.0, active_features: 0, step_size: 10.0, 


In [6]:
# inspect weights of final models (in non-convex formulation)

print("Gated ReLU Model")
W1, W2, theta = grelu_model.get_weights()

print("Layer Shapes:", W1.shape, W2.shape, theta.shape)
print("Skip-Layer Sparsity:", np.sum(theta == 0) / theta.shape[1])
print("Network Sparsity:", np.sum(np.sum(W1, axis=0) == 0) / theta.shape[1])

Gated ReLU Model
Layer Shapes: (50, 50) (1, 50) (1, 50)
Skip-Layer Sparsity: 0.38
Network Sparsity: 0.38
