In [1]:
"""
Compare convex and non-convex optimization for a realizable classification problem.
"""

import sys
sys.path.append("..")

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch


from scnn.private.utils.data import gen_classification_data

from scnn.optimize import optimize
from scnn.regularizers import NeuronGL1

# 1. Generate Data

We generate a realizable classification dataset by sampling labels from a two-layer ReLU network with random weights.
The follow parameters control the make-up of the training set.

In [None]:
# Generate realizable synthetic classification problem (ie. Figure 1)
n_train = 1000
n_test = 1000
d = 25
hidden_units = 100
kappa = 1000  # condition number

(X_train, y_train), (X_test, y_test) = gen_classification_data(123, n_train, n_test, d, hidden_units, kappa)

In [None]:
def accuracy(logits, y):
    return np.sum((np.sign(logits) == y)) / len(y)

# 2. Non-Convex Training

First, we try to trian a neural network using the standard non-convex approach.
We use PyTorch to create and differentiate a two-layer neural network with ReLU activations.
Note the amount of boiler-plate required to start training such a simple model!

In [None]:
# cast data and create loader
tX_train, ty_train, tX_test, ty_test = [torch.tensor(z, dtype=torch.float) for z in [X_train, y_train, X_test, y_test]]

loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(tX_train, ty_train), batch_size=32, shuffle=True)

In [None]:
# model parameters
lam = 0.001

# optimization parameters
tol = 1e-6 
max_epochs = 1000

# try playing with the step size...

# lr = 0.01
# lr = 0.001
# lr = 0.0001
lr = 0.00001

In [None]:
# create model
nc_model = torch.nn.Sequential(
    torch.nn.Linear(in_features=d, out_features=hidden_units, bias=False), 
    torch.nn.ReLU(), 
    torch.nn.Linear(in_features=hidden_units, out_features=1, bias=False))

# Acc Before Training
print("Pre-Training Test Accuracy:", accuracy(nc_model(tX_test).detach().numpy(), y_test), "\n")


sgd = torch.optim.SGD(nc_model.parameters(), lr=lr)

for i in range(max_epochs):
    for X, y in loader:
        nc_model.zero_grad()
        l2_penalty = sum([torch.sum(param ** 2) for param in nc_model.parameters()])
        obj = torch.sum((nc_model(X) - y) ** 2) / (2 * len(y)) + lam * l2_penalty
        obj.backward()
        
        sgd.step()

    # check for convergence
    
    nc_model.zero_grad()
    l2_penalty = sum([torch.sum(param ** 2) for param in nc_model.parameters()])
    obj = torch.sum((nc_model(tX_train) - ty_train) ** 2) / (2 * len(y_train)) + lam * l2_penalty
    obj.backward()    
    grad_norm = sum([torch.sum(param.grad ** 2) for param in nc_model.parameters()])

    if grad_norm <= tol:
        print(f"Converged at {i}/{max_epochs}")
        break

    if i % 25 == 0:
        print(f"{i}/{max_epochs}: Obj - {obj}, Grad - {grad_norm}")

# Acc After Training
print("\nPost-Training Test Accuracy:", accuracy(nc_model(tX_test).detach().numpy(), y_test))

# 3. Convex Reformulation

Instead, we optimize a two-layer neural network with gated ReLU activations using convex optimization.
Convexification allows us to use sophisticated optimization methods with convergence and optimality guarantees.
Training is easy and requires little-to-no setup.

In [None]:
# number of activation patterns to use.
max_neurons = 1000

cvx_model, metrics = optimize("gated_relu", 
                          max_neurons,
                          X_train, 
                          y_train, 
                          X_test, 
                          y_test, 
                          regularizer=NeuronGL1(lam),
                          verbose=True,  
                          device="cpu")

# Acc After Training
print("\n \n")
print("Post-Training Test Accuracy:", accuracy(cvx_model(X_test), y_test))
print(f"Hidden Layer Size: {cvx_model.parameters[0].shape[0]}")