In [1]:
"""
Train neural networks on a synthetic classification dataset using convex optimization.
"""

import sys
sys.path.append("..")

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import torch


from convex_nn.private.utils.data import gen_classification_data

from convex_nn.optimize import optimize

In [4]:
# Generate realizable synthetic classification problem (ie. Figure 1)
n_train = 1000
n_test = 1000
d = 25
hidden_units = 100
kappa = 10  # condition number

(X_train, y_train), (X_test, y_test) = gen_classification_data(123, n_train, n_test, d, hidden_units, kappa)

In [5]:
def accuracy(logits, y):
    return np.sum((np.sign(logits) == y)) / len(y)

In [6]:
# cast data
tX_train, ty_train, tX_test, ty_test = [torch.tensor(z, dtype=torch.float) for z in [X_train, y_train, X_test, y_test]]

loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(tX_train, ty_train), batch_size=32, shuffle=True)

In [7]:
max_epochs = 1000
tol = 1e-6    
lam = 0.001

## Non-Convex Model

In [10]:
lr = 0.01

# create model
nc_model = torch.nn.Sequential(
    torch.nn.Linear(in_features=d, out_features=hidden_units, bias=False), 
    torch.nn.ReLU(), 
    torch.nn.Linear(in_features=hidden_units, out_features=1, bias=False))

# Acc Before Training
print("Test Accuracy:", accuracy(nc_model(tX_test).detach().numpy(), y_test))

sgd = torch.optim.SGD(nc_model.parameters(), lr=lr)

for i in range(max_epochs):
    for X, y in loader:
        nc_model.zero_grad()
        l2_penalty = sum([torch.sum(param ** 2) for param in nc_model.parameters()])
        obj = torch.sum((nc_model(X) - y) ** 2) / (2 * len(y)) + lam * l2_penalty
        obj.backward()
        
        sgd.step()

    # check for convergence
    
    nc_model.zero_grad()
    l2_penalty = sum([torch.sum(param ** 2) for param in nc_model.parameters()])
    obj = torch.sum((nc_model(tX_train) - ty_train) ** 2) / (2 * len(y_train)) + lam * l2_penalty
    obj.backward()    
    grad_norm = sum([torch.sum(param.grad ** 2) for param in nc_model.parameters()])

    if grad_norm <= tol:
        print(f"Converged at {i}/{max_epochs}")
        break

    if i % 25 == 0:
        print(f"{i}/{max_epochs}: Obj - {obj}, Grad - {grad_norm}")

# Acc After Training
print("Test Accuracy:", accuracy(nc_model(tX_test).detach().numpy(), y_test))

Test Accuracy: 0.433
0/1000: Obj - 0.17481637001037598, Grad - 0.35416796803474426
25/1000: Obj - 0.11591842770576477, Grad - 0.00950573105365038
50/1000: Obj - 0.10122910141944885, Grad - 0.01757941208779812
75/1000: Obj - 0.09057214111089706, Grad - 0.04705703258514404
100/1000: Obj - 0.08221948146820068, Grad - 0.026237286627292633
125/1000: Obj - 0.07646583765745163, Grad - 0.041473351418972015
150/1000: Obj - 0.07053500413894653, Grad - 0.0018400861881673336
175/1000: Obj - 0.06606698781251907, Grad - 0.012897776439785957
200/1000: Obj - 0.0616372786462307, Grad - 0.001382817281410098
225/1000: Obj - 0.05873275548219681, Grad - 0.026050196960568428
250/1000: Obj - 0.05530163645744324, Grad - 0.02188556082546711
275/1000: Obj - 0.051780033856630325, Grad - 0.0015152401756495237
300/1000: Obj - 0.04925689473748207, Grad - 0.004436086397618055
325/1000: Obj - 0.04647251218557358, Grad - 0.0015814988873898983
350/1000: Obj - 0.044255148619413376, Grad - 0.002444556448608637
375/1000: 

In [11]:
# number of activation patterns to use.
max_neurons = 1000

# train model
cvx_model, metrics = optimize("gated_relu", 
                          max_neurons, ''
                          lam, 
                          X_train, 
                          y_train, 
                          X_test, 
                          y_test, 
                          verbose=True,  
                          device="cpu")

# Acc After Training
print("\n \n")
print("Test Accuracy:", accuracy(cvx_model(X_test), y_test))
print(f"Hidden Layer Size: {cvx_model.parameters[0].shape[0]}")

INFO:convex_nn:Pre-Optimization Metrics: Train Set objective: 0.5, Train Set grad_norm: 0.1562219113111496, 


fista:   0%|          | 0/10000 [00:00<?, ?it/s]

Train Set objective: 0.5, Train Set grad_norm: 0.1562219113111496, 
Train Set objective: 0.1280835418701172, Train Set grad_norm: 1.120530032494571e-05, 
Train Set objective: 0.12403209686279297, Train Set grad_norm: 2.0719967324112076e-06, 


INFO:convex_nn:Termination criterion satisfied at iteration 67/10000. Exiting optimization loop.
INFO:convex_nn:Post-Optimization Metrics: Train Set objective: 0.12256829071044922, Train Set grad_norm: 8.999828082778549e-07, 



 

Test Accuracy: 0.954
Hidden Layer Size: 418
