In [2]:
"""
Train neural networks on a synthetic classification dataset using convex optimization.
"""

import sys
sys.path.append("..")

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import torch


from convex_nn.private.utils.data import gen_classification_data

from convex_nn.optimize import optimize

In [4]:
# Generate realizable synthetic classification problem (ie. Figure 1)
n_train = 1000
n_test = 1000
d = 25
hidden_units = 100
kappa = 1000  # condition number

(X_train, y_train), (X_test, y_test) = gen_classification_data(123, n_train, n_test, d, hidden_units, kappa)

In [None]:
def accuracy(logits, y):
    return np.sum((np.sign(logits) == y)) / len(y)

In [None]:
# cast data
tX_train, ty_train, tX_test, ty_test = [torch.tensor(z, dtype=torch.float) for z in [X_train, y_train, X_test, y_test]]

loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(tX_train, ty_train), batch_size=32, shuffle=True)

In [None]:
max_epochs = 1000
tol = 1e-6    
lam = 0.001

## Non-Convex Model

In [None]:
lr = 0.00001

# create model
nc_model = torch.nn.Sequential(
    torch.nn.Linear(in_features=d, out_features=hidden_units, bias=False), 
    torch.nn.ReLU(), 
    torch.nn.Linear(in_features=hidden_units, out_features=1, bias=False))

# Acc Before Training
print("Test Accuracy:", accuracy(nc_model(tX_test).detach().numpy(), y_test))

sgd = torch.optim.SGD(nc_model.parameters(), lr=lr)

for i in range(max_epochs):
    for X, y in loader:
        nc_model.zero_grad()
        l2_penalty = sum([torch.sum(param ** 2) for param in nc_model.parameters()])
        obj = torch.sum((nc_model(X) - y) ** 2) / (2 * len(y)) + lam * l2_penalty
        obj.backward()
        
        sgd.step()

    # check for convergence
    
    nc_model.zero_grad()
    l2_penalty = sum([torch.sum(param ** 2) for param in nc_model.parameters()])
    obj = torch.sum((nc_model(tX_train) - ty_train) ** 2) / (2 * len(y_train)) + lam * l2_penalty
    obj.backward()    
    grad_norm = sum([torch.sum(param.grad ** 2) for param in nc_model.parameters()])

    if grad_norm <= tol:
        print(f"Converged at {i}/{max_epochs}")
        break

    if i % 25 == 0:
        print(f"{i}/{max_epochs}: Obj - {obj}, Grad - {grad_norm}")

# Acc After Training
print("Test Accuracy:", accuracy(nc_model(tX_test).detach().numpy(), y_test))

# Convex Reformulation

In [None]:
# number of activation patterns to use.
max_neurons = 1000

# train model
cvx_model, metrics = optimize("gated_relu", 
                          max_neurons,
                          lam, 
                          X_train, 
                          y_train, 
                          X_test, 
                          y_test, 
                          verbose=True,  
                          device="cpu")

# Acc After Training
print("\n \n")
print("Test Accuracy:", accuracy(cvx_model(X_test), y_test))
print(f"Hidden Layer Size: {cvx_model.parameters[0].shape[0]}")