In [9]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import itertools
from utils import SyntheticData, plot_decision_boundary, get_accuracy
from torch.utils.data import DataLoader
%matplotlib inline

class CompositeNetwork(nn.Module):
    def __init__(self, input_dimension, output_dimension, num_hidden_layers=10, neurons_per_layer=100):
        super(CompositeNetwork, self).__init__()
        
        def get_layer_with_elu(in_features, out_features, bias=True):
            return nn.Linear(in_features, out_features, bias=bias), nn.ELU()
        
        self.middle_dense = itertools.chain.from_iterable(get_layer_with_elu(in_features=neurons_per_layer, 
                                                                             out_features=neurons_per_layer) for i in range(num_hidden_layers-2))
    
        
        self.model = nn.Sequential(
            nn.Linear(in_features=input_dimension,
                      out_features=neurons_per_layer,
                      bias=True),
            nn.ELU(),
            *self.middle_dense, 
            )
        
        self.final = nn.Sequential(
            nn.Linear(in_features=neurons_per_layer,
                      out_features=output_dimension,
                      bias=True)
        )
        
    def forward(self, x):
        last_hidden = self.model(x)
        out = self.final(last_hidden)
        return last_hidden, out
    
    def predict(self, x):
        pass
            
def weight_initializer(m):
    if type(m) == nn.Linear:
        nn.init.kaiming_uniform_(m.weight)
        m.bias.data.fill_(0.01)

L = 10
M = 100

net = CompositeNetwork(input_dimension=2, output_dimension=1, num_hidden_layers=L, neurons_per_layer=M)
net.apply(weight_initializer)

criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_data = SyntheticData(400, seed=1)
test_data = SyntheticData(10000, seed=10)

In [10]:
current_epoch = 0
while True:
    optimizer.zero_grad()
    x, outputs = net(train_data.data)
    sigmoid = nn.Sigmoid()
    loss = criterion(sigmoid(outputs), train_data.target)
    loss.backward()
    optimizer.step()

    accuracy = get_accuracy(sigmoid(outputs), train_data.target)
    
    if current_epoch % 100 == 0:
        _, outputs_test = net(test_data.data)
        accuracy_test = get_accuracy(outputs_test, test_data.target)
        print("Accuracy (train): %.3f. Accuracy (validation): %.3f. Loss: %.3f." % (accuracy, accuracy_test, loss.item()))
            
    current_epoch += 1
    if accuracy == 1:
        print("Goal reached after %d epochs" % current_epoch)
        break

Accuracy (train): 0.547. Accuracy (validation): 0.609. Loss: 0.845.
Accuracy (train): 0.875. Accuracy (validation): 0.858. Loss: 0.285.
Accuracy (train): 0.920. Accuracy (validation): 0.843. Loss: 0.177.
Accuracy (train): 0.863. Accuracy (validation): 0.841. Loss: 0.283.
Accuracy (train): 0.998. Accuracy (validation): 0.806. Loss: 0.030.
Goal reached after 425 epochs


In [None]:
plot_decision_boundary(100, net, train_data)

In [11]:
from pulp import *

In [12]:
W_last = net.final[-1].weight.detach().numpy().ravel()

In [38]:
import numpy as np
k = 8

model = pulp.LpProblem("Network Decomposition", pulp.LpMinimize)
alphas = pulp.LpVariable.dicts("alphas", (range(k), range(M)), None, None,  LpContinuous)

for i in range(M):
    model += lpSum([alphas[j][i] for j in range(k)]) == W_last[i] 
    
for i in range(k):
    for j in np.random.choice(M-1, size=int(M/2), replace=False):
        model += alphas[i][j] == 0
        
last_hidden, outputs = net(train_data.data)
last_hidden = last_hidden.detach().numpy()
y_out = outputs.detach().numpy().ravel()
y_hat = train_data.target.detach().numpy().ravel()
y_hat[y_hat == 0] = -1

for i in range(y_out.shape[0]):
    for j in range(k):
        model += lpSum([alphas[j][l] * last_hidden[i, l] for l in range(M)]) * y_hat[i] - 10e-6  >= 0

In [39]:
model.solve()

1

In [41]:
print("Status:", LpStatus[model.status])

Status: Optimal


In [43]:
varsdict = {}
for v in model.variables():
    varsdict[v.name] = v.varValue

a = np.empty(shape=(k, M), dtype=np.float32)

def result_to_matrix(model, k, M):
    a = np.empty(shape=(k, M), dtype=np.float32)
    for v in model.variables():
        name, i, j = v.name.split("_")
        if name == "alphas": 
            a[int(i), int(j)] = v.varValue
    return a
        
a = result_to_matrix(model, k, M)

In [84]:
from scipy.stats import logistic
import torch

def get_accuracy_numpy(probabilities, labels):
    predictions = (probabilities > 0.5)
    correct = (predictions  == labels.int().numpy()).sum()
    return float(correct)/len(labels)
    
for i in range(k):
    preds = logistic.cdf(last_hidden.dot(a[i,:]))
    train_data.target[train_data.target == -1] = 0
    print("Subnetwork %d accuracy: %.3f" % (i, get_accuracy_numpy(preds.reshape(400, 1), train_data.target)))

Subnetwork 0 accuracy: 1.000
Subnetwork 1 accuracy: 1.000
Subnetwork 2 accuracy: 1.000
Subnetwork 3 accuracy: 1.000
Subnetwork 4 accuracy: 1.000
Subnetwork 5 accuracy: 1.000
Subnetwork 6 accuracy: 1.000
Subnetwork 7 accuracy: 1.000
