In [6]:
import tensorflow as tf
tf.keras.backend.set_floatx('float64')
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from collections import defaultdict
from scipy.stats import entropy
from scipy.spatial.distance import pdist, squareform
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
import pickle

import sys
sys.path.append('../../functions/')
from dataset import generate_gaussian_parity

In [7]:
# Polytope functions, tensorflow version
def get_activations(model, X, penultimate=False):
    """
    Description
    -----------
    From a ReLU neural network and a training dataset, computes the number of polytopes
    occupied by the training samples relative to the sample size. Each polytope
    corresponds to a fixed assignment of all ReLU's as either on/off and within each
    polytope the network is a linear function. One may think of this fraction
    as the relative number of piecewise linear functions a network has to learn on the
    data in order to perform well.
    
    Parameters
    ----------
    model : pytorch sequential network with ReLUs
    X : training data loader
    penultimate : boolean, default False
        If True, only returns polytopes using the last layer of ReLUs. Set to True
        if all the prior layers are viewed as a representation learner.
        
    Returns
    -------
    fraction : float
        Fraction of training samples in unique polytopes.
    polytope_assignments : list, length=dataloader_sample_size
        Labels encoding which samples occured in which polytopes.
    """
    polytope_memberships = []
    n_samples = 0
    
    n_samples += X.shape[0]
    polytope_memberships = []
    for layer in model.layers[:-1]: # Assumes sequential, may have to adjust based on model
        X = layer(X)
        binary_preactivation = (X.numpy() > 0.0).astype('int')
        polytope_memberships.append(binary_preactivation)
    
    if penultimate:
        polytope_memberships = polytope_memberships[-1]
    else:
        polytope_memberships = np.hstack(polytope_memberships)
        
    return polytope_memberships
    
#     polytopes, assignments, counts = np.unique(polytope_memberships, axis=0, return_inverse=True, return_counts=True)
    
#     # kernel_mat = 1 - squareform(pdist(polytope_memberships, p=1)) / polytope_memberships.shape[1]
#     # kernel_mat = polytope_memberships @ polytope_memberships.T / polytope_memberships.shape[1]
#     # mat_evals = np.linalg.svd(kernel_mat, compute_uv=False, hermitian=True)
#     mat_evals = np.linalg.svd(polytope_memberships / np.sqrt(polytope_memberships.shape[1]), compute_uv=False)**2
    
#     return len(polytopes) / n_samples, assignments, counts, entropy(mat_evals)

In [8]:
# X_train, y_train = make_moons(1000)
X_train, y_train = generate_gaussian_parity(n=1000, angle_params=0, acorn=1234)
X_test, y_test = generate_gaussian_parity(n=10000, angle_params=0, acorn=66)

In [9]:
widths =  [4, 8, 16, 32]
depths = [1, 2, 3, 4]

for depth in depths:
    for width in widths:
        layers = [tf.keras.layers.Dense(width, activation='relu') for _ in range(depth)]
        model = tf.keras.models.Sequential(layers + [
            tf.keras.layers.Dense(2)
        ])

        n_epochs = 100
        optimizer = tf.keras.optimizers.Adam(0.001)
        loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metric = tf.keras.metrics.SparseCategoricalAccuracy()

        model.compile(
            optimizer=optimizer,
            loss=loss_fn,
            metrics=[metric],
        )

        history = model.fit(X_train, y_train, epochs=n_epochs, verbose=0)

        test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0) # Loss, accuracy
        train_loss, train_acc = model.evaluate(X_train, y_train, verbose=0) # Loss, accuracy

        print(f"Model depth={depth}, width={width}, train_acc={train_acc}, test_acc={test_acc}")
        
        activations = get_activations(model, X_train)
        
        results_dict = {
            "activations": activations,
            "train_acc": train_acc,
            "test_acc": test_acc,
            "train_loss": train_loss,
            "test_loss": test_loss,
        }

        with open(f'./activation_matrices/xor_results_depth={depth}_width={width}.pkl', 'wb') as f:
            pickle.dump(results_dict, f)

Model depth=1, width=4, train_acc=0.688, test_acc=0.6734
Model depth=1, width=8, train_acc=0.729, test_acc=0.7241
Model depth=1, width=16, train_acc=0.738, test_acc=0.7271
Model depth=1, width=32, train_acc=0.742, test_acc=0.7272
Model depth=2, width=4, train_acc=0.733, test_acc=0.7284
Model depth=2, width=8, train_acc=0.743, test_acc=0.7298
Model depth=2, width=16, train_acc=0.747, test_acc=0.7284
Model depth=2, width=32, train_acc=0.745, test_acc=0.7281
Model depth=3, width=4, train_acc=0.732, test_acc=0.7142
Model depth=3, width=8, train_acc=0.742, test_acc=0.7322
Model depth=3, width=16, train_acc=0.753, test_acc=0.7301
Model depth=3, width=32, train_acc=0.752, test_acc=0.7251
Model depth=4, width=4, train_acc=0.647, test_acc=0.6175
Model depth=4, width=8, train_acc=0.751, test_acc=0.73
Model depth=4, width=16, train_acc=0.752, test_acc=0.7288
Model depth=4, width=32, train_acc=0.756, test_acc=0.7253
