### Extension: Multilayer (n>=3) perceptron w/ Batch Normalization & binarized inputs

In [1]:
from utils import load_mnist
import utils
import numpy as np
from mlp import Layer, LayerArgs, Model, ModelArgs
from collections import Counter
import math
from mi_tool import MI
import plot_tool

In [2]:
X_train, y_train = load_mnist('../fashion_mnist/', kind='train')
X_test, y_test = load_mnist('../fashion_mnist/', kind='t10k')

# normalize inputs
X_train, X_test = np.multiply(X_train, 1.0 / 255.0), np.multiply(X_test, 1.0 / 255.0)
X_train, y_train = utils.unison_shuffled_copies(X_train, y_train)
X_train_subset, y_train_subset = X_train[:10000], y_train[:10000]

In [3]:
# define the network structure with 2 hidden layers of dimension 100 and 20. 
# other parameters were set to default, activation function is set to RELU by default. 

layer_args = [LayerArgs(784, 784, layer_type = "INPUT"), \
              LayerArgs(784, 100), \
              LayerArgs(100, 50), \
              LayerArgs(50, 25), \
              LayerArgs(25, 10, layer_type = "OUTPUT", activate = np.exp)]

# our model is using mini-batch gradient descent
# set max #(epoch) as 80 and max #(iteration) as 100000;
# For everay 1000 iterations, we compute (and plot) MI;
model_args = ModelArgs(num_passes = 80, max_iter=100000, report_interval=500)

# intialize the model
# TODO: model weights intialization by denoised autoencoder

model = Model(layer_args, model_args)
model.feed_data(X_train, y_train, X_test, y_test)
model.trial_data(X_train_subset, y_train_subset)
model.intialize_model()

In [None]:
MI_client = MI(X_train_subset, y_train_subset, 10)
MI_client.discretize()
MI_client.pre_compute()

In [None]:
for epoch, hidden_layers in model.run_model():
    MI_client.mi_single_epoch(hidden_layers, epoch)

Epoch: 1, Train Acc: 0.85145, Test Acc: 0.837


### Analysis: Information Bottleneck

In [None]:
plot_tool.mi_plot(MI_client)

In [None]:
# binarization

X_train.setflags(write=1)
X_test.setflags(write=1)

X_train = np.where(X_train>127, 1, 0)
X_test = np.where(X_test>127, 1, 0)