In [7]:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
from tensorflow.keras.layers import (
    Dense, Dropout, Conv2D, MaxPooling2D, Reshape, Flatten, Input)
from tensorflow.keras.models import Model
#from tensorflow_probability.layers import DenseVariational

import sys
sys.path.insert(1, '/Users/milesturpin/Dev/latent_variable_nn')
sys.path.insert(1, '/Users/milesturpin/Dev/latent_variable_nn/models')

from models.base_model import BaseModel
from models.model_utils import (
    latent_normal_vector, latent_vector_variational_posterior,
    latent_normal_matrix, latent_matrix_variational_posterior, softplus_inverse)

tfd = tfp.distributions
tfpl = tfp.layers

In [321]:
data_size = 'small'
x_train = np.load('../data/femnist/{data_size}/x_train.npy'.format(data_size=data_size))
y_train = np.load('../data/femnist/{data_size}/y_train.npy'.format(data_size=data_size))
gid_train = np.load('../data/femnist/{data_size}/gid_train.npy'.format(data_size=data_size))
x_test = np.load('../data/femnist/{data_size}/x_test.npy'.format(data_size=data_size))
y_test = np.load('../data/femnist/{data_size}/y_test.npy'.format(data_size=data_size))
gid_test = np.load('../data/femnist/{data_size}/gid_test.npy'.format(data_size=data_size))

np.random.seed(335)
from sklearn.utils import shuffle
train_data = [x_train, gid_train, y_train]
test_data = [x_test, gid_test, y_test]

train_data = shuffle(*train_data, random_state=356)
test_data = shuffle(*test_data, random_state=356)

In [157]:
class Linear(tf.keras.layers.Layer):

    def __init__(self, units=32, **kwargs):
        super(Linear, self).__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                             initializer='random_normal',
                             trainable=True)

    def call(self, inputs):
        print(inputs.shape)
        print(self.w.shape, self.b.shape)
        print(tf.matmul(inputs, self.w).shape)
        return tf.matmul(inputs, self.w) + self.b

    def get_config(self):
        config = super(Linear, self).get_config()
        config.update({'units': self.units})
        return config

In [158]:
linear = Linear()

In [159]:
linear(x_train[:10])

(10, 784)
(784, 32) (32,)
(10, 32)


<tf.Tensor: id=432885, shape=(10, 32), dtype=float32, numpy=
array([[-0.5440177 , -1.0042272 ,  1.1032884 , -0.10582794, -3.0298684 ,
        -0.34431428, -0.98262805,  1.2245424 , -0.21390186, -0.98818064,
         1.4837292 ,  0.6192355 , -0.40182206, -0.01606881,  2.4264178 ,
         0.50637144,  1.4344455 , -0.7766889 , -1.3800389 ,  1.2437507 ,
         1.9632759 ,  0.40609843, -1.5507433 , -1.3688016 ,  1.05972   ,
         2.323223  ,  0.54351574, -1.194923  , -0.44533157, -0.23382336,
         0.23519394, -1.0720731 ],
       [-0.5440177 , -1.0042272 ,  1.1032884 , -0.10582794, -3.0298684 ,
        -0.34431428, -0.98262805,  1.2245424 , -0.21390186, -0.98818064,
         1.4837292 ,  0.6192355 , -0.40182206, -0.01606881,  2.4264178 ,
         0.50637144,  1.4344455 , -0.7766889 , -1.3800389 ,  1.2437507 ,
         1.9632759 ,  0.40609843, -1.5507433 , -1.3688016 ,  1.05972   ,
         2.323223  ,  0.54351574, -1.194923  , -0.44533157, -0.23382336,
         0.23519394, -1.0720

## Base MLP

In [17]:
def create_MLP():
    img = Input(shape=(784,))
    x = Dense(units=512, activation='relu')(img)
    x = Dense(units=256, activation='relu')(x)
    out = Dense(62, activation='softmax')(x)
    model = Model(inputs=img, outputs=out)
    return model

In [24]:
base_mlp = create_MLP()
base_mlp.summary()
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
base_mlp.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['acc'])
base_mlp.fit(x_train, y_train, batch_size=100, epochs=40,  validation_data=(x_test, y_test))

Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_12 (Dense)             (None, 512)               401920    
_________________________________________________________________
dense_13 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_14 (Dense)             (None, 62)                15934     
Total params: 549,182
Trainable params: 549,182
Non-trainable params: 0
_________________________________________________________________
Train on 35535 samples, validate on 4044 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 1

<tensorflow.python.keras.callbacks.History at 0x1a3c74d828>

In [139]:
[x.shape for x in base_mlp.get_weights()]

[(784, 512), (512,), (512, 256), (256,), (256, 62), (62,)]

## One Hot MLP

In [23]:
from tensorflow.keras.utils import to_categorical

In [25]:
one_hot_data_train = np.concatenate([x_train, to_categorical(gid_train)], axis=1)
one_hot_data_test = np.concatenate([x_test, to_categorical(gid_test)], axis=1)

In [32]:
def create_one_hot_MLP():
    img = Input(shape=(784+190,))
    x = Dense(units=512, activation='relu')(img)
    x = Dense(units=256, activation='relu')(x)
    out = Dense(62, activation='softmax')(x)
    model = Model(inputs=img, outputs=out)
    return model

In [34]:
one_hot_model = create_one_hot_MLP()
one_hot_model.summary()
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
one_hot_model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['acc'])

In [35]:
one_hot_model.fit(one_hot_data_train, y_train, batch_size=100, epochs=20, validation_data=(one_hot_data_test, y_test))

Train on 35535 samples, validate on 4044 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x10b8f9860>

## Multilevel MLP

In [290]:
class MultilevelDense(tf.keras.layers.Layer):
    
    def __init__(self, units, num_groups, multilevel_weights=True, multilevel_bias=True, activation=None, **kwargs):
        super(MultilevelDense, self).__init__(**kwargs)
        self.units = units
        self.num_groups = num_groups
        self.activation = tf.keras.activations.get(activation)
        
    def build(self, input_shape):
        self.multi_w = self.add_weight(shape=(self.num_groups, input_shape[-1], self.units), 
                                 initializer='glorot_uniform',
                                 trainable=True)
        self.multi_b = self.add_weight(shape=(self.num_groups, self.units), 
                                 initializer='zeros',
                                 trainable=True)
        super(MultilevelDense, self).build(input_shape)
        
    def call(self, x, gid):
        #print(self.multi_w.shape, gid.shape)
        w = tf.gather(self.multi_w, gid)
        #w = tf.squeeze(w)
        #print(w.shape)
        b = tf.gather(self.multi_b, gid)
        #b = tf.squeeze(b)
        
        x = tf.expand_dims(x, axis=-1)
        
        #print(tf.matmul(x, w))
        #print(w.shape,tf.transpose(w).shape,x.shape)
        x = tf.matmul(w, x, transpose_a=True)
        x = tf.squeeze(x)
        #print(x,b)
        out = self.activation(x + b)
        return out
    
    def get_config(self):
        config = super(MultilevelDense, self).get_config()
        config.update({'units': self.units, 'num_groups': self.num_groups})
        return config
    
    

In [291]:
mldense = MultilevelDense(units=4, num_groups=190, activation='relu')

In [292]:
mldense(x_train[:10], gid_train[:10]).shape

TensorShape([10, 4])

In [293]:
def create_multilevel_MLP(num_groups):
    img = Input(shape=(784,))
    gid = Input(shape=(1,), dtype=tf.int32)
    x = Dense(units=512, activation='relu')(img)
    x = Dense(units=256, activation='relu')(x)
    #print(x)
    out = MultilevelDense(62, num_groups=num_groups, activation='softmax')(x, gid)
    model = Model(inputs=[img, gid], outputs=out)
    return model

In [283]:
ml_model = create_multilevel_MLP(num_groups=190)

Tensor("dense_84/Relu:0", shape=(None, 256), dtype=float32)


ValueError: Shapes must be equal rank, but are 2 and 1 for 'multilevel_dense_75/MatMul' (op: 'BatchMatMul') with input shapes: [?,1,256,62], [?,256,1].

In [266]:
_ = ml_model([x_train[:2], gid_train[:2]])
ml_model.summary()

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
ml_model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['acc'])

Model: "multilevel_mlp_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_75 (Dense)             multiple                  401920    
_________________________________________________________________
dense_76 (Dense)             multiple                  131328    
_________________________________________________________________
multilevel_dense_70 (Multile multiple                  3027460   
Total params: 3,560,708
Trainable params: 3,560,708
Non-trainable params: 0
_________________________________________________________________


In [301]:
import numpy as np
import pickle
import time
import os
from tqdm import tqdm, trange

from sklearn.metrics import accuracy_score, f1_score

import tensorflow as tf
import tensorflow_probability as tfp

from utils import robust_loss, round_nums

tfd = tfp.distributions

In [302]:
class BaseModel(tf.keras.Model):
    """
    Base model that serves as super class for different architectures.
    Subclassed models are responsible for building out the latent space
    as well as the model architecture. The base model contains training
    loop code and utilities for saving results.

    Args:
        optimizer : keras optimizer object
        loss_fn : keras loss function object
        num_groups : array-like obj with number of unique groups in
            each level of grouping; used to initialize latent variables
        experiment_dir : used for saving training stats and model weights
        logger : logging object
    """

    def __init__(self, optimizer, loss_fn, train_size, num_groups, args):
        super(BaseModel, self).__init__()
        self.optimizer = optimizer
        self.loss_fn = loss_fn
        self.model_size = args.model_size
        self.z_dim = args.z_dim
        self.train_size = train_size
        self.num_groups = num_groups
        self.seed = args.seed
        self._build_model()
        if args.latent_config != 'none':
            self._build_latent_space()

    def _build_model(self):
        """Initialize model layers."""
        pass

    def _build_latent_space(self):
        """Initialize latent variables and their prior distributions."""
        pass


    def create_batch_generator(
        self, data, batch_size, prefetch=2):
        """Use tf.data API to create efficient input pipeline.

        Should be overriden in the subclasses if particular dataset needs
        a different batching procedure.

        Args:
            data : list of arrays of the form [x, gid, gid2, ..., y]
            batch_size : self-explanatory
            prefetch : number of batchs to precompute
        """
        generator = tf.data.Dataset.from_tensor_slices(tuple(data))
        generator = generator.batch(batch_size)
        # Experimental feature, automatically picks # of batches to process
        generator = generator.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
        return generator


    def train(self, train_data, test_data, batch_size, num_epochs, eval_every=1, print_freq=10):
        """Train network.

        Args:
            train_data, test_data : list of numpy arrays in the order
                [x, gid, gid2, ..., y]
            batch_size : self-explanatory
            num_epochs : self-explanatory
            eval_every : number of epochs in between test set evaluations
            print_freq : how many times per epoch to print training
                progress. Useful when individual epochs take a long time
        """
        #self.logger.info('Evaluating untrained model...')
        #self.log_group_test_performance(test_data, epoch=0)

        # TODO: find less hacky way to build model, get summary
        inputs, labels = train_data[:-1], train_data[-1]
        inputs = [x[:5] for x in inputs]
        outputs = self(*inputs)
        # Try to print summary of param counts, won't work for some models
        try:
            self.summary(print_fn=print)
        except:
            print('Configuration not amenable to `summary`.')

        # Stateful Keras object for keeping track of mean loss
        train_loss = tf.keras.metrics.Mean('train_loss')

        last_time = time.time()
        for epoch in range(1, num_epochs+1):
            print('--- Epoch {} ---'.format(epoch))

            train_generator = self.create_batch_generator(train_data, batch_size)

            for step, batch in enumerate(train_generator):
                loss = self.train_step(batch)
                train_loss(loss)

                # Print out train loss every 1/print_freq thru train set
                num_batches = np.ceil(len(train_data[0])/batch_size)
                if (step+1) % np.ceil(num_batches/print_freq) == 0 or (step+1) == num_batches:
                    print(
                        'Step {} - train loss: {:.5f}, time elapsed: {:d}s'.format(
                            step+1, train_loss.result().numpy(),
                            round(time.time()-last_time)))

                    last_time = time.time()
                    train_loss.reset_states()

            if epoch % eval_every == 0 or epoch == num_epochs:
                print('Evaluating test set...')
                self.log_group_test_performance(test_data, epoch=epoch)
                #self.save_weights()


    def train_step(self, batch):
        """
        Idiomatic Tensorflow for making predictions and computing
        gradients.
        """
        inputs, labels = batch[:-1], batch[-1]
        with tf.GradientTape() as tape:
            pred = self(*inputs)
            loss = self.loss_fn(labels, pred)
            # Only need to add KL loss once per epoch
            #print((sum(self.losses) / self.train_size))
            loss += sum(self.losses) / self.train_size
        grads = tape.gradient(loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return loss


    def log_group_test_performance(self, test_data, epoch):
        """Evaluate test set performance across groups.

        Args:
            test_data : list of test arrays
            epoch : current epoch, used for logging results
        """

        # Don't shuffle so that we can match up batch preds with input data
        test_generator = self.create_batch_generator(
            test_data, batch_size=1000)

        scores = []
        # tqdm prints nice progress bars
        for test_batch in tqdm(test_generator):
            inputs, labels = test_batch[:-1], test_batch[-1]
            score = self(*inputs)
            scores.append(score)
        scores = np.concatenate(scores)
        preds = scores.argmax(axis=1)

#         output_file = os.path.join(self.experiment_dir,
#             'training_stats{}.csv'.format(epoch))
#         file = open(output_file, 'w')

#         # Header for CSV file, add more columns if add metrics in get_metrics
#         file.write('epoch,gid,test_acc,test_f1,test_loss\n')

        # Get stats with respect to first level grouping
        # i.e. whatever group level is in second index
        gid_test = test_data[1]
        y_test = test_data[-1]
        results_list = []
        for gid in trange(self.num_groups[0]):
            # Get instances of each group, calculate performance
            gid_idx = np.where(gid_test == gid)[0]
            gid_metrics = self.get_metrics(y_test[gid_idx], scores[gid_idx])
            results_list.append(gid_metrics)
            # Write out line to csv e.g. `3,347,0.87,0.86,0.34`
            
            #file.write(','.join(map(str, [epoch, gid] + gid_metrics)) + '\n')
        #file.close()

        results_arr = np.stack(results_list)
        group_acc = results_arr[:,0]
        stats = round_nums(
            accuracy_score(y_test, preds),
            np.percentile(sorted(group_acc), 10),
            np.percentile(sorted(group_acc), 90))

        # TODO: add more metrics
        print(
            'Test accuracy: {:.5f}, 10th percentile: {:.5f}, 90th percentile: {:.5f}'.format(*stats))


    # TODO: add more metrics, if add more need to add to CSV header
    def get_metrics(self, y_true, y_score):
        y_pred = tf.math.argmax(y_score, axis=1)
        return [
            accuracy_score(y_true, y_pred),
            f1_score(y_true, y_pred, average='weighted'),
            robust_loss(y_true, y_score),
        ]

In [322]:
class MultilevelMLP(BaseModel):
    
    def __init__(self, optimizer, loss_fn, train_size, num_groups, args):
        super(MultilevelMLP, self).__init__(optimizer, loss_fn, train_size, num_groups, args)
        
    def _build_model(self):
        self.dense1 = Dense(units=512, activation='relu')
        self.dense2 = Dense(units=256, activation='relu')
        self.ml_dense = MultilevelDense(62, num_groups=self.num_groups, activation='softmax')
        
    def _build_latent_space(self):
        pass
        
    def call(self, x, gid):
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.ml_dense(x, gid)
        return x

In [323]:
import argparse
args = argparse.Namespace()
args_dict = vars(args)
args_dict.update({
    "batch_size": 100,
    "data_dir": "data",
    "data_size": "small",
    "dataset": "femnist",
    "description": "lr sched 50, bigger batch more epochs",
    "eval_every": 1,
    "latent_config": "factor2",
    "lr": 0.001,
    "model_size": "small",
    "num_epochs": 40,
    "print_freq": 1,
    "seed": 1227,
    "testing": True,
    "z_dim": [
        62
    ]
})
ml_model = MultilevelMLP(opt, loss_fn, 35535, 190, args)

In [324]:
ml_model.train([x_train, gid_train, y_train], [x_test, gid_test, y_test], batch_size=100, num_epochs=3)

Model: "multilevel_mlp_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_91 (Dense)             multiple                  401920    
_________________________________________________________________
dense_92 (Dense)             multiple                  131328    
_________________________________________________________________
multilevel_dense_80 (Multile multiple                  3027460   
Total params: 3,560,708
Trainable params: 3,560,708
Non-trainable params: 0
_________________________________________________________________
--- Epoch 1 ---
Step 36 - train loss: 4.13192, time elapsed: 6s
Step 72 - train loss: 4.11971, time elapsed: 5s
Step 108 - train loss: 4.10985, time elapsed: 5s
Step 144 - train loss: 4.08328, time elapsed: 5s
Step 180 - train loss: 4.11244, time elapsed: 5s
Step 216 - train loss: 4.09985, time elapsed: 4s
Step 252 - train loss: 4.11612, time elapsed: 5s
Step 288 - t

1it [00:00,  7.27it/s]

Step 356 - train loss: 4.12769, time elapsed: 4s
Evaluating test set...


5it [00:00, 18.28it/s]


TypeError: 'int' object is not subscriptable

In [306]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()



In [296]:
from base_model import BaseModel

In [None]:
class 

In [288]:
_ = ml_model([x_train[:2], gid_train[:2]])
ml_model.summary()

opt = tf.keras.optimizers.Adam(learning_rate=0.001)

### I THINK I JUST CANT USE COMPILE OR FIT
ml_model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['acc'])

tf.Tensor(
[[-0.06748185  0.03969476 -0.03092025  0.01272973  0.03301796 -0.03741422
   0.02798387 -0.04484198 -0.02708249 -0.01406365  0.03128406  0.0530345
  -0.02823323  0.09702478  0.04532017 -0.00336938  0.01084191 -0.03770605
   0.01827123  0.00543186 -0.06078134  0.02129716 -0.11266416  0.0059648
   0.04361    -0.06416956 -0.07238913  0.0104103  -0.09759513 -0.06082897
  -0.0091065  -0.06174833 -0.02937673 -0.05784327 -0.04405056 -0.00570857
   0.07046047  0.03697755 -0.00273403 -0.00322475 -0.0751567   0.02579825
   0.00590726  0.03809592 -0.0004933   0.08686652 -0.06789909 -0.00627298
  -0.01915285  0.00255602 -0.07464639 -0.01323446  0.01534919 -0.0244153
   0.04534295 -0.00083769 -0.08018332  0.092474    0.0629398  -0.08053047
  -0.0419539  -0.01578863]
 [-0.06748185  0.03969476 -0.03092025  0.01272973  0.03301796 -0.03741422
   0.02798387 -0.04484198 -0.02708249 -0.01406365  0.03128406  0.0530345
  -0.02823323  0.09702478  0.04532017 -0.00336938  0.01084191 -0.03770605
   0

In [289]:
ml_model.fit([x_train, gid_train], y_train, batch_size=100, epochs=20, validation_data=([x_test, gid_test], y_test))

Tensor("multilevel_dense_76/Squeeze:0", dtype=float32) Tensor("multilevel_dense_76/Identity_1:0", shape=(None, 62), dtype=float32)


TypeError: '>' not supported between instances of 'NoneType' and 'int'