This notebook contains our first model for our emotions classifier. It is a fractal neural network (similar to a residual neural network, but more modern). The code is sourced from here: https://github.com/snf/keras-fractalnet/blob/master/. Paper is located here: http://www.fractal.org/Life-Science-Technology/Publications/Fractal-Neural-Networks.htm
                

This first segment contains helper functions/ classes (fractal.py)

In [53]:
import numpy as np
from keras.layers import (
    Input,
    BatchNormalization,
    Activation, Dense, Dropout,
    Conv2D, MaxPooling2D, ZeroPadding2D
)
from keras.models import Model
from keras.engine import Layer
#from keras.utils.visualize_util import plot
from keras import backend as K

if K._BACKEND == 'theano':
    from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
if K._BACKEND == 'tensorflow':
    import tensorflow as tf

def theano_multinomial(n, pvals, seed):
    rng = RandomStreams(seed)
    return rng.multinomial(n=n, pvals=pvals, dtype='float32')

def tensorflow_categorical(count, seed):
    assert count > 0
    arr = [1.] + [.0 for _ in range(count-1)]
    return tf.random_shuffle(arr, seed)

# Returns a random array [x0, x1, ...xn] where one is 1 and the others
# are 0. Ex: [0, 0, 1, 0].
def rand_one_in_array(count, seed=None):
    if seed is None:
        seed = np.random.randint(1, 10e6)
    if K._BACKEND == 'theano':
        pvals = np.array([[1. / count for _ in range(count)]], dtype='float32')
        return theano_multinomial(n=1, pvals=pvals, seed=seed)[0]
    elif K._BACKEND == 'tensorflow':
        return tensorflow_categorical(count=count, seed=seed)
    else:
        raise Exception('Backend: {} not implemented'.format(K._BACKEND))

class JoinLayer(Layer):
    '''
    This layer will behave as Merge(mode='ave') during testing but
    during training it will randomly select between using local or
    global droppath and apply the average of the paths alive after
    aplying the drops.
    - Global: use the random shared tensor to select the paths.
    - Local: sample a random tensor to select the paths.
    '''

    def __init__(self, drop_p, is_global, global_path, force_path, **kwargs):
        #print "init"
        self.p = 1. - drop_p
        self.is_global = is_global
        self.global_path = global_path
        self.uses_learning_phase = True
        self.force_path = force_path
        super(JoinLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        #print("build")
        self.average_shape = list(input_shape[0])[1:]

    def _random_arr(self, count, p):
        return K.random_binomial((count,), p=p)

    def _arr_with_one(self, count):
        return rand_one_in_array(count=count)

    def _gen_local_drops(self, count, p):
        # Create a local droppath with at least one path
        arr = self._random_arr(count, p)
        drops = K.switch(
            K.any(arr),
            arr,
            self._arr_with_one(count)
        )
        return drops

    def _gen_global_path(self, count):
        return self.global_path[:count]

    def _drop_path(self, inputs):
        count = len(inputs)
        drops = K.switch(
            self.is_global,
            self._gen_global_path(count),
            self._gen_local_drops(count, self.p)
        )
        ave = K.zeros(shape=self.average_shape)
        for i in range(0, count):
            ave = ave + inputs[i] * drops[i]
        sum = K.sum(drops)
        # Check that the sum is not 0 (global droppath can make it
        # 0) to avoid divByZero
        ave = K.switch(
            K.not_equal(sum, 0.),
            ave/sum,
            ave)
        return ave

    def _ave(self, inputs):
        ave = inputs[0]
        for input in inputs[1:]:
            ave = ave + input
        ave /= len(inputs)
        return ave

    def call(self, inputs, mask=None):
        #print("call")
        if self.force_path:
            output = self._drop_path(inputs)
        else:
            output = K.in_train_phase(self._drop_path(inputs), self._ave(inputs))
        return output

    def get_output_shape_for(self, input_shape):
        #print("get_output_shape_for", input_shape)
        return input_shape[0]

class JoinLayerGen:
    '''
    JoinLayerGen will initialize seeds for both global droppath
    switch and global droppout path.
    These seeds will be used to create the random tensors that the
    children layers will use to know if they must use global droppout
    and which path to take in case it is.
    '''

    def __init__(self, width, global_p=0.5, deepest=False):
        self.global_p = global_p
        self.width = width
        self.switch_seed = np.random.randint(1, 10e6)
        self.path_seed = np.random.randint(1, 10e6)
        self.deepest = deepest
        if deepest:
            self.is_global = K.variable(1.)
            self.path_array = K.variable([1.] + [.0 for _ in range(width-1)])
        else:
            self.is_global = self._build_global_switch()
            self.path_array = self._build_global_path_arr()

    def _build_global_path_arr(self):
        # The path the block will take when using global droppath
        return rand_one_in_array(seed=self.path_seed, count=self.width)

    def _build_global_switch(self):
        # A randomly sampled tensor that will signal if the batch
        # should use global or local droppath
        return K.equal(K.random_binomial((), p=self.global_p, seed=self.switch_seed), 1.)

    def get_join_layer(self, drop_p):
        global_switch = self.is_global
        global_path = self.path_array
        return JoinLayer(drop_p=drop_p, is_global=global_switch, global_path=global_path, force_path=self.deepest)

def fractal_conv(filter, nb_row, nb_col, dropout=None):
    def f(prev):
        conv = prev
        conv = Conv2D(filter, nb_row=nb_col, nb_col=nb_col, kernel_initializer="he_normal", padding="same")(conv)
        if dropout:
            conv = Dropout(dropout)(conv)
        conv = BatchNormalization(mode=0, axis=1 if K._BACKEND == 'theano' else -1)(conv)
        conv = Activation('relu')(conv)
        return conv
    return f

# XXX_ It's not clear when to apply Dropout, the paper cited
# (arXiv:1511.07289) uses it in the last layer of each stack but in
# the code gustav published it is in each convolution block so I'm
# copying it.
def fractal_block(join_gen, c, filter, nb_col, nb_row, drop_p, dropout=None):
    def f(z):
        columns = [[z] for _ in range(c)]
        last_row = 2**(c-1) - 1
        for row in range(2**(c-1)):
            t_row = []
            for col in range(c):
                prop = 2**(col)
                # Add blocks
                if (row+1) % prop == 0:
                    t_col = columns[col]
                    t_col.append(fractal_conv(filter=filter,
                                              nb_col=nb_col,
                                              nb_row=nb_row,
                                              dropout=dropout)(t_col[-1]))
                    t_row.append(col)
            # Merge (if needed)
            if len(t_row) > 1:
                merging = [columns[x][-1] for x in t_row]
                merged  = join_gen.get_join_layer(drop_p=drop_p)(merging)
                for i in t_row:
                    columns[i].append(merged)
        return columns[0][-1]
    return f

def fractal_net(b, c, conv, drop_path, global_p=0.5, dropout=None, deepest=False):
    '''
    Return a function that builds the Fractal part of the network
    respecting keras functional model.
    When deepest is set, we build the entire network but set droppath
    to global and the Join masks to [1., 0... 0.] so only the deepest
    column is always taken.
    We don't add the softmax layer here nor build the model.
    '''
    def f(z):
        output = z
        # Initialize a JoinLayerGen that will be used to derive the
        # JoinLayers that share the same global droppath
        join_gen = JoinLayerGen(width=c, global_p=global_p, deepest=deepest)
        for i in range(b):
            (filter, nb_col, nb_row) = conv[i]
            dropout_i = dropout[i] if dropout else None
            output = fractal_block(join_gen=join_gen,
                                   c=c, filter=filter,
                                   nb_col=nb_col,
                                   nb_row=nb_row,
                                   drop_p=drop_path,
                                   dropout=dropout_i)(output)
            output = MaxPooling2D(pool_size=(2,2), strides=(2,2))(output)
        return output
    return f

The next segment contains handwritten code to load train/dev/test data into arrays

In [55]:
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt

IMAGE_HEIGHT = IMAGE_WIDTH = 32
NUM_CHANNELS = 3

def path_to(dataset, imagenum):
    return 'D:\CS230 Project\\' + dataset + "_img\\img{}.jpg".format(imagenum)

def load_data():
    with open('labels.json') as f_labels:
        labels = json.load(f_labels)
        y_train = np.expand_dims(np.asarray(labels["trainY"]), axis = 1)
        y_dev = np.expand_dims(np.asarray(labels["devY"]), axis = 1)
        y_test = np.expand_dims(np.asarray(labels["testY"]), axis = 1)
        # X Dev
        X_dev = np.zeros((len(labels["devX"]), IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS))
        for imagenum in range(len(labels["devX"])):
            im_arr = cv2.imread(path_to("dev", imagenum))
            X_dev[imagenum] = im_arr
        print("Finished reading dev set")
        # X Test
        X_test = np.zeros((len(labels["testX"]), IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS))
        for imagenum in range(len(labels["testX"])):
            im_arr = cv2.imread(path_to("test", imagenum))
            X_test[imagenum] = im_arr
        print("Finished reading test set")
        # X Train
        X_train = np.zeros((len(labels["trainX"]), IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS))
        for imagenum in range(len(labels["trainX"])):
            im_arr = cv2.imread(path_to("train", imagenum))
            X_train[imagenum] = im_arr
        print("Finished reading train set")
    return (X_train, y_train), (X_dev, y_dev), (X_test, y_test)

(X_train, y_train), (X_dev, y_dev), (X_test, y_test) = load_data()

Finished reading dev set
Finished reading test set
Finished reading train set


Now, we are ready to train the neural network with our data

The next segment contains the code that actually runs (cifar10_fractal.py)

In [None]:
import os
import glob
import argparse
from keras.callbacks import (
    LearningRateScheduler,
    ModelCheckpoint
)
from keras.datasets import cifar10
from keras.layers import (
    Activation,
    Input,
    Dense,
    Flatten
)
from keras.models import Model
from keras.optimizers import SGD, RMSprop, Adam, Nadam
#from keras.utils.visualize_util import plot
from keras.utils import np_utils
from keras import backend as K


NB_CLASSES = 8 #Original value is 10
NB_EPOCHS = 1 #Original value is 400
LEARN_START = 0.02
BATCH_SIZE = 100
MOMENTUM = 0.9
IMG_SIZE = 32

Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_dev = np_utils.to_categorical(y_dev, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

X_train = X_train.astype('float32')
X_dev = X_dev.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_dev /= 255
X_test /= 255

# Drop by 10 when we halve the number of remaining epochs (200, 300, 350, 375)
def learning_rate(epoch):
    if epoch < 200:
        return 0.02
    if epoch < 300:
        return 0.002
    if epoch < 350:
        return 0.0002
    if epoch < 375:
        return 0.00002
    return 0.000002

def build_network(deepest=False):
    dropout = [0., 0.1, 0.2, 0.3, 0.4]
    conv = [(64, 3, 3), (128, 3, 3), (256, 3, 3), (512, 3, 3), (512, 2, 2)]
    input= Input(shape=(3, IMG_SIZE, IMG_SIZE) if K._BACKEND == 'theano' else (IMG_SIZE, IMG_SIZE,3))
    output = fractal_net(
        c=3, b=5, conv=conv,
        drop_path=0.15, dropout=dropout,
        deepest=deepest)(input)
    output = Flatten()(output)
    output = Dense(NB_CLASSES, kernel_initializer="he_normal")(output)
    output = Activation('softmax')(output)
    model = Model(input=input, output=output)
    #optimizer = SGD(lr=LEARN_START, momentum=MOMENTUM)
    #optimizer = SGD(lr=LEARN_START, momentum=MOMENTUM, nesterov=True)
    optimizer = Adam()
    #optimizer = Nadam()
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    #plot(model, to_file='model.png', show_shapes=True)
    return model

def train_network(net):
    print("Training network")
    snapshot = ModelCheckpoint(
        filepath="snapshots/weights.{epoch:04d}-{val_loss:.4f}.h5",
        monitor="val_loss",
        save_best_only=False)
    learn = LearningRateScheduler(learning_rate)
    
    net.fit(
        x=X_train, y=Y_train, batch_size=BATCH_SIZE,
        epochs=NB_EPOCHS, validation_data=(X_test, Y_test),
        #callbacks=[learn, snapshot]
        callbacks=[snapshot]
    )

def test_network(net, weights):
    print("Loading weights from '{}' and testing".format(weights))
    net.load_weights(weights)
    ret = net.evaluate(x=X_test, y=Y_test, batch_size=BATCH_SIZE)
    print('Test:', ret)

def classify_image(net, weights, img):
    print("Loading weights from '{}' and testing".format(weights))
    net.load_weights(weights)
    ret = net.predict(img)
    print('Predictions: ', ret)
    
def main():
    # Since argparsers don't work in jupyter notebooks, enter all args manually here. 
    args = {"deepest": False, "load": None, "summary": False}
    classify = False;
    net = build_network(deepest=args["deepest"])
    if args["load"]:
        weights_filepath = "snapshots/weights.{epoch:04d}-{val_loss:.4f}.h5" #Fill this in with the location of the file containting the weights
        weights = weights_filepath
        test_network(net, weights)
    elif args["summary"]:
        net.summary()
#    elif classify:
#        image = "C:\\Users\\noahj\\cs230\\CS230 Project\\train_img\\img0.jpg"
#        image = np.expand_dims(cv2.imread(image, imagenum), axis=0)
#        classify_image(net, weights, image)
    else:
        train_network(net)


main()



Training network
Train on 67938 samples, validate on 8493 samples
Epoch 1/1
 1700/67938 [..............................] - ETA: 2:39:49 - loss: nan - acc: 0.1259