In [488]:
import numpy as np
import pandas as pd
import scipy.special as sp
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import PIL
import PIL.Image

import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Input
from create_spiral import create_spiral


import keras
from keras import layers

In [None]:
print(f'Number of GPUs detected: {len(tf.config.list_logical_devices('GPU'))}')

In [None]:
df = pd.read_csv('../DL_and_NN_in_Python/fer2013.csv')


print(df.head())
print(f'number of photos in the dataset is: {len(df)}')
      
X = []
Y = []

for row in df.index:
    X.append(list(map(int, df.iloc[row].pixels.split(' '))))
    Y.append(df.iloc[row].emotion)

X = np.array(X) / 255 # normalise pixel values to lie between 0 and 1
Y = np.array(Y)

print(f'check number of features is 48**2: {X.shape[1] == 48**2}')

X, Y = shuffle(X, Y, random_state=42)  # numpy's shuffle is not nice because you can't shuffle two arrays simultaneously

train_proportion = 0.8
train_index = int(train_proportion*len(X))

X_train, X_test = X[:train_index], X[train_index:]
Y_train, Y_test = Y[:train_index], Y[train_index:]
# or just use train_test_split from sklearn.model_selection for the same effect
n_classes = len(set(Y_train))

print(f'Number of samples in training set: {len(X_train)}')
print(f'Number of samples in test set: {len(X_test)}')

N, D,  = X_train.shape
D1 = int(np.sqrt(D))

In [None]:
X_train.resize((N, D1, D1))
X_test.resize((len(X_test), D1, D1))

# TensorFlow

In [None]:
class DenseBatchNormalisedLayer(keras.layers.Layer):
    def __init__(self, fan_out, decay, act_fun):
        super(DenseBatchNormalisedLayer, self).__init__()
        # Initialize model parameters
        
        self.decay = tf.Variable(decay, trainable=False, type=np.float32)

        self.act_fun = act_fun
        self.fan_out = fan_out
  
    def build(self, input_shape):
        self.W = self.add_weight(shape=(int(input_shape[-1]), self.fan_out), initializer='glorot_normal', trainable=True, name='weight')
        self.running_mean = self.add_weight(shape=(1, self.fan_out), initializer='zeros', trainable=False, name='running_mean')
        self.running_var = self.add_weight(shape=(1, self.fan_out), initializer='zeros', trainable=False, name='running_var')
        
        self.gamma = self.add_weight(shape=(1, self.fan_out), initializer='ones', trainable=True, name='gamma')
        self.beta = self.add_weight(shape=(1, self.fan_out), initializer='zeros', trainable=True, name='beta')

    def call(self, x, is_training):
        # flatten input if necessary
        if x.ndim > 2:
           x = tf.reshape(x, list(x.shape[:-2]) + [x.shape[-2]*x.shape[-1]])

        Z = tf.matmul(x, self.W)
    
        if is_training:
            batch_mean, batch_var = tf.nn.moments(Z, [0])
            self.running_mean.assign(self.decay*self.running_mean + (1-self.decay)*batch_mean)
            self.running_var.assign(self.decay*self.running_var + (1-self.decay)*batch_var)

            res = tf.nn.batch_normalization(x=Z, mean=batch_mean, variance=batch_var, offset=self.beta, scale=self.gamma, variance_epsilon=1e-8)

        else:
            res = tf.nn.batch_normalization(x=Z, mean=self.running_mean, variance=self.running_var, offset=self.beta, scale=self.gamma, variance_epsilon=1e-8)
    
      
        return self.act_fun(res)

In [None]:
layer_norm = DenseBatchNormalisedLayer(fan_out=20, decay=0.9, act_fun=tf.nn.relu)
_ = layer_norm(np.zeros_like(X_train).reshape((X_train.shape[0], X_train.shape[-1]**2)), is_training=True) # call the layer to build it

In [None]:
layer_norm.variables[1].value

In [None]:
[print(var) for var in layer_norm.variables];
print('---------')
[print(var) for var in layer_norm.trainable_variables];

Build the model using the subclassing API:

In [None]:
class ANN(keras.Model):
    def __init__(self, n_hidden_units: int = 10, act_fun=tf.nn.relu, decay=0.9):
        super().__init__()
        self.flatten = Flatten()
        self.batch_norm_layer = DenseBatchNormalisedLayer(fan_out=n_hidden_units, decay=decay, act_fun=act_fun)
        self.batch_norm_layer2 = DenseBatchNormalisedLayer(fan_out=n_hidden_units, decay=decay, act_fun=act_fun)
        self.decay = decay
        self.act_fun = act_fun
        self.n_hidden_units = n_hidden_units
    
    def call(self, x, is_training):
        x = self.flatten(x)
        x = self.batch_norm_layer(x, is_training=is_training)
        x = self.batch_norm_layer2(x, is_training=is_training)
        x = tf.keras.layers.Dense(n_classes)(x)
        return x

In [None]:
model = ANN(20, act_fun=tf.nn.relu, decay=0.9)
_ = model(np.zeros_like(X_train), is_training=True)

# loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# optimiser = tf.keras.optimizers.Adam(learning_rate=1e-4)

In [None]:
def loss_function(logits, T):
    return tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=T, logits=logits))

In [None]:
batch_size = 32
dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
dataset = dataset.shuffle(buffer_size=X_train.shape[0]).batch(batch_size)

In [None]:
# Set training parameters
epochs = 10
learning_rate = 1e-4
losses = []

# Format training loop
for epoch in range(epochs):
  for x_batch, y_batch in dataset:
    x_batch = tf.cast(x_batch, np.float32)
    with tf.GradientTape() as tape:
      batch_loss = loss_function(model(x_batch, is_training=True), y_batch)
    # Update parameters with respect to the gradient calculations
    grads = tape.gradient(batch_loss, model.trainable_variables)
    for g,v in zip(grads, model.trainable_variables):
        v.assign_sub(learning_rate*g)
  # Keep track of model loss per epoch
  loss = loss_function(model(tf.cast(X_train, np.float32), is_training=False), Y_train)
  losses.append(loss)
  if epoch % 10 == 0:
    print(f'Cross-entropy loss for step {epoch}: {loss.numpy():0.3f}')

# Plot model results
print("\n")
plt.plot(range(epochs), losses)
plt.xlabel("Epoch")
plt.ylabel("Cross-entropy loss")
plt.title('MSE loss vs training iterations');
