**Initialization**

Imports

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

Hyperparameters

In [None]:
n_epoch = 250
batch_size = 50
threshold = 128
learning_rate = 0.0001


input_feature_dimension = 2
hidden_feature_dimension = 5
output_feature_dimension = 10

Import MNIST

In [None]:
mnist = keras.datasets.mnist
(training_images, training_labels), (test_images, test_labels) = mnist.load_data()

**Data formatting**

Method for calculating maximum set size

In [None]:
def decrease_set_size():
    max_count = 0
    for i in range (60000):
        count = 0
        for j1 in range (28):
            for j2 in range (28):
                if training_images[i][j1][j2] >= 1:
                    count += 1
        if count > max_count:
            max_count = count
      
    for i in range (10000):
        count = 0
        for j1 in range (28):
            for j2 in range (28):
                if training_images[i][j1][j2] >= 1:
                    count += 1
        if count > max_count:
            max_count = count
      
    return max_count

Methods for formatting the dataset to coordinate sets with a mask.

In [None]:
def format_data(image):
    data = []
    mask = []
    for i in range(28):
        for j in range(28):
            if image[i][j] > threshold:
                data.append([i, j])
                mask.append(1)
    for k in range(max_set_size - len(data)):
        data.append([0,0])
        mask.append(0)
    dataArray = np.array(data)
    maskArray = np.array(mask)
    return unison_shuffle(dataArray, maskArray)

def unison_shuffle(data, mask):
    assert len(data) == len(mask)
    p = np.random.permutation(len(data))
    return data[p], mask[p]

def generate_images(images):
    image_array = ([])
    mask_array = ([])
    for i in range(len(images)):
        img = images[i]
        data, mask = format_data(img)
        image_array.append(data)
        mask_array.append(mask)
    return np.array(image_array), np.array(mask_array)

def format_label(labels):
    label_array = []
    for i in range(len(labels)):
        label = np.zeros(10)
        label[labels[i]] = 1
        label_array.append(label)
    return np.array(label_array)

Calculate training data, test data along with their masks and labels using the previously defined methods.

In [None]:
max_set_size = decrease_set_size()

training_data, training_mask = generate_images(training_images)
test_data, test_mask = generate_images(test_images)
training_label = format_label(training_labels)
test_label = format_label(test_labels)

In [None]:
print("Set size             =  ", max_set_size)

print("Training data shape  = ", training_data.shape,
     "\nTest data shape      = ", test_data.shape,
     "\nTraining label shape = ", training_label.shape,
     "\nTest label shape     = ", test_label.shape)

print("Training mask shape  = ", training_mask.shape,
     "\nTest mask shape      = ", test_mask.shape)

**The network**

Structure

In [None]:
# Update this string when altering the network
network_stats = '100-200-100-SIGMA-100-100-100-10'

# Initialization of input, output and mask shapes
x = tf.placeholder(tf.float32, [None, max_set_size, input_feature_dimension])
y = tf.placeholder(tf.float32, [None, output_feature_dimension])
x_mask = tf.placeholder(tf.bool, [None, max_set_size])


# The phi network:

phi_hidden_1 = tf.layers.dense(inputs=x, units=100, activation=tf.nn.relu)
phi_hidden_2 = tf.layers.dense(inputs=phi_hidden_1, units=200, activation=tf.nn.relu)
phi_hidden_3 = tf.layers.dense(inputs=phi_hidden_2, units=100, activation=tf.nn.relu)
#phi_hidden_4 = tf.layers.dense(inputs=phi_hidden_3, units=300, activation=tf.nn.relu)
#phi_hidden_5 = tf.layers.dense(inputs=phi_hidden_4, units=300, activation=tf.nn.relu)
phi = tf.layers.dense(inputs=phi_hidden_3, units=hidden_feature_dimension)


# Mask and summation

x_mask_hidden = tf.cast(tf.expand_dims(x_mask, -1), tf.float32)
phi_masked = tf.multiply(x_mask_hidden, phi)
Sigma = tf.reduce_sum(phi_masked, axis=1, name="Sigma")


# The rho network:

rho_hidden_1 = tf.layers.dense(inputs=Sigma, units=100, activation=tf.nn.relu)
rho_hidden_2 = tf.layers.dense(rho_hidden_1, 100 , tf.nn.relu)
rho_hidden_3 = tf.layers.dense(rho_hidden_2, 100, tf.nn.relu)
#rho_hidden_4 = tf.layers.dense(rho_hidden_3, 300, tf.nn.relu)
y_hat = tf.layers.dense(inputs=rho_hidden_3, units=output_feature_dimension)
y_hat_softmax = tf.nn.softmax(y_hat)
y_pred = tf.argmax(y_hat, axis=1)

Loss function and optimizer is defined

In [None]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=y_hat))
optimize = tf.train.AdamOptimizer(learning_rate).minimize(loss)

A method for calculating the correct prediction percentage of the network is defined here.

In [None]:
def predictions(output, labels):
    correct = 0;
    for i in range (output.size):
        if output[i] == labels[i]:
            correct = correct + 1
    percentage = str(correct/output.size*100)
    return ("Correct: " + percentage + "%")

**Running the network**

Training the network

In [None]:
with tf.train.MonitoredSession() as sess:
    for epoch in range(n_epoch):

        for i in range(training_data.shape[0]//batch_size):
            sess.run(optimize, {x: training_data[i*batch_size:(i+1)*batch_size], 
                                x_mask: training_mask[i*batch_size:(i+1)*batch_size], 
                                y: training_label[i*batch_size:(i+1)*batch_size]})
            
            test_loss = \
                sess.run(loss, {x:training_data[i*batch_size:(i+1)*batch_size], 
                                x_mask: training_mask[i*batch_size:(i+1)*batch_size], 
                                y: training_label[i*batch_size:(i+1)*batch_size]})

        phi_val, y_val, y_pred_val = \
            sess.run([phi_masked, y_hat_softmax, y_pred], {x: test_data, x_mask: test_mask})
        print('Epoch: ' , epoch, ' - Loss: ', str(test_loss))
        if ((epoch % 10) == 0):
            pred = predictions(y_pred_val, test_labels)
            print(pred)

Displaying the results

In [None]:
ID_string = \
    str(network_stats) + \
    ".-E" + str(n_epoch) + \
    "-BS" + str(batch_size) + \
    "-LR" + str(learning_rate) + \
    "-TH" + str(threshold)
print(ID_string)
print('Test loss:', test_loss)
predictions(y_pred_val, test_labels)