In [76]:
import numpy as np
import tensorflow as tf
import os
import pickle
import numpy.random as rnd
from sklearn.utils import shuffle

## Load in data

In [None]:
import pickle
import boto3
from io import BytesIO

In [None]:
s3 = boto3.resource('s3')
data_subsets = ['train', 'val']
data = {}

for name in data_subsets:
    with BytesIO() as files:
        path = "omniglot_images/" +name+ ".pickle"
        s3.Bucket("research-paper-omniglot-data").download_fileobj(path, files)
        files.seek(0)    # move back to the beginning after writing
        (X,c) = pickle.load(files)
        data[name] = X

In [4]:
path = '../omniglot_images/'
data_subsets = ["train", "val", "test"]

data = {}
categories = {}
info = {}
        
for name in data_subsets:
    file_path = os.path.join(path, name + ".pickle")
    print("loading data from {}".format(file_path))
    with open(file_path,"rb") as f:
        (X,c) = pickle.load(f)
        data[name] = X
        categories[name] = c

loading data from ../omniglot_images/train.pickle
loading data from ../omniglot_images/val.pickle
loading data from ../omniglot_images/test.pickle


In [5]:
def create_train_data(size, s='train'):
    #get train data and shape
    X=data[s]
    n_classes, n_examples, w, h = X.shape
    
    #initialize 2 empty arrays for the input size in a list
    pairs=[np.zeros((size, h, w,1)) for i in range(2)]
    
    #initialize vector for the targets
    targets=np.zeros((size,1))
    
    for x in range(size):
        #randomly sample one class (character)
        category = rnd.choice(n_classes,1,replace=False)
        #randomly sample one example from class (1-20 characters)
        idx_1 = rnd.randint(0, n_examples)
        pairs[0][x,:,:,:] = X[category, idx_1].reshape(w, h, 1)
        #randomly sample again one example from class and add last class with modulo
        # ..to ensure not same class pairs are created
        idx_2 = (idx_1 + rnd.randint(0, n_examples)) % n_examples
        #pick images of different class for 1st half and same class for 2nd half
        if x >= size // 2:
            category_2 = category
            targets[x] = 1
        else: 
        #add a random number to the category modulo n classes to ensure 2nd image has
        # ..different category
            idx_2 = rnd.randint(0, n_examples) 
            category_2 = (category + rnd.randint(1,n_classes)) % n_classes
            targets[x] = 0
        pairs[1][x,:,:,:] = X[category_2,idx_2].reshape(w, h,1)
        
    return pairs, targets

In [8]:
train_set, train_labels = create_train_data(10000)
val_set, val_labels = create_train_data(3000)

## Settings

In [61]:
caps1_size = 8
caps2_size = 10 # actually number of capsules
pred_matrix_size = 16 # this is actually capsule size of 2
conv1_channels = 256
conv1_filter = 9
primaryCaps_channels = 32
primaryCaps_filter = 9
routing_rounds = 3
epsilon = 1e-7
learning_rate = 0.001
fully_layer_size = 20

## Squash function

In [62]:
def squash(input_vector, axis):
    normalised_input = tf.reduce_sum(tf.square(input_vector), axis = axis, keepdims = True)
    scale = tf.divide(normalised_input, tf.add(normalised_input, 1.))
    vector = tf.divide(input_vector, tf.sqrt(tf.add(normalised_input, epsilon)))
    output = tf.multiply(scale, vector)
    
    return(output)

## Convolutional layer

In [63]:
def convolutional(input_data, conv_shape, stride_shape, name, relu=True):
    weights = tf.get_variable('W'+name, initializer=tf.truncated_normal(conv_shape, stddev=0.3))
    bias = tf.get_variable('B'+name, initializer=tf.truncated_normal([conv_shape[-1]], stddev=0.3))
    out_layer = tf.nn.conv2d(input_data, weights, stride_shape, padding = 'VALID')
    out_layer_bias = tf.add(out_layer, bias)
    
    if relu == True:
        out_layer_final = tf.nn.relu(out_layer_bias)
        return(out_layer_final)
    
    return(out_layer_bias)

## Primarycaps

In [64]:
def primarycaps(input_data, conv_shape, stride_shape, primaryCaps_channels, caps1_size, caps2_size, pose_size, batch):
    output = convolutional(input_data, conv_shape, stride_shape, relu=False, name='primaryCaps')
    filter_size = output.get_shape().as_list()[1]
    caps1_raw = tf.reshape(output, [-1,filter_size*filter_size*primaryCaps_channels,caps1_size], name='caps1_raw')
    caps1_output = squash(caps1_raw, axis=-1)
    caps1_output_expand = tf.expand_dims(caps1_output, axis=-1)
    caps1_output_expand2 = tf.expand_dims(caps1_output_expand, axis=2)
    caps1_output_expand2_tiled = tf.tile(caps1_output_expand2, [1,1,caps2_size,1,1], name = 'caps1_out_tiled')
    
    weight_matrix = tf.get_variable('Weight_matrix', initializer=tf.truncated_normal([1, filter_size*filter_size*primaryCaps_channels, caps2_size, pose_size, caps1_size], stddev=0.1))
    weight_matrix_tiled = tf.tile(weight_matrix, [batch, 1, 1, 1, 1], name = 'W_matrix_tiled')
    caps2_predicted = tf.matmul(weight_matrix_tiled, caps1_output_expand2_tiled, name='caps2_predicted')
    
    return(caps2_predicted)

## Routing by agreement

In [65]:
def routing_by_agreement(input_data, caps2_size, rounds, batch):
    raw_weights = tf.zeros([batch, input_data.get_shape().as_list()[1], caps2_size, 1, 1], name = 'raw_weights')
    
    for i in range(rounds):
        routing_weights = tf.nn.softmax(raw_weights, axis=2, name = 'routing_weights' + str(i))
        weighted_predictions = tf.multiply(routing_weights, input_data, name = 'weighted_predictions' + str(i))
        weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, name = 'weighted_sum' + str(i), keepdims = True)
        caps2_output = squash(weighted_sum, axis=-2)
    
        caps2_output_tiled = tf.tile(caps2_output, [1, input_data.get_shape().as_list()[1], 1, 1, 1], name = 'caps2_output_tiled'+ str(i))
        agreement = tf.matmul(input_data, caps2_output_tiled, transpose_a = True, name = 'agreement'+ str(i))
        raw_weights = tf.add(raw_weights, agreement, name = 'raw_weights' + str(i))
        
    return(caps2_output)

In [66]:
def create_dense_layer(input_data, input_shape, neurons, name):
    weights = tf.get_variable(name+'_W', initializer=tf.truncated_normal([input_shape, neurons], stddev=0.2))
    bias = tf.get_variable(name+'b', initializer=tf.truncated_normal([neurons], mean=0.5, stddev=0.02))
    fully_connected = tf.add(tf.matmul(input_data, weights), bias)
    out_dense_activation = tf.nn.sigmoid(fully_connected)
    
    return(out_dense_activation)

## Graph

In [67]:
def create_network(caps1_size, caps2_size,pred_matrix_size,conv1_channels,conv1_filter,primaryCaps_channels,primaryCaps_filter,routing_rounds,X,fully_layer_size):
    conv1 = convolutional(X, [conv1_filter,conv1_filter,X.get_shape().as_list()[-1],
                                  conv1_channels],[1,1,1,1], name='conv')
    primary = primarycaps(conv1, [primaryCaps_filter,primaryCaps_filter,conv1.get_shape().as_list()[-1],
                              primaryCaps_channels*caps1_size], [1,2,2,1],primaryCaps_channels,
                      caps1_size,caps2_size, pred_matrix_size, batch=tf.shape(X)[0])
    output = routing_by_agreement(primary, caps2_size, routing_rounds, batch=tf.shape(X)[0])
    flat = tf.reshape(output, [-1, caps2_size*pred_matrix_size])
    fully_connected = create_dense_layer(flat, caps2_size*pred_matrix_size , fully_layer_size, 'fully')
    
    return(fully_connected)

In [69]:
tf.reset_default_graph()

In [70]:
X1 = tf.placeholder(tf.float32, [None, 105, 105, 1])
X2 = tf.placeholder(tf.float32, [None, 105, 105, 1])
y = tf.placeholder(tf.float32, [None, 1])

In [71]:
with tf.variable_scope('siamese') as scope:
    output1 = create_network(caps1_size, caps2_size,pred_matrix_size,conv1_channels,conv1_filter,primaryCaps_channels,primaryCaps_filter,routing_rounds,X1,fully_layer_size)
    scope.reuse_variables()
    output2 = create_network(caps1_size, caps2_size,pred_matrix_size,conv1_channels,conv1_filter,primaryCaps_channels,primaryCaps_filter,routing_rounds,X2,fully_layer_size)


l1_dis = tf.abs(tf.subtract(output1, output2))

weights = tf.Variable(tf.truncated_normal([fully_layer_size, 1], stddev=0.03), name='w_final')
bias = tf.Variable(tf.truncated_normal([1], stddev=0.01), name='b_final')
fully_final = tf.add(tf.matmul(l1_dis, weights), bias)
y_estimate = tf.nn.sigmoid(fully_final)    

In [72]:
cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = fully_final))

In [73]:
optimiser = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cross_entropy)

## Prediction

In [74]:
correct_prediction = tf.equal(tf.round(y_estimate), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype = tf.float32))

## Setup training

In [None]:
init_op = tf.global_variables_initializer()
epochs = 2
batch_size = 50

#If run on AWS
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allocator_type = 'BFC'
config.gpu_options.allow_growth=True


with tf.Session(config=config) as sess:
    init_op.run()

    total_batch = int(10000/batch_size)
    total_batch_val = int(3000/batch_size)

    for epoch in range(epochs):
        avg_cost = 0
        acc = 0
        for i in range(total_batch):
            batch_x1, batch_x2, batch_y = shuffle(train_set[0],train_set[1], train_labels, n_samples = batch_size)
            a, c, accuracy= sess.run([optimiser, cross_entropy, accuracy], feed_dict={X1: batch_x1, X2: batch_x2, y: batch_y})
            avg_cost += c/total_batch
            acc += accuracy
            print('batch:', i)
        print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost))
        print('avg_cost:', avg_cost)
        print('train_acc:', acc/total_batch)
        acc_val = 0
        for iterations in range(total_batch_val):
            batch_x1, batch_x2, batch_y = shuffle(val_set[0],val_set[1], val_labels, n_samples = batch_size)
            val_acc = sess.run(accuracy, feed_dict={X: batch_x, y: batch_y})
            acc_val += val_acc
        acc_val = np.mean(acc_vals)
        print('val_acc:', acc_val/total_batch_val)