In [1]:
import h5py
import pickle
import numpy as np

## Load Dataset

In [2]:
# training data
h5f = h5py.File('dataset/training_data.h5', 'r')
x_train = h5f['x_train'][:]
y_train = h5f['y_train'][:]
h5f.close()

print('x_train:', x_train.shape)
print('y_train:', y_train.shape)

x_train: (10000, 250, 250, 1)
y_train: (10000, 5, 16)


In [3]:
# validation data
h5f = h5py.File('dataset/validation_data.h5', 'r')
x_val = h5f['x_val'][:]
y_val = h5f['y_val'][:]
h5f.close()

print('x_val:', x_val.shape)
print('y_val:', y_val.shape)

x_val: (2500, 250, 250, 1)
y_val: (2500, 5, 16)


## Load vocabulary and embeddings

In [4]:
def read_glove_vecs(glove_file):
    print('Creating word to vec map...')
    with open(glove_file, 'r') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float32)
    print('Done!')
    return word_to_vec_map

In [5]:
# load embeddings
word_to_vec_map = read_glove_vecs('{}/glove.6B.50d.txt'.format('dataset'))

Creating word to vec map...
Done!


In [6]:
# load vocabulary
with open('dataset/vocabulary.pickle', 'rb') as vocab_file:
    vocabulary_dict = pickle.load(vocab_file)

vocabulary = vocabulary_dict['vocabulary']
word_to_index = vocabulary_dict['word_to_index']
index_to_word = vocabulary_dict['index_to_word']

In [19]:
len(vocabulary)

9567

## Define Image Model

In [7]:
import keras.backend as K
from keras import layers, models, optimizers
from keras import callbacks
from PIL import Image

from utils import combine_images
from capsule_layers import CapsuleLayer, PrimaryCap, Length, Mask

K.set_image_data_format('channels_last')

Using TensorFlow backend.


In [32]:
from keras.layers import Dense, Embedding, GRU
from keras.models import Model

In [9]:
input_shape = x_train.shape[1:]
n_class = 10
routings = 3

In [10]:
x = layers.Input(shape=input_shape)

In [11]:
# Layer 1-3: Just some conventional Conv2D layers
conv1 = layers.Conv2D(filters=96, kernel_size=13, strides=4, padding='valid', activation='relu', name='conv1')(x)
conv2 = layers.Conv2D(filters=96, kernel_size=5, strides=2, padding='valid', activation='relu', name='conv2')(conv1)
conv3 = layers.Conv2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv3')(conv2)
conv3

<tf.Tensor 'conv3/Relu:0' shape=(?, 20, 20, 256) dtype=float32>

In [12]:
# Layer 4: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule]
primary_caps = PrimaryCap(conv3, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid')

# Layer 5: Capsule layer. Routing algorithm works here.
caption_caps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='caption_caps')(primary_caps)
caption_caps

<tf.Tensor 'caption_caps/mul_2:0' shape=(?, 10, 16) dtype=float32>

In [13]:
out_caps = Length(name='capsnet')(caption_caps)
out_caps

<tf.Tensor 'capsnet/Sqrt:0' shape=(?, 10) dtype=float32>

## Define caption model

In [14]:
# Internal state size of 3 GRU layers in the RNN
state_size = 512

In [15]:
# length of the interger tokens converted by the embedding layer
embedding_size = 100

In [16]:
decoder_transfer_map = Dense(state_size, activation='tanh', name='decoder_transfer_map')

In [18]:
decoder_input = layers.Input(shape=(y_train.shape[-1], ), name='decoder_input')

In [22]:
decoder_embedding = Embedding(input_dim=10000, output_dim=embedding_size, name='decoder_embedding')

In [23]:
decoder_gru1 = GRU(state_size, name='decoder_gru1',
                   return_sequences=True)
decoder_gru2 = GRU(state_size, name='decoder_gru2',
                   return_sequences=True)
decoder_gru3 = GRU(state_size, name='decoder_gru3',
                   return_sequences=True)

In [25]:
decoder_dense = Dense(10000,
                      activation='softmax',
                      name='decoder_output')

## Connect and Create the Training Model

In [26]:
rnn_initial_state = decoder_transfer_map(out_caps)
rnn_initial_state

<tf.Tensor 'decoder_transfer_map/Tanh:0' shape=(?, 512) dtype=float32>

In [27]:
# Start the decoder-network with its input-layer.
net = decoder_input

# Connect the embedding-layer.
net = decoder_embedding(net)
net

<tf.Tensor 'decoder_embedding/embedding_lookup/Identity:0' shape=(?, 16, 100) dtype=float32>

In [29]:
# Connect all the GRU layers.
net = decoder_gru1(net, initial_state=rnn_initial_state)
net = decoder_gru2(net, initial_state=rnn_initial_state)
net = decoder_gru3(net, initial_state=rnn_initial_state)
net

<tf.Tensor 'decoder_gru3/transpose_1:0' shape=(?, ?, 512) dtype=float32>

In [30]:
# Connect the final dense layer that converts to
# one-hot encoded arrays.
decoder_output = decoder_dense(net)
decoder_output

<tf.Tensor 'decoder_output/truediv:0' shape=(?, 16, 10000) dtype=float32>

In [33]:
decoder_model = Model(inputs=[x, decoder_input],
                      outputs=[decoder_output])

In [35]:
decoder_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 250, 250, 1)  0                                            
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 60, 60, 96)   16320       input_1[0][0]                    
__________________________________________________________________________________________________
conv2 (Conv2D)                  (None, 28, 28, 96)   230496      conv1[0][0]                      
__________________________________________________________________________________________________
conv3 (Conv2D)                  (None, 20, 20, 256)  1990912     conv2[0][0]                      
__________________________________________________________________________________________________
primarycap

In [None]:
# TODO

decoder_model.compile()
decoder_model.fit()