In [1]:
import tensorflow as tf
tf.__version__

'1.13.1'

In [2]:
import os
import time
#!pip install -q -U tensorflow-gpu
import tensorflow as tf
import numpy as np

from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import LeakyReLU, Dense, Input, Embedding, Dropout, Bidirectional, GRU, Flatten, SpatialDropout1D
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Model

In [3]:
gru_len = 256
Routings = 3
Num_capsule = 10
Dim_capsule = 16
dropout_p = 0.25
rate_drop_dense = 0.28

max_features = 20000
maxlen = 1000
embed_size = 256

### Import Dataset

In [4]:
(train_text, train_labels), (test_text, test_labels) = tf.keras.datasets.imdb.load_data(num_words=maxlen)
train_text = sequence.pad_sequences(train_text, maxlen=maxlen)
test_text = sequence.pad_sequences(test_text, maxlen=maxlen)

In [5]:
batch_size = 32
epochs = 40

# Cast the labels to floats, needed later
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=maxlen)
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
y_train = np.expand_dims(y_train, axis=1)
y_test = np.expand_dims(y_test, axis=1)
print(y_train.shape, y_test.shape)

(25000, 1) (25000, 1)


### Capsule Layer

In [6]:
# ADAPTED FROM CAPSULE IMPLEMENTATION : https://github.com/bojone/Capsule
# Converted to Tensorflow Keras instead of Keras 

from tensorflow.keras import activations
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer

def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    return scale * x


#define our own softmax function instead of K.softmax
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex/K.sum(ex, axis=axis, keepdims=True)


#A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, share_weights=True, activation='squash', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.share_weights = share_weights
        self.activation = squash
        

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = int(input_shape[-1])
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = int(input_shape[-2])
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = (K.shape(u_vecs)[0])
        input_num_capsule = (K.shape(u_vecs)[1])
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        #final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:,:,:,0]) #shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            c = softmax(b, 1)
            o = K.batch_dot(c, u_hat_vecs, [2, 2])
            if i < self.routings - 1:
                o = K.l2_normalize(o, -1)
                b = K.batch_dot(o, u_hat_vecs, [2, 3])
        return self.activation(o)

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)


### Build Capsule Network Model

In [7]:
inputs = Input(shape=(maxlen,))
embed_layer = Embedding(max_features,
                            embed_size,
                            input_length=maxlen)(inputs)
embed_layer = SpatialDropout1D(rate_drop_dense)(embed_layer)

x = Bidirectional(GRU(gru_len,
                      activation='relu',
                      dropout=dropout_p,
                      recurrent_dropout=dropout_p,
                      return_sequences=True))(embed_layer)
capsule = Capsule(num_capsule=Num_capsule,dim_capsule=Dim_capsule,routings=Routings,share_weights=True)(x)

capsule = Flatten()(capsule)
capsule = Dropout(dropout_p)(capsule)
capsule = LeakyReLU()(capsule)

x = Flatten()(x)
predictions = Dense(1, activation='sigmoid')(x)


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


### Compile Model

In [8]:
model = tf.keras.Model(inputs=inputs, outputs=predictions)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
model.compile(loss='binary_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

### Mirrored Strategy for Multi-GPU 

In [9]:
NUM_GPUS = 2
strategy = tf.contrib.distribute.MirroredStrategy(num_gpus=NUM_GPUS)
config = tf.estimator.RunConfig(train_distribute=strategy)
estimator = tf.keras.estimator.model_to_estimator(model,
                                                  config=config)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Device is available but not used by distribute strategy: /device:CPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_CPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:0
INFO:tensorflow:Device is available but not used by distribute strategy: /device:XLA_GPU:1
INFO:tensorflow:Configured nccl all-reduce.
INFO:tensorflow:Initializing RunConfig with distribution strategies.
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Using the Keras model provided.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp8yeeqhw4', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 

### Define Input and Eval Function

In [10]:
def input_fn(texts, labels, epochs, batch_size):
    # Convert the inputs to a Dataset. (E)
    ds = tf.data.Dataset.from_tensor_slices((texts, labels))
    # Shuffle, repeat, and batch the examples. (T)
    SHUFFLE_SIZE = 5000
    #ds = ds.shuffle(SHUFFLE_SIZE).repeat(epochs).batch(batch_size)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(batch_size)
    # Return the dataset. (L)
    return ds


def eval_fn(texts, labels, epochs, batch_size):
    # Convert the inputs to a Dataset. (E)
    ds = tf.data.Dataset.from_tensor_slices((texts, labels))
    ds = ds.batch(batch_size)
    # Return the dataset. (L)
    return ds

### Adding Timing Hook 

In [11]:
class TimeHistory(tf.train.SessionRunHook):
    def begin(self):
        self.times = []
    def before_run(self, run_context):
        self.iter_time_start = time.time()
    def after_run(self, run_context, run_values):
        self.times.append(time.time() - self.iter_time_start)

### Training Process

In [12]:
time_hist = TimeHistory()
BATCH_SIZE = 32
EPOCHS = 1
estimator.train(lambda:input_fn(x_train,
                                y_train,
                                epochs=EPOCHS,
                                batch_size=BATCH_SIZE),
                hooks=[time_hist])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:batch_all_reduce invoked for batches size = 1 with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:batch_all_reduce invoked for batches size = 1 with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:batch_all_reduce invoked for batches size = 1 with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:batch_all_reduce invoked for batches size = 1 with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:batch_all_reduce invoked for batches size = 1 with algorithm = nccl, num_packs = 1, agg_small_grads_max_bytes = 0 and agg_small_grads_max_group = 10
INFO:tensorflow:batch_all_reduce invoked for batches size = 1 with algorithm = nccl, num_packs = 1, agg_sma

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x7fc07552a5c0>

In [13]:
total_time = sum(time_hist.times)
print(f"total time with {NUM_GPUS} GPU(s): {total_time} seconds")
avg_time_per_batch = np.mean(time_hist.times)
print(f"{BATCH_SIZE*NUM_GPUS/avg_time_per_batch} text samples/second with {NUM_GPUS} GPU(s)")

total time with 2 GPU(s): 1540.020670890808 seconds
16.249132542828235 text samples/second with 2 GPU(s)


### Final Evaluation

In [14]:
estimator.evaluate(lambda:eval_fn(x_test, 
                                   y_test,
                                   epochs=1,
                                   batch_size=BATCH_SIZE))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Starting evaluation at 2019-04-13T17:13:16Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /tmp/tmp8yeeqhw4/model.ckpt-391
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-04-13-17:29:56
INFO:tensorflow:Saving dict for global step 391: binary_accuracy = 0.86056, global_step = 391, loss = 0.32411215
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 391: /tmp/tmp8yeeqhw4/model.ckpt-391


{'binary_accuracy': 0.86056, 'loss': 0.32411215, 'global_step': 391}