In [None]:
import numpy as np

import tensorflow as tf

!pip install -q tensorflow-hub
!pip install -q tensorflow-datasets
import tensorflow_hub as hub
import tensorflow_datasets as tfds

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

In [None]:
# Split the training set into 60% and 40%, so we'll end up with 15,000 examples
# for training, 10,000 examples for validation and 25,000 examples for testing.
train_data, validation_data, test_data = tfds.load(
    name="imdb_reviews", 
    split=('train[:60%]', 'train[60%:]', 'test'),
    as_supervised=True)

In [None]:
#embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
embedding = hub.load("https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2")
hub_layer = hub.KerasLayer(embedding, input_shape=[],
                           dtype=tf.string, trainable=False)

In [None]:
input_length = 1000
embed_size = 128

class Encode_Layer(tf.keras.layers.Layer):
    def __init__(self, dtype=tf.string, **kwargs):
        super().__init__(dtype=dtype, **kwargs)
        self.embedding = hub.load("https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2")
        self.hub_layer = hub.KerasLayer(self.embedding, input_shape=[], dtype=tf.string, trainable=False)
    def call(self, inputs):
        words = tf.strings.split(inputs)
        A = tf.ragged.map_flat_values(self.hub_layer, words)
        A = A * 1
        B = A.to_tensor(shape=[None, input_length, embed_size], default_value=0)
        return B
        
encode_layer = Encode_Layer()

filter_sizes = '1,2,3'
num_filters = 1000

input = tf.keras.layers.Input(shape=(), dtype=tf.string)
embed = encode_layer(input)
embed = tf.expand_dims(embed, -1)
pool_outputs = []
for filter_size in list(map(int, filter_sizes.split(','))):
    filter_shape = (filter_size, embed_size)
    conv = tf.keras.layers.Conv2D(num_filters, filter_shape, strides=(1, 1), padding='valid',
                               data_format='channels_last', activation='relu',
                               kernel_initializer='glorot_normal',
                               bias_initializer=tf.keras.initializers.constant(0.1),
                               name='convolution_{:d}'.format(filter_size))(embed)
    max_pool_shape = (input_length - filter_size + 1, 1)
    pool = tf.keras.layers.MaxPool2D(pool_size=max_pool_shape,
                                  strides=(1, 1), padding='valid',
                                  data_format='channels_last',
                                  name='max_pooling_{:d}'.format(filter_size))(conv)
    pool_outputs.append(pool)
pool_outputs = tf.keras.layers.concatenate(pool_outputs, axis=-1, name='concatenate')
pool_outputs = tf.keras.layers.Flatten(data_format='channels_last', name='flatten')(pool_outputs)
pool_outputs = tf.keras.layers.Dropout(0.4, name='dropout1')(pool_outputs)
dense = tf.keras.layers.Dense(256)(pool_outputs)
dense = tf.keras.layers.Dropout(0.4, name='dropout2')(dense)
outputs = tf.keras.layers.Dense(1)(dense)
model = tf.keras.models.Model(inputs=[input],outputs=[outputs])
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("text_cnn.h5", save_best_only=True)

In [None]:
history = model.fit(train_data.shuffle(10000).batch(64),
                    epochs=10,
                    validation_data=validation_data.batch(64),
                    callbacks=[checkpoint_cb],
                    verbose=1)

In [None]:
model = tf.keras.models.load_model("text_cnn.h5", custom_objects={"Encode_Layer": Encode_Layer})

In [None]:
results = model.evaluate(test_data.batch(32), verbose=2)

for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

In [None]:
tf.saved_model.save(model, "text_cnn")

In [None]:
saved_model = tf.saved_model.load("text_cnn")
y_pred = saved_model(tf.constant(["this is a terrible movie.","this is a good movie.","very interesting movie","i wouldn't watch this movie.","i recommend this movie."]))
y_pred