In [1]:
!pip install -U tensorflow_hub==0.6.0

Requirement already up-to-date: tensorflow_hub==0.6.0 in /usr/local/lib/python3.6/dist-packages (0.6.0)


In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np

try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    pass

import tensorflow as tf

import tensorflow_hub as hub
import tensorflow_datasets as tfds

print('Version: ', tf.__version__)
print('Eager mode: ', tf.executing_eagerly())
print('Hub version: ', hub.__version__)
print('GPU is ', 'available' if tf.test.is_gpu_available() else 'not available')

TensorFlow 2.x selected.
Version:  2.0.0-rc0
Eager mode:  True
Hub version:  0.6.0
GPU is  available


In [0]:
# Download the IMDB dataset

# Split the training st int 60% and 40%.
# So we'll end up with 15,000 examples for training,
# 10,000 example for validation and 25,000 example for testing

train_validation_split = tfds.Split.TRAIN.subsplit([6, 4])
(train_data, validation_data), test_data = tfds.load(
        name='imdb_reviews',
        split=(train_validation_split, tfds.Split.TEST),
        as_supervised=True)

In [14]:
# Explore the data
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
train_examples_batch

<tf.Tensor: id=437, shape=(10,), dtype=string, numpy=
array([b"As a lifelong fan of Dickens, I have invariably been disappointed by adaptations of his novels.<br /><br />Although his works presented an extremely accurate re-telling of human life at every level in Victorian Britain, throughout them all was a pervasive thread of humour that could be both playful or sarcastic as the narrative dictated. In a way, he was a literary caricaturist and cartoonist. He could be serious and hilarious in the same sentence. He pricked pride, lampooned arrogance, celebrated modesty, and empathised with loneliness and poverty. It may be a clich\xc3\xa9, but he was a people's writer.<br /><br />And it is the comedy that is so often missing from his interpretations. At the time of writing, Oliver Twist is being dramatised in serial form on BBC television. All of the misery and cruelty is their, but non of the humour, irony, and savage lampoonery. The result is just a dark, dismal experience: the story p

In [15]:
type(train_data.batch(10))

tensorflow.python.data.ops.dataset_ops.BatchDataset

In [16]:
type(iter(train_data.batch(10)))

tensorflow.python.data.ops.iterator_ops.IteratorV2

In [17]:
type(next(iter(train_data.batch(10))))

tuple

In [18]:
type(next(iter(train_data.batch(10)))[0])

tensorflow.python.framework.ops.EagerTensor

In [19]:
train_labels_batch

<tf.Tensor: id=438, shape=(10,), dtype=int64, numpy=array([1, 1, 1, 1, 1, 1, 0, 1, 1, 0])>

In [20]:
train_examples_batch.shape

TensorShape([10])

In [22]:
train_examples_batch

<tf.Tensor: id=437, shape=(10,), dtype=string, numpy=
array([b"As a lifelong fan of Dickens, I have invariably been disappointed by adaptations of his novels.<br /><br />Although his works presented an extremely accurate re-telling of human life at every level in Victorian Britain, throughout them all was a pervasive thread of humour that could be both playful or sarcastic as the narrative dictated. In a way, he was a literary caricaturist and cartoonist. He could be serious and hilarious in the same sentence. He pricked pride, lampooned arrogance, celebrated modesty, and empathised with loneliness and poverty. It may be a clich\xc3\xa9, but he was a people's writer.<br /><br />And it is the comedy that is so often missing from his interpretations. At the time of writing, Oliver Twist is being dramatised in serial form on BBC television. All of the misery and cruelty is their, but non of the humour, irony, and savage lampoonery. The result is just a dark, dismal experience: the story p

In [0]:
# Build the model
# Decisions
# - How to represent the text?
# - How many layers to use in the model?
# - How many hidden units to use for each layer?

# Thiscase, input data consists of sentences
# convert sentences into embeddings vectors.
# We use pre-trained thext embedding as the first layer.
# Advantages
# - No worry about text preprocessing
# - benefit tranfer learning
# - embedding has a fixed size, simpler to process

# we use pre-trained text embedding model from tensorflow hub

In [27]:
# Create Keras layer that uses a TensorFlow Hub model
# to embed the sentences, and try it out on a couple of input example
# Note: no matter the length of the input text,
# the output shape of the embedding is (num_examples, embedding_dimension)


module_url = "https://tfhub.dev/google/tf2-preview/nnlm-en-dim128/1"
embed = hub.KerasLayer(module_url,
                       input_shape=[],
                       dtype=tf.string,
                       trainable=True)

embed(train_examples_batch[:3])

<tf.Tensor: id=847, shape=(3, 128), dtype=float32, numpy=
array([[ 1.78428471e+00, -2.29856372e-01,  2.45664164e-01,
         6.84022367e-01, -1.29306912e-01, -1.57380581e-01,
        -1.81604568e-02, -3.73616189e-01, -1.86627433e-01,
         2.31135096e-02,  2.92425722e-01, -4.98053074e-01,
        -4.21140671e-01, -4.68299031e-01, -3.36686820e-01,
        -1.38489604e-01, -3.70901823e-01, -1.46785565e-02,
        -4.20534164e-01,  1.43151474e+00,  1.00796312e-01,
         4.05581176e-01, -8.53958651e-02, -1.26951560e-01,
        -1.08419672e-01, -3.50219727e-01,  4.06883471e-02,
        -1.95807740e-01, -1.16975710e-01, -5.78830671e-03,
         9.03800651e-02, -9.08162072e-02, -8.68640095e-03,
        -1.64070018e-02,  3.24384898e-01, -9.46734697e-02,
        -1.63240522e-01, -4.01422143e-01, -4.99349594e-01,
         5.78139901e-01, -1.41224876e-01,  3.35231982e-02,
        -1.43409833e-01, -1.45958960e-01,  4.64950085e-01,
         1.25476301e-01, -1.26240194e-01, -5.81108630e-01

In [0]:
model = tf.keras.Sequential()
model.add(embed)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [29]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_2 (KerasLayer)   (None, 128)               124642688 
_________________________________________________________________
dense_2 (Dense)              (None, 16)                2064      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
Total params: 124,644,769
Trainable params: 124,644,769
Non-trainable params: 0
_________________________________________________________________


In [0]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [32]:
history = model.fit(train_data.shuffle(10000).batch(512),
                    epochs=20,
                    validation_data=validation_data.batch(512),
                    verbose=1)

Epoch 1/20
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [34]:
# Evaluate model
results = model.evaluate(test_data.batch(512), verbose=0)
for name, value in zip(model.metrics_names, results):
    print('%s: %.3f' % (name, value))

loss: 0.510
accuracy: 0.851
