In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow import keras
from tensorflow.keras import layers

In [2]:
tfds.disable_progress_bar()

In [3]:
# Exemplar Embedding Layer

embedding_layer = layers.Embedding(1000, 5)

result = embedding_layer(tf.constant([1,2,3]))
result.numpy()

array([[ 0.02684892,  0.00013601,  0.02895354, -0.03703148, -0.01701851],
       [-0.04473897, -0.0349722 , -0.01102676,  0.0156083 ,  0.0234695 ],
       [-0.01700026,  0.03066676,  0.00943993, -0.01132981,  0.00863471]],
      dtype=float32)

In [4]:
result = embedding_layer(tf.constant([[0,1,2],[3,4,5]]))
result.shape

TensorShape([2, 3, 5])

In [5]:
# Load the data

(train_data, test_data), info = tfds.load(
    'imdb_reviews/subwords8k', 
    split = (tfds.Split.TRAIN, tfds.Split.TEST), 
    with_info = True, as_supervised = True)

In [6]:
encoder = info.features['text'].encoder
encoder.subwords[:20]

['the_',
 ', ',
 '. ',
 'a_',
 'and_',
 'of_',
 'to_',
 's_',
 'is_',
 'br',
 'in_',
 'I_',
 'that_',
 'this_',
 'it_',
 ' /><',
 ' />',
 'was_',
 'The_',
 'as_']

In [7]:
train_data

<DatasetV1Adapter shapes: ((None,), ()), types: (tf.int64, tf.int64)>

In [8]:
train_batches = train_data.shuffle(1000).padded_batch(10, padded_shapes = ([None],[]))
test_batches = test_data.shuffle(1000).padded_batch(10, padded_shapes = ([None],[]))

In [9]:
train_batch, train_labels = next(iter(train_batches))
train_batch.numpy()

array([[  12,   31,  165, ...,    0,    0,    0],
       [  19, 2031,   18, ...,    0,    0,    0],
       [3475,  456,  143, ..., 7009,  104, 7420],
       ...,
       [ 133,  296,   27, ...,    0,    0,    0],
       [6388,   11, 2636, ...,    0,    0,    0],
       [ 407,   77,   18, ...,    0,    0,    0]])

In [10]:
# Model

embedding_dim = 16

model = keras.Sequential([
  layers.Embedding(encoder.vocab_size, embedding_dim),
  layers.GlobalAveragePooling1D(),
  layers.Dense(16, activation = 'relu'),
  layers.Dense(1)
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 16)          130960    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 131,249
Trainable params: 131,249
Non-trainable params: 0
_________________________________________________________________


In [11]:
# Compile and Train Model

model.compile(optimizer = 'adam',
              loss = tf.keras.losses.BinaryCrossentropy(from_logits = True),
              metrics = ['accuracy'])

history = model.fit(
    train_batches,
    epochs = 10,
    validation_data = test_batches, validation_steps = 20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
