In [1]:
import tensorflow_datasets as tfds
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow.keras.callbacks as callbacks
import tensorflow_addons as tfa

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
print('tensorflow version', tf.version.VERSION)

1 Physical GPUs, 1 Logical GPUs
tensorflow version 2.3.2


In [2]:
examples = tfds.load('imdb_reviews')
train_examples, test_examples = examples['train'], examples['test']
print('train', train_examples)
print('test', test_examples)

train <DatasetV1Adapter shapes: {label: (), text: ()}, types: {label: tf.int64, text: tf.string}>
test <DatasetV1Adapter shapes: {label: (), text: ()}, types: {label: tf.int64, text: tf.string}>


In [3]:
for entry in train_examples.take(3):
  print(entry['text'].numpy())

b"This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it."
b'I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion I fell asleep because the film was rubbish. The plot development was

In [4]:
MAX_LENGTH = 360
VOCAB_SIZE = 8000
vec_layer = layers.experimental.preprocessing.TextVectorization(
  max_tokens=VOCAB_SIZE,
  output_sequence_length=MAX_LENGTH)
vec_layer.adapt(train_examples.map(lambda x: x['text']).batch(64))
vocabs = vec_layer.get_vocabulary()
print('vocabs: ', len(vocabs))
print(vocabs)

vocabs:  8000


In [5]:
for example in train_examples.batch(10).take(1):
  example_sequence = vec_layer(example['text'])
  print(example_sequence)

tf.Tensor(
[[  11   14   34 ...    0    0    0]
 [  10   26   75 ...    0    0    0]
 [4816 6359    2 ...    0    0    0]
 ...
 [   2   20    7 ...    0    0    0]
 [  10   63  116 ...    0    0    0]
 [ 243   11   29 ...    0    0    0]], shape=(10, 360), dtype=int64)


In [6]:
BUFFER_SIZE = 20000
BATCH_SIZE = 64

train_ds = train_examples.shuffle(BUFFER_SIZE).map(lambda x: ([x['text']], x['label'])).batch(BATCH_SIZE)
test_ds = test_examples.map(lambda x: ([x['text']], x['label'])).batch(BATCH_SIZE)

print(train_ds)
print(test_ds)

<DatasetV1Adapter shapes: ((None, 1), (None,)), types: (tf.string, tf.int64)>
<DatasetV1Adapter shapes: ((None, 1), (None,)), types: (tf.string, tf.int64)>


In [7]:
EMBEDDING_SIZE = 64

def embedding(x):
  position = tf.range(start=0, limit=MAX_LENGTH, delta=1)
  em = layers.Embedding(VOCAB_SIZE, EMBEDDING_SIZE, mask_zero=True)(x)
  pos_em = layers.Embedding(MAX_LENGTH, EMBEDDING_SIZE)(position)
  return em + pos_em


def transformer(x, dropout=0.1):
  m = tfa.layers.MultiHeadAttention(EMBEDDING_SIZE, 8)([x, x, x])
  d = layers.Dropout(dropout)(m)
  a = layers.Add()([x, d])
  x = layers.LayerNormalization(epsilon=1e-6)(a)
  x = layers.Dense(EMBEDDING_SIZE, activation='relu')(x)
  x = layers.LayerNormalization(epsilon=1e-6)(x)
  x = layers.Dropout(dropout)(x)
  return x

def create_model():
  inputs = layers.Input(shape=(MAX_LENGTH), dtype=tf.int32);
  x = embedding(inputs)
  x = transformer(x)
  x = layers.GlobalAveragePooling1D()(x)
  x = layers.Dense(20, activation="relu")(x)
  outputs = layers.Dense(1, activation="sigmoid")(x)
  print(outputs)
  model = keras.Model(inputs=[inputs], outputs=[outputs])
  
  return model


tf.keras.backend.clear_session()
model = create_model()
model.summary()
train_model = keras.Sequential([
  keras.Input(shape=(1), dtype=tf.string),
  vec_layer,
  model
])
train_model.compile(optimizer=keras.optimizers.Adam(1e-4),
                    loss='binary_crossentropy',
                    metrics=['accuracy'])
train_model.summary()

Tensor("dense_2/Sigmoid:0", shape=(None, 1), dtype=float32)
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 360)]        0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 360, 64)      512000      input_1[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_AddV2 (TensorFlowOp [(None, 360, 64)]    0           embedding[0][0]                  
__________________________________________________________________________________________________
multi_head_attention (MultiHead (None, 360, 64)      131136      tf_op_layer_AddV2[0][0]          
                           

In [8]:
for text_example, label_example in train_ds:
  print(text_example.shape)
  print(label_example)
  break

(64, 1)
tf.Tensor(
[1 1 1 1 1 0 1 0 1 0 1 1 0 0 0 0 0 1 1 1 1 0 1 1 0 0 0 0 0 1 1 0 0 1 0 1 0
 1 1 1 1 1 1 1 0 0 1 0 1 0 1 1 0 1 1 1 1 1 0 0 0 1 0 0], shape=(64,), dtype=int64)


In [9]:
from datetime import datetime

def scheduler(epoch, lr):
  if epoch == 0:
    return 1e-4
  if epoch == 10:
    return 1e-5
  return lr

logdir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)
schedule_callback = callbacks.LearningRateScheduler(scheduler, verbose=True)
checkpoint_callback = callbacks.ModelCheckpoint('transformer_imdb',
                                                monitor='val_accuracy',
                                                verbose=1,
                                                save_best_only=True)

history = train_model.fit(train_ds,
                          epochs=20,
                          validation_data=test_ds,
                          callbacks=[tensorboard_callback, checkpoint_callback, schedule_callback])


Epoch 00001: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 1/20
Instructions for updating:
use `tf.profiler.experimental.stop` instead.


Instructions for updating:
use `tf.profiler.experimental.stop` instead.


    391/Unknown - 24s 63ms/step - loss: 0.5932 - accuracy: 0.6861
Epoch 00001: val_accuracy improved from -inf to 0.83108, saving model to transformer_imdb
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


INFO:tensorflow:Assets written to: transformer_imdb/assets


INFO:tensorflow:Assets written to: transformer_imdb/assets



Epoch 00002: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 2/20
Epoch 00002: val_accuracy improved from 0.83108 to 0.86868, saving model to transformer_imdb
INFO:tensorflow:Assets written to: transformer_imdb/assets


INFO:tensorflow:Assets written to: transformer_imdb/assets



Epoch 00003: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 3/20
Epoch 00003: val_accuracy improved from 0.86868 to 0.87908, saving model to transformer_imdb
INFO:tensorflow:Assets written to: transformer_imdb/assets


INFO:tensorflow:Assets written to: transformer_imdb/assets



Epoch 00004: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 4/20
Epoch 00004: val_accuracy did not improve from 0.87908

Epoch 00005: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 5/20
Epoch 00005: val_accuracy improved from 0.87908 to 0.87960, saving model to transformer_imdb
INFO:tensorflow:Assets written to: transformer_imdb/assets


INFO:tensorflow:Assets written to: transformer_imdb/assets



Epoch 00006: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 6/20
Epoch 00006: val_accuracy did not improve from 0.87960

Epoch 00007: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 7/20
Epoch 00007: val_accuracy did not improve from 0.87960

Epoch 00008: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 8/20
Epoch 00008: val_accuracy did not improve from 0.87960

Epoch 00009: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 9/20
Epoch 00009: val_accuracy did not improve from 0.87960

Epoch 00010: LearningRateScheduler reducing learning rate to 9.999999747378752e-05.
Epoch 10/20
Epoch 00010: val_accuracy did not improve from 0.87960

Epoch 00011: LearningRateScheduler reducing learning rate to 1e-05.
Epoch 11/20
Epoch 00011: val_accuracy did not improve from 0.87960

Epoch 00012: LearningRateScheduler reducing learning rate to 9.999999747378752e-06.
Epoch 12/20
Epoch 00012

In [10]:
import tensorflowjs as tfjs

model.save('transformer_imdb.h5')
tfjs.converters.save_keras_model(model, 'transformer_imdb.tfjs')

  return h5py.File(h5file)


In [11]:
import json

vocab_index = {}
for i, word in enumerate(vec_layer.get_vocabulary()):
  vocab_index[str(word)] = i
  
with open('imdb_vocabs.json', 'w') as f:
  json.dump(vocab_index, f)