In [1]:
import tensorflow as tf
from keras.models import *
from keras.layers import *
from keras.datasets import imdb
from keras.utils import pad_sequences

In [2]:
class TransformerBlock(Layer):
  def __init__(self, embed_dim, num_head, ff_dim, rate  = 0.1):

    super().__init__()
    self.att = MultiHeadAttention(num_heads = num_heads , key_dim = embed_dim)

    self.ffn = Sequential(
        [Dense(ff_dim, activation = 'relu'), Dense(embed_dim)],
    )

    self.layernorm1 = LayerNormalization(epsilon = 1e-6)
    self.layernorm2 = LayerNormalization(epsilon = 1e-6)

    self.dropout1 = Dropout(rate)
    self.dropout2 = Dropout(rate)

  def call(self, inputs, training):
    attn_output = self.att(inputs, inputs)

    attn_output = self.dropout1(attn_output, training = training)

    out1 = self.layernorm1(inputs + attn_output)

    ffn_output = self.ffn(out1)

    ffn_output = self.dropout2(ffn_output, training = training)

    return self.layernorm2(out1 + ffn_output)

# Implementing Embedding Layer

In [3]:
class TokenAndPositionEmbedding(Layer):

  def __init__(self, maxlen, vocab_size, embed_dim):

    super().__init__()
    self.token_emb = Embedding(input_dim= vocab_size, output_dim=embed_dim)

    self.pos_emb = Embedding(input_dim= maxlen, output_dim=embed_dim)

  def call(self, x):
    maxlen = tf.shape(x)[-1]

    positions = tf.range(start = 0, limit = maxlen, delta = 1)

    postions = self.pos_emb(positions)

    x = self.token_emb(x)

    return x + postions

In [4]:
vocab_size = 20000 # Only consider the top 20k words
maxlen = 200
(x_train, y_train), (x_val, y_val) = imdb.load_data(num_words = vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = pad_sequences(x_train, maxlen = maxlen)
x_val = pad_sequences(x_val, maxlen = maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 Training sequences
25000 Validation sequences


In [16]:
x_train[0]

array([    5,    25,   100,    43,   838,   112,    50,   670,     2,
           9,    35,   480,   284,     5,   150,     4,   172,   112,
         167,     2,   336,   385,    39,     4,   172,  4536,  1111,
          17,   546,    38,    13,   447,     4,   192,    50,    16,
           6,   147,  2025,    19,    14,    22,     4,  1920,  4613,
         469,     4,    22,    71,    87,    12,    16,    43,   530,
          38,    76,    15,    13,  1247,     4,    22,    17,   515,
          17,    12,    16,   626,    18, 19193,     5,    62,   386,
          12,     8,   316,     8,   106,     5,     4,  2223,  5244,
          16,   480,    66,  3785,    33,     4,   130,    12,    16,
          38,   619,     5,    25,   124,    51,    36,   135,    48,
          25,  1415,    33,     6,    22,    12,   215,    28,    77,
          52,     5,    14,   407,    16,    82, 10311,     8,     4,
         107,   117,  5952,    15,   256,     4,     2,     7,  3766,
           5,   723,

In [5]:
x_train.shape

(25000, 200)

In [6]:
embed_dim = 32
num_heads = 2
ff_dim = 32

inputs = Input(shape = (maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.1)(x)
x = Dense(20, activation = 'relu')(x)
x = Dropout(0.1)(x)
outputs = Dense(2, activation = 'softmax')(x)

model = Model(inputs = inputs, outputs = outputs)

In [9]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

history = model.fit(x_train, y_train, batch_size = 32, epochs = 10, validation_data = (x_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
model.evaluate(x_val, y_val)



[1.5963256359100342, 0.8192800283432007]

In [12]:
def predict_text(text):
  text = pad_sequences(text, maxlen = maxlen)
  prediction = model.predict(text)
  return prediction.argmax(axis = 1)


In [13]:
text = """It is no wonder that the film has such a high rating, it is quite literally breathtaking. What can I say that hasn't said before? Not much, it's the story, the acting, the premise, but most of all, this movie is about how it makes you feel. Sometimes you watch a film, and can't remember it days later, this film loves with you, once you've seen it, you don't forget. The ultimate story of friendship, of hope, and of life, and overcoming adversity. I understand why so many class this as the best film of all time, it isn't mine, but I get it. If you haven't seen it, or haven't seen it for some time, you need to watch it, it's amazing. 10/10."""

In [14]:
predict_text(text)

ValueError: ignored

In [18]:
(x_train, y_train), (x_val, y_val) = imdb.load_data(num_words = vocab_size)
x_train[1]

[1,
 194,
 1153,
 194,
 8255,
 78,
 228,
 5,
 6,
 1463,
 4369,
 5012,
 134,
 26,
 4,
 715,
 8,
 118,
 1634,
 14,
 394,
 20,
 13,
 119,
 954,
 189,
 102,
 5,
 207,
 110,
 3103,
 21,
 14,
 69,
 188,
 8,
 30,
 23,
 7,
 4,
 249,
 126,
 93,
 4,
 114,
 9,
 2300,
 1523,
 5,
 647,
 4,
 116,
 9,
 35,
 8163,
 4,
 229,
 9,
 340,
 1322,
 4,
 118,
 9,
 4,
 130,
 4901,
 19,
 4,
 1002,
 5,
 89,
 29,
 952,
 46,
 37,
 4,
 455,
 9,
 45,
 43,
 38,
 1543,
 1905,
 398,
 4,
 1649,
 26,
 6853,
 5,
 163,
 11,
 3215,
 10156,
 4,
 1153,
 9,
 194,
 775,
 7,
 8255,
 11596,
 349,
 2637,
 148,
 605,
 15358,
 8003,
 15,
 123,
 125,
 68,
 2,
 6853,
 15,
 349,
 165,
 4362,
 98,
 5,
 4,
 228,
 9,
 43,
 2,
 1157,
 15,
 299,
 120,
 5,
 120,
 174,
 11,
 220,
 175,
 136,
 50,
 9,
 4373,
 228,
 8255,
 5,
 2,
 656,
 245,
 2350,
 5,
 4,
 9837,
 131,
 152,
 491,
 18,
 2,
 32,
 7464,
 1212,
 14,
 9,
 6,
 371,
 78,
 22,
 625,
 64,
 1382,
 9,
 8,
 168,
 145,
 23,
 4,
 1690,
 15,
 16,
 4,
 1355,
 5,
 28,
 6,
 52,
 154,
 462,
 33,
