# Assignment 5

In [33]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import pad_sequences

max_features = 10000 # vocabulary size
max_len = 250 # words per sample

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# pad all samples to same length
x_train = pad_sequences(x_train, maxlen=max_len, padding='post')
x_test = pad_sequences(x_test, maxlen=max_len, padding='post')

In [34]:
from tensorflow.keras.layers import Layer, Embedding, LayerNormalization
import tensorflow as tf

class TokenAndPositionEmbedding(Layer):
    def __init__(self, seq_len, vocab_size, emb_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=emb_dim)
        self.pos_emb = Embedding(input_dim=seq_len, output_dim=emb_dim)

    def call(self, x_input):
        seq_len = tf.shape(x_input)[-1]
        positions = tf.range(start=0, limit=seq_len, delta=1)
        positions = self.pos_emb(positions)
        x_input = self.token_emb(x_input)
        return x_input + positions

In [35]:
from tensorflow.keras.layers import MultiHeadAttention
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D, Dropout, Add
from tensorflow.keras.models import Model

embed_dim = 32 # word embeddings dimension
num_heads = 2 # number of attention heads
key_dim = embed_dim // num_heads # query/key dimension for one head

inputs = Input(shape=(max_len,))
x = TokenAndPositionEmbedding(max_len, max_features, embed_dim)(inputs)

attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x)
attention_output = LayerNormalization()(attention_output) # Normalization after the attention layer

x = Add()([x, attention_output]) # Residual connection around the attention layer

x = GlobalAveragePooling1D()(x)
x = Dropout(0.5)(x)
outputs = Dense(1, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

In [36]:
model.fit(x_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.6689 - loss: 0.5775
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9065 - loss: 0.2415  
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9395 - loss: 0.1736
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9607 - loss: 0.1261
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9695 - loss: 0.1005  


<keras.src.callbacks.history.History at 0x7fbadd8b6210>

In [37]:
print(f'Test accuracy = {model.evaluate(x_test, y_test)[1]:.4f}')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8604 - loss: 0.4101
Test accuracy = 0.8580
