<a href="https://colab.research.google.com/github/palVikram/Advance_tensorflow/blob/main/Transformer_encoder_vs_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

vocab_size = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer


## Implement a Transformer block as a layer

In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self,**kwargs):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=2, key_dim=32)
        self.ffn = keras.Sequential(
            [layers.Dense(32, activation="relu"), layers.Dense(32),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(0.1)
        self.dropout2 = layers.Dropout(0.1)

    def get_config(self):
        config = super().get_config().copy()
        config.update({     
            'att':self.att,
            'ffn': self.ffn,
            'layernorm1':self.layernorm1,
            'layernorm2':self.layernorm2,
            'dropout1':self.dropout1,
            'dropout2':self.dropout2
        })
        return config

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

## Implement embedding layer

Two seperate embedding layers, one for tokens, one for token index (positions).

In [None]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, **kwargs):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=20000, output_dim=32)
        self.pos_emb = layers.Embedding(input_dim=200, output_dim=32)

    def get_config(self):
        config = super().get_config().copy()
        config.update({     
            'token_emb':self.token_emb,
            'pos_emb': self.pos_emb
        })
        return config

    def call(self, x):
        ## X= VECTORIZATION(MY NAME IS VIKRAM AND MY WIFE NAME IS MANPREET KOUR)
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [None]:
tf.range(start=0, limit=maxlen, delta=1)

<tf.Tensor: shape=(200,), dtype=int32, numpy=
array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 1

## Download and prepare dataset

In [None]:
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

25000 Training sequences
25000 Validation sequences


In [None]:
x_train

array([[   5,   25,  100, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

In [None]:
set(list(y_train))

{0, 1}

## Create classifier model using transformer layer

Transformer layer outputs one vector for each time step of our input sequence.
Here, we take the mean across all time steps and
use a feed forward network on top of it to classify text.

In [None]:
embed_dim = 32  # Embedding size for each token
num_heads = 2  # Number of attention heads
ff_dim = 32  # Hidden layer size in feed forward network inside transformer

inputs = layers.Input(shape=(maxlen,))
x = TokenAndPositionEmbedding()(inputs)
transformer_block = TransformerBlock()
x = transformer_block(x)
avg_pool = tf.keras.layers.GlobalAveragePooling1D()(x)
max_pool = tf.keras.layers.GlobalMaxPooling1D()(x)
x = tf.keras.layers.concatenate([avg_pool, max_pool])
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

## Train and Evaluate

In [None]:
model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
history = model.fit(
    x_train, y_train, batch_size=32, epochs=1, validation_data=(x_val, y_val)
)



In [None]:
new_model = tf.keras.models.load_model('model.h5', custom_objects={'TokenAndPositionEmbedding': TokenAndPositionEmbedding, \
                                                                   'TransformerBlock': TransformerBlock})


In [None]:
import time
start = time.process_time()
score = new_model.predict(np.array([x_val[0]]))
print(time.process_time() - start)
score

0.051662238000062644


array([[0.927837  , 0.07216291]], dtype=float32)

In [None]:
model=tf.keras.Sequential([
          tf.keras.layers.Embedding(20000, 32, input_length=200),
          tf.keras.layers.LSTM(units=256),
          tf.keras.layers.Dense(2, activation='softmax')                       
])

model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
history = model.fit(
    x_train, y_train, batch_size=32, epochs=1, validation_data=(x_val, y_val)
)





In [None]:
import time
start = time.process_time()
score = model.predict(np.array([x_val[0]]))
print(time.process_time() - start)
score

0.46727311299991925


array([[0.8355441 , 0.16445588]], dtype=float32)