#### Simple Encoder Decoder Attention Mechanism

In [3]:
import numpy as np 
import pandas as pd 

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers

from keras.layers import Layer
import tensorflow as tf

#### Preparing Input Data 

In [4]:
data_path = 'dataset/amazon_cells_labelled.txt'
messages = []
labels = []

with open(data_path, 'r', encoding='utf-8') as f:
    datas = f.read().split("\n")
    for data in datas:
        if not data.strip():
            continue
        row =  data.split('\t')

        if len(row) == 2:
            messages.append(row[0])
            labels.append(int(row[1]))


print("messages : ", messages)
print("length of messages : ", len(messages))
print("labels : ", labels)
print("length of labels : ", len(labels))

length of messages :  1000
labels :  [0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,

In [5]:
dataset = pd.DataFrame({'message': messages, "label": labels})
dataset.head(10)

Unnamed: 0,message,label
0,So there is no way for me to plug it in here i...,0
1,"Good case, Excellent value.",1
2,Great for the jawbone.,1
3,Tied to charger for conversations lasting more...,0
4,The mic is great.,1
5,I have to jiggle the plug to get it to line up...,0
6,If you have several dozen or several hundred c...,0
7,If you are Razr owner...you must have this!,1
8,"Needless to say, I wasted my money.",0
9,What a waste of money and time!.,0


#### Tokenizing the Words and Converting it into Numerical Sequences

In [6]:
t = Tokenizer() 
t.fit_on_texts(dataset['message'])
text_matrix = t.texts_to_sequences(dataset['message'])

#### We need to know the Maximum Length of the Matrix to Pad a sequences

In [8]:
max_len = 0
for matrix in text_matrix:
    length = len(matrix)
    if length > max_len : 
        max_len = length

print("Maximum length of sequences can be: ", max_len)

Maximum length of sequences can be:  30


In [9]:
text_pad = pad_sequences(text_matrix, maxlen=32, padding='post' )

In [10]:
X_train, X_test, y_train, y_test = train_test_split(text_pad, dataset['label'], test_size=0.2, random_state=42)

In [12]:
features = 32
vocab = t.word_index
vocab_size = len(vocab)
print("dicitionary element in knowledge based ", vocab)
print("vocabulary size of the knowlefge based : ", vocab_size)

vocabulary size of the knowlefge based :  1878


In [13]:
#### Defining LSTM Model

In [14]:
# Now lets define the basic LSTM based model

inputs1 = Input(shape=(features, ))
x1 = Embedding(input_dim=vocab_size + 1, output_dim = 32, 
input_length = features, embeddings_regularizer = regularizers.l2(.001))(inputs1)
x1 = LSTM(100, dropout=0.3, recurrent_dropout=0.2)(x1)
outputs1 = Dense(1, activation='sigmoid')(x1)
model1  = Model(inputs1, outputs1)



In [15]:
model1.summary()

In [16]:
model1.compile(optimizer='Adam', loss = 'binary_crossentropy', metrics=['accuracy'])
history = model1.fit(x=X_train, y=y_train, batch_size=100, epochs=10, verbose=1, shuffle=True, validation_split=0.2)

Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 75ms/step - accuracy: 0.4859 - loss: 0.7372 - val_accuracy: 0.4812 - val_loss: 0.7259
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.4844 - loss: 0.7206 - val_accuracy: 0.5188 - val_loss: 0.7127
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.4844 - loss: 0.7095 - val_accuracy: 0.5188 - val_loss: 0.7040
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.5125 - loss: 0.7021 - val_accuracy: 0.5188 - val_loss: 0.6986
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.5078 - loss: 0.6982 - val_accuracy: 0.5188 - val_loss: 0.6956
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.5078 - loss: 0.6955 - val_accuracy: 0.5188 - val_loss: 0.6939
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━

#### Self Attention Mechanism

In [17]:
class attention(Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)

    def call(self, x):
        # x shape: (batch, timesteps, features)
        
        et = tf.squeeze(tf.tanh(tf.linalg.matmul(x, self.W) + self.b), axis=-1)  # (batch, timesteps)
        at = tf.nn.softmax(et)  # attention weights (batch, timesteps)
        at = tf.expand_dims(at, axis=-1)  # (batch, timesteps, 1)
        
        output = x * at  # (batch, timesteps, features)
        return tf.reduce_sum(output, axis=1)  # context vector

    def compute_output_shape(self,input_shape):
        return (input_shape[0],input_shape[-1])

    def get_config(self):
        return super(attention,self).get_config()

In [18]:
inputs=Input((features,))
x=Embedding(input_dim=vocab_size+1,output_dim=32,input_length=features,\
            embeddings_regularizer=regularizers.l2(.001))(inputs)
att_in=LSTM(100,return_sequences=True,dropout=0.3,recurrent_dropout=0.2)(x)
att_out=attention()(att_in)
outputs=Dense(1,activation='sigmoid',trainable=True)(att_out)
model=Model(inputs,outputs)
model.summary()

In [19]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model.fit(X_train,y_train,batch_size=100,epochs=10,verbose=1,shuffle=True,validation_split=0.2)

Epoch 1/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 76ms/step - acc: 0.5016 - loss: 0.7370 - val_acc: 0.5188 - val_loss: 0.7252
Epoch 2/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - acc: 0.4922 - loss: 0.7206 - val_acc: 0.5813 - val_loss: 0.7127
Epoch 3/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - acc: 0.5437 - loss: 0.7092 - val_acc: 0.5875 - val_loss: 0.7040
Epoch 4/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - acc: 0.5594 - loss: 0.7012 - val_acc: 0.5188 - val_loss: 0.6979
Epoch 5/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - acc: 0.5703 - loss: 0.6951 - val_acc: 0.5938 - val_loss: 0.6920
Epoch 6/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - acc: 0.6125 - loss: 0.6795 - val_acc: 0.6375 - val_loss: 0.6518
Epoch 7/10
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - acc: 0.7281 - loss: 0

<keras.src.callbacks.history.History at 0x20b16649050>

We get val accuracy of 76% which is nice. but we can still improve it.