In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
#label encoding
from sklearn.preprocessing import LabelEncoder


In [14]:
df=pd.read_csv('archive\IMDB Dataset.csv')


In [15]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import string
import re
def clean_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    #remove html tags
    text = re.sub(r'<.*?>', '', text)
    text = text.translate(str.maketrans('', '', string.punctuation))
    #stopword removal
    # text= ' '.join([word for word in text.split() if word not in stopwords.words("english")])
    text = text.strip()
    return text


In [16]:
df['review'] = df['review'].apply(clean_text)


In [17]:
from collections import Counter
stop_words = stopwords.words('english')
stopwords_dict = Counter(stop_words)
def re_stopwords(text):
    return ' '.join([word for word in text.split() if word not in stopwords_dict])

In [18]:
df['review'] = df['review'].apply(re_stopwords)


In [19]:
lb=LabelEncoder()
df['sentiment']=lb.fit_transform(df['sentiment'])

In [20]:
max_length = max(df['review'].apply(lambda x: len(x.split())))


In [21]:
tk=keras.preprocessing.text.Tokenizer(num_words=10000)
tk.fit_on_texts(df['review'])
X=tk.texts_to_sequences(df['review'])
X=keras.preprocessing.sequence.pad_sequences(X,maxlen=600)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, df['sentiment'], test_size=0.2, random_state=42)


In [23]:
df.head()

Unnamed: 0,review,sentiment
0,one reviewers mentioned watching oz episode yo...,1
1,wonderful little production filming technique ...,1
2,thought wonderful way spend time hot summer we...,1
3,basically theres family little boy jake thinks...,0
4,petter matteis love time money visually stunni...,1


In [7]:
p=tf.random.uniform([1, 100])
c,v=tf.split(p, num_or_size_splits=2,axis=1)

In [9]:
def scaled_dot_product_attention(Q, K, V, mask=None):
    """
    Calculate the attention weights.

    Arguments:
        Q -- query shape == (..., Tq, dk)
        K -- key shape == (..., Tv, dk)
        V -- value shape == (..., Tv, dv)
        mask: Float tensor with shape broadcastable to (..., Tq, Tv). Defaults to None.

    Returns:
        output -- (attention,attention_weights)
    """
    
    #Compute the scaled dot-product Q•K
    matmul_QK = tf.matmul(Q,K,transpose_b=True)  # dot-product of shape (..., Tq, Tv)

    dk = K.shape[-1]
    scaled_attention_logits = matmul_QK/np.sqrt(dk) # scaled dot-product of shape (..., Tq, Tv)

    # Add the mask to the scaled dot-product
    if mask is not None: 
        scaled_attention_logits += (1. - mask) *(-1e9)

    # Compute the Softmax
    attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)  # weights of shape (..., Tq, Tv)

    #Multiply with V
    output = tf.matmul(attention_weights,V)  # Attention representation of shape (..., Tq, dv)
    
    return output, attention_weights

In [10]:
import tensorflow as tf
from tensorflow.keras import layers, Model, Input

# class SelfAttention(layers.Layer):
#     def __init__(self, units):
#         super(SelfAttention, self).__init__()
#         self.W1 = layers.Dense(units)
#         self.W2 = layers.Dense(units)
#         self.V = layers.Dense(1)

#     def call(self, features):
#         # features: shape (batch_size, seq_len, embedding_dim)
#         hidden_with_time_axis = tf.expand_dims(features, 1)
#         score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
#         attention_weights = tf.nn.softmax(self.V(score), axis=1)
#         context_vector = attention_weights * features
#         context_vector = tf.reduce_sum(context_vector, axis=1)
#         return context_vector

def senti_model():
    inputs = Input(shape=(600,))
    embedding = layers.Embedding(10000, 128, input_length=600)(inputs)
    
    # attention = SelfAttention(128)(embedding)
    # attention = layers.Dropout(0.3)(attention)
    den= layers.Dense(128*3, activation='relu')(embedding)
    #reshape and get q,k,v vectors
    q,k,v = tf.split(den, num_or_size_splits=3, axis=1)
    # embedding=tf.keras.layers.Attention()([q,k,v])
    embedding,attention_weights = scaled_dot_product_attention(q,k,v)
    lstm = layers.LSTM(64, return_sequences=True)(embedding)
    # lstm_attention = SelfAttention(64)(lstm)
    drp = layers.Dropout(0.5)(lstm)
    
    ln = layers.LayerNormalization()(drp)
    dense = layers.Dense(1, activation='sigmoid')(ln)
    
    model = Model(inputs=inputs, outputs=dense)
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

model = senti_model()
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 600)]        0           []                               
                                                                                                  
 embedding (Embedding)          (None, 600, 128)     1280000     ['input_1[0][0]']                
                                                                                                  
 dense (Dense)                  (None, 600, 384)     49536       ['embedding[0][0]']              
                                                                                                  
 tf.split (TFOpLambda)          [(None, 200, 384),   0           ['dense[0][0]']                  
                                 (None, 200, 384),                                            

In [12]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
es = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min')
mc = ModelCheckpoint('best_model+yy.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

In [24]:
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test), callbacks=[es, mc])

Epoch 1/5
Epoch 1: val_accuracy improved from -inf to 0.88694, saving model to best_model+yy.h5
Epoch 2/5
Epoch 2: val_accuracy improved from 0.88694 to 0.88711, saving model to best_model+yy.h5
Epoch 3/5
Epoch 3: val_accuracy did not improve from 0.88711
Epoch 4/5
 70/625 [==>...........................] - ETA: 37s - loss: 0.1400 - accuracy: 0.9488

KeyboardInterrupt: 

In [52]:
#save model
model.save('sentiment_analysis_with_attention.h5')