In [94]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Concatenate,Dense,Input,Embedding,Bidirectional,LSTM,GRU
import keras.backend as K
from tensorflow.keras.utils import to_categorical

In [2]:
imdb_df = pd.read_csv("/home/rohit/datasets/IMDB_Dataset.csv")

In [3]:
imdb_df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [4]:
d = {'negative':0,'positive':1}

In [5]:
imdb_df['sentiment'] = imdb_df['sentiment'].apply(lambda x : d.get(x))

In [6]:
imdb_df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [7]:
X_train,X_test,y_train,y_test = train_test_split(imdb_df['review'],imdb_df['sentiment'],test_size=0.2)

In [8]:
X_train = X_train.to_numpy()

In [9]:
X_test = X_test.to_numpy()

In [10]:
X_test

array(["I love sharks. And mutants. And explosions. Theoretically, with those parameters in mind, HAMMERHEAD: Shark Frenzy should have been the best movie ever.<br /><br />It is not.<br /><br />The monster looks like a villain from Power Rangers, and has approximately the same range of rubbery movement. This might be okay if the makers weren't quite as proud of its design as they seem to be. That is to say, for a guy in a big rubber suit in an action/scifi/horror flick that could benefit from some mystery, the shark gets a lot of screen time. Granted, it is usually shaky and erratic. I guess you're supposed to assume that it's so scary that even the camera guy freaks out.<br /><br />The camera goes to a person about to get eaten, the camera goes to the shark. The camera goes back to the person about to get eaten, only now they are screaming and armless. And so on.<br /><br />The costuming is bad, the acting is poor, and the special effects are sub-par, but the writing is by far the wor

In [11]:
y_train

21920    1
10369    0
42409    1
12442    0
37142    0
        ..
22157    1
2853     1
31457    0
49863    1
9169     1
Name: sentiment, Length: 40000, dtype: int64

In [12]:
y_train = y_train.to_numpy()

In [13]:
#clean_text
import re
def clean_text(text,punctuations=r'''!()-[]{};:'"\,<>./?@#$%^&*_~''',
    stop_words=['the', 'a', 'and', 'is', 'be', 'will']):
    
    
    text = re.sub(r'https?://\S+|www\.\S+', '',text)
    text = re.sub(r'<.*?>', '',text)
    # Removing the punctuations
    for x in text.lower(): 
        if x in punctuations: 
            text = text.replace(x, "")
            
    text = text.lower()
    # Removing stop words
    text = ' '.join([word for word in text.split() if word not in stop_words])
    
    
    
    text = re.sub(r'\s+', ' ',text).strip()
    return text

In [14]:
X_train = [clean_text(x) for x in X_train]

In [15]:
X_test = [clean_text(x) for x in X_test]

In [16]:
X_train

['merchant of four seasons isnt what i would call happy movie at all or even one that impressed me to point of praising it to sky there are other fassbinder flicks for that like veronika voss underrated satans brew but its certainly no less than fascinating experiment in taking look at those in society that you me others we know might possibly know or not really want to know i imagine in early 70s in germany generation coming out of ww2 had stigma to live with but tried their best just to get by this stigma that floats all over this film in many instances in fassbinders work in general but especially because with four seasons he takes his eye on middle class particular married couple distanced depressed angry hans fruit seller his longsuffered wife that nothing short than trying for realism in guise of melodrama if cassavetes were crazy german he might make this film maybe even as just larkthe story sounds simple enough where hans drinking gets out of control he beats his wife this sce

In [17]:
t= Tokenizer()

In [18]:
t.fit_on_texts(X_train)

In [19]:
vocab_size = len(t.word_index) + 1

In [20]:
vocab_size

193027

In [21]:
encoded_text = t.texts_to_sequences(X_train)

In [22]:
len(encoded_text)

40000

In [23]:
padded_train = pad_sequences(encoded_text,maxlen = 500,padding='post')

In [24]:
max_len = max([len(x) for x in encoded_text])
min_len = min([len(x) for x in encoded_text])

In [25]:
max_len,min_len

(1848, 4)

In [26]:
encoded_text_test = t.texts_to_sequences(X_test)

In [27]:
padded_test = pad_sequences(encoded_text_test, maxlen = 500,padding='post')

In [81]:
class attention(tf.keras.layers.Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name="att_weight",shape=(input_shape[-1],1),initializer="normal")
        self.b=self.add_weight(name="att_bias",shape=(input_shape[1],1),initializer="zeros")        
        super(attention, self).build(input_shape)

    def call(self,x):
        et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
        at=K.softmax(et)
        at=K.expand_dims(at,axis=-1)
        output=x*at
        return K.sum(output,axis=1)

    def compute_output_shape(self,input_shape):
        return (input_shape[0],input_shape[-1])

    def get_config(self):
        return super(attention,self).get_config()

In [87]:
# Add attention layer to the deep learning network
class attention(tf.keras.layers.Layer):
    def __init__(self,**kwargs):
        super(attention,self).__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1), 
                               initializer='random_normal', trainable=True)
        self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1), 
                               initializer='zeros', trainable=True)        
        super(attention, self).build(input_shape)

    def call(self,x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x,self.W)+self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)   
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

In [96]:
sequence_input = Input(shape=(500,))

embedded_sequences = Embedding(vocab_size, 300)(sequence_input)

In [97]:
import os
lstm = Bidirectional(LSTM(128,dropout=0.3,return_sequences=True,recurrent_activation='relu'), name="bi_lstm_0")(embedded_sequences)

att_in = Bidirectional(LSTM(10,return_sequences=True))(lstm)



In [98]:
# state_h = Concatenate()([forward_h, backward_h])
# state_c = Concatenate()([forward_c, backward_c])

# context_vector, attention_weights = Attention(lstm, state_h)
att_out=attention()(att_in)
dense_1 = Dense(32,activation = 'relu')(att_out)
output = keras.layers.Dense(2, activation='softmax')(dense_1)

model = keras.Model(inputs=sequence_input, outputs=output)

# summarize layers
print(model.summary())

Model: "model_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 500)]             0         
_________________________________________________________________
embedding_6 (Embedding)      (None, 500, 300)          57908100  
_________________________________________________________________
bi_lstm_0 (Bidirectional)    (None, 500, 256)          439296    
_________________________________________________________________
bidirectional_8 (Bidirection (None, 500, 20)           21360     
_________________________________________________________________
attention_16 (attention)     (None, 20)                520       
_________________________________________________________________
dense_17 (Dense)             (None, 32)                672       
_________________________________________________________________
dense_18 (Dense)             (None, 2)                 66  

In [32]:
import keras

In [None]:
keras.__version__

In [99]:
model.compile(optimizer='adam',loss='categorical_crossentropy', metrics=['accuracy'])
              
             

#early_stopping_callback = keras.callbacks.EarlyStopping(monitor='val_loss',min_delta=0, patience=1, verbose=0, mode='auto')
                                                       
                                                       


In [95]:
y_train = to_categorical(y_train)

In [100]:
history = model.fit(padded_train, y_train,epochs=5,batch_size=256,validation_split=.1, verbose=1)
                    
                    
                    
                    

Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [None]:
result = model.evaluate(x_test, y_test)
print(result)

In [None]:
type(X_train)