<a href="https://colab.research.google.com/github/sneharreddy25/project_25-/blob/main/Project_Emotion_Detection_using_Bidirectional_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Project: Emotion Detection using Bidirectional LSTM

In [None]:
# Importing the required libraries
import keras
import numpy as np
from keras.models import Sequential,Model
from keras.layers import Dense,Bidirectional
from nltk.tokenize import word_tokenize,sent_tokenize
from keras.layers import *
from sklearn.model_selection import cross_val_score
import nltk
import pandas as pd
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
df=pd.read_csv('isear.csv',header=None)
# The isear.csv contains rows with value 'No response'
# We need to remove such rows
df.drop(df[df[1] == '[ No response.]'].index, inplace = True)
print(df.head())

         0                                                  1
0      joy  [ On days when I feel close to my partner and ...
1     fear  Every time I imagine that someone I love or I ...
2    anger  When I had been obviously unjustly treated and...
3  sadness  When I think about the short time that we live...
4  disgust  At a gathering I found myself involuntarily si...


In [None]:
# The feel_arr will store all the sentences
# i.e feel_arr is the list of all sentences
feel_arr=df[1]

# Each  sentence in feel_arr is tokenized by the help of work tokenizer.
# If I have a sentence - 'I am happy'.
# After word tokenizing it will convert into- ['I','am','happy']
feel_arr = [word_tokenize(sent) for sent in feel_arr]
print(feel_arr[0])

['[', 'On', 'days', 'when', 'I', 'feel', 'close', 'to', 'my', 'partner', 'and', 'other', 'friends', '.', 'When', 'I', 'feel', 'at', 'peace', 'with', 'myself', 'and', 'also', 'experience', 'a', 'close', 'contact', 'with', 'people', 'whom', 'I', 'regard', 'greatly', '.', ']']


In [None]:
# Defined a function padd in which each sentence length
# is fixed to 100.
# If length is less than 100 , then
# the word- '<padd>' is append
def padd(arr):
    for i in range(100-len(arr)):
        arr.append('<pad>')
    return arr[:100]

# call the padd function for each sentence in feel_arr
for i in range(len(feel_arr)):
    feel_arr[i]=padd(feel_arr[i])

print(feel_arr[0])

['[', 'On', 'days', 'when', 'I', 'feel', 'close', 'to', 'my', 'partner', 'and', 'other', 'friends', '.', 'When', 'I', 'feel', 'at', 'peace', 'with', 'myself', 'and', 'also', 'experience', 'a', 'close', 'contact', 'with', 'people', 'whom', 'I', 'regard', 'greatly', '.', ']', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']


In [None]:
# Glove vector contains a 50 dimensional vector corresponding
# to each word in dictionary.
vocab_f = 'glove.6B.50d.txt'

# embeddings_index is a dictionary which contains the mapping of
# word with its corresponding 50d vector.
embeddings_index = {}
with open(vocab_f, encoding='utf8') as f:
    for line in f:
        # splitting each line of the glove.6B.50d in a list of
        # items- in which the first element is the word to be embedded,
        # and from second to the end of line contains the 50d vector.
        values = line.rstrip().rsplit(' ')
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

# the embedding index of word 'happy'
embeddings_index['happy']

array([ 0.092086,  0.2571  , -0.58693 , -0.37029 ,  1.0828  , -0.55466 ,
       -0.78142 ,  0.58696 , -0.58714 ,  0.46318 , -0.11267 ,  0.2606  ,
       -0.26928 , -0.072466,  1.247   ,  0.30571 ,  0.56731 ,  0.30509 ,
       -0.050312, -0.64443 , -0.54513 ,  0.86429 ,  0.20914 ,  0.56334 ,
        1.1228  , -1.0516  , -0.78105 ,  0.29656 ,  0.7261  , -0.61392 ,
        2.4225  ,  1.0142  , -0.17753 ,  0.4147  , -0.12966 , -0.47064 ,
        0.3807  ,  0.16309 , -0.323   , -0.77899 , -0.42473 , -0.30826 ,
       -0.42242 ,  0.055069,  0.38267 ,  0.037415, -0.4302  , -0.39442 ,
        0.10511 ,  0.87286 ], dtype=float32)

In [None]:
# Embedding each word of the feel_arr
embedded_feel_arr = []
for each_sentence in feel_arr:
    embedded_feel_arr.append([])
    for word in each_sentence:
        if word.lower() in embeddings_index:
            embedded_feel_arr[-1].append(embeddings_index[word.lower()])
        else:
            # if the word to be embedded is '<padd>' append 0 fifty times
            embedded_feel_arr[-1].append([0]*50)

print(embedded_feel_arr[0][0])

[-0.61201   0.98226   0.11539   0.014623  0.23873  -0.067035  0.30632
 -0.64742  -0.38517  -0.03691   0.094788  0.57631  -0.091557 -0.54825
  0.25255  -0.14759   0.13023   0.21658  -0.30623   0.30028  -0.23471
 -0.17927   0.9518    0.54258   0.31172  -0.51038  -0.65223  -0.48858
  0.13486  -0.40132   2.493    -0.38777  -0.26456  -0.49414  -0.3871
 -0.20983   0.82941  -0.46253   0.39549   0.014881  0.79485  -0.79958
 -0.16243   0.013862 -0.53536   0.52536   0.019818 -0.16353   0.30649
  0.81745 ]


In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
#Converting x into numpy-array
X=np.array(embedded_feel_arr)

# Perform one-hot encoding on df[0] i.e emotion
enc = OneHotEncoder(handle_unknown='ignore')
Y = enc.fit_transform(np.array(df[0]).reshape(-1,1)).toarray()

# Split into train and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y,
                                                    test_size=0.2,
                                                    random_state=42)

In [None]:
class BiLSTMModel:
    def __init__(self):
        self.model = Sequential()
        self.model.add(Bidirectional(LSTM(100, input_shape=(100, 50))))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(7, activation='softmax'))
        self.model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

    def fit(self, X, Y, epochs, batch_size):
        self.model.fit(X, Y, epochs=epochs, batch_size=batch_size)

    def evaluate(self, X, Y, batch_size):
        return self.model.evaluate(X, Y, batch_size=batch_size)

    def predict(self, X):
        return self.model.predict(X)

In [None]:
# create an instance of the BiLSTMModel class
model = BiLSTMModel()

# fit the model on the input and target data
model.fit(X_train,Y_train, epochs=20, batch_size=64)

  super().__init__(**kwargs)


Epoch 1/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 215ms/step - accuracy: 0.2076 - loss: 1.9114
Epoch 2/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 235ms/step - accuracy: 0.3543 - loss: 1.6698
Epoch 3/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 241ms/step - accuracy: 0.4002 - loss: 1.5816
Epoch 4/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 220ms/step - accuracy: 0.4395 - loss: 1.4948
Epoch 5/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 226ms/step - accuracy: 0.4721 - loss: 1.4357
Epoch 6/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 237ms/step - accuracy: 0.4817 - loss: 1.4072
Epoch 7/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 216ms/step - accuracy: 0.5025 - loss: 1.3423
Epoch 8/20
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 238ms/step - accuracy: 0.5215 - loss: 1.3066
Epoch 9/20
[1m95/95[0m [32m━━

In [None]:
# print the summary of the model
model.model.summary()

In [None]:
#Testing the model
model.evaluate(X_test,Y_test, batch_size=1)

[1m1515/1515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 25ms/step - accuracy: 0.5157 - loss: 1.4725


[1.4608063697814941, 0.5161716341972351]