<a href="https://colab.research.google.com/github/saishdesai23/Emotion-Classification-and-Detection/blob/main/Emotion_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Connecting the Library to google drive**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Importing required packages**

In [2]:
import pandas as pd
import numpy as np

**Creating dataframe**

In [3]:
def dataframe_creation(filepath : str):
  """
  Function to create a dataframe form the text file
  """
  # reading data from the text file
  with open(filepath, "r") as f:
    lines = f.readlines()
  sent = []
  emotion = []
  for line in lines:
    entry = line.split(";")
    sent.append(entry[0])
    emotion.append(entry[1][:-1])

  # storing the data in a dataframe
  dataframe = pd.DataFrame(zip(sent,emotion) , columns=['Message','Emotion'])
  dataframe['Emotion'] = dataframe['Emotion'].astype('category')
  return dataframe
  


In [4]:
train_data = dataframe_creation("/content/drive/MyDrive/Kaggle Competitions/Emotion_Detection/data/train.txt")
val_data = dataframe_creation("/content/drive/MyDrive/Kaggle Competitions/Emotion_Detection/data/val.txt")
test_data = dataframe_creation("/content/drive/MyDrive/Kaggle Competitions/Emotion_Detection/data/test.txt")

In [5]:
train_data.head()

Unnamed: 0,Message,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [6]:
val_data.head()

Unnamed: 0,Message,Emotion
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy


In [7]:
test_data.head()

Unnamed: 0,Message,Emotion
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness


**Importing Deep Learning Libraries**

In [8]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical

**One Hot Encoding**

In [9]:
# setting hyper parameters
voc_size = 10000

train_sent = train_data['Message']
val_sent = val_data['Message']
test_sent =  test_data['Message']

sent_length = max(max([len(s.split()) for s in train_sent]), 
                  max([len(s.split()) for s in val_sent]), 
                  max([len(s.split()) for s in test_sent]))


train_onehot_repr = [one_hot(words, voc_size) for words in train_sent]
val_onehot_repr = [one_hot(words, voc_size) for words in val_sent]
test_onehot_repr = [one_hot(words, voc_size) for words in test_sent]


**Padding Sequence**

In [10]:
train_padded_doc = pad_sequences(train_onehot_repr, padding='post', maxlen=sent_length)
val_padded_doc = pad_sequences(val_onehot_repr, padding='post', maxlen=sent_length)
test_padded_doc = pad_sequences(test_onehot_repr, padding='post', maxlen=sent_length)

**Model Building**

In [11]:
embedding_vector_feautures = 100

model=Sequential()
model.add(Embedding(voc_size,
                    embedding_vector_feautures,
                    input_length=sent_length))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(80,return_sequences=True)))
model.add(Bidirectional(LSTM(160)))
model.add(Dense(6, activation='softmax'))

model.compile(loss='categorical_crossentropy', 
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 66, 100)           1000000   
                                                                 
 dropout (Dropout)           (None, 66, 100)           0         
                                                                 
 bidirectional (Bidirectiona  (None, 66, 160)          115840    
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 320)              410880    
 nal)                                                            
                                                                 
 dense (Dense)               (None, 6)                 1926      
                                                                 
Total params: 1,528,646
Trainable params: 1,528,646
Non-

In [12]:
train_data['Emotion']=train_data.Emotion.replace({'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5})
y_train=to_categorical(train_data['Emotion'])

val_data['Emotion']=val_data.Emotion.replace({'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5})
y_val=to_categorical(val_data['Emotion'])

test_data['Emotion']=test_data.Emotion.replace({'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5})
y_test=to_categorical(test_data['Emotion'])

**Model Training**

In [13]:
model.fit(train_padded_doc,
          y_train, 
          validation_data=(val_padded_doc, y_val), 
          epochs=50,
          batch_size=60)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7febb03eed90>

In [17]:
y_predict = model.predict(test_padded_doc)

In [26]:
y_predict_class = []
for ele in y_predict:
  y_predict_class.append(list(ele).index(max(ele)))

y_actual_class = []
for ele in y_test:
  y_actual_class.append(list(ele).index(max(ele)))

In [27]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_predict_class, y_actual_class)

In [28]:
acc

0.8865

In [33]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_predict_class, y_actual_class)

In [34]:
cm

array([[649,  12,  31,  21,   6,   6],
       [  3, 233,   2,   7,   7,   0],
       [ 28,   4, 120,   3,   3,   0],
       [  5,  14,   3, 539,   8,   1],
       [  5,   9,   1,  10, 186,  13],
       [  5,   3,   2,   1,  14,  46]])

In [32]:
from sklearn.metrics import classification_report
cr = classification_report(y_predict_class, y_actual_class)

In [36]:
print(cr)

              precision    recall  f1-score   support

           0       0.93      0.90      0.91       725
           1       0.85      0.92      0.88       252
           2       0.75      0.76      0.76       158
           3       0.93      0.95      0.94       570
           4       0.83      0.83      0.83       224
           5       0.70      0.65      0.67        71

    accuracy                           0.89      2000
   macro avg       0.83      0.83      0.83      2000
weighted avg       0.89      0.89      0.89      2000

