In [None]:
import pandas as pd
import numpy as np
from nltk.corpus import stopwords
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

In [108]:
#!pip install nltk

In [109]:
df = pd.read_csv('../Dataset/emotion_dataset.csv')
df

Unnamed: 0,text,label
0,i feel awful about it too because it s my job ...,0
1,im alone i feel awful,0
2,ive probably mentioned this before but i reall...,1
3,i was feeling a little low few days back,0
4,i beleive that i am much more sensitive to oth...,2
...,...,...
416804,that was what i felt when i was finally accept...,1
416805,i take every day as it comes i m just focussin...,4
416806,i just suddenly feel that everything was fake,0
416807,im feeling more eager than ever to claw back w...,1


In [110]:
df = df[0:20000]
column= 'text'


In [111]:
# preprocessing

df[column] = df[column].str.lower()

STOPWORDS = set(stopwords.words('english'))

def remove_stopwords(text):
    return " ".join([word for word in text.split() if word not in STOPWORDS])

df[column] = df[column].apply(lambda x: remove_stopwords(x))

def remove_splcharacters(text):
    text = re.sub('[^a-zA-Z0-9]', ' ', text)
    text = re.sub('\s+', ' ', text)
    return text

df[column] = df[column].apply(lambda x: remove_splcharacters(x))

df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].str.lower()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].apply(lambda x: remove_stopwords(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].apply(lambda x: remove_splcharacters(x))


Unnamed: 0,text,label
0,feel awful job get position succeed happen,0
1,im alone feel awful,0
2,ive probably mentioned really feel proud actua...,1
3,feeling little low days back,0
4,beleive much sensitive peoples feelings tend c...,2


In [112]:
# Lemmatization

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def lemmatize_text(text):
    return " ".join([lemmatizer.lemmatize(word, pos ='v') for word in text.split()])

df[column] = df[column].apply(lambda x: lemmatize_text(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[column] = df[column].apply(lambda x: lemmatize_text(x))


In [113]:
# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df[column])


import pickle
# Save tokenizer
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)


word_index = tokenizer.word_index
vocab_size = len(word_index)
vocab_size

13737

In [114]:
# padding the data
sequence = tokenizer.texts_to_sequences(df[column])

# maximum length of the data
max_len = 40
padded_seq = pad_sequences(sequence,maxlen=max_len, padding='post', truncating= 'post')
padded_seq

array([[   1,  367,  201, ...,    0,    0,    0],
       [   3,  117,    1, ...,    0,    0,    0],
       [  22,  214,  514, ...,    0,    0,    0],
       ...,
       [   1, 1487,  448, ...,    0,    0,    0],
       [ 138,    1,   17, ...,    0,    0,    0],
       [   1,   26,  110, ...,    0,    0,    0]], dtype=int32)

In [115]:
# Word Embedding

#create embedding index
embedding_index = {}
with open('glove.6B.100d.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        embed_list = np.array(values[1:]).astype('float32')
        embedding_index[word]= embed_list



In [116]:
#create embedding matrix
embedding_dim = 100
embedding_matrix = np.zeros((vocab_size+1, embedding_dim))

for word, i in word_index.items():
    embedding_vector = embedding_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [117]:
# Split dataset
from sklearn.model_selection import train_test_split

xtrain,  xtest, ytrain,ytest = train_test_split(padded_seq,df['label'], train_size=0.9, random_state=42)

from keras.utils import to_categorical

ytrain = to_categorical(ytrain)

In [118]:
# Build the model 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

model = Sequential()
model.add(
    Embedding(
        input_dim=vocab_size + 1,
        output_dim=embedding_dim,
        weights=[embedding_matrix],
        input_length=max_len,
        trainable=False,  # freeze pretrained embeddings
    )
)
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(6, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()



In [119]:

model.fit(xtrain,ytrain,validation_split=0.1,epochs=30,batch_size=32)

Epoch 1/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - accuracy: 0.3343 - loss: 1.6103 - val_accuracy: 0.3278 - val_loss: 1.5923
Epoch 2/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.3336 - loss: 1.5687 - val_accuracy: 0.3267 - val_loss: 1.5989
Epoch 3/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.3318 - loss: 1.5728 - val_accuracy: 0.3267 - val_loss: 1.5918
Epoch 4/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.3407 - loss: 1.5698 - val_accuracy: 0.3267 - val_loss: 1.5941
Epoch 5/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.3337 - loss: 1.5743 - val_accuracy: 0.3267 - val_loss: 1.5917
Epoch 6/30
[1m507/507[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.3393 - loss: 1.5737 - val_accuracy: 0.3267 - val_loss: 1.5933
Epoch 7/30
[1m507/507

<keras.src.callbacks.history.History at 0x141fabe7160>

In [None]:
predict = model.predict(xtest)
predict = np.argmax(predict,axis=1)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step


In [121]:
from sklearn.metrics import classification_report
print(classification_report(ytest, predict))

              precision    recall  f1-score   support

           0       0.91      0.96      0.94       610
           1       0.96      0.88      0.92       639
           2       0.76      0.81      0.78       168
           3       0.90      0.90      0.90       271
           4       0.86      0.93      0.89       240
           5       0.89      0.76      0.82        72

    accuracy                           0.90      2000
   macro avg       0.88      0.87      0.88      2000
weighted avg       0.91      0.90      0.90      2000



In [122]:
model.save('emotion_analyzer.h5')



In [123]:
inp = [ 441, 3328,    1, 1179,  763,   17,    0,    0,    0,    0,    0,
          0,    0,    1111110,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0]
inp = np.expand_dims(inp, axis=0).astype('int32')
model.predict(inp)



InvalidArgumentError: Graph execution error:

Detected at node sequential_9_1/embedding_9_1/GatherV2 defined at (most recent call last):
  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\runpy.py", line 196, in _run_module_as_main

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\runpy.py", line 86, in _run_code

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\asyncio\base_events.py", line 603, in run_forever

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\asyncio\base_events.py", line 1909, in _run_once

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\asyncio\events.py", line 80, in _run

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3077, in run_cell

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3132, in _run_cell

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3336, in run_cell_async

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3519, in run_ast_nodes

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3579, in run_code

  File "C:\Users\ANISH\AppData\Local\Temp\ipykernel_25752\3886853127.py", line 6, in <module>

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 560, in predict

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 259, in one_step_on_data_distributed

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 249, in one_step_on_data

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 104, in predict_step

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\layers\layer.py", line 910, in __call__

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\ops\operation.py", line 58, in __call__

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\models\sequential.py", line 221, in call

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\models\functional.py", line 183, in call

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\ops\function.py", line 171, in _run_through_graph

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\models\functional.py", line 643, in call

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\layers\layer.py", line 910, in __call__

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\ops\operation.py", line 58, in __call__

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\layers\core\embedding.py", line 140, in call

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\ops\numpy.py", line 5442, in take

  File "c:\Nishanthi\Hope_AI\Projects\Chat_Emotion_Detection\venv\lib\site-packages\keras\src\backend\tensorflow\numpy.py", line 2222, in take

indices[0,13] = 1111110 is not in [0, 13738)
	 [[{{node sequential_9_1/embedding_9_1/GatherV2}}]] [Op:__inference_one_step_on_data_distributed_379626]

In [92]:
inp

array([[ 441, 3328,    1, 1179,  763,   17,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0]], dtype=int32)