In [1]:
pip install neattext

Collecting neattext
  Downloading neattext-0.1.3-py3-none-any.whl.metadata (12 kB)
Downloading neattext-0.1.3-py3-none-any.whl (114 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/114.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.7/114.7 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: neattext
Successfully installed neattext-0.1.3


In [2]:
import numpy as np
import pandas as pd
import neattext as nt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import joblib

In [3]:
df = pd.read_csv('emotions_dataset.csv')

print(df.head())

                             text    emotion
0                 I feel awesome!      happy
1               That's repulsive.  disgusted
2             This is unexpected.  surprised
3  Nothing exciting is happening.    neutral
4       I am on top of the world!      happy


In [4]:
# Preprocessing the text column using NeatText
df['clean_text'] = df['text'].apply(lambda x: nt.remove_stopwords(nt.remove_special_characters(x)).lower())

print(df[['text', 'clean_text']].head())

                             text          clean_text
0                 I feel awesome!        feel awesome
1               That's repulsive.     thats repulsive
2             This is unexpected.          unexpected
3  Nothing exciting is happening.  exciting happening
4       I am on top of the world!               world


In [5]:
# Tokenization
tokenizer = Tokenizer(num_words=10000)  # Limit to the top 10,000 words
tokenizer.fit_on_texts(df['clean_text'])

# Converting the text into sequences
X = tokenizer.texts_to_sequences(df['clean_text'])

# Padding sequences to ensure uniform input size
X = pad_sequences(X, padding='post')

In [6]:
# Encoding the labels i.e emotions
encoder = LabelEncoder()
y = encoder.fit_transform(df['emotion'])

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Defining the BiLSTM model
model = Sequential()

# Adding an Embedding layer
model.add(Embedding(input_dim=10000, output_dim=128, input_length=X_train.shape[1]))

# Adding a Bidirectional LSTM layer
model.add(Bidirectional(LSTM(64, return_sequences=True)))

# Dropout layer for regularization
model.add(Dropout(0.5))

# Another LSTM layer
model.add(Bidirectional(LSTM(64)))

# Dense output layer with softmax activation for multi-class classification
model.add(Dense(len(encoder.classes_), activation='softmax'))

# Compiling the model with adam optimizer and sparse categorical crossentropy for predictions
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [10]:
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 9ms/step - accuracy: 0.9284 - loss: 0.2036 - val_accuracy: 0.9769 - val_loss: 0.0323
Epoch 2/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.9756 - loss: 0.0343 - val_accuracy: 0.9766 - val_loss: 0.0326
Epoch 3/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 9ms/step - accuracy: 0.9753 - loss: 0.0340 - val_accuracy: 0.9766 - val_loss: 0.0324
Epoch 4/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 9ms/step - accuracy: 0.9747 - loss: 0.0343 - val_accuracy: 0.9766 - val_loss: 0.0324
Epoch 5/5
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 10ms/step - accuracy: 0.9756 - loss: 0.0340 - val_accuracy: 0.9769 - val_loss: 0.0322


In [11]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9764 - loss: 0.0321
Test Accuracy: 97.69%


In [12]:
model.save('emotion_bilstm_model.h5')
joblib.dump(tokenizer, 'text_tokenizer.pkl')
joblib.dump(encoder, 'label_encoder.pkl')

print("Model, tokenizer and encoder saved")



Model, tokenizer and encoder saved


In [14]:
def predict_emotion_from_input():
    new_text = input("Enter a sentence to predict emotion: ")

    cleaned_text = nt.remove_stopwords(nt.remove_special_characters(new_text)).lower()

    X_new = tokenizer.texts_to_sequences([cleaned_text])

    X_new_pad = pad_sequences(X_new, padding='post', maxlen=X_train.shape[1])

    emotion_pred = model.predict(X_new_pad)
    predicted_emotion = encoder.inverse_transform([np.argmax(emotion_pred)])

    print(f"The emotion of the text is: {predicted_emotion[0]}")

In [15]:
predict_emotion_from_input()

Enter a sentence to predict emotion: the movie was amazing
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 511ms/step
The emotion of the text is: happy


In [16]:
predict_emotion_from_input()

Enter a sentence to predict emotion: i want to make this but i am unsure about it
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
The emotion of the text is: fearful


In [17]:
predict_emotion_from_input()

Enter a sentence to predict emotion: I can't believe this is happening, it's so unbelievable and shocking! What a surprise!
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
The emotion of the text is: surprised


In [18]:
predict_emotion_from_input()

Enter a sentence to predict emotion: This is terrible, I can't stop crying. Everything seems so hopeless
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
The emotion of the text is: sad


In [19]:
predict_emotion_from_input()

Enter a sentence to predict emotion: Nothing exciting or bad is happening.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
The emotion of the text is: neutral


In [22]:
predict_emotion_from_input()

Enter a sentence to predict emotion: I can't stand this smell.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
The emotion of the text is: disgusted


In [23]:
predict_emotion_from_input()

Enter a sentence to predict emotion: I am really scared about the upcoming exam. I feel anxious and overwhelmed
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
The emotion of the text is: fearful
