In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import classification_report

# Load the dataset
data = pd.read_csv("emotions.csv")

# Split the data into training and testing sets
train_texts, test_texts, train_labels, test_labels = train_test_split(data['text'], data['label'], test_size=0.2, random_state=42)

# Tokenize the texts
max_features = 10000  # Number of words to consider as features
max_len = 100  # Maximum length of texts
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(train_texts)
train_sequences = tokenizer.texts_to_sequences(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)

# Pad sequences to have consistent length
train_data = pad_sequences(train_sequences, maxlen=max_len)
test_data = pad_sequences(test_sequences, maxlen=max_len)

# Convert labels to categorical
num_classes = 6
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)

# Define the LSTM model
# Building a very simple LSTM model
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 2
model.fit(train_data, train_labels, batch_size=batch_size, epochs=epochs, validation_split=0.2)




Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x7afd9770e2f0>

In [None]:
model.save('final_model.h5')

  saving_api.save_model(


In [None]:
# Evaluate the model
scores = model.evaluate(test_data, test_labels)
print(f"Test Accuracy: {scores[1]*100:.2f}%")



Test Accuracy: 93.80%


In [None]:
# Predictions
predictions_prob = model.predict(test_data)
predictions = np.argmax(predictions_prob, axis=1)





In [None]:
true_labels = np.argmax(test_labels, axis=1)
# Print classification report
print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.97      0.98      0.98     24201
           1       0.94      0.97      0.95     28164
           2       0.90      0.78      0.84      6929
           3       0.96      0.92      0.94     11441
           4       0.92      0.88      0.90      9594
           5       0.74      0.94      0.83      3033

    accuracy                           0.94     83362
   macro avg       0.91      0.91      0.91     83362
weighted avg       0.94      0.94      0.94     83362



In [None]:
# Define a single testing case
test_case = ["Just got into a car accident. Thankfully, everyone is okay, but I'm feeling shaken and anxious."]

# Tokenize the test case
test_case_sequence = tokenizer.texts_to_sequences(test_case)
test_case_data = pad_sequences(test_case_sequence, maxlen=max_len)

# Make prediction for the test case
predicted_prob = model.predict(test_case_data)
predicted_class = np.argmax(predicted_prob)

# Map predicted class to emotion label
emotion_labels = {0: "sadness", 1: "joy", 2: "love", 3: "anger", 4: "fear", 5: "surprise"}
predicted_emotion = emotion_labels[predicted_class]

# Print the predicted emotion
print("Predicted emotion:", predicted_emotion)


Predicted emotion: fear
