In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np


In [2]:
# Function to load dataset
def load_dataset(file_path):
    data = pd.read_csv(file_path, delimiter=';', header=None, names=['sentence', 'emotion'])
    return data

# Load train, test, and validation datasets
train_data = load_dataset('/content/drive/MyDrive/Emotions dataset NLP/train.txt')
test_data = load_dataset('/content/drive/MyDrive/Emotions dataset NLP/test.txt')
val_data = load_dataset('/content/drive/MyDrive/Emotions dataset NLP/val.txt')

In [3]:
test_data

Unnamed: 0,sentence,emotion
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness
...,...,...
1995,i just keep feeling like someone is being unki...,anger
1996,im feeling a little cranky negative after this...,anger
1997,i feel that i am useful to my people and that ...,joy
1998,im feeling more comfortable with derby i feel ...,joy


In [4]:
# Preprocess the text data
def preprocess_text(text_data):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token='<OOV>')
    tokenizer.fit_on_texts(text_data)

    max_length = max([len(s.split()) for s in text_data])
    sequences = tokenizer.texts_to_sequences(text_data)
    padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_length, padding='post')

    return padded_sequences, tokenizer, max_length


In [5]:
# Preprocess the emotion labels
def preprocess_labels(emotion_data):
    label_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
    label_tokenizer.fit_on_texts(emotion_data)

    emotion_sequences = label_tokenizer.texts_to_sequences(emotion_data)
    emotion_array = np.array(emotion_sequences).reshape(-1)

    return emotion_array, label_tokenizer

In [6]:
# Preprocess train, test, and validation data
train_padded, tokenizer, max_length = preprocess_text(train_data['sentence'])
train_labels, label_tokenizer = preprocess_labels(train_data['emotion'])

test_padded = tokenizer.texts_to_sequences(test_data['sentence'])
test_padded = tf.keras.preprocessing.sequence.pad_sequences(test_padded, maxlen=max_length, padding='post')
test_labels, _ = preprocess_labels(test_data['emotion'])

val_padded = tokenizer.texts_to_sequences(val_data['sentence'])
val_padded = tf.keras.preprocessing.sequence.pad_sequences(val_padded, maxlen=max_length, padding='post')
val_labels, _ = preprocess_labels(val_data['emotion'])

In [7]:
# Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.word_index) + 1, 64, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(len(label_tokenizer.word_index) + 1, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(train_padded, train_labels, epochs=10, validation_data=(val_padded, val_labels), verbose=1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(test_padded, test_labels)
print(f'Test loss: {loss}, Test accuracy: {accuracy}')

# Make predictions
def predict_emotion(text, tokenizer, label_tokenizer, model, max_length):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=max_length, padding='post')
    prediction = model.predict(padded_sequence)
    emotion = label_tokenizer.index_word[np.argmax(prediction)]
    return emotion

text = "I am not feeling happy today"
predicted_emotion = predict_emotion(text, tokenizer, label_tokenizer, model, max_length)
print(f"Predicted emotion for '{text}': {predicted_emotion}")


Test loss: 0.43926525115966797, Test accuracy: 0.9020000100135803
Predicted emotion for 'I am not feeling happy today': joy


In [20]:
import random

def predict_random_test_data(test_data, test_padded, tokenizer, label_tokenizer, model, max_length):
    random_index = random.randint(0, len(test_data) - 1)
    text = test_data.iloc[random_index]['sentence']
    true_emotion = test_data.iloc[random_index]['emotion']

    predicted_emotion = predict_emotion(text, tokenizer, label_tokenizer, model, max_length)

    print(f"Text: {text}")
    print(f"True Emotion: {true_emotion}")
    print(f"Predicted Emotion: {predicted_emotion}")

# Call the function to predict emotion for a random test data point
predict_random_test_data(test_data, test_padded, tokenizer, label_tokenizer, model, max_length)


Text: i then realized that if i want to shoot weddings of clients who i connect with and feel comfortable with i must allow them to get to know me
True Emotion: joy
Predicted Emotion: joy
