In [42]:
import re
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.preprocessing import LabelEncoder
from sklearn import svm
from nltk import PorterStemmer, word_tokenize
from nltk.corpus import stopwords
import unidecode
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score 
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
import joblib
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.preprocessing import LabelEncoder

In [2]:
train_data = pd.read_csv('data/wassa_train.csv')
test_data = pd.read_csv('data/wassa_test.csv')
val_data = pd.read_csv('data/wassa_val.csv')

In [3]:
train_data

Unnamed: 0,id,tweet,emotion
0,1929,Kinda wished I watched mischievous kiss before...,joy
1,4049,When you forget to mention you were bought dre...,anger
2,5405,@FreyaLynn @donniewahlberg yep! jimmy buffett ...,neutral
3,5900,"@philcampbell blue skies? where, it's still gr...",neutral
4,3712,Some moving clips on youtube tonight of the vi...,anger
...,...,...,...
5996,5407,@koifusionpdx i was so close to the tacos...on...,neutral
5997,496,@Fatumoriginal there's no sitcom better! If u ...,sadness
5998,499,@Mysteri759 @Ren102e906 @slb42jcb @swoozyqyah ...,sadness
5999,1784,Watch this amazing live.ly broadcast by @ittzd...,joy


In [4]:
def preprocess_text(text):
    result = text.lower()
    result = result.strip()
    result = re.result = re.sub(r"http\S+", "", result)
    result = re.sub('\S*@\S*\s?', '', result)
    result = unidecode.unidecode(result)
    stop_words = stopwords.words("english")
    word_list = word_tokenize(result)

    stemmed_sentence = ""
    for word in word_list:
        if word not in stop_words:
            stemmed_sentence += word
            stemmed_sentence += " "

    result = stemmed_sentence
    whitelist = set('abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ')
    result = ''.join(filter(whitelist.__contains__, result))
    result = ''.join([i for i in result if not i.isdigit()])
    return result

In [5]:
def preprocess_text_list(text_list):
  for i,text in enumerate(text_list):
    text_list[i] = preprocess_text(text_list[i])

In [6]:
train_texts = train_data["tweet"].tolist()
val_texts = val_data["tweet"].tolist()
test_texts = test_data["tweet"].tolist()


train_labels = train_data["emotion"].tolist()
val_labels = val_data["emotion"].tolist()
test_labels = test_data["emotion"].tolist()

train_labels[:10]
     

['joy',
 'anger',
 'neutral',
 'neutral',
 'anger',
 'joy',
 'neutral',
 'sadness',
 'sadness',
 'neutral']

In [7]:
preprocess_text_list(train_texts)
preprocess_text_list(val_texts)
preprocess_text_list(test_texts)

train_texts[:10]

['kinda wished watched mischievous kiss playful kiss ',
 'forget mention bought dreamboys tickets ',
 'yep  jimmy buffett ftw     jimmy needs tweet words wisdom every morning ',
 'blue skies   s still grey hazy window ',
 'moving clips youtube tonight vigil held tulsa metropolitan baptist church  terencecruther  justice  sadness ',
 'makes successful  happy forever ',
 'morning hon  breakfast  cant decide ',
 'standard candice starting show pout  startasyoumeantogoon  gbbo ',
 '   would frown bit  folding arms  why every time m need assistance someone expects lil   ',
 'someone make cofffeeeeeee  ']

In [8]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(train_texts)

In [9]:
tokenized_train_texts = tokenizer.texts_to_sequences(train_texts)
tokenized_val_texts = tokenizer.texts_to_sequences(val_texts)
tokenized_test_texts = tokenizer.texts_to_sequences(test_texts)

In [10]:
max_sequence_length = max(len(seq) for seq in tokenized_train_texts)
padded_train_texts = pad_sequences(tokenized_train_texts, maxlen=max_sequence_length)
padded_val_texts = pad_sequences(tokenized_val_texts, maxlen=max_sequence_length)
padded_test_texts = pad_sequences(tokenized_test_texts, maxlen=max_sequence_length)


In [11]:
label_encoder = LabelEncoder()
encoded_train_labels = label_encoder.fit_transform(train_labels)
encoded_val_labels = label_encoder.transform(val_labels)
encoded_test_labels = label_encoder.transform(test_labels)


In [12]:
embedding_dim = 50 
lstm_units = 100 

In [13]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dim, input_length=max_sequence_length))
model.add(LSTM(units=lstm_units))
model.add(Dense(units=len(label_encoder.classes_), activation='softmax'))

In [14]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
model.fit(padded_train_texts, encoded_train_labels, epochs=5, validation_data=(padded_val_texts, encoded_val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1ea78c53650>

In [16]:
loss, accuracy = model.evaluate(padded_test_texts, encoded_test_labels)
print(f'Test accuracy: {accuracy * 100:.2f}%')

Test accuracy: 86.67%


In [34]:
emotions = [
    "I hate getting up, it's so annoying.",
    "I love cats and their sweet noses.",
    "I put up with my schoolmates.",
    "This movie made me feel so happy!",
    "The traffic jam today was frustrating.",
    "Spending time with friends always brings joy.",
    "I feel lonely when I'm alone at home.",
    "Excited about the upcoming vacation!",
    "I love spending time with friends and family. It brings me so much joy!",
    "The rude behavior of that person made me really angry.",
    "I'm feeling neutral about the upcoming changes at work.",
    "Today is just another neutral day for me.",
    "The constant delays and issues with the project are making me angry.",
    "Winning the lottery would bring me immense joy and happiness!",
    "I don't have a strong opinion on the matter; I'm feeling quite neutral.",
    "The news about the recent event left me in deep sadness.",
    "Losing a loved one is an experience filled with sadness and grief.",
    "I have a neutral stance on the current political situation.",
    "I'm so happy right now!", "This is really frustrating."
    "Feeling sad and lonely tonight.",
    "I can't contain my excitement!",
    "Angry about the recent events.",
    "Today is a wonderful day!",
    "Dealing with a lot of stress.",
    "The movie made me cry, but it was beautiful.",
    "I'm thrilled about the upcoming project.",
    "Feeling a bit anxious about the presentation."
]

texts = []
predicted_emotions_lstm = []
probabilities_lstm = []

for text in emotions:
    preprocessed_text = preprocess_text(text)
    
    tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
    padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)
    
    prediction_proba_lstm = model.predict(padded_text)
    
    predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
    probability_lstm = prediction_proba_lstm.max()
    
    texts.append(text)
    predicted_emotions_lstm.append(predicted_emotion_lstm)
    probabilities_lstm.append(probability_lstm)

emotion_df_lstm = pd.DataFrame({'Text': texts, 'Predicted Emotion (LSTM)': predicted_emotions_lstm, 'Probability (LSTM)': probabilities_lstm})

emotion_df_lstm




Unnamed: 0,Text,Predicted Emotion (LSTM),Probability (LSTM)
0,"I hate getting up, it's so annoying.",neutral,0.733853
1,I love cats and their sweet noses.,joy,0.977139
2,I put up with my schoolmates.,neutral,0.90588
3,This movie made me feel so happy!,joy,0.646331
4,The traffic jam today was frustrating.,anger,0.524004
5,Spending time with friends always brings joy.,joy,0.926883
6,I feel lonely when I'm alone at home.,neutral,0.996206
7,Excited about the upcoming vacation!,neutral,0.92096
8,I love spending time with friends and family. ...,joy,0.858424
9,The rude behavior of that person made me reall...,anger,0.997394


In [35]:
text = "Today is a wonderful day!	"

preprocessed_text = preprocess_text(text)

tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)

prediction_proba_lstm = model.predict(padded_text)

predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
probability_lstm = prediction_proba_lstm.max()

sorted_probabilities_lstm = sorted(zip(label_encoder.classes_, prediction_proba_lstm.flatten()), key=lambda x: x[1], reverse=True)

print(f"Text: {text}\nPredicted Emotion (LSTM): {predicted_emotion_lstm}\nProbability (LSTM): {probability_lstm:.2f}")
print(f"Probabilities per class (LSTM): {sorted_probabilities_lstm}")


Text: Today is a wonderful day!	
Predicted Emotion (LSTM): joy
Probability (LSTM): 0.97
Probabilities per class (LSTM): [('joy', 0.97122926), ('neutral', 0.023158265), ('sadness', 0.00332496), ('fear', 0.0022219492), ('anger', 6.549842e-05)]


In [46]:
text = "I'm afraid of spiders, they're so big!"

preprocessed_text = preprocess_text(text)

tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)

prediction_proba_lstm = model.predict(padded_text)

predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
probability_lstm = prediction_proba_lstm.max()

sorted_probabilities_lstm = sorted(zip(label_encoder.classes_, prediction_proba_lstm.flatten()), key=lambda x: x[1], reverse=True)

print(f"Text: {text}\nPredicted Emotion (LSTM): {predicted_emotion_lstm}\nProbability (LSTM): {probability_lstm:.2f}")
print(f"Probabilities per class (LSTM): {sorted_probabilities_lstm}")


Text: I'm afraid of spiders, they're so big!
Predicted Emotion (LSTM): fear
Probability (LSTM): 0.99
Probabilities per class (LSTM): [('fear', 0.9944095), ('joy', 0.0049989815), ('neutral', 0.00037351483), ('sadness', 0.00017394901), ('anger', 4.4041135e-05)]


In [36]:
text = """The air crackles with tension, and the world feels like a pressure cooker about to explode. Every step is heavy, and the rhythm of life echoes with the discordant beats of frustration. It's as if the very air is charged with an unsettling energy, ready to unleash the storm that brews within.

The smallest inconveniences become fuel for the fiery furnace of anger, each frustration adding another log to the burning resentment. Words hang in the air, sharp and cutting, like daggers seeking release from the depths of irritation. The world seems painted in shades of red, a visual representation of the simmering rage beneath the surface.

In this moment, control feels elusive, slipping through clenched fists like sand. The desire to scream or slam doors becomes a tempting release, an outlet for the building resentment. The very fabric of patience feels threadbare, stretched to its limit, threatening to unravel at the slightest provocation."""

preprocessed_text = preprocess_text(text)

tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)

prediction_proba_lstm = model.predict(padded_text)

predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
probability_lstm = prediction_proba_lstm.max()

sorted_probabilities_lstm = sorted(zip(label_encoder.classes_, prediction_proba_lstm.flatten()), key=lambda x: x[1], reverse=True)

print(f"Text: {text}\nPredicted Emotion (LSTM): {predicted_emotion_lstm}\nProbability (LSTM): {probability_lstm:.2f}")
print(f"Probabilities per class (LSTM): {sorted_probabilities_lstm}")



Text: The air crackles with tension, and the world feels like a pressure cooker about to explode. Every step is heavy, and the rhythm of life echoes with the discordant beats of frustration. It's as if the very air is charged with an unsettling energy, ready to unleash the storm that brews within.

The smallest inconveniences become fuel for the fiery furnace of anger, each frustration adding another log to the burning resentment. Words hang in the air, sharp and cutting, like daggers seeking release from the depths of irritation. The world seems painted in shades of red, a visual representation of the simmering rage beneath the surface.

In this moment, control feels elusive, slipping through clenched fists like sand. The desire to scream or slam doors becomes a tempting release, an outlet for the building resentment. The very fabric of patience feels threadbare, stretched to its limit, threatening to unravel at the slightest provocation.
Predicted Emotion (LSTM): anger
Probability (L

In [37]:
text = """In the quietude of solitude, a heavy fog of sadness descends, enveloping the world in a muted palette of gray. Each breath feels like a burden, and the air is thick with the weight of unspoken sorrows. The echo of a distant sigh seems to reverberate through the emptiness, a haunting melody of melancholy.

Every step is laden with the echoes of past heartaches, and the landscape is painted with the brushstrokes of unshed tears. The world, once vibrant and alive, now appears as a desolate tableau, a reflection of the internal storm that rages within. The rhythm of life slows to a somber cadence, mirroring the heavy heart that beats in sync with the sorrowful symphony.

Gazing out through the window, raindrops trace their lonely paths, a metaphor for the tears that refuse to fall. The silence is broken only by the occasional gentle patter, a lullaby for the dispirited soul. The ache is palpable, an emotional bruise that colors every interaction with a tinge of muted blue."""

preprocessed_text = preprocess_text(text)

tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)

prediction_proba_lstm = model.predict(padded_text)

predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
probability_lstm = prediction_proba_lstm.max()

sorted_probabilities_lstm = sorted(zip(label_encoder.classes_, prediction_proba_lstm.flatten()), key=lambda x: x[1], reverse=True)

print(f"Text: {text}\nPredicted Emotion (LSTM): {predicted_emotion_lstm}\nProbability (LSTM): {probability_lstm:.2f}")
print(f"Probabilities per class (LSTM): {sorted_probabilities_lstm}")



Text: In the quietude of solitude, a heavy fog of sadness descends, enveloping the world in a muted palette of gray. Each breath feels like a burden, and the air is thick with the weight of unspoken sorrows. The echo of a distant sigh seems to reverberate through the emptiness, a haunting melody of melancholy.

Every step is laden with the echoes of past heartaches, and the landscape is painted with the brushstrokes of unshed tears. The world, once vibrant and alive, now appears as a desolate tableau, a reflection of the internal storm that rages within. The rhythm of life slows to a somber cadence, mirroring the heavy heart that beats in sync with the sorrowful symphony.

Gazing out through the window, raindrops trace their lonely paths, a metaphor for the tears that refuse to fall. The silence is broken only by the occasional gentle patter, a lullaby for the dispirited soul. The ache is palpable, an emotional bruise that colors every interaction with a tinge of muted blue.
Predicted 

In [38]:
text = """Waking up to the warm embrace of sunshine, the world seems to sparkle with endless possibilities. A symphony of birdsong dances through the air, creating a melody that resonates with the rhythm of joy. Today is not just a day; it's a canvas waiting to be painted with vibrant hues of happiness.

As I step outside, a gentle breeze carries whispers of optimism, and the flowers in bloom release a fragrance that feels like nature's way of smiling. There's an infectious energy in the atmosphere, a reminder that life is a grand celebration, and every moment is a chance to dance to the melody of our own existence.

In the midst of this symphony of joy, even the mundane becomes extraordinary. The rhythmic tapping of my footsteps on the pavement transforms into a cheerful beat, syncopating with the pulse of life. The world seems to applaud my every move, as if nature itself is cheering me on.

Meeting the gaze of a stranger, there's a shared recognition of the beauty that surrounds us. It's as if joy is a secret language, and we are all fluent in its expression. Laughter becomes the currency of the day, exchanged freely between friends and strangers alike, creating a tapestry of shared happiness."""

preprocessed_text = preprocess_text(text)

tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)

prediction_proba_lstm = model.predict(padded_text)

predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
probability_lstm = prediction_proba_lstm.max()

sorted_probabilities_lstm = sorted(zip(label_encoder.classes_, prediction_proba_lstm.flatten()), key=lambda x: x[1], reverse=True)

print(f"Text: {text}\nPredicted Emotion (LSTM): {predicted_emotion_lstm}\nProbability (LSTM): {probability_lstm:.2f}")
print(f"Probabilities per class (LSTM): {sorted_probabilities_lstm}")



Text: Waking up to the warm embrace of sunshine, the world seems to sparkle with endless possibilities. A symphony of birdsong dances through the air, creating a melody that resonates with the rhythm of joy. Today is not just a day; it's a canvas waiting to be painted with vibrant hues of happiness.

As I step outside, a gentle breeze carries whispers of optimism, and the flowers in bloom release a fragrance that feels like nature's way of smiling. There's an infectious energy in the atmosphere, a reminder that life is a grand celebration, and every moment is a chance to dance to the melody of our own existence.

In the midst of this symphony of joy, even the mundane becomes extraordinary. The rhythmic tapping of my footsteps on the pavement transforms into a cheerful beat, syncopating with the pulse of life. The world seems to applaud my every move, as if nature itself is cheering me on.

Meeting the gaze of a stranger, there's a shared recognition of the beauty that surrounds us. It'

In [40]:
model.save('lstm_model/lstm_model.keras')

In [44]:
loaded_lstm_model = load_model('lstm_model/lstm_model.keras')

In [45]:
text = "Today is a wonderful excitement day!"

preprocessed_text = preprocess_text(text)

tokenized_text = tokenizer.texts_to_sequences([preprocessed_text])
padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)

prediction_proba_lstm = loaded_lstm_model.predict(padded_text)

predicted_emotion_lstm = label_encoder.inverse_transform(prediction_proba_lstm.argmax(axis=1))[0]
probability_lstm = prediction_proba_lstm.max()

sorted_probabilities_lstm = sorted(zip(label_encoder.classes_, prediction_proba_lstm.flatten()), key=lambda x: x[1], reverse=True)

print(f"Text: {text}\nPredicted Emotion (LSTM): {predicted_emotion_lstm}\nProbability (LSTM): {probability_lstm:.2f}")
print(f"Probabilities per class (LSTM): {sorted_probabilities_lstm}")


Text: Today is a wonderful excitement day!
Predicted Emotion (LSTM): joy
Probability (LSTM): 0.99
Probabilities per class (LSTM): [('joy', 0.99302834), ('neutral', 0.0028970654), ('sadness', 0.002856291), ('fear', 0.0011492128), ('anger', 6.897687e-05)]
