# Emotion classification model(LSTM)

In [38]:
import pandas as pd
import numpy as np

#### import data 

In [39]:
df = pd.read_csv(
    "text.txt",
    sep=";",
    names=["text", "emotion"]
)

In [40]:
df

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
17995,i just keep feeling like someone is being unki...,anger
17996,im feeling a little cranky negative after this...,anger
17997,i feel that i am useful to my people and that ...,joy
17998,im feeling more comfortable with derby i feel ...,joy


In [41]:
df.shape

(18000, 2)

In [61]:
df['text'][17997]

'i feel that i am useful to my people and that gives me a great feeling of achievement'

#### `import labrary`

In [42]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

#### `apply labelencoder in target col`

In [43]:
le = LabelEncoder()
y = le.fit_transform(df["emotion"])

#### `split data into train or test`

In [44]:
X_train, X_test, y_train, y_test = train_test_split(
    df["text"],
    y,
    test_size=0.2,
    random_state=42
)

#### ` token the x train data `

In [45]:
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

#### ` padding and max length`

In [46]:
MAX_LEN = 50

X_train_pad = pad_sequences(X_train_seq, maxlen=MAX_LEN, padding="post")
X_test_pad = pad_sequences(X_test_seq, maxlen=MAX_LEN, padding="post")

# `apply LSTM MODEL`

In [47]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=MAX_LEN),
    LSTM(64),
    Dense(len(le.classes_), activation="softmax")
])


model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)


model.fit(X_train_pad, y_train,epochs=20, batch_size=50, validation_data=(X_test_pad, y_test))



Epoch 1/20
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 31ms/step - accuracy: 0.3268 - loss: 1.5978 - val_accuracy: 0.3331 - val_loss: 1.5746
Epoch 2/20
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.3405 - loss: 1.5737 - val_accuracy: 0.2997 - val_loss: 1.5774
Epoch 3/20
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step - accuracy: 0.3386 - loss: 1.5668 - val_accuracy: 0.3328 - val_loss: 1.5738
Epoch 4/20
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step - accuracy: 0.3554 - loss: 1.5547 - val_accuracy: 0.3331 - val_loss: 1.5625
Epoch 5/20
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 25ms/step - accuracy: 0.3674 - loss: 1.4655 - val_accuracy: 0.3325 - val_loss: 1.4715
Epoch 6/20
[1m288/288[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 24ms/step - accuracy: 0.4103 - loss: 1.3065 - val_accuracy: 0.4003 - val_loss: 1.2965
Epoch 7/20
[1m288/28

<keras.src.callbacks.history.History at 0x1f256763710>

#  Accuracy

In [48]:
loss, acc = model.evaluate(X_test_pad, y_test)
print("Accuracy:", acc)

[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.8870 - loss: 0.4418
Accuracy: 0.8891666531562805



# confusion_matrix

In [49]:
from sklearn.metrics import confusion_matrix


y_pred = np.argmax(model.predict(X_test_pad), axis=1)
cm = confusion_matrix(y_test, y_pred)
print(cm)

[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step
[[ 419    6   12    5   22    1]
 [  16  354    7    0   28   15]
 [  15    8 1109   43   23    1]
 [   7    3   69  214    9    0]
 [  26   13   18    5 1017    0]
 [   3   31   12    0    1   88]]


# classification_report

In [50]:
from sklearn.metrics import classification_report

print(classification_report(
    y_test,
    y_pred,
    target_names=le.classes_
))

              precision    recall  f1-score   support

       anger       0.86      0.90      0.88       465
        fear       0.85      0.84      0.85       420
         joy       0.90      0.92      0.91      1199
        love       0.80      0.71      0.75       302
     sadness       0.92      0.94      0.93      1079
    surprise       0.84      0.65      0.73       135

    accuracy                           0.89      3600
   macro avg       0.86      0.83      0.84      3600
weighted avg       0.89      0.89      0.89      3600



#### ` Short observation / summary `:

- Overall model performance is strong with 89% accuracy on 3600 samples.
- Joy and sadness are predicted very well (F1 ≈ 0.91–0.93), likely because they have more training data and clearer language patterns.
- Anger and fear also show good balance between precision and recall (F1 ≈ 0.85–0.88).
- Love and especially surprise perform weaker, mainly due to lower recall (the model misses these emotions more often).
- The gap between macro avg (0.84) and weighted avg (0.89) indicates class imbalance—classes with fewer samples (love, surprise) hurt macro performance.

#### `FOR test purpose`

In [27]:
def predict_emotion(text):
   
    seq = tokenizer.texts_to_sequences([text])                # 1. text → sequence
    pad = pad_sequences(seq, maxlen=MAX_LEN, padding="post")  # 2. pad
    probs = model.predict(pad)                                # 3. model → probabilities
    class_id = np.argmax(probs, axis=1)[0]                    # 4. highest probability index
    emotion = le.inverse_transform([class_id])[0]             # 5. index → emotion label
    
    return emotion

In [53]:
sentence = "i feel like a happy"
print(predict_emotion(sentence))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step
joy


In [57]:
model.save("model.h5")
model = load_model("model.h5")



In [59]:
import pickle

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)