In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split




In [2]:
# 🚀 Load your merged dataset
df = pd.read_csv("cleaned_goemotions.csv")  # Make sure this file exists

In [3]:
# 🔄 Convert emotions to string (since they might be lists)
df["emotions"] = df["emotions"].apply(eval)  # Convert string list to actual list
df["emotions"] = df["emotions"].apply(lambda x: x[0] if len(x) > 0 else "neutral")  # Take first emotion

In [4]:
# 🎯 Encode emotions into numeric labels
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["emotions"])
num_classes = len(label_encoder.classes_)  # Number of unique emotions

In [5]:
# 📝 Text Tokenization
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(df["text"])
sequences = tokenizer.texts_to_sequences(df["text"])
padded_sequences = pad_sequences(sequences, maxlen=50, padding="post", truncating="post")

In [6]:
# 🏋️ Split into Train & Test
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, df["label"], test_size=0.4, random_state=36)

In [7]:
# 🏗️ Build LSTM Model
model = Sequential([
    Embedding(input_dim=10000, output_dim=128, input_length=50),
    LSTM(64, return_sequences=True),
    LSTM(64),
    Dense(64, activation="relu"),
    Dense(num_classes, activation="softmax")  # Softmax for multi-class classification
])




In [8]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])




In [9]:
# 🚀 Train the Model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1d7160f3510>

In [10]:
# 💾 Save the Model & Tokenizer
model.save("emotion_model.h5")
import pickle
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

# 🔄 Save Label Encoder
with open("label_encoder.pkl", "wb") as f:
    pickle.dump(label_encoder, f)

print("✅ Model Training Complete. Saved as 'emotion_model.h5'")

✅ Model Training Complete. Saved as 'emotion_model.h5'


  saving_api.save_model(
