In [None]:
import pandas as pd
import numpy as np
import re

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
from google.colab import files
files.upload()


In [None]:
df = pd.read_csv("/content/goemotions_1.csv")


In [None]:
emotion_cols = ['joy', 'sadness', 'anger', 'fear']
df = df[['text'] + emotion_cols]

df['label'] = df[emotion_cols].idxmax(axis=1)
df = df[['text', 'label']]



In [None]:
def clean_text(text):
    return re.sub(r'[^a-zA-Z\s]', '', text.lower())

df['text'] = df['text'].apply(clean_text)


In [None]:
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])


In [None]:
tokenizer = Tokenizer(num_words=20000)
tokenizer.fit_on_texts(df['text'])

X = pad_sequences(tokenizer.texts_to_sequences(df['text']), maxlen=100)
y = df['label_encoded']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
model = Sequential([
    Embedding(20000, 100, input_length=100),
    Bidirectional(LSTM(128)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(len(le.classes_), activation='softmax')
])


In [None]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weights = dict(enumerate(class_weights))


In [None]:
model.fit(
    X_train, y_train,
    epochs=15,
    batch_size=32,
    validation_data=(X_test, y_test),
    class_weight=class_weights
)


In [None]:
y_pred = np.argmax(model.predict(X_test), axis=1)
print(classification_report(y_test, y_pred, target_names=le.classes_))


In [None]:
def predict_emotion(text):
    text = clean_text(text)
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=100)
    pred = np.argmax(model.predict(pad), axis=1)[0]
    return le.inverse_transform([pred])[0]

print(predict_emotion("I feel very tired and broken today"))
print(predict_emotion("i  feel amaze after getting good marks "))

