In [None]:
import re
import string
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import layers
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
import spacy

# Install spaCy and the medium English model
!pip install -U spacy
!python -m spacy download en_core_web_md

# Load spaCy word vectors
nlp = spacy.load('en_core_web_md')

# Define the emotion mapping
emotion_mapping = {
    0: 'sadness',
    1: 'joy',
    2: 'love',
    3: 'anger',
    4: 'fear',
    5: 'surprise',
    6: 'neutral'
}

# Function to clean the text data
def clean_text(text):
    text = text.lower()
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)
    text = re.sub(r"\s+", " ", text)
    return text.strip()

# Load your dataset
data = pd.read_csv('Model_ds.csv')

# Clean the text data
data['Text'] = data['Text'].apply(clean_text)

# Handle missing or invalid labels
data['Label'] = data['Label'].replace(-9223372036854775808, np.nan)  # Replace with NaN
data.dropna(subset=['Label'], inplace=True)  # Remove rows with NaN labels

# Map labels to string values
data['Emotion'] = data['Label'].map(emotion_mapping)

# Split the dataset AFTER handling missing labels
X_train, X_test, y_train, y_test = train_test_split(data['Text'], data['Emotion'], test_size=0.2, random_state=1337)

# Convert labels to integers using LabelEncoder
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

# Tokenization and padding
max_features = 20000
sequence_length = 500

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_seq, maxlen=sequence_length)
X_test_padded = pad_sequences(X_test_seq, maxlen=sequence_length)

embedding_dim = 300
embedding_matrix = np.zeros((max_features, embedding_dim))

for word, i in tokenizer.word_index.items():
    if i >= max_features:
        continue
    if word in nlp.vocab:
        embedding_matrix[i] = nlp(word).vector

# Build the model
model = tf.keras.Sequential([
    layers.Embedding(max_features, embedding_dim, input_length=sequence_length, weights=[embedding_matrix], trainable=False),
    layers.Conv1D(128, 5, activation='relu'),
    layers.MaxPooling1D(5),
    layers.Conv1D(128, 5, activation='relu'),
    layers.MaxPooling1D(5),
    layers.Bidirectional(layers.LSTM(128, return_sequences=True)),
    layers.Bidirectional(layers.LSTM(128)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(label_encoder.classes_), activation='softmax')  # Output layer with number of classes
])

model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Train the model
epochs = 3
batch_size = 64

early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

model.fit(X_train_padded, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model on test set
y_pred = model.predict(X_test_padded)
y_pred_classes = np.argmax(y_pred, axis=1)

# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))

# Function to predict emotion from text
def predict_emotion(text):
    user_text_clean = clean_text(text)
    user_text_seq = tokenizer.texts_to_sequences([user_text_clean])
    user_text_padded = pad_sequences(user_text_seq, maxlen=sequence_length)
    user_prediction = model.predict(user_text_padded)
    user_pred_class = np.argmax(user_prediction, axis=1)
    return label_encoder.classes_[user_pred_class[0]]

# Take user input for prediction in a loop
while True:
    user_text = input("Enter text to predict emotion (or type 'exit' to quit): ")
    if user_text.lower() == 'exit':
        break
    predicted_emotion = predict_emotion(user_text)
    print(f"Text: {user_text}\nPredicted Emotion: {predicted_emotion}\n")


Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Epoch 1/3
Epoch 2/3
Epoch 3/3


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report:
              precision    recall  f1-score   support

       anger       0.00      0.00      0.00       166
        fear       0.00      0.00      0.00        74
         joy       0.00      0.00      0.00       147
     neutral       0.59      0.98      0.74      1142
     sadness       0.00      0.00      0.00       110
    surprise       0.63      0.20      0.30       379

    accuracy                           0.59      2018
   macro avg       0.20      0.20      0.17      2018
weighted avg       0.45      0.59      0.48      2018

