In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
import re

# Download stopwords and wordnet datasets
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [19]:
# Load the dataset
data = pd.read_csv("/content/drive/MyDrive/dL/Emotion_classify_Data.csv")

In [20]:
# Data cleaning and preprocessing
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    text = text.split()
    text = [word for word in text if word not in stopwords.words('english')]
    stemmer = PorterStemmer()
    text = [stemmer.stem(word) for word in text]
    lemmatizer = WordNetLemmatizer()
    text = [lemmatizer.lemmatize(word) for word in text]
    return " ".join(text)

In [21]:
data.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [22]:
data['Comment'] = data['Comment'].apply(preprocess_text)

In [31]:
data.head()

Unnamed: 0,Comment,Emotion
0,serious hate one subject death feel reluct drop,fear
1,im full life feel appal,anger
2,sit write start dig feel think afraid accept p...,fear
3,ive realli angri r feel like idiot trust first...,joy
4,feel suspici one outsid like raptur happen someth,fear


In [23]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['Comment'], data['Emotion'], test_size=0.2, random_state=42)

In [24]:
# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [25]:
# Train a Logistic Regression model
lr_model = LogisticRegression(max_iter=5000)
lr_model.fit(X_train_tfidf, y_train)


In [26]:
# Train a Naive Bayes (Multinomial) model
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

In [27]:
# Create a function for emotion prediction
def predict_emotion(text):
    preprocessed_text = preprocess_text(text)
    tfidf_text = tfidf_vectorizer.transform([preprocessed_text])
    lr_prediction = lr_model.predict(tfidf_text)
    nb_prediction = nb_model.predict(tfidf_text)
    return lr_prediction[0], nb_prediction[0]

In [28]:
# Test the models
text1 = "I am feeling very anxious and scared right now."
text2 = "I'm thrilled about the upcoming event."
text3 = "I can't contain my anger anymore."

In [29]:
label_lr1, label_nb1 = predict_emotion(text1)
label_lr2, label_nb2 = predict_emotion(text2)
label_lr3, label_nb3 = predict_emotion(text3)

In [30]:
print("Text 1 - Logistic Regression:", label_lr1, "Naive Bayes:", label_nb1)
print("Text 2 - Logistic Regression:", label_lr2, "Naive Bayes:", label_nb2)
print("Text 3 - Logistic Regression:", label_lr3, "Naive Bayes:", label_nb3)

Text 1 - Logistic Regression: fear Naive Bayes: fear
Text 2 - Logistic Regression: joy Naive Bayes: joy
Text 3 - Logistic Regression: anger Naive Bayes: anger
