In [20]:
import pandas as pd
import spacy
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
df=pd.read_csv("C:\\Users\\SHARIB\\Downloads\\Emotion_classify_Data.csv")
print(df.shape)
df.head()

(5937, 2)


Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [5]:
df['Emotion'].value_counts()

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64

In [9]:
df['Emotion_num']=df['Emotion'].map({
    'joy': 0,
    'fear': 1,
    'anger': 2
})

In [10]:
df.head()

Unnamed: 0,Comment,Emotion,Emotion_num
0,i seriously hate one subject to death but now ...,fear,1
1,im so full of life i feel appalled,anger,2
2,i sit here to write i start to dig out my feel...,fear,1
3,ive been really angry with r and i feel like a...,joy,0
4,i feel suspicious if there is no one outside l...,fear,1


In [11]:
X_train,X_test,y_train,y_test=train_test_split(df.Comment,df.Emotion_num,test_size=0.2,random_state=2022,stratify=df.Emotion_num)
print(X_train.shape)
print(X_test.shape)

(4749,)
(1188,)


In [14]:
clf=Pipeline([
    ('Vectorizer',CountVectorizer(ngram_range=(3,3))),
    ('Classifier',RandomForestClassifier())
])
clf.fit(X_train,y_train)

In [15]:
y_pred=clf.predict(X_test)
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.59      0.26      0.36       400
           1       0.37      0.78      0.50       388
           2       0.51      0.23      0.32       400

    accuracy                           0.42      1188
   macro avg       0.49      0.43      0.39      1188
weighted avg       0.49      0.42      0.39      1188



In [16]:
clf1=Pipeline([
    ('Vectorizer',CountVectorizer(ngram_range=(1,2))),
    ('Classifier',MultinomialNB())
])
clf1.fit(X_train,y_train)

In [17]:
y_pred1=clf1.predict(X_test)
print(classification_report(y_test,y_pred1))

              precision    recall  f1-score   support

           0       0.87      0.86      0.87       400
           1       0.87      0.83      0.85       388
           2       0.83      0.88      0.85       400

    accuracy                           0.86      1188
   macro avg       0.86      0.86      0.86      1188
weighted avg       0.86      0.86      0.86      1188



In [18]:
clf2=Pipeline([
    ('Vectorizer',CountVectorizer(ngram_range=(1,2))),
    ('Classifier',RandomForestClassifier())
])
clf2.fit(X_train,y_train)

In [19]:
y_pred2=clf2.predict(X_test)
print(classification_report(y_test,y_pred2))

              precision    recall  f1-score   support

           0       0.84      0.96      0.90       400
           1       0.94      0.88      0.91       388
           2       0.92      0.85      0.89       400

    accuracy                           0.90      1188
   macro avg       0.90      0.90      0.90      1188
weighted avg       0.90      0.90      0.90      1188



In [22]:
clf3=Pipeline([
    ('Vectorizer',TfidfVectorizer()),
    ('Classifier',RandomForestClassifier())
])
clf3.fit(X_train,y_train)

In [23]:
y_pred3=clf3.predict(X_test)
print(classification_report(y_test,y_pred3))

              precision    recall  f1-score   support

           0       0.87      0.94      0.91       400
           1       0.91      0.91      0.91       388
           2       0.94      0.86      0.90       400

    accuracy                           0.90      1188
   macro avg       0.91      0.90      0.90      1188
weighted avg       0.91      0.90      0.90      1188



In [24]:
nlp=spacy.load('en_core_web_sm')

In [25]:
def preprocess(text):
    doc=nlp(text)
    filtered_tokens=[]
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        filtered_tokens.append(token.lemma_)
    return " ".join(filtered_tokens)    

In [26]:
df['preprocessed_text']=df['Comment'].apply(preprocess)
df.head()

Unnamed: 0,Comment,Emotion,Emotion_num,preprocessed_text
0,i seriously hate one subject to death but now ...,fear,1,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,2,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,1,sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,0,ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,1,feel suspicious outside like rapture happen


In [27]:
X_train,X_test,y_train,y_test=train_test_split(df.preprocessed_text,df.Emotion_num,test_size=0.2,random_state=2022,stratify=df.Emotion_num)
print(X_train.shape)
print(X_test.shape)

(4749,)
(1188,)


In [28]:
clf5=Pipeline([
    ('Vectorizer',CountVectorizer(ngram_range=(1,2))),
    ('Classifier',RandomForestClassifier())
])
clf5.fit(X_train,y_train)

In [29]:
y_pred5=clf5.predict(X_test)
print(classification_report(y_test,y_pred5))

              precision    recall  f1-score   support

           0       0.94      0.95      0.94       400
           1       0.94      0.91      0.92       388
           2       0.92      0.94      0.93       400

    accuracy                           0.93      1188
   macro avg       0.93      0.93      0.93      1188
weighted avg       0.93      0.93      0.93      1188



In [30]:
clf6=Pipeline([
    ('Vectorizer',TfidfVectorizer()),
    ('Classifier',RandomForestClassifier())
])
clf6.fit(X_train,y_train)

In [31]:
y_pred6=clf6.predict(X_test)
print(classification_report(y_test,y_pred6))

              precision    recall  f1-score   support

           0       0.91      0.96      0.94       400
           1       0.93      0.91      0.92       388
           2       0.93      0.91      0.92       400

    accuracy                           0.93      1188
   macro avg       0.93      0.93      0.93      1188
weighted avg       0.93      0.93      0.93      1188



In [36]:
clf5.predict([" Funny how all the Elon's sycophantic cult followers on here just followed him into supporting Trump as soon as he did, without question. How would they get his reposts and engagement, and ad reveune, without doing that? "])

array([2])