In [7]:
import pandas as pd
df = pd.read_csv('emotions.txt', delimiter = ';', names=['text', 'emotion'])
df

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
17995,i just keep feeling like someone is being unki...,anger
17996,im feeling a little cranky negative after this...,anger
17997,i feel that i am useful to my people and that ...,joy
17998,im feeling more comfortable with derby i feel ...,joy


In [8]:
df.emotion.value_counts()

joy         6057
sadness     5247
anger       2434
fear        2161
love        1463
surprise     638
Name: emotion, dtype: int64

In [16]:
df = df[(df['emotion'] == 'joy') | (df['emotion'] == 'sadness') | (df['emotion'] == 'anger') | (df['emotion'] == 'fear')]
df['emotion_num'] = df['emotion'].map({'joy':0,'sadness':1, 'anger':2, 'fear':3})

In [17]:
df

Unnamed: 0,text,emotion,emotion_num
0,i didnt feel humiliated,sadness,1
1,i can go from feeling so hopeless to so damned...,sadness,1
2,im grabbing a minute to post i feel greedy wrong,anger,2
4,i am feeling grouchy,anger,2
5,ive been feeling a little burdened lately wasn...,sadness,1
...,...,...,...
17995,i just keep feeling like someone is being unki...,anger,2
17996,im feeling a little cranky negative after this...,anger,2
17997,i feel that i am useful to my people and that ...,joy,0
17998,im feeling more comfortable with derby i feel ...,joy,0


In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.text, df.emotion_num, random_state=2022, test_size = 0.2, stratify = df.emotion_num)

In [25]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.naive_bayes import MultinomialNB

nb1 = Pipeline([
    ('vectorizer', CountVectorizer(ngram_range = (1,2))),
    ('NB', MultinomialNB())
])
nb1.fit(X_train, y_train)
y_pred = nb1.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

           0       0.73      0.97      0.84      1212
           1       0.74      0.90      0.81      1049
           2       0.94      0.32      0.48       487
           3       0.92      0.28      0.43       432

    accuracy                           0.76      3180
   macro avg       0.83      0.62      0.64      3180
weighted avg       0.79      0.76      0.72      3180



In [26]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

rf1 = Pipeline([
    ('vectorizer', CountVectorizer(ngram_range = (3, 3))),
    ('rf', RandomForestClassifier())
])
rf1.fit(X_train, y_train)
y_pred = rf1.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

           0       0.54      0.65      0.59      1212
           1       0.61      0.34      0.43      1049
           2       0.53      0.23      0.32       487
           3       0.24      0.51      0.33       432

    accuracy                           0.46      3180
   macro avg       0.48      0.43      0.42      3180
weighted avg       0.52      0.46      0.46      3180



In [27]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

rf1 = Pipeline([
    ('vectorizer', CountVectorizer(ngram_range = (1, 2))),
    ('rf', RandomForestClassifier())
])
rf1.fit(X_train, y_train)
y_pred = rf1.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

           0       0.82      0.97      0.89      1212
           1       0.90      0.88      0.89      1049
           2       0.94      0.72      0.81       487
           3       0.93      0.75      0.83       432

    accuracy                           0.87      3180
   macro avg       0.90      0.83      0.86      3180
weighted avg       0.88      0.87      0.87      3180



In [28]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf1 = Pipeline([
    ('vectorizer', TfidfVectorizer()),
    ('rf', RandomForestClassifier())
])
tf1.fit(X_train, y_train)
y_pred = tf1.predict(X_test)
print(classification_report(y_test, y_pred))



              precision    recall  f1-score   support

           0       0.87      0.95      0.91      1212
           1       0.91      0.88      0.89      1049
           2       0.89      0.78      0.83       487
           3       0.87      0.83      0.85       432

    accuracy                           0.89      3180
   macro avg       0.89      0.86      0.87      3180
weighted avg       0.89      0.89      0.88      3180

