In [None]:
import pandas as pd

df=pd.read_table("emotionClassifier.txt",header=None,sep=";",names=["Text","Emotion"])

In [None]:
df.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16000 entries, 0 to 15999
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Text     16000 non-null  object
 1   Emotion  16000 non-null  object
dtypes: object(2)
memory usage: 250.1+ KB


In [None]:
df["Emotion"].value_counts()

joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: Emotion, dtype: int64

In [None]:
df.isnull().sum()

Text       0
Emotion    0
dtype: int64

In [None]:
import nltk
from nltk.tokenize import word_tokenize
nltk.download("punkt")

from nltk.corpus import stopwords
nltk.download("stopwords")

from nltk.stem import PorterStemmer,WordNetLemmatizer
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [None]:
def clean_text(text):
  token=word_tokenize(text.lower())

  #non alpha removal
  ftoken=[i for i in token if i.isalpha()]

  #stop words removal
  stpwd=stopwords.words("english")
  stoken=[i for i in ftoken if i not in stpwd]

  #lemma
  lemma=WordNetLemmatizer()
  ltoken=[lemma.lemmatize(i) for i in stoken]

  #joining
  return " ".join(ltoken)

In [None]:
df["Text"]=df["Text"].astype(str)

In [None]:
import nltk
nltk.download("omw-1.4")

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [None]:
df["clean_Text"]=df["Text"].apply(clean_text)

In [None]:
df.head()

Unnamed: 0,Text,Emotion,clean_Text
0,i didnt feel humiliated,sadness,didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,go feeling hopeless damned hopeful around some...
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing minute post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,ever feeling nostalgic fireplace know still pr...
4,i am feeling grouchy,anger,feeling grouchy


In [None]:
df["Emotion"].value_counts()

joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: Emotion, dtype: int64

In [None]:
df["Emotions_num"]=df["Emotion"].map({"joy":0,"sadness":1,"anger":2,"fear":3,"love":4,"surprise":5})

In [None]:
df.head()

Unnamed: 0,Text,Emotion,clean_Text,Emotions_num
0,i didnt feel humiliated,sadness,didnt feel humiliated,1
1,i can go from feeling so hopeless to so damned...,sadness,go feeling hopeless damned hopeful around some...,1
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing minute post feel greedy wrong,2
3,i am ever feeling nostalgic about the fireplac...,love,ever feeling nostalgic fireplace know still pr...,4
4,i am feeling grouchy,anger,feeling grouchy,2


In [None]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(df.clean_Text,df.Emotions_num,test_size=0.25,random_state=1,stratify=df.Emotions_num)

In [None]:
xtrain.shape

(12000,)

In [None]:
xtest.shape

(4000,)

In [None]:
ytrain.shape

(12000,)

In [None]:
ytest.shape

(4000,)

In [None]:
ytest.value_counts()

0    1341
1    1166
2     540
3     484
4     326
5     143
Name: Emotions_num, dtype: int64

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
pipline=Pipeline([("vectorization",TfidfVectorizer()),("knn",KNeighborsClassifier())])

In [None]:
pipline.fit(xtrain,ytrain)
ypred=pipline.predict(xtest)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.76      0.88      0.82      1341
           1       0.76      0.87      0.81      1166
           2       0.80      0.66      0.72       540
           3       0.84      0.66      0.74       484
           4       0.81      0.55      0.65       326
           5       0.71      0.35      0.47       143

    accuracy                           0.77      4000
   macro avg       0.78      0.66      0.70      4000
weighted avg       0.78      0.77      0.77      4000



In [None]:
xtest[:3]

650     feel quite strongly student punished due well ...
7652    woke yesterday morning wondering hurt mommy fe...
4339    ive home almost week hospital though feel need...
Name: clean_Text, dtype: object

In [None]:
ytest[:3]

650     1
7652    1
4339    4
Name: Emotions_num, dtype: int64

In [None]:
ypred[:3]

array([1, 1, 1])

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
pipline=Pipeline([("vectorization",TfidfVectorizer()),("rf",RandomForestClassifier())])


In [None]:
pipline.fit(xtrain,ytrain)
ypred=pipline.predict(xtest)

In [None]:
print(classification_report(ytest,ypred))

              precision    recall  f1-score   support

           0       0.89      0.92      0.90      1341
           1       0.91      0.91      0.91      1166
           2       0.85      0.87      0.86       540
           3       0.84      0.85      0.84       484
           4       0.83      0.76      0.79       326
           5       0.85      0.64      0.73       143

    accuracy                           0.88      4000
   macro avg       0.86      0.82      0.84      4000
weighted avg       0.88      0.88      0.88      4000



In [None]:
xtest[:3]

650     feel quite strongly student punished due well ...
7652    woke yesterday morning wondering hurt mommy fe...
4339    ive home almost week hospital though feel need...
Name: clean_Text, dtype: object

In [None]:
ytest[:3]

650     1
7652    1
4339    4
Name: Emotions_num, dtype: int64

In [None]:
ypred[:3]

array([1, 1, 4])