In [36]:
import pandas as pd
import numpy as np
import spacy

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [4]:
emotion_data = pd.read_csv("/content/Emotion_classify_Data.csv")
emotion_data

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear
...,...,...
5932,i begun to feel distressed for you,fear
5933,i left feeling annoyed and angry thinking that...,anger
5934,i were to ever get married i d have everything...,joy
5935,i feel reluctant in applying there because i w...,fear


In [33]:
emotion_data.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [5]:
emotion_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5937 entries, 0 to 5936
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Comment  5937 non-null   object
 1   Emotion  5937 non-null   object
dtypes: object(2)
memory usage: 92.9+ KB


In [6]:
# Finding the distribution of Emotion
emotion_data['Emotion'].value_counts()

Unnamed: 0_level_0,count
Emotion,Unnamed: 1_level_1
anger,2000
joy,2000
fear,1937


In [15]:
# Show sample of data
print(f"{emotion_data['Comment'][0]} -> {emotion_data['Emotion'][0]}")

i seriously hate one subject to death but now i feel reluctant to drop it -> fear


# ***Preprocessing***

In [30]:
text = emotion_data['Comment'][3]
text

'ive been really angry with r and i feel like an idiot for trusting him in the first place'

In [31]:
nlp = spacy.load('en_core_web_sm')

In [32]:
doc = nlp(text)
doc

ive been really angry with r and i feel like an idiot for trusting him in the first place

In [29]:
def preprocess(text):
  #Tokkenization
  doc = nlp(text)
  filtered_data = []

  for token in doc:
    #stop words or punctuation
    if token.is_stop or token.is_punct:
      continue
    filtered_data.append(token.lemma_)
  return " ".join(filtered_data)


In [37]:
emotion_data["Preprocessed comment"] = emotion_data["Comment"].apply(preprocess)
emotion_data

Unnamed: 0,Comment,Emotion,Preprocessed comment
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen
...,...,...,...
5932,i begun to feel distressed for you,fear,begin feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,leave feel annoyed angry thinking center stupi...
5934,i were to ever get married i d have everything...,joy,marry d ready offer ve get club perfect good l...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant apply want able find company kn...


## **Lemmitization**

In [23]:
for token in doc:
  print(f"{token} ==> {token.lemma_}")

i ==> I
ve ==> ve
been ==> be
really ==> really
angry ==> angry
with ==> with
r ==> r
and ==> and
i ==> I
feel ==> feel
like ==> like
an ==> an
idiot ==> idiot
for ==> for
trusting ==> trust
him ==> he
in ==> in
the ==> the
first ==> first
place ==> place


# ***Stop Words***

In [24]:
for token in doc:
  if token.is_stop:
    print(f"{token} ==> Stop Word")

  else:
    print(f"{token} ==> Not Stop Word")

i ==> Stop Word
ve ==> Not Stop Word
been ==> Stop Word
really ==> Stop Word
angry ==> Not Stop Word
with ==> Stop Word
r ==> Not Stop Word
and ==> Stop Word
i ==> Stop Word
feel ==> Not Stop Word
like ==> Not Stop Word
an ==> Stop Word
idiot ==> Not Stop Word
for ==> Stop Word
trusting ==> Not Stop Word
him ==> Stop Word
in ==> Stop Word
the ==> Stop Word
first ==> Stop Word
place ==> Not Stop Word


In [26]:
text = "ive been really angry with r and i feel like an idiot for trusting him in the first place"

'ive been really angry with r and i feel like an idiot for trusting him in the first place'

In [27]:
doc = nlp(text)

In [28]:
for token in doc:
  if token.is_stop:
    print(f"{token} ==> Stop Word")

  elif token.is_punct:
    print(f"{token} ==> Punctuation")

  else:
    print(f"{token} ==> Not Stop Word")

i ==> Stop Word
ve ==> Not Stop Word
been ==> Stop Word
really ==> Stop Word
angry ==> Not Stop Word
with ==> Stop Word
r ==> Not Stop Word
and ==> Stop Word
i ==> Stop Word
feel ==> Not Stop Word
like ==> Not Stop Word
an ==> Stop Word
idiot ==> Not Stop Word
for ==> Stop Word
trusting ==> Not Stop Word
him ==> Stop Word
in ==> Stop Word
the ==> Stop Word
first ==> Stop Word
place ==> Not Stop Word


# ***Key Words***

In [34]:
preprocess_text = preprocess(text)
preprocess_text

've angry r feel like idiot trust place'

# ***Text Representative***

In [38]:
x = emotion_data['Preprocessed comment']  # Preprocessing
y = emotion_data['Emotion']  # Encoder

In [42]:
v = TfidfVectorizer()
x_vec = v.fit_transform(x)

In [41]:
x_vec.toarray()

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [43]:
encoder =  LabelEncoder()
y_enc = encoder.fit_transform(y)

In [44]:
#Splitting
x_train, x_test, y_train, y_test = train_test_split(x_vec, y_enc, test_size=0.2, random_state=42)

In [45]:
#Modeling

model = RandomForestClassifier()
model.fit(x_train, y_train)

In [46]:
print(f"Train Score: {model.score(x_train, y_train)}")
print(f"Test Score: {model.score(x_test, y_test)}")

Train Score: 0.9991577174141925
Test Score: 0.9276094276094277


In [48]:
y_pred = model.predict(x_test)
print(f"Test Score: {model.score(x_test, y_test)}")

Test Score: 0.9276094276094277


In [53]:
#Testing The Model
test_text = "I love traveling to abroad and trying to find new tradions"

preprocess_text = preprocess(test_text)
preprocess_text

'love travel abroad try find new tradion'

In [54]:
text_vec = v.transform([preprocess_text])
text_vec

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 6 stored elements and shape (1, 6926)>

In [55]:
pred = model.predict(text_vec)
pred

array([2])

In [56]:
emotion_data

Unnamed: 0,Comment,Emotion,Preprocessed comment
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen
...,...,...,...
5932,i begun to feel distressed for you,fear,begin feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,leave feel annoyed angry thinking center stupi...
5934,i were to ever get married i d have everything...,joy,marry d ready offer ve get club perfect good l...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant apply want able find company kn...


In [58]:
test2 = emotion_data.iloc[2123, 0]


In [59]:
preprocess_text = preprocess(test2)
preprocess_text

'feel bother rape stick ass think statement like say lot speaker target'

In [60]:
text_vec = v.transform([preprocess_text])
text_vec

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 12 stored elements and shape (1, 6926)>

In [61]:
pred = model.predict(text_vec)
pred

array([0])

In [64]:
emotion_data.iloc[2123, 0]

'i feel less bothered my get the rape stick out of your ass because i think a statement like that says a lot more about the speaker than the target'