# Import Needed Modules

###### Using Emotional Dataset from Kaggle: https://www.kaggle.com/datasets/abdallahwagih/emotion-dataset
###### Using the code basis from https://www.kaggle.com/code/abdallahwagih/nlp-pipeline-tutorial

## With this model you will be able to classify 3 types of emotion from a phrase. 
## This model contains a terminal user interface

In [3]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import spacy

# EDA

In [2]:
# Read the dataset with name "Emotion_classify_Data.csv" and store it in a variable df
df = pd.read_csv("Emotion_classify_Data.csv")

# Print the shape of dataframe
print(df.shape)

# Print top 5 rows
df.head(5)

(5937, 2)


Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [3]:
# Check the distribution of Emotion
df['Emotion'].value_counts()

Emotion
anger    2000
joy      2000
fear     1937
Name: count, dtype: int64

# Preprocessing

In [4]:
# load english language model and create nlp object from it
nlp = spacy.load("en_core_web_sm") 

In [6]:
txt = df['Comment'][6]
txt

'when a friend of mine keeps telling me morbid things that happened to his dog'

In [7]:
# Tokenization
doc = nlp(txt)

### Sentence Tokenization
No need to do it as data in dataframe is splitted in sentences

In [8]:
# for sentence in doc.sents:
#     print(sentence)

### Word Tokenization

In [8]:
for token in doc:
    print(token)

when
a
friend
of
mine
keeps
telling
me
morbid
things
that
happened
to
his
dog


### Stemming and Lemmatization

In [9]:
for token in doc:
    print(f"Word: {token} | -> {token.lemma_}")

Word: when | -> when
Word: a | -> a
Word: friend | -> friend
Word: of | -> of
Word: mine | -> mine
Word: keeps | -> keep
Word: telling | -> tell
Word: me | -> I
Word: morbid | -> morbid
Word: things | -> thing
Word: that | -> that
Word: happened | -> happen
Word: to | -> to
Word: his | -> his
Word: dog | -> dog


### Stop Words

In [10]:
for token in doc:
    if token.is_stop or token.is_punct:
        print(token)

when
a
of
mine
me
that
to
his


### **Preprocess Function**

In [11]:
# use this utility function to get the preprocessed text data
def preprocess(text):
    # remove stop words and lemmatize the text
    doc = nlp(text)
    filtered_tokens = []
    for token in doc:
        if token.is_stop or token.is_punct:
            continue
        filtered_tokens.append(token.lemma_)
    
    return " ".join(filtered_tokens) 

In [12]:
print(txt)
procces_txt = preprocess(txt)
print(procces_txt)

when a friend of mine keeps telling me morbid things that happened to his dog
friend keep tell morbid thing happen dog


## Apply preprocess function on dataframe

In [13]:
df['preprocessed_comment'] = df['Comment'].apply(preprocess) 

In [14]:
df

Unnamed: 0,Comment,Emotion,preprocessed_comment
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen
...,...,...,...
5932,i begun to feel distressed for you,fear,begin feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,leave feel annoyed angry thinking center stupi...
5934,i were to ever get married i d have everything...,joy,marry d ready offer ve get club perfect good l...
5935,i feel reluctant in applying there because i w...,fear,feel reluctant apply want able find company kn...


## Encoding target column

In [15]:
df['Emotion_num'] = df['Emotion'].map({'joy' : 0, 'fear': 1, 'anger': 2})

df.head(5)

Unnamed: 0,Comment,Emotion,preprocessed_comment,Emotion_num
0,i seriously hate one subject to death but now ...,fear,seriously hate subject death feel reluctant drop,1
1,im so full of life i feel appalled,anger,m life feel appalled,2
2,i sit here to write i start to dig out my feel...,fear,sit write start dig feeling think afraid accep...,1
3,ive been really angry with r and i feel like a...,joy,ve angry r feel like idiot trust place,0
4,i feel suspicious if there is no one outside l...,fear,feel suspicious outside like rapture happen,1


## Split data into train and test

In [16]:
X_train, X_test, y_train, y_test = train_test_split(df['preprocessed_comment'], df['Emotion_num'], 
                                                    test_size=0.4, random_state=42, stratify=df['Emotion_num'])

In [17]:
print("Shape of X_train: ", X_train.shape)
print("Shape of X_test: ", X_test.shape)

Shape of X_train:  (3562,)
Shape of X_test:  (2375,)


## Convert text column to numeric vector

In [18]:
v = TfidfVectorizer()

X_train_cv = v.fit_transform(X_train)
X_test_cv = v.transform(X_test)

# All TfidfVectorizer vocabularies
print(v.vocabulary_)



# Machine Learning Model

## 1. Naive Bayes

In [19]:
NB_model = MultinomialNB()

# Model training
NB_model.fit(X_train_cv, y_train)

In [20]:
# Get prediction
y_pred = NB_model.predict(X_test_cv)

In [21]:
# Print accuracy score
print(accuracy_score(y_test, y_pred))

0.895578947368421


In [23]:
# Print classification report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support



           0       0.90      0.89      0.89       400

           1       0.91      0.90      0.91       388

           2       0.90      0.92      0.91       400



    accuracy                           0.90      1188

   macro avg       0.90      0.90      0.90      1188

weighted avg       0.90      0.90      0.90      1188




## 2. Random Forest

In [22]:
RFC_model = RandomForestClassifier()

RFC_model.fit(X_train_cv, y_train)

In [23]:
# Get the predictions for X_test and store it in y_pred
y_pred = RFC_model.predict(X_test_cv)

In [24]:
# Print Accuracy
print(accuracy_score(y_test, y_pred))

0.9258947368421052


In [27]:
# Print the classfication report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support



           0       0.92      0.95      0.93       400

           1       0.92      0.93      0.92       388

           2       0.94      0.90      0.92       400



    accuracy                           0.93      1188

   macro avg       0.93      0.93      0.93      1188

weighted avg       0.93      0.93      0.93      1188




# Test Model

## Get text

In [25]:
test_text = df['Comment'][2075]
test_text

'i feel hateful to have given up my friendship with that woman and a couple of others for the same reasons to admit defeat and let my husband make me feel so insecure that i feel the need to avoid her cut her out of my life so that my securities is not challenged'

## Apply preprocess

In [26]:
test_text_processed = [preprocess(test_text)]
test_text_processed

['feel hateful give friendship woman couple reason admit defeat let husband feel insecure feel need avoid cut life security challenge']

## Convert to vector

In [27]:
test_text_vc = v.transform(test_text_processed)

## Get Prediction

In [28]:
test_text = RFC_model.predict(test_text_vc)

## Output

In [29]:
print(f"{df['Emotion'][2075]} -> {df['Emotion_num'][2075]}")
print(test_text)

anger -> 2
[2]


## User Input (Terminal Interface)

In [38]:
# Define a function for emotion classification
def classify_emotion(model, vectorizer, phrase):
    # Preprocess the input phrase
    processed_phrase = preprocess(phrase)
    
    # Convert the processed phrase to a vector
    phrase_vector = vectorizer.transform([processed_phrase])
    
    # Predict the emotion label using the model
    predicted_emotion = model.predict(phrase_vector)
    
    # Return the predicted emotion label
    return predicted_emotion[0]

# Define a function for emotion classification from user input
def classify_emotion_from_user_input(model, vectorizer):
    # Prompt the user to enter a phrase
    user_input = input("Enter a phrase: ")
    
    # Use the provided function to classify the emotion
    predicted_emotion = classify_emotion(model, vectorizer, user_input)
    
    return predicted_emotion

# Example usage:{'joy' : 0, 'fear': 1, 'anger': 2})
predicted_emotion = classify_emotion_from_user_input(RFC_model, v)
if predicted_emotion == 0:
    print("Predicted Emotion: You are filling Joy")
    if predicted_emotion == 1:
        print("Predicted Emotion: You are filling Fear")
else:
    print("Predicted_Emotion: You are filling Anger")

Enter a phrase: i feel hateful to have given up my friendship with that woman and a couple of others for the same reasons to admit defeat and let my husband make me feel so insecure that i feel the need to avoid her cut her out of my life so that my securities is not challenged
Predicted_Emotion: You are filling Anger
