In [1]:
import pandas as pd

Read text file

In [2]:
df = pd.read_csv("/content/train.txt", sep=";", names=["description", "emotion"])

In [3]:
df.head()

Unnamed: 0,description,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


Check for null values

In [4]:
df.isnull().sum()

description    0
emotion        0
dtype: int64

Check for unique entries

In [6]:
df['emotion'].value_counts()

joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: emotion, dtype: int64

In [7]:
df['description'].value_counts()

i feel on the verge of tears from weariness i look at your sweet face and cant help but tenderly kiss your cheeks                                                                          2
i feel kind of strange                                                                                                                                                                     2
i feel more adventurous willing to take risks img src http cdn                                                                                                                             2
i write these words i feel sweet baby kicks from within and my memory is refreshed i would do anything for this boy                                                                        2
i still feel a craving for sweet food                                                                                                                                                      2
                                                       

Map the emotions into a numerical representation

In [9]:
df['emotion-num'] = df['emotion'].map({
    "joy": 1, "sadness": 2, "anger": 3, "fear": 4, "love": 5, "surprise": 6
})

In [10]:
df.head()

Unnamed: 0,description,emotion,emotion-num
0,i didnt feel humiliated,sadness,2
1,i can go from feeling so hopeless to so damned...,sadness,2
2,im grabbing a minute to post i feel greedy wrong,anger,3
3,i am ever feeling nostalgic about the fireplac...,love,5
4,i am feeling grouchy,anger,3


In [11]:
import spacy as sc

loading the spacy english trained model

In [12]:
nlp = sc.load("en_core_web_sm")

Preprocess the 'description' column using tokenizing method

In [13]:
def preprocess(text):
  doc = nlp(text)
  filtered_tokens = []
  for token in doc:
    if token.is_stop or token.is_punct:
      continue
    else:
      filtered_tokens.append(token.lemma_)
  return " ".join(filtered_tokens)

In [14]:
df['preprocess-text'] = df['description'].apply(preprocess)

In [15]:
df.head()

Unnamed: 0,description,emotion,emotion-num,preprocess-text
0,i didnt feel humiliated,sadness,2,not feel humiliate
1,i can go from feeling so hopeless to so damned...,sadness,2,feel hopeless damned hopeful care awake
2,im grabbing a minute to post i feel greedy wrong,anger,3,m grab minute post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,5,feel nostalgic fireplace know property
4,i am feeling grouchy,anger,3,feel grouchy


In [17]:
!pip install scikit-learn



## Splitting data

In [19]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df['description'], df['emotion-num'], test_size=0.2, random_state=42)

Logistice Regression

In [22]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

In [23]:
lr = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('classifier', LogisticRegression())
])

lr.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Run prediction

In [24]:
lr_y_pred = lr.predict(x_test)

In [26]:
from sklearn.metrics import classification_report

In [27]:
print(classification_report(y_test, lr_y_pred))

              precision    recall  f1-score   support

           1       0.79      0.96      0.87      1021
           2       0.87      0.94      0.90       946
           3       0.90      0.77      0.83       427
           4       0.85      0.70      0.77       397
           5       0.89      0.55      0.68       296
           6       0.88      0.43      0.58       113

    accuracy                           0.84      3200
   macro avg       0.86      0.73      0.77      3200
weighted avg       0.85      0.84      0.83      3200



In [28]:
lr.predict(['I am very Happy'])

array([1])

In [30]:
emotion_names = {
    1: "joy",
    2: "sadness",
    3: "anger",
    4: "fear",
    5: "love",
    6: "surprise"
}

emotion = lr.predict(['I am very Happy'])[0]
print(emotion_names[emotion])

joy
