# Data Loading

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
df=pd.read_csv('clean_data.csv')
df.head()

Unnamed: 0,Text,Emotion,Clean_text
0,i didnt feel humiliated,sadness,didnt feel humiliated
1,i can go from feeling so hopeless to so damned...,sadness,feeling hopeless damned hopeful cares awake
2,im grabbing a minute to post i feel greedy wrong,anger,im grabbing minute post feel greedy wrong
3,i am ever feeling nostalgic about the fireplac...,love,feeling nostalgic fireplace know property
4,i am feeling grouchy,anger,feeling grouchy


In [3]:
df.drop('Text', axis=1, inplace=True)
df.head(3)

Unnamed: 0,Emotion,Clean_text
0,sadness,didnt feel humiliated
1,sadness,feeling hopeless damned hopeful cares awake
2,anger,im grabbing minute post feel greedy wrong


# Analysis

In [4]:
df.Emotion.unique()

array(['sadness', 'anger', 'love', 'surprise', 'fear', 'happy'],
      dtype=object)

In [5]:
df.Emotion.value_counts()

happy       7029
sadness     6265
anger       2993
fear        2652
love        1641
surprise     879
Name: Emotion, dtype: int64

In [6]:
sadness=df[df['Emotion'] == 'sadness']
type(sadness)

pandas.core.frame.DataFrame

In [7]:
sadness.head()

Unnamed: 0,Emotion,Clean_text
0,sadness,didnt feel humiliated
1,sadness,feeling hopeless damned hopeful cares awake
5,sadness,ive feeling little burdened lately wasnt sure
10,sadness,feel like suffering seeing mean
13,sadness,feel low energy thirsty


In [8]:
anger=df[df['Emotion'] == 'anger']
anger.head()

Unnamed: 0,Emotion,Clean_text
2,anger,im grabbing minute post feel greedy wrong
4,anger,feeling grouchy
12,anger,think easiest time year feel dissatisfied
20,anger,feel irritated rejected saying
24,anger,feel like fucked dont usually eat morning


In [9]:
love=df[df['Emotion'] == 'love']
love.head()

Unnamed: 0,Emotion,Clean_text
3,love,feeling nostalgic fireplace know property
9,love,feel romantic
47,love,let sad feeling want accepted home
61,love,ate feel gentle tingle feeling healing taking ...
68,love,suppose truth needs shared havent feeling fait...


In [10]:
surprise=df[df['Emotion'] == 'surprise']
surprise.head()

Unnamed: 0,Emotion,Clean_text
6,surprise,ive taking milligrams times recommended ive fa...
32,surprise,seen heard read past couple days left feeling ...
57,surprise,feeling pleasantly surprised supportiveness ea...
64,surprise,nearly finished week detox feel amazing
129,surprise,feel stranger strange land raising son place f...


In [11]:
fear=df[df['Emotion'] == 'fear']
fear.head()

Unnamed: 0,Emotion,Clean_text
7,fear,feel confused life teenager jaded year old man
19,fear,feel compromised skeptical value unit work
21,fear,feeling completely overwhelmed strategies help...
31,fear,remember feeling acutely distressed days
53,fear,stymied little bit wrote feeling unsure story ...


In [12]:
happy=df[df['Emotion'] == 'happy']
happy.head()

Unnamed: 0,Emotion,Clean_text
8,happy,petronas years feel petronas performed huge pr...
11,happy,feel running divine experience expect type spi...
14,happy,immense sympathy general point possible proto ...
15,happy,feel reassured anxiety
22,happy,feeling amused delighted


# Feature Extraction and Identification

### Tesing

In [13]:
from sklearn.feature_extraction.text import CountVectorizer

In [43]:
from sklearn.feature_selection import mutual_info_classif, SelectKBest, chi2

In [14]:
cv=CountVectorizer(max_features=5000, ngram_range=(1,4))

In [15]:
X=cv.fit_transform(df['Clean_text']).toarray()

In [16]:
X.shape

(21459, 5000)

In [17]:
type(X)

numpy.ndarray

In [18]:
X

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [19]:
cv.get_feature_names()[:20]

['abandoned',
 'abilities',
 'ability',
 'abit',
 'able',
 'able feel',
 'able find',
 'able help',
 'absolute',
 'absolutely',
 'abuse',
 'abused',
 'accept',
 'acceptable',
 'acceptance',
 'accepted',
 'accepted allowed',
 'accepting',
 'access',
 'accident']

In [20]:
X_sadness=cv.fit_transform(sadness['Clean_text']).toarray()

In [21]:
X_sadness.shape

(6265, 5000)

In [22]:
cv.get_feature_names()[:20]

['abandoned',
 'abilities',
 'ability',
 'abit',
 'able',
 'able feel',
 'absolutely',
 'absolutely devastated',
 'absurd',
 'abuse',
 'abused',
 'accept',
 'accepted',
 'accepted allowed',
 'accepting',
 'accident',
 'accidentally',
 'accompanied',
 'accomplished',
 'accomplishment']

In [23]:
sadness_df=pd.DataFrame(X_sadness, columns=cv.get_feature_names())
sadness_df.head()

Unnamed: 0,abandoned,abilities,ability,abit,able,able feel,absolutely,absolutely devastated,absurd,abuse,...,yelled,yes,yesterday,youd,young,younger,youre,youre feeling,youve,youve trust
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
max(sadness_df['abuse'])

2

In [28]:
max(sadness_df['accomplishment'])

1

In [29]:
y_sadness=sadness['Emotion']
y_sadness.shape

(6265,)

In [31]:
mi_sad=mutual_info_classif(X_sadness, y_sadness)

In [32]:
mi_sad

array([0., 0., 0., ..., 0., 0., 0.])

In [34]:
mi_sad=pd.Series(mi_sad)
mi_sad.index=sadness_df.columns
type(mi_sad)

pandas.core.series.Series

In [35]:
mi_sad.head()

abandoned    0.0
abilities    0.0
ability      0.0
abit         0.0
able         0.0
dtype: float64

In [37]:
mi_sad.sort_values(ascending=False).head(10)

feel              0.000399
fake              0.000239
feeling           0.000239
work              0.000080
admit             0.000080
abandoned         0.000000
overcome grief    0.000000
owned             0.000000
overwhelmingly    0.000000
overwhelming      0.000000
dtype: float64

In [41]:
mi_sad_top5=SelectKBest(mutual_info_classif, k=20)
mi_sad_top5.fit(X_sadness, y_sadness)

SelectKBest(k=20,
            score_func=<function mutual_info_classif at 0x0000025F75D909D0>)

In [42]:
sadness_df.columns[mi_sad_top5.get_support()]

Index(['feel', 'feeling', 'know', 'time feel', 'year feeling', 'year old',
       'year school', 'years', 'years ago', 'yell', 'yelled', 'yes',
       'yesterday', 'youd', 'young', 'younger', 'youre', 'youre feeling',
       'youve', 'youve trust'],
      dtype='object')

In [45]:
mi_sad_chi=chi2(X_sadness, y_sadness)
mi_sad_chi=pd.Series(mi_sad_chi[1])
mi_sad_chi.index=sadness_df.columns
mi_sad_chi.sort_index(ascending=False).head(20)

youve trust     NaN
youve           NaN
youre feeling   NaN
youre           NaN
younger         NaN
young           NaN
youd            NaN
yesterday       NaN
yes             NaN
yelled          NaN
yell            NaN
years ago       NaN
years           NaN
year school     NaN
year old        NaN
year feeling    NaN
year            NaN
yeah            NaN
ya              NaN
www             NaN
dtype: float64

In [46]:
X_anger=cv.fit_transform(anger['Clean_text']).toarray()
anger_df=pd.DataFrame(X_anger, columns=cv.get_feature_names())
y_anger=anger['Emotion']
mi_anger_top5=SelectKBest(mutual_info_classif, k=20)
mi_anger_top5.fit(X_anger, y_anger)
anger_df.columns[mi_anger_top5.get_support()]

Index(['feel', 'years life', 'years old', 'yeast', 'yell', 'yelled', 'yelling',
       'yes', 'yesterday', 'yomis', 'yomis voice', 'york', 'youd', 'young',
       'younger', 'younger disabled', 'youngsters', 'youre', 'youre feeling',
       'youth'],
      dtype='object')

### Select k Best

In [47]:
def feature_identification(data):
    X=cv.fit_transform(data['Clean_text']).toarray()
    df=pd.DataFrame(X, columns=cv.get_feature_names())
    y=data['Emotion']
    feature_top20=SelectKBest(mutual_info_classif, k=20)
    feature_top20.fit(X, y)
    return df.columns[feature_top20.get_support()]

In [48]:
sadness_top20_features=feature_identification(sadness)
print(sadness_top20_features)

Index(['feel', 'feel like', 'feeling', 'things', 'year feeling', 'year old',
       'year school', 'years', 'years ago', 'yell', 'yelled', 'yes',
       'yesterday', 'youd', 'young', 'younger', 'youre', 'youre feeling',
       'youve', 'youve trust'],
      dtype='object')


In [49]:
anger_top20_features=feature_identification(anger)
print(anger_top20_features)

Index(['feel', 'years life', 'years old', 'yeast', 'yell', 'yelled', 'yelling',
       'yes', 'yesterday', 'yomis', 'yomis voice', 'york', 'youd', 'young',
       'younger', 'younger disabled', 'youngsters', 'youre', 'youre feeling',
       'youth'],
      dtype='object')


In [50]:
love_top20_features=feature_identification(love)
print(love_top20_features)

Index(['feel', 'wrote', 'www', 'ya', 'yash', 'year', 'year feel', 'year old',
       'year olds', 'years', 'years ago', 'yes', 'yesterday', 'yoga',
       'yoga class', 'young', 'younger', 'youre', 'youtube', 'youve'],
      dtype='object')


In [51]:
surprise_top20_features=feature_identification(surprise)
print(surprise_top20_features)

Index(['work', 'worked', 'workers', 'working', 'works', 'world', 'worth',
       'wouldnt', 'write', 'writing', 'written', 'wrong', 'year', 'year old',
       'years', 'yesterday', 'yesterday feel', 'young', 'younger', 'youre'],
      dtype='object')


In [52]:
fear_top20_features=feature_identification(fear)
print(fear_top20_features)

Index(['feel', 'xmas', 'xmas hate', 'yeah', 'year', 'year ago', 'year old',
       'years', 'years ago', 'years feeling', 'years old', 'yes', 'yesterday',
       'yesterday feeling', 'yoga', 'youll', 'young', 'younger', 'youre',
       'youre feeling'],
      dtype='object')


In [53]:
happy_top20_features=feature_identification(happy)
print(happy_top20_features)

Index(['feel', 'feel like', 'feeling', 'years ago', 'years feel', 'yes',
       'yesterday', 'yo', 'yoga', 'youll', 'young', 'young man', 'young woman',
       'younger', 'youre', 'youth', 'youtube', 'youve', 'zero', 'zone'],
      dtype='object')


In [54]:
df_top20_features=feature_identification(df)
print(df_top20_features)

Index(['conversation', 'distressed', 'divine', 'feel devastated',
       'feel strange', 'feeling annoyed', 'feeling awfully', 'feeling ok',
       'feeling tortured', 'impressed', 'like dont', 'lovely',
       'makes feel like', 'negative emotions', 'numb', 'paris', 'result',
       'small', 'strange', 'work'],
      dtype='object')


### Mutual Info Classifier

In [55]:
def feature_identification(data):
    X=cv.fit_transform(data['Clean_text']).toarray()
    df=pd.DataFrame(X, columns=cv.get_feature_names())
    y=data['Emotion']
    feature_top20=mutual_info_classif(X, y)
    feature_top20=pd.Series(feature_top20)
    feature_top20.index=df.columns
    return feature_top20.sort_values(ascending=False).head(20)

In [56]:
sadness_top20_features=feature_identification(sadness)
print(sadness_top20_features)

feel               0.000559
feeling            0.000160
need               0.000080
overall feel       0.000000
overwhelmingly     0.000000
overwhelming       0.000000
overwhelmed        0.000000
overly             0.000000
overcome grief     0.000000
overcome           0.000000
abandoned          0.000000
oz                 0.000000
overall            0.000000
outsider           0.000000
outside world      0.000000
outside window     0.000000
outside control    0.000000
outside            0.000000
outings            0.000000
owned              0.000000
dtype: float64


In [57]:
anger_top20_features=feature_identification(anger)
print(anger_top20_features)

feel                                3.341129e-04
abandoned                           1.221245e-15
parents feel enraged child          1.221245e-15
parents hours                       1.221245e-15
parents government need work        1.221245e-15
parents government need             1.221245e-15
parents government                  1.221245e-15
parents furious engagement busin    1.221245e-15
parents furious engagement          1.221245e-15
parents furious                     1.221245e-15
parents feel enraged                1.221245e-15
parents issues let                  1.221245e-15
parents feel                        1.221245e-15
parents exasperation oh ll          1.221245e-15
parents exasperation oh             1.221245e-15
parents exasperation                1.221245e-15
parents cos know id                 1.221245e-15
parents cos know                    1.221245e-15
parents issues                      1.221245e-15
parents issues let know             1.221245e-15
dtype: float64


In [58]:
love_top20_features=feature_identification(love)
print(love_top20_features)

ability                            0
plan excellent                     0
plan run miles                     0
plan run                           0
plan jumping ship mid              0
plan jumping ship                  0
plan jumping                       0
plan excellent catholic schools    0
plan excellent catholic            0
plan doesnt taste sweet            0
plan stand                         0
plan doesnt taste                  0
plan doesnt                        0
plan arrangements real pain        0
plan arrangements real             0
plan arrangements                  0
plan action                        0
plan                               0
plan run miles morning             0
plan stand money                   0
dtype: int32


In [59]:
surprise_top20_features=feature_identification(surprise)
print(surprise_top20_features)

1980                                 1.221245e-15
personal sandblog admitting          1.221245e-15
personally feel                      1.221245e-15
personally                           1.221245e-15
personality novel strange cross      1.221245e-15
personality novel strange            1.221245e-15
personality novel                    1.221245e-15
personality                          1.221245e-15
personal sandblog admitting helps    1.221245e-15
personal sandblog                    1.221245e-15
personally feel amazed managed       1.221245e-15
personal life feel curious           1.221245e-15
personal life feel                   1.221245e-15
personal life                        1.221245e-15
personal                             1.221245e-15
person swoons life stories           1.221245e-15
person swoons life                   1.221245e-15
person swoons                        1.221245e-15
personally feel amazed               1.221245e-15
persons                              1.221245e-15


In [60]:
fear_top20_features=feature_identification(fear)
print(fear_top20_features)

10                              0
pretty scared silly             0
pretty shy right                0
pretty shy                      0
pretty shaky sad                0
pretty shaky business           0
pretty shaky                    0
pretty shaken moment            0
pretty shaken                   0
pretty scared                   0
pretty soon                     0
pretty restless right typing    0
pretty restless right           0
pretty restless                 0
pretty paranoid trying cover    0
pretty paranoid trying          0
pretty paranoid lot dont        0
pretty paranoid lot             0
pretty shy right dont           0
pretty strange                  0
dtype: int32


In [61]:
happy_top20_features=feature_identification(happy)
print(happy_top20_features)

feel                 0.000498
feeling              0.000142
optimistic today     0.000000
organic              0.000000
ordered              0.000000
order                0.000000
orange               0.000000
options              0.000000
option               0.000000
optimistic future    0.000000
original             0.000000
optimistic           0.000000
opportunity          0.000000
opinions             0.000000
opinion              0.000000
openly               0.000000
opening              0.000000
open suggestions     0.000000
organised            0.000000
ought                0.000000
dtype: float64


In [62]:
df_top20_features=feature_identification(df)
print(df_top20_features)

horny             0.016358
orange            0.016170
punished          0.015684
feel reluctant    0.014807
winter            0.014789
supportive        0.014642
strange           0.014612
gentle            0.014517
amazed            0.013992
saving            0.013916
frightened        0.013617
offended          0.013606
deprived          0.013087
camp              0.012975
soup              0.012950
feeling guilty    0.012854
nd                0.012535
isolated          0.012428
feel shy          0.012302
british           0.012196
dtype: float64
