## Opening the file

In [2]:
import pandas as pd 
data = "C:\\Users\\sowky\\Downloads\\emotion_dataset_raw.csv"
# reading the fie and print
df = pd.read_csv(data)
df.sample(10)

Unnamed: 0,Emotion,Text
32108,fear,Remember when Mark Wahlberg fingered Reese on ...
21716,disgust,A feeling of revulsion or profound disapproval...
5677,fear,so i saw @TasteUrDREAMS on Pitkin earlier toda...
10523,fear,@AlwaysME04 yeah thats prob it ... To save fac...
4433,anger,You can't do that ! You can't charge me for a...
3311,joy,this pregnant guy about to give us a test tomo...
8776,surprise,@Shutrbecca Poinsettia plants being put out ri...
27643,joy,When I received the acceptance form for the se...
14550,fear,"“@RevRunWisdom not afraid of tomorrow, for I h..."
18084,anger,"Following an argument with my brother, he brok..."


## Getting the Bing Liu Lexicon

In [3]:
from sklearn import preprocessing
import nltk
nltk.download('opinion_lexicon')
from nltk.corpus import opinion_lexicon
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package opinion_lexicon to
[nltk_data]     C:\Users\sowky\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


In [4]:
print('Total number of words in opinion lexicon', len(opinion_lexicon.words()))
print('Examples of positive words in opinion lexicon',
opinion_lexicon.positive()[:10])
print('Examples of negative words in opinion lexicon',
opinion_lexicon.negative()[:10])

Total number of words in opinion lexicon 6789
Examples of positive words in opinion lexicon ['a+', 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'accessible', 'acclaim', 'acclaimed', 'acclamation']
Examples of negative words in opinion lexicon ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably', 'abominate', 'abomination', 'abort', 'aborted']


In [5]:
# Let's create a dictionary which we can use for scoring our review text
nltk.download('punkt')
df.rename(columns={"reviewText": "text"}, inplace=True)
pos_score = 1
neg_score = -1
word_dict = {}

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sowky\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [10]:
# Adding the positive words to the dictionary
for word in opinion_lexicon.positive():
 word_dict[word] = pos_score
# Adding the negative words to the dictionary
for word in opinion_lexicon.negative():
 word_dict[word] = neg_score

## Function to do math

In [11]:
def bing_liu_score(Text):
    sentiment_score = 0
    bag_of_words = word_tokenize(Text.lower())
    for word in bag_of_words:
        if word in word_dict:
            sentiment_score += word_dict[word]
    return sentiment_score

## Making sure all texts are non empty and are type text then call the function


In [12]:
df['Text'].fillna('no review', inplace=True)
df['Bing_Liu_Score'] = df['Text'].apply(bing_liu_score)

In [14]:
df[['Emotion',"Text", 'Bing_Liu_Score']].head(10)

Unnamed: 0,Emotion,Text,Bing_Liu_Score
0,neutral,Why ?,0
1,joy,Sage Act upgrade on my to do list for tommorow.,0
2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...,-1
3,joy,Such an eye ! The true hazel eye-and so brill...,5
4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...,-1
5,fear,I'm expecting an extremely important phonecall...,0
6,sadness,.Couldnt wait to see them live. If missing th...,-1
7,surprise,maken Tip 2: Stop op een moment dat je het hel...,0
8,surprise,En dan krijg je ff een cadeautje van een tweep...,0
9,surprise,@1116am Drummer Boy bij op verzoek van @BiemO...,0


In [16]:
df.groupby('Emotion').agg({'Bing_Liu_Score':'mean'})

Unnamed: 0_level_0,Bing_Liu_Score
Emotion,Unnamed: 1_level_1
anger,-0.71515
disgust,-0.558411
fear,-0.562292
joy,0.510819
neutral,0.11402
sadness,-0.488843
shame,-0.767123
surprise,0.174298
