In [None]:
import pandas as pd
import os

In [None]:
os.chdir("D:\\OneDrive - Manipal Global Education Services Pvt Ltd\\Official\\MGAIT\\IPBA\\Datasets")

### Sentiment Prediction Strategy 1: Word based scoring 
AFINN is a list of English words rated for valence with an integer
between minus five (negative) and plus five (positive). The words have
been manually labeled by Finn Årup Nielsen in 2009-2011. The file
is tab-separated. There are two versions:

AFINN-111: Newest version with 2477 words and phrases.

AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
are 1480 lines, as some words are listed twice. The word list in not
entirely in alphabetic ordering.  

An evaluation of the word list is available in:

Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903

Valence, as used in psychology, especially in discussing emotions, means the intrinsic attractiveness/"good"-ness (positive valence) or averseness/"bad"-ness (negative valence) of an event, object, or situation. ... For example, emotions popularly referred to as "negative", such as anger and fear, have negative valence.
    - source Wikipedia

In [None]:
afinn = pd.read_csv("AFINN-111.txt",sep="\t",header=None)

In [None]:
afinn.head()

In [None]:
afinn.columns = ["Term","Score"]

In [None]:
afinn.head(n=5)

In [None]:
# Converting the DF to a dictionary for faster lookups

In [None]:
term_scores = dict(afinn.values)

In [None]:
print(term_scores)

In [None]:
term_scores['abandon']

In [None]:
term_scores['marvellous']

In [None]:
#if a word is present in the dict take its sentiment score, 
# if not take a default value for sentiment as 0

In [None]:
term_scores.get('marvellous',0)

In [None]:
term_scores.get('amazing',0)

In [None]:
term_scores.get('good',0)

In [None]:
txt = "nlp is amazing"

In [None]:
term_scores.get('nlp',0) + term_scores.get('is',0) +\
term_scores.get('amazing',0)

In [None]:
'''
For every word in the sentence
get its sentiment score from the dict and add them up
'''

In [None]:
from nltk.tokenize import word_tokenize

In [None]:
score = 0
for word in word_tokenize(txt):
    score = score+ term_scores.get(word,0)
print(score)

In [None]:
txt = "amazing terrible pathetic"

In [None]:
def get_sentiment(sent):
    score = 0
    tokens = word_tokenize(sent.lower())
    for word in tokens:
        score = score+term_scores.get(word,0)
    return score

In [None]:
get_sentiment(txt)

In [None]:
get_sentiment('This car is amazing with a terrible experience')

### Sentiment Prediction Strategy 2: vader

**VADER (*V*alence *A*ware *D*ictionary and s*E*ntiment *R*easoner)**

is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media.

In [None]:
import nltk
nltk.download('vader_lexicon')

In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [None]:
analyser = SentimentIntensityAnalyzer()

In [None]:
analyser.polarity_scores("the food is great")

In [None]:
analyser.polarity_scores("the food is terrible")

In [None]:
analyser.polarity_scores("the food is marvelous")

In [None]:
analyser.polarity_scores("the food is good")

In [None]:
analyser.polarity_scores("the food is good!")

In [None]:
analyser.polarity_scores("the food is GOOD!")

In [None]:
analyser.polarity_scores("heard the news")

In [None]:
analyser.polarity_scores("heard the news smh")

In [None]:
analyser.polarity_scores("the food is good")

In [None]:
analyser.polarity_scores("the food is good :)")

In [None]:
analyser.polarity_scores("the food is good lol")['compound']

In [None]:
def get_vader_sentiment(sent):
    return analyser.polarity_scores(sent)['compound']

In [None]:
get_vader_sentiment("this is HORRIBLE")