In [1]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import re
from textblob import TextBlob
# https://github.com/cjhutto/vaderSentiment

In [2]:
analyzer = SentimentIntensityAnalyzer()

In [3]:
def clean(text):
    text = re.sub('[^A-Za-z]+', ' ', text)
    return text

In [4]:
with open("part_1.txt") as transcript:
    lines = transcript.readlines()
lines_clean = clean(lines[0])
ngram_object = TextBlob(lines_clean)
ngrams = ngram_object.ngrams(n=4) 

In [5]:
for ngram in ngrams:
    ngram_str = ' '.join(ngram)
    sentiment = analyzer.polarity_scores(ngram_str)
    if sentiment['compound'] < 0:
        print('Negative: ' + ngram_str + ' ' + str(sentiment['compound']))
    if sentiment['compound'] > 0:
        print('Positive: ' + ngram_str + ' ' + str(sentiment['compound']))

Positive: Okay I m starting 0.2263
Negative: a little bit intimidating -0.3862
Negative: little bit intimidating for -0.3862
Negative: bit intimidating for me -0.4404
Negative: intimidating for me I -0.4404
Positive: me I m sure 0.3182
Positive: I m sure it 0.3182
Positive: m sure it s 0.3182
Positive: sure it s fine 0.4767
Positive: it s fine and 0.2023
Positive: s fine and then 0.2023
Positive: fine and then I 0.2023
Positive: cookies I m fine 0.2023
Positive: I m fine with 0.2023
Positive: m fine with those 0.2023
Positive: fine with those Okay 0.4019
Positive: with those Okay Coronavirus 0.2263
Positive: those Okay Coronavirus in 0.2263
Positive: Okay Coronavirus in the 0.2263
Positive: Slovak Republic very cool 0.3804
Positive: Republic very cool I 0.3804
Positive: very cool I like 0.6549
Positive: cool I like how 0.5859
Positive: I like how it 0.3612
Positive: like how it has 0.3612


In [6]:
sentiment = analyzer.polarity_scores("It was very problematic, I wasn't able to find it at all")
print(sentiment)

sentiment = analyzer.polarity_scores("best")
print(sentiment)

sentiment = analyzer.polarity_scores("not the best")
print(sentiment)

sentiment = analyzer.polarity_scores("not best")
print(sentiment)

sentiment = analyzer.polarity_scores("confident")
print(sentiment)

sentiment = analyzer.polarity_scores("it was not a problem")
print(sentiment)

sentiment = analyzer.polarity_scores("The acting was good , but the movie could have been better")
print(sentiment)

sentiment = analyzer.polarity_scores("information looks outdated")
print(sentiment)

{'neg': 0.225, 'neu': 0.775, 'pos': 0.0, 'compound': -0.4927}
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.6369}
{'neg': 0.627, 'neu': 0.373, 'pos': 0.0, 'compound': -0.5216}
{'neg': 0.771, 'neu': 0.229, 'pos': 0.0, 'compound': -0.5216}
{'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4939}
{'neg': 0.0, 'neu': 0.639, 'pos': 0.361, 'compound': 0.3089}
{'neg': 0.0, 'neu': 0.633, 'pos': 0.367, 'compound': 0.7003}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


In [7]:
# compound (computed by normalizing the scores above)

In [8]:
# VADER Sentiment Analysis. VADER (Valence Aware Dictionary and sEntiment Reasoner) is a lexicon and rule-based sentiment analysis tool that is specifically attuned to sentiments expressed in social media, and works well on texts from other domains.

In [14]:
sentiment = analyzer.polarity_scores("I don't know")
print(sentiment)
# could artificially modify polarity of see
sentiment = analyzer.polarity_scores("not know")
print(sentiment)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


In [10]:
new_words = {
    'see': 2.0,
    'find': 2.0,
}

analyzer.lexicon.update(new_words)

In [11]:
sentiment = analyzer.polarity_scores("not find")
print(sentiment)

{'neg': 0.713, 'neu': 0.287, 'pos': 0.0, 'compound': -0.357}
