# 02. Polarity Analysis

In [7]:
from pymongo import MongoClient
import pandas as pd

#regex
import re

#NLTK
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords

nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('stopwords')

#TextBlob
from textblob import TextBlob

#SpaCy
import spacy
from spacy import displacy

#Others
import speech_recognition as sr #sudo pip3 install --upgrade speechrecognition



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/pablofdezc/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /home/pablofdezc/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/pablofdezc/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


## 1. We connect to the DB in MongoDB


In [30]:
client = MongoClient("localhost:27017")
db = client.get_database("DT")
collection = db.get_collection("tweets")

In [31]:
collection.find_one({})

{'_id': ObjectId('608c56f1673c051e14d40980'),
 '': '1',
 'tweet': 'Be sure to tune in and watch Donald Trump on Late Night with David Letterman as he presents the Top Ten List tonight!',
 'date': datetime.datetime(2009, 5, 4, 11, 54, 25),
 'rt': 510,
 'favourites': 917,
 'year': 2009}

## 2. Polarity of the tweets in 2020

In [43]:
def sentimentAnalysis(content):
    sia = SentimentIntensityAnalyzer()
    polarity = sia.polarity_scores(content)
    return polarity

In [44]:
# medir la poliralidad de los tweets del 2020 
tweets2020 = list(collection.find({"year": 2020}, {"tweet": 1, "_id": 0}))
tweets2020[:5]

[{'tweet': 'Best equipment & finest military in the World. On site quickly!https://twitter.com/heatherjones333/status/1212475089133944832 …'},
 {'tweet': 'Great job!https://twitter.com/TrumpWarRoom/status/1212391791774838784 …'},
 {'tweet': 'They don’t know how to do that!https://twitter.com/GeraldoRivera/status/1212402251806990336 …'},
 {'tweet': 'pic.twitter.com/VXeKiVzpTf'},
 {'tweet': 'pic.twitter.com/qOi7mpKcHY'}]

In [45]:
for i in tweets2020:
    print(sentimentAnalysis(i['tweet']))

{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'compound': 0.6696}
{'neg': 0.0, 'neu': 0.185, 'pos': 0.815, 'compound': 0.6588}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.411, 'pos': 0.589, 'compound': 0.8221}
{'neg': 0.0, 'neu': 0.696, 'pos': 0.304, 'compound': 0.2914}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.328, 'pos': 0.672, 'compound': 0.6239}
{'neg': 0.0, 'neu': 0.308, 'pos': 0.692, 'compound': 0.6696}
{'neg': 0.16, 'neu': 0.691, 'pos': 0.149, 'compound': 0.1779}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.417, 'pos': 0.583, 'compound': 0.4199}
{'neg': 0.791, 'neu': 0.209, 'pos': 0.0, 'compound': -0.5848}
{'neg': 0.413, 'n

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.892, 'pos': 0.108, 'compound': 0.4199}
{'neg': 0.2, 'neu': 0.622, 'pos': 0.178, 'compound': -0.2003}
{'neg': 0.174, 'neu': 0.826, 'pos': 0.0, 'compound': -0.6886}
{'neg': 0.0, 'neu': 0.541, 'pos': 0.459, 'compound': 0.7777}
{'neg': 0.0, 'neu': 0.538, 'pos': 0.462, 'compound': 0.8832}
{'neg': 0.095, 'neu': 0.633, 'pos': 0.272, 'compound': 0.4199}
{'neg': 0.0, 'neu': 0.677, 'pos': 0.323, 'compound': 0.9562}
{'neg': 0.0, 'neu': 0.736, 'pos': 0.264, 'compound': 0.2003}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.877, 'pos': 0.123, 'compound': 0.2732}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.781, 'pos': 0.219, 'compound': 0.6116}
{'neg': 0.0, 'neu': 0.46, 'pos': 0.54, 'compound': 0.5461}
{'n

{'neg': 0.0, 'neu': 0.417, 'pos': 0.583, 'compound': 0.4199}
{'neg': 0.0, 'neu': 0.313, 'pos': 0.687, 'compound': 0.6588}
{'neg': 0.0, 'neu': 0.264, 'pos': 0.736, 'compound': 0.4199}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.323, 'pos': 0.677, 'compound': 0.636}
{'neg': 0.0, 'neu': 0.597, 'pos': 0.403, 'compound': 0.9158}
{'neg': 0.0, 'neu': 0.654, 'pos': 0.346, 'compound': 0.784}
{'neg': 0.0, 'neu': 0.417, 'pos': 0.583, 'compound': 0.4199}
{'neg': 0.0, 'neu': 0.417, 'pos': 0.583, 'compound': 0.4199}
{'neg': 0.458, 'neu': 0.542, 'pos': 0.0, 'compound': -0.1759}
{'neg': 0.144, 'neu': 0.687, 'pos': 0.168, 'compound': 0.3595}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.635, 'pos': 0.365, 'compound': 0.784}
{'neg': 0.093, 'neu': 0.638, 'pos': 0.269, 'compound': 0.8236}
{'neg': 0.595, 'neu': 0.405, 'pos': 0.0, 'compound': -0.6601}
{'neg': 0.787, 'neu': 0.213, 'pos': 0.0, '

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.629, 'neu': 0.371, 'pos': 0.0, 'compound': -0.5255}
{'neg': 0.0, 'neu': 0.11, 'pos': 0.89, 'compound': 0.8439}
{'neg': 0.545, 'neu': 0.455, 'pos': 0.0, 'compound': -0.3384}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.417, 'pos': 0.583, 'compound': 0.4199}
{'neg': 0.0, 'neu': 0.763, 'pos': 0.237, 'compound': 0.4199}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.196, 'neu': 0.804, 'pos': 0.0, 'compound': -0.5261}
{'neg': 0.0, 'neu': 0.218, 'pos': 0.782, 'compound': 0.5562}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.616, 'pos': 0.384, 'compound': 0.8169}
{'neg': 0.0, 'neu': 0.763, 'pos': 0.237, 'compound': 0.4199}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu'

{'neg': 0.39, 'neu': 0.61, 'pos': 0.0, 'compound': -0.4926}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.805, 'neu': 0.195, 'pos': 0.0, 'compound': -0.628}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.373, 'neu': 0.627, 'pos': 0.0, 'compound': -0.8786}
{'neg': 0.761, 'neu': 0.239, 'pos': 0.0, 'compound': -0.4926}
{'neg': 0.13, 'neu': 0.69, 'pos': 0.18, 'compound': 0.3664}
{'neg': 0.109, 'neu': 0.67, 'pos': 0.221, 'compound': 0.3987}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.475, 'neu': 0.269, 'pos': 0.256, 'compound': -0.3869}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.264, 'pos': 0.736, 'compound': 0.4199}
{'neg': 0.41, 'neu': 0.59, 'pos': 0.0, 'compound': -0.8669}
{'neg': 0.0, 'neu': 0.589, 'pos': 0.411, 'compound': 0.4199}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.672, 'pos': 0.328, 'compound': 0.6588}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.133, 'neu': 0.867, 'pos': 0.0, 'compound': -0.5106}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.362, 'pos': 0.638, 'compound': 0.5461}
{'neg': 0.0, 'neu': 0.325, 'pos': 0.675, 'compound': 0.8528}
{'neg': 0.0, 'neu': 0.411, 'pos': 0.589, 'compound': 0.8221}
{'neg': 0.0, 'neu': 0.734, 'pos': 0.266, 'compound': 0.4389}
{'neg': 0.555, 'neu': 0.445, 'pos': 0.0, 'compound': -0.3595}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.264, 'pos': 0.736, 'compound': 0.4199}
{'neg': 0.506, 'neu': 0.494, 'pos': 0.0, 'compound': -0.8019}
{'neg': 0.0, 'neu': 0.747, 'pos': 0.253, 'compound': 0.8585}
{'neg': 0.0, 'neu': 0.532, 'pos': 0.468, 'compound': 0.5461}
{'

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.339, 'pos': 0.661, 'compound': 0.5983}
{'neg': 0.328, 'neu': 0.587, 'pos': 0.085, 'compound': -0.7959}
{'neg': 0.0, 'neu': 0.46, 'pos': 0.54, 'compound': 0.5461}
{'neg': 0.0, 'neu': 0.758, 'pos': 0.242, 'compound': 0.3031}
{'neg': 0.0, 'neu': 0.577, 'pos': 0.423, 'compound': 0.6588}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.121, 'neu': 0.879, 'pos': 0.0, 'compound': -0.6435}
{'neg': 0.0, 'neu': 0.687, 'pos': 0.313, 'compound': 0.923}
{'neg': 0.212, 'neu': 0.708, 'pos': 0.08, 'compound': -0.6239}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.558, 'pos': 0.442, 'compound': 0.9133}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
{'neg': 0.0, 'neu': 0.634, 'pos': 0.366, 'compound': 0.8122}
{'neg': 0

## 3. Polarity per year

In [46]:
year = list(collection.find({'year':{"$in": [2019]}}, {"tweet": 1, "_id" : 0}))
year

[{'tweet': 'Happy New Year!'},
 {'tweet': 'One thing has now been proven. The Democrats do not care about Open Borders and all of the crime and drugs that Open Borders bring!'},
 {'tweet': 'Congratulations to President @ JairBolsonaro who just made a great inauguration speech - the U.S.A. is with you!'},
 {'tweet': 'Border Security and the Wall “thing” and Shutdown is not where Nancy Pelosi wanted to start her tenure as Speaker! Let’s make a deal?'},
 {'tweet': 'Gas prices are low and expected to go down this year. This would be good!'},
 {'tweet': 'Washington Examiner - “MAGA list: 205 ‘historic results’ help Trump make case for 2020 re-election.” True!'},
 {'tweet': 'Mexico is paying for the Wall through the new USMCA Trade Deal. Much of the Wall has already been fully renovated or built. We have done a lot of work. $5.6 Billion Dollars that House has approved is very little in comparison to the benefits of National Security. Quick payback!'},
 {'tweet': 'Important meeting today on B

In [54]:
def sentimentAnalysis(year):
    sia = SentimentIntensityAnalyzer()
    polarity = sia.polarity_scores('content')
    pol = polarity['pos']
    return pol

In [55]:
pos = []
for i in year:
    pos.append(sentimentAnalysis(i['tweet']))
print(pos)

[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

 ### No sé poruqe me salen todo ceros