# A MongoDB JSON Document Database
- The purpose of this project is to store and search JSON for 10,000 streamed tweets about 100 U.S. senators
- Summarize top 10 by tweet count
- Display interactive map containing tweet count summaries
- 10,000 tweets can take substantial time
- Possible enhancement — Use sentiment analysis to count positive, negative and neutral tweets mentioning each senator’s handle

In [1]:
# enable high-res images in notebook
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

### Use Tweepy to Authenticate with Twitter and Get the API Object

In [2]:
import tweepy, keys

auth = tweepy.OAuthHandler(
    keys.consumer_key, keys.consumer_secret)
auth.set_access_token(keys.access_token, 
    keys.access_token_secret)

In [3]:
api = tweepy.API(auth, wait_on_rate_limit=True, 
                 wait_on_rate_limit_notify=True)   

### Load the Senators' Data

In [4]:
import pandas as pd

senators_df = pd.read_csv('senators.csv')

senators_df['TwitterID'] = senators_df['TwitterID'].astype(str)

pd.options.display.max_columns = 6

senators_df.head()

Unnamed: 0,State,Name,Party,TwitterHandle,TwitterID
0,AL,Richard Shelby,R,SenShelby,21111098
1,AL,Doug Jomes,D,SenDougJones,941080085121175552
2,AK,Lisa Murkowski,R,lisamurkowski,18061669
3,AK,Dan Sullivan,R,SenDanSullivan,2891210047
4,AZ,Martha McSally,R,SenMcSallyAZ,2964949642


### Configuring the MongoClient

In [5]:
from pymongo import MongoClient

atlas_client = MongoClient(keys.mongo_connection_string)

db = atlas_client.senators

### Setting up Tweet Stream

In [6]:
from tweetlistener import TweetListener

In [7]:
tweet_limit = 500

In [8]:
import json

class TweetListener(tweepy.StreamListener):
    """Handles incoming Tweet stream."""

    def __init__(self, api, database, limit=10000):
        """Create instance variables for tracking number of tweets."""
        self.db = database
        self.tweet_count = 0
        self.TWEET_LIMIT = limit  # 10,000 by default
        super().__init__(api)  # call superclass's init

    def on_connect(self):
        """Called when your connection attempt is successful, enabling 
        you to perform appropriate application tasks at that point."""
        print('Successfully connected to Twitter\n')

    def on_data(self, data):
        """Called when Twitter pushes a new tweet to you."""
        self.tweet_count += 1  # track number of tweets processed
        json_data = json.loads(data)  # convert string to JSON
        self.db.tweets.insert_one(json_data)  # store in tweets collection
        print(f'    Screen name: {json_data["user"]["name"]}') 
        print(f'     Created at: {json_data["created_at"]}')         
        print(f'Tweets received: {self.tweet_count}')         

        # if TWEET_LIMIT is reached, return False to terminate streaming
        return self.tweet_count != self.TWEET_LIMIT
    
    def on_error(self, status):
        print(f'Error: {status}')
        return True

In [9]:
twitter_stream = tweepy.Stream(api.auth, 
    TweetListener(api, db, tweet_limit))

In [10]:
twitter_stream.filter(track=senators_df.TwitterHandle.tolist(),
    follow=senators_df.TwitterID.tolist())

Successfully connected to Twitter

    Screen name: 沙广白左
     Created at: Fri Apr 09 16:03:15 +0000 2021
Tweets received: 1
    Screen name: scubaemt
     Created at: Fri Apr 09 16:03:16 +0000 2021
Tweets received: 2
    Screen name: Roberta Eidman Has Started Prepping for 2022
     Created at: Fri Apr 09 16:03:16 +0000 2021
Tweets received: 3
    Screen name: Peggy Stuart, Pragmatic Progressive
     Created at: Fri Apr 09 16:03:16 +0000 2021
Tweets received: 4
    Screen name: JoeManchin’sDreamJournal
     Created at: Fri Apr 09 16:03:16 +0000 2021
Tweets received: 5
    Screen name: Donna Adamski
     Created at: Fri Apr 09 16:03:18 +0000 2021
Tweets received: 6
    Screen name: East Valley Indivisibles
     Created at: Fri Apr 09 16:03:18 +0000 2021
Tweets received: 7
    Screen name: Maureen Copatch
     Created at: Fri Apr 09 16:03:18 +0000 2021
Tweets received: 8
    Screen name: Laura rutmanis
     Created at: Fri Apr 09 16:03:18 +0000 2021
Tweets received: 9
    Screen name: De

    Screen name: Peggy Stuart, Pragmatic Progressive
     Created at: Fri Apr 09 16:03:48 +0000 2021
Tweets received: 83
    Screen name: Brenda
     Created at: Fri Apr 09 16:03:48 +0000 2021
Tweets received: 84
    Screen name: Stephanie 😷
     Created at: Fri Apr 09 16:03:48 +0000 2021
Tweets received: 85
    Screen name: Debbylu2u
     Created at: Fri Apr 09 16:03:49 +0000 2021
Tweets received: 86
    Screen name: Maite
     Created at: Fri Apr 09 16:03:50 +0000 2021
Tweets received: 87
    Screen name: archangel2
     Created at: Fri Apr 09 16:03:50 +0000 2021
Tweets received: 88
    Screen name: Keith Martin
     Created at: Fri Apr 09 16:03:50 +0000 2021
Tweets received: 89
    Screen name: Tallsquall #DemandBetterNews🌪
     Created at: Fri Apr 09 16:03:50 +0000 2021
Tweets received: 90
    Screen name: lawrence delibro
     Created at: Fri Apr 09 16:03:51 +0000 2021
Tweets received: 91
    Screen name: Edward Carl Lind lll
     Created at: Fri Apr 09 16:03:51 +0000 2021
Tweets 

    Screen name: Erica Phuckyah 🇨🇦💙🇺🇸
     Created at: Fri Apr 09 16:04:22 +0000 2021
Tweets received: 166
    Screen name: Murph
     Created at: Fri Apr 09 16:04:22 +0000 2021
Tweets received: 167
    Screen name: Sandra WilliamsLewis
     Created at: Fri Apr 09 16:04:23 +0000 2021
Tweets received: 168
    Screen name: Renaissance Man - Not
     Created at: Fri Apr 09 16:04:23 +0000 2021
Tweets received: 169
    Screen name: ❤💛💚Ephew Money💸🤑💹
     Created at: Fri Apr 09 16:04:24 +0000 2021
Tweets received: 170
    Screen name: PayingAttentionAndConcerned
     Created at: Fri Apr 09 16:04:24 +0000 2021
Tweets received: 171
    Screen name: Ebony Rose
     Created at: Fri Apr 09 16:04:25 +0000 2021
Tweets received: 172
    Screen name: Debra Brady
     Created at: Fri Apr 09 16:04:25 +0000 2021
Tweets received: 173
    Screen name: Gordon
     Created at: Fri Apr 09 16:04:25 +0000 2021
Tweets received: 174
    Screen name: Annette N. Tchelka
     Created at: Fri Apr 09 16:04:26 +0000 2

    Screen name: 💙🌊🌊🌊Joshua Linus💙Blue2022
     Created at: Fri Apr 09 16:04:55 +0000 2021
Tweets received: 247
    Screen name: Yaimara
     Created at: Fri Apr 09 16:04:55 +0000 2021
Tweets received: 248
    Screen name: TruthIsTruth
     Created at: Fri Apr 09 16:04:55 +0000 2021
Tweets received: 249
    Screen name: HagereyKibretey
     Created at: Fri Apr 09 16:04:56 +0000 2021
Tweets received: 250
    Screen name: Elsie
     Created at: Fri Apr 09 16:04:58 +0000 2021
Tweets received: 251
    Screen name: KeptByGod
     Created at: Fri Apr 09 16:04:58 +0000 2021
Tweets received: 252
    Screen name: Hannah Delahanty
     Created at: Fri Apr 09 16:04:59 +0000 2021
Tweets received: 253
    Screen name: Cozy16
     Created at: Fri Apr 09 16:05:00 +0000 2021
Tweets received: 254
    Screen name: Chris M
     Created at: Fri Apr 09 16:05:00 +0000 2021
Tweets received: 255
    Screen name: FP Smith
     Created at: Fri Apr 09 16:05:01 +0000 2021
Tweets received: 256
    Screen name: Ric

    Screen name: Tummler (means... Connector & Troublemaker)
     Created at: Fri Apr 09 16:05:33 +0000 2021
Tweets received: 329
    Screen name: Realbookmaker2
     Created at: Fri Apr 09 16:05:35 +0000 2021
Tweets received: 330
    Screen name: Shelley
     Created at: Fri Apr 09 16:05:35 +0000 2021
Tweets received: 331
    Screen name: Deborah Goldsmith
     Created at: Fri Apr 09 16:05:35 +0000 2021
Tweets received: 332
    Screen name: nihal
     Created at: Fri Apr 09 16:05:37 +0000 2021
Tweets received: 333
    Screen name: menswaxingstore
     Created at: Fri Apr 09 16:05:37 +0000 2021
Tweets received: 334
    Screen name: Uncle Giggles
     Created at: Fri Apr 09 16:05:38 +0000 2021
Tweets received: 335
    Screen name: Tiffany Davidson
     Created at: Fri Apr 09 16:05:38 +0000 2021
Tweets received: 336
    Screen name: Mike Ryan
     Created at: Fri Apr 09 16:05:39 +0000 2021
Tweets received: 337
    Screen name: Sommerset1616
     Created at: Fri Apr 09 16:05:39 +0000 2021

    Screen name: Rolled Up Sleeves
     Created at: Fri Apr 09 16:06:07 +0000 2021
Tweets received: 411
    Screen name: Silas Hunter 🙂
     Created at: Fri Apr 09 16:06:07 +0000 2021
Tweets received: 412
    Screen name: Toonces The Driving Cat
     Created at: Fri Apr 09 16:06:08 +0000 2021
Tweets received: 413
    Screen name: GovTrack.🇺🇸
     Created at: Fri Apr 09 16:06:08 +0000 2021
Tweets received: 414
    Screen name: TIMJPH
     Created at: Fri Apr 09 16:06:08 +0000 2021
Tweets received: 415
    Screen name: COOL VFX
     Created at: Fri Apr 09 16:06:08 +0000 2021
Tweets received: 416
    Screen name: freedomforever
     Created at: Fri Apr 09 16:06:09 +0000 2021
Tweets received: 417
    Screen name: Rowan Wolfe🏳️‍🌈USAF
     Created at: Fri Apr 09 16:06:09 +0000 2021
Tweets received: 418
    Screen name: Never Forget Jan. 6th, 2021
     Created at: Fri Apr 09 16:06:09 +0000 2021
Tweets received: 419
    Screen name: Sho'Nuff Skywalker
     Created at: Fri Apr 09 16:06:10 +0000

    Screen name: MarthaHC in Va
     Created at: Fri Apr 09 16:06:45 +0000 2021
Tweets received: 495
    Screen name: Eileen
     Created at: Fri Apr 09 16:06:46 +0000 2021
Tweets received: 496
    Screen name: ✝️LENAYE HEAVEN
     Created at: Fri Apr 09 16:06:46 +0000 2021
Tweets received: 497
    Screen name: Marilyn Muller
     Created at: Fri Apr 09 16:06:46 +0000 2021
Tweets received: 498
    Screen name: Pollyanne Hawkes
     Created at: Fri Apr 09 16:06:47 +0000 2021
Tweets received: 499
    Screen name: Dave Edwards
     Created at: Fri Apr 09 16:06:48 +0000 2021
Tweets received: 500


In [11]:
db.tweets.create_index([('$**', 'text')])

'$**_text'

### Counting tweets for Each Senator

In [12]:
tweet_counts = []

In [13]:
for senator in senators_df.TwitterHandle:
    tweet_counts.append(db.tweets.count_documents(
    {'$text': {'$search': senator}}))

### Show tweet counts for each senator

In [14]:
tweet_counts_df = senators_df.assign(Tweets=tweet_counts)

tweet_counts_df.sort_values(by='Tweets', ascending=False).head(10)

Unnamed: 0,State,Name,Party,TwitterHandle,TwitterID,Tweets
95,WV,Joe Manchin,D,Sen_JoeManchin,234374703,304
62,NY,Chuck Schumer,D,SenSchumer,17494010,164
63,NY,Kirsten Gillibrand,D,SenGillibrand,72198806,155
83,TN,Marsha Blackburn,R,MarshaBlackburn,278145569,155
84,TX,John Cornyn,R,JohnCornyn,13218102,107
85,TX,Ted Cruz,R,SenTedCruz,1074480192,75
24,IL,Tammy Duckworth,D,SenDuckworth,1058520120,69
33,KY,Rand Paul,R,RandPaul,216881337,49
15,DE,Chris Coons,D,ChrisCoons,15324851,49
17,FL,Marco Rubio,R,marcorubio,15745368,47


### Get the state locations for plotting markers

In [15]:
from geopy import OpenMapQuest
import time
from state_codes import state_codes

geo = OpenMapQuest(api_key=keys.mapquest_key)

In [16]:
states = tweet_counts_df.State.unique()

states.sort()

In [17]:
locations = []

In [18]:
from IPython.display import clear_output

for state in states:
    processed = False
    delay = .1 
    while not processed:
        try: 
            locations.append(geo.geocode(state_codes[state] + ', USA'))
            clear_output()  # clear cell's current output before showing next one
            print(locations[-1])  
            processed = True
        except:  # timed out, so wait before trying again
            print('OpenMapQuest service timed out. Waiting.')
            time.sleep(delay)
            delay += .1

Wyoming, United States of America


### Grouping the tweet counts by state

In [19]:
tweets_counts_by_state = tweet_counts_df.groupby(
    'State', as_index=False).sum()

In [20]:
tweets_counts_by_state.head()

Unnamed: 0,State,Tweets
0,AK,7
1,AL,4
2,AR,13
3,AZ,25
4,CA,10


### Creating the Map

In [21]:
import folium

In [22]:
usmap = folium.Map(location=[39.8283, -98.5795], 
                   zoom_start=4, detect_retina=True,
                   tiles='Stamen Toner')

In [23]:
choropleth = folium.Choropleth(
    geo_data='us-states.json',
    name='choropleth',
    data=tweets_counts_by_state,
    columns=['State', 'Tweets'],
    key_on='feature.id',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Tweets by State'
).add_to(usmap)

layer = folium.LayerControl().add_to(usmap)

In [24]:
sorted_df = tweet_counts_df.sort_values(by='Tweets', ascending=False)

for index, (name, group) in enumerate(sorted_df.groupby('State')):
    strings = [state_codes[name]]  # used to assemble popup text

    for s in group.itertuples():
        strings.append(f'{s.Name} ({s.Party}); Tweets: {s.Tweets}')
        
    text = '<br>'.join(strings)  
    popup = folium.Popup(text, max_width=200)
    marker = folium.Marker(
        (locations[index].latitude, locations[index].longitude), 
        popup=popup)
    marker.add_to(usmap) 

### Saving and Displaying the Map

In [25]:
usmap.save('SenatorsTweets.html')

In [26]:
from IPython.display import IFrame
IFrame(src='./SenatorsTweets.html', width=800, height=450)