In [1]:
!pip install folium --quiet

In [2]:
%matplotlib inline

In [3]:
import pandas as pd
import random

from mongo_aggregation_verbs import *

from lib import create_mongo_client_to_database_collection

collection_reference = create_mongo_client_to_database_collection('twitter', 'tweets')

In [4]:
collection_reference.count_documents({})

16417

## Tweets By Day

In [5]:
datestring_created_at = { "dateString" : "$created_at"}
date_from_string = {"$dateFromString" : datestring_created_at }

date_to_id = {
    PROJECT : {
        "_id" : 0,
        "year"  : {"$year"       : {"date" : date_from_string}},
        "month" : {"$month"      : {"date" : date_from_string}},
        "day"   : {"$dayOfMonth" : {"date" : date_from_string}},
    }
}

group_by_date = {
    GROUP : {
        "tweets" : { "$sum" : 1 },
        "_id" : {
            "year"  : "$year",
            "month" : "$month",
            "day"   : "$day"
        },
    }
}



In [6]:
cursor = collection_reference.aggregate([
    date_to_id,
    group_by_date
])

daily_tweets = pd.DataFrame(list(cursor))

In [7]:
def dictionary_to_datestring(x):
    month = x['month']
    day   = x['day']
    year  = x['year']
    return "{}-{}-{}".format(month, day, year)

In [8]:
daily_tweets

Unnamed: 0,_id,tweets
0,"{'year': 2018, 'month': 7, 'day': 25}",7897
1,"{'year': 2018, 'month': 7, 'day': 21}",72
2,"{'year': 2018, 'month': 7, 'day': 24}",1936
3,"{'year': 2018, 'month': 7, 'day': 26}",6459
4,"{'year': None, 'month': None, 'day': None}",3
5,"{'year': 2018, 'month': 7, 'day': 23}",50


In [9]:
null_date = {'year': None, 'month': None, 'day': None}

In [10]:
daily_tweets._id = daily_tweets._id.mask(daily_tweets._id == null_date)

In [11]:
daily_tweets.dropna(inplace=True)

In [None]:
datestrings = daily_tweets['_id'].apply(dictionary_to_datestring)
daily_tweets['date'] = pd.to_datetime(datestrings)

daily_tweets.drop('_id', axis=1, inplace=True)
daily_tweets.sort_values('date', inplace=True)
daily_tweets.set_index('date', inplace=True)
daily_tweets.plot()

## Tweet Locations

In [None]:
not_null = { '$ne' : None }
nonnull_geo = {'geo' : not_null }
keep_geo = { 'geo' : 1 }

cursor = collection_reference.find(nonnull_geo, keep_geo)
collection_reference.count_documents(nonnull_geo)

In [None]:
geo_tweets = pd.DataFrame(list(cursor))

In [None]:
list(geo_tweets.head(5)['geo'].values)

In [None]:
def parse_geo_from_tweets(tweets):
    geo = pd.DataFrame(list(tweets['geo'].values))
    return geo

In [None]:
geo = parse_geo_from_tweets(geo_tweets)
geo.sample(5)

In [None]:
import folium
starting_loc = [34.0689, -118.4452]
la_map = folium.Map(location=starting_loc, zoom_start=13)

In [None]:
for loc in geo.coordinates:
    folium.Marker(loc).add_to(la_map)

In [None]:
la_map