# Setup

In [1]:
# -*- coding: utf-8 -*-
%matplotlib inline

import agate
import warnings
import requests
import zipfile
import io
import json

from pprint import pprint
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

warnings.filterwarnings('ignore')

# Trump Tweet analysis

A look at Trump's tweets since inauguration day.

## Questions to answer

### Data processing

* Date grouping columns
* Sentiment analysis

### Analysis

* Frequencies sliced several ways: By day, month, hour of day
* Rates: Tweets/day, Tweets/day/month
* Retweets: What was shared heavily?
* Searches: 
  * Media organizations

# Get data

_Only run this if you need to update the Trump tweet data, for example the first time you run this script._

First, download the zipfile and unzip it in `data` directory.

**@TODO**: Cache zip files locally, read from zip into ignored local file.

In [2]:
response = requests.get('https://github.com/bpb27/trump_tweet_data_archive/raw/master/master_2017.json.zip')
z = zipfile.ZipFile(io.BytesIO(response.content))
z.extractall('data')

Open the downloaded, unzipped JSON file, run sentiment analysis, remove multi-valued `entities` field, and create `data/master_2017_processed.json`.

In [3]:
with open('data/master_2017.json') as f:
    jsondata = json.load(f)

analyzer = SentimentIntensityAnalyzer()
for row in jsondata:
    del(row['entities'])
    vs = analyzer.polarity_scores(row['text'])
    row['sentiment'] = vs

with open('data/master_2017_processed.json', 'w') as f:
    json.dump(jsondata, f)

# Utilities

_Add helper functions here._

In [4]:
# Stub for helpers

# Data processing

Open the `json` file and then use the `json_normalize()` function to flatten it for use with silly Pandas.

In [5]:
tweets = agate.Table.from_json('data/master_2017_processed.json')

Let's look at all the crazy column names:

In [6]:
print(tweets)

| column                                                                                         | data_type |
| ---------------------------------------------------------------------------------------------- | --------- |
| retweeted                                                                                      | Boolean   |
| in_reply_to_user_id                                                                            | Number    |
| in_reply_to_status_id                                                                          | Number    |
| id                                                                                             | Number    |
| created_at                                                                                     | DateTime  |
| retweet_count                                                                                  | Number    |
| coordinates                                                                                    | Boolean   |
|

In [7]:
tweetsByDay = tweets.group_by(lambda x: x['created_at'].strftime('%Y-%m-%d'), key_name='month')
countsByDay = tweetsByDay.aggregate([
    ('count', agate.Count()),
])

In [8]:
countsByDay.print_table()

| month      | count |
| ---------- | ----- |
| 2017-04-23 |     7 |
| 2018-04-22 |    10 |
| 2018-04-21 |     5 |
| 2018-04-20 |     4 |
| 2018-04-19 |     4 |
| 2018-04-18 |     7 |
| 2018-04-17 |     8 |
| 2018-04-16 |     5 |
| 2018-04-14 |     3 |
| 2018-04-13 |     4 |
| 2018-04-12 |     6 |
| 2018-04-11 |     5 |
| 2018-04-10 |     3 |
| 2018-04-09 |     2 |
| 2018-04-08 |     5 |
| 2018-04-07 |     1 |
| 2018-04-06 |     2 |
| 2018-04-05 |     1 |
| 2018-04-04 |     4 |
| 2018-04-03 |     8 |
| ...        |   ... |


In [9]:
dict(tweets.rows[0])

{'contributors': None,
 'coordinates': None,
 'created_at': datetime.datetime(2017, 4, 23, 20, 17),
 'extended_entities/media/0/additional_media_info/description': None,
 'extended_entities/media/0/additional_media_info/embeddable': None,
 'extended_entities/media/0/additional_media_info/monetizable': None,
 'extended_entities/media/0/additional_media_info/source_user/contributors_enabled': None,
 'extended_entities/media/0/additional_media_info/source_user/created_at': None,
 'extended_entities/media/0/additional_media_info/source_user/default_profile': None,
 'extended_entities/media/0/additional_media_info/source_user/default_profile_image': None,
 'extended_entities/media/0/additional_media_info/source_user/description': None,
 'extended_entities/media/0/additional_media_info/source_user/entities/url/urls/0/display_url': None,
 'extended_entities/media/0/additional_media_info/source_user/entities/url/urls/0/expanded_url': None,
 'extended_entities/media/0/additional_media_info/sour