## How often does @realDonaldTrump tweet each day?

In [1]:
import altair as alt
import pandas as pd
import matplotlib as mpl
import numpy as np
import json
import pytz
from datetime import datetime
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Read dump downloaded from trumptweetarchive.com

In [2]:
%run '00-trump-tweets-processing.ipynb'

In [3]:
src = pd.read_json('/Users/mhustiles/data/github/notebooks/trump-tweet-frequency/output/realdonaldtrump.json') 

### How many tweets since May 2009?

In [4]:
len(src)

50370

### Clean up dates

In [5]:
src['eastern_created_at'] = src['created_at'].dt.time
src['eastern_created_at'] = src['created_at'].dt.tz_localize("GMT").dt.tz_convert('America/Los_Angeles')
src['date'] = pd.to_datetime(src['eastern_created_at']).dt.strftime('%m/%d/%Y')
src['date'] = pd.to_datetime(src['date'])
src['year'] = src['eastern_created_at'].dt.year
src['month'] = src['eastern_created_at'].dt.month
src['day'] = src['eastern_created_at'].dt.day
src['hour'] = src['eastern_created_at'].dt.hour
src['minute'] = src['eastern_created_at'].dt.minute
src['time'] = src['eastern_created_at'].dt.time
src['id_str'] = src['id_str'].astype(str)
src['year'] = src['year'].astype(str)
src['month'] = src['month'].astype(str)
src['day'] = src['day'].astype(str)
src['hour'] = src['hour'].astype(str)

### Tweets per month, year

In [6]:
trump_years = src.groupby(['year','month']).agg({'date': 'count'}).reset_index()

In [7]:
trump_years.tail()

Unnamed: 0,year,month,date
130,2020,3,968
131,2020,4,848
132,2020,5,1106
133,2020,6,1139
134,2020,7,317


### What did he average each day in retweets, likes and replies

In [8]:
trump_engagements_day = src.groupby(['month', 'year']).agg({'id_str':'size', 'retweet_count':['sum', 'mean'], 'favorite_count':['sum', 'mean']}).round().reset_index()

In [9]:
trump_engagements_day.columns = trump_engagements_day.columns.map('_'.join).str.strip().str.rstrip('_')

In [10]:
trump_engagements_day.tail()

Unnamed: 0,month,year,id_str_size,retweet_count_sum,retweet_count_mean,favorite_count_sum,favorite_count_mean
130,9,2015,414,673655,1627.0,1262434,3049.0
131,9,2016,288,2659432,9234.0,6972293,24209.0
132,9,2017,307,5047042,16440.0,19423102,63267.0
133,9,2018,378,6368803,16849.0,21990052,58175.0
134,9,2019,800,11384264,14230.0,34639760,43300.0


In [11]:
chart = alt.Chart((trump_engagements_day.query('year > "2016"')))\
    .mark_bar(size=10,opacity=1)\
    .encode(
    x=alt.X('month:T', title=' ', \
            axis=alt.Axis(values=['1', '12'], grid=False, tickCount=1, format='%b')),
    y=alt.Y('id_str_size:Q', stack=None, title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=2, format='1,s'),\
           scale=alt.Scale(domain=(0, 1000))),\
    facet=alt.Facet('year:N', columns=4, title=' ')
).properties(width=150, height=100,
     title='Monthly tweets by @realDonaldTrump'
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
).configure_legend(symbolType='square')

In [12]:
chart = alt.Chart((trump_engagements_day.query('year > "2016"')))\
    .mark_bar(size=10,opacity=1)\
    .encode(
    x=alt.X('month:T', title=' ', \
            axis=alt.Axis(values=['1', '12'], grid=False, tickCount=1, format='%b')),
    y=alt.Y('favorite_count_mean:Q', stack=None, title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=2, format='1,s'),\
           scale=alt.Scale(domain=(0, 120000))),\
    facet=alt.Facet('year:N', columns=4, title=' ')
).properties(width=150, height=100,
     title='Mean monthly likes of @realDonaldTrump'
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
).configure_legend(symbolType='square')

In [13]:
chart = alt.Chart((trump_engagements_day.query('year > "2016"')))\
    .mark_bar(size=10,opacity=1)\
    .encode(
    x=alt.X('month:T', title=' ', \
            axis=alt.Axis(values=['1', '7'], grid=False, tickCount=1, format='%B')),
    y=alt.Y('retweet_count_mean:Q', stack=None, title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=2, format='1,s'),\
           scale=alt.Scale(domain=(0, 30000))),\
    facet=alt.Facet('year:N', columns=4, title=' ')
).properties(width=150, height=100,
     title='Mean monthly retweets of @realDonaldTrump'
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
).configure_legend(symbolType='square')

### Limit data to when Trump took office. How many tweets since then?

In [14]:
trumpall_prez = src[src.date >= '01/20/2017']
len(trumpall_prez)

20035

### Limit data to month Trump announced candidacy. How many tweets since then?

In [15]:
trumpall_running = src[src.date >= '06/01/2015']
len(trumpall_running)

28439

### Count daily tweets since candidacy, create dataframe with results

In [16]:
trump_tweets_day_candidacy = trumpall_running.groupby(['date']).size()
trump_tweets_day_candidacy_df = pd.DataFrame({'date': trump_tweets_day_candidacy.index, 'count': trump_tweets_day_candidacy.values})

In [17]:
trump_tweets_day_candidacy_df.to_csv('output/trump_tweets_day_candidacy.csv')

### Count daily tweets since inaguration, create dataframe with results

In [18]:
trump_tweets_day_prez = trumpall_prez.groupby(['date']).size()
trump_tweets_day_prez_df = pd.DataFrame({'date': trump_tweets_day_prez.index, 'count': trump_tweets_day_prez.values})

In [19]:
trump_tweets_day_prez_df.to_csv('output/trump_tweets_day_prez.csv')

### Count daily tweets from the beginning, create dataframe with results

In [20]:
trump_tweets_day = src.groupby(['date']).agg('size').reset_index(name='count')
trump_tweets_day.tail()

Unnamed: 0,date,count
3312,2020-07-08,14
3313,2020-07-09,24
3314,2020-07-10,30
3315,2020-07-11,40
3316,2020-07-12,11


In [21]:
trump_tweets_day.to_csv('output/trump_tweets_day.csv')

### Sort table for top tweet days during presidency

In [22]:
trump_freq_inauguration = trump_tweets_day_prez_df.sort_values(['count'], ascending=False)

In [23]:
trump_freq_inauguration.head(5)

Unnamed: 0,date,count
1218,2020-06-05,199
1192,2020-05-10,125
1042,2019-12-12,124
1038,2019-12-08,106
1083,2020-01-22,94


### Read all from today

In [24]:
today = pd.Timestamp.today().strftime("%m/%d/%Y")

In [25]:
trump_tweets_today = trumpall_prez[trumpall_prez.date == today]

In [26]:
trump_tweets_today.head()

Unnamed: 0,index,source,id_str,text,created_at,retweet_count,in_reply_to_user_id_str,favorite_count,is_retweet,eastern_created_at,date,year,month,day,hour,minute,time
0,0,Twitter for iPhone,1282291265661472768,"No, Radical Left anarchists, agitators, looter...",2020-07-12 12:30:36,31524,,137635,False,2020-07-12 05:30:36-07:00,2020-07-12,2020,7,12,5,30,05:30:36
1,1,Twitter for iPhone,1282288813075771392,"Thank you to J.R. Majewski, a great Air Force ...",2020-07-12 12:20:51,16935,,69489,False,2020-07-12 05:20:51-07:00,2020-07-12,2020,7,12,5,20,05:20:51
2,2,Twitter for iPhone,1282286510390288384,"...rounds, no problem. When I play, Fake News ...",2020-07-12 12:11:42,14137,25073877.0,84008,False,2020-07-12 05:11:42-07:00,2020-07-12,2020,7,12,5,11,05:11:42
3,3,Twitter for iPhone,1282286508716756992,I know many in business and politics that work...,2020-07-12 12:11:42,19489,,102018,False,2020-07-12 05:11:42-07:00,2020-07-12,2020,7,12,5,11,05:11:42
4,4,Twitter for iPhone,1282281745585504256,We have now built 240 Miles of new Border Wall...,2020-07-12 11:52:46,23738,,105393,False,2020-07-12 04:52:46-07:00,2020-07-12,2020,7,12,4,52,04:52:46


---

### RT vs Tweet

In [27]:
rt_tweet = src.groupby(['date', 'is_retweet']).agg('size').reset_index(name='count')
rt_tweet_recent = src[src['date'] > '2019-01-01']\
    .groupby(['date', 'is_retweet']).agg('size').reset_index(name='count')

In [28]:
rt_pivot = pd.DataFrame(pd.pivot_table(rt_tweet, values='count', index=['date'],\
                          columns=['is_retweet'], aggfunc=np.sum).\
                        rename(columns={'is_retweet':'id', False:'tweet', True:'retweet'}).reset_index().fillna(''))

In [29]:
rt_pivot.to_csv('output/rt_pivot.csv')

---

### Chart it!

In [30]:
#bars

lines = alt.Chart(trump_tweets_day_candidacy_df, title = '@realDonaldTrump tweet frequency since candidacy announcement').mark_bar(size=2).encode(
    x = alt.X('date:T', axis = alt.Axis(grid=False, title = '', tickCount=6, format = ("%b. %Y"))),
    y = alt.Y('count:Q',
        scale=alt.Scale(domain=(0, 200)), axis = alt.Axis(gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0, tickCount=6, title = 'Daily tweet counts and mean')),
)

#rule showing mean

rule = alt.Chart(trump_tweets_day_candidacy_df).mark_rule(color='red').encode(
    y='mean(count):Q'
)

#rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align='center',
    baseline='middle',
    dx=200,
    dy=10,
    fontWeight='bold',
).encode(
    text=alt.Text('mean(count):Q', format=".2"))

#go
( lines + rule + text ).properties(height=400,width=800).configure_view(strokeOpacity=0)

In [31]:
#bars

bars = alt.Chart(trump_freq_inauguration, title = '@realDonaldTrump tweet frequency since inauguration').\
    mark_bar(size=2).encode(
    x = alt.X('date:T', axis = alt.Axis(grid=False, title = '', tickCount=6, format = ("%b. %Y"))),
    y = alt.Y('count:Q',
        scale=alt.Scale(domain=(0, 200)), axis = alt.Axis(gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0, tickCount=6, title = 'Daily tweet counts and mean')),
)

#rule showing mean

rule = alt.Chart(trump_freq_inauguration).mark_rule(color='red').encode(
    y='mean(count):Q'
)

#rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align='center',
    baseline='middle',
    dx=200,
    dy=10,
    fontWeight='bold',
).encode(
    text=alt.Text('mean(count):Q', format=".2"))

#go
( bars + rule + text ).properties(height=400,width=800).configure_view(strokeOpacity=0)

In [32]:
heatmap = alt.Chart(
    trump_freq_inauguration,
    title="@realDonaldTrump daily tweet frequency since inauguration"
).mark_rect().encode(
    x=alt.X('date(date):O', title=' '),
    y=alt.Y('month(date):O', title=''),
    color=alt.Color('count:Q', title='Tweet count'),
    tooltip=[
        alt.Tooltip('monthdate(date):T', title='Date'),
        alt.Tooltip('count:Q', title='Tweets')
    ],
    facet=alt.Facet('year(date):O', title='',columns=1)
).properties(width=600, height=200)

heatmap.configure_legend(
    orient='top',
    padding=10
).configure_view(strokeOpacity=0)

In [33]:
trump_RT_freq_inauguration = trump_tweets_day_prez_df.sort_values(['count'], ascending=False)

In [34]:
trumpall_prez_rt = trumpall_prez[trumpall_prez['is_retweet'] == True]
trumpall_prez_no_rt = trumpall_prez[trumpall_prez['is_retweet'] == False]

### Retweets

In [35]:
trump_tweets_day_prez_rt = trumpall_prez_rt.groupby(['date']).size()
trump_tweets_day_prez_rt_df = pd.DataFrame({'date': trump_tweets_day_prez_rt.index, \
                                         'count': trump_tweets_day_prez_rt.values})

In [36]:
trump_tweets_day_prez_rt_df.sort_values(by='date', ascending=False).head()

Unnamed: 0,date,count
372,2020-07-12,5
371,2020-07-11,23
370,2020-07-10,16
369,2020-07-09,5
368,2020-07-08,7


### No retweets

In [37]:
trump_tweets_day_prez_no_rt = trumpall_prez_no_rt.groupby(['date']).size()
trump_tweets_day_prez_no_rt_df = pd.DataFrame({'date': trump_tweets_day_prez_no_rt.index, \
                                         'count': trump_tweets_day_prez_no_rt.values})

In [38]:
# retweets
trump_rt_freq_inauguration = trump_tweets_day_prez_rt_df.sort_values(['count'], ascending=False)
# no retweets
trump_no_rt_freq_inauguration = trump_tweets_day_prez_no_rt_df.sort_values(['count'], ascending=False)

In [39]:
trump_rt_freq_inauguration.head()

Unnamed: 0,date,count
337,2020-06-05,163
293,2019-12-12,94
289,2019-12-08,82
292,2019-12-11,69
335,2020-06-03,61


In [40]:
### Try a different color scheme 

heatmap = alt.Chart(
    trump_no_rt_freq_inauguration,
    title="@realDonaldTrump daily tweet (without RT) frequency since inauguration"
).mark_rect().encode(
    x=alt.X('date(date):O', title=' '),
    y=alt.Y('month(date):O', title=''),
    color=alt.Color('count:Q', title='Tweet count', scale=alt.Scale(scheme="blues")),
    tooltip=[
        alt.Tooltip('monthdate(date):T', title='Date'),
        alt.Tooltip('count:Q', title='Tweets')
    ],
    facet=alt.Facet('year(date):O', title='', columns=1)
).properties(width=600, height=180)

heatmap.configure_legend(
    orient='top',
    padding=10
).configure_view(strokeOpacity=0)

In [41]:
#bars
bars = alt.Chart(trump_no_rt_freq_inauguration, \
                 title = '@realDonaldTrump tweet frequency (without RT) since inauguration').mark_bar(size=2).encode(
    x = alt.X('date:T', axis = alt.Axis(grid=False, title = '', tickCount=6, format = ("%b. %Y"))),
    y = alt.Y('count:Q',
        scale=alt.Scale(domain=(0, 150)), axis = alt.Axis(gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=5, title = 'Daily tweet counts and mean')),
)

#rule showing mean
rule = alt.Chart(trump_no_rt_freq_inauguration).mark_rule(color='red').encode(
    y='mean(count):Q'
)

#rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align='center',
    baseline='middle',
    dx=50,
    dy=10,
    fontWeight='bold',
).encode(
    text=alt.Text('mean(count):Q', format=".2"))

#go
( bars + rule + text ).properties(height=400,width=800).configure_view(strokeOpacity=0)