## How often does @realDonaldTrump tweet each day?

In [1]:
import altair as alt
import pandas as pd
import matplotlib as mpl
import numpy as np
import json
import pytz
from datetime import datetime
import altair_latimes as lat
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

### Read dump downloaded from trumptweetarchive.com

In [4]:
%run '00-trump-tweets-processing.ipynb'

In [5]:
src = pd.read_json('/Users/mhustiles/data/github/notebooks/trump-tweet-frequency/output/realdonaldtrump.json') 

### How many tweets since May 2009?

In [6]:
len(src)

53477

In [8]:
src.head()

Unnamed: 0,index,source,id_str,text,created_at,retweet_count,in_reply_to_user_id_str,favorite_count,is_retweet,eastern_created_at,date,year,month,day,hour,minute,time
0,0,Twitter for iPhone,1313529166265020416,Did anyone get to see that absolute “Joke” of ...,2020-10-06 17:18:52,12,,4,False,2020-10-06 17:18:52,2020-10-06,2020,10,6,10,18,10:18:52
1,1,Twitter for iPhone,1313511340124917760,REPEAL SECTION 230!!!,2020-10-06 16:08:02,15402,,61375,False,2020-10-06 16:08:02,2020-10-06,2020,10,6,9,8,09:08:02
2,2,Twitter for iPhone,1313510118437392384,The Fake News Media refuses to discuss how goo...,2020-10-06 16:03:10,13180,,41808,False,2020-10-06 16:03:10,2020-10-06,2020,10,6,9,3,09:03:10
3,3,Twitter for iPhone,1313507608196771840,https://t.co/519BvRfcU2,2020-10-06 15:53:12,8284,,29977,False,2020-10-06 15:53:12,2020-10-06,2020,10,6,8,53,08:53:12
4,4,Twitter for iPhone,1313505418866552832,THANK YOU! https://t.co/NvawGFpSIl,2020-10-06 15:44:30,8438,,32892,False,2020-10-06 15:44:30,2020-10-06,2020,10,6,8,44,08:44:30


### Clean up dates

In [9]:
src['eastern_created_at'] = src['created_at'].dt.time
src['eastern_created_at'] = src['created_at'].dt.tz_localize("GMT").dt.tz_convert('America/Los_Angeles')
src['date'] = pd.to_datetime(src['eastern_created_at']).dt.strftime('%m/%d/%Y')
src['date'] = pd.to_datetime(src['date'])
src['year'] = src['eastern_created_at'].dt.year
src['month'] = src['eastern_created_at'].dt.month
src['day'] = src['eastern_created_at'].dt.day
src['hour'] = src['eastern_created_at'].dt.hour
src['minute'] = src['eastern_created_at'].dt.minute
src['time'] = src['eastern_created_at'].dt.time
src['id_str'] = src['id_str'].astype(str)
src['year'] = src['year'].astype(str)
src['month'] = src['month'].astype(str)
src['day'] = src['day'].astype(str)
src['hour'] = src['hour'].astype(str)

### Most popular tweets

In [10]:
src[['id_str','date','text','favorite_count' ]].sort_values('favorite_count', ascending=False).head(3)

Unnamed: 0,id_str,date,text,favorite_count
58,1311892190680014848,2020-10-01,"Tonight, @FLOTUS and I tested positive for COV...",1886131
56,1312233807991496704,2020-10-02,"Going welI, I think! Thank you to all. LOVE!!!",1219995
13819,1157345692517634048,2019-08-02,A$AP Rocky released from prison and on his way...,879647


In [11]:
src[['id_str','date','text','retweet_count']].sort_values('retweet_count', ascending=False).head(3)

Unnamed: 0,id_str,date,text,retweet_count
58,1311892190680014848,2020-10-01,"Tonight, @FLOTUS and I tested positive for COV...",416790
22238,881503147168071680,2017-07-02,#FraudNewsCNN #FNN https://t.co/WYUnHjjUjg,369530
23525,795954831718498304,2016-11-08,TODAY WE MAKE AMERICA GREAT AGAIN!,344806


### Time difference between tweets

In [12]:
trumppres = pd.DataFrame(src[src.date >= '01/20/2017']).sort_values('created_at', ascending=True)

In [13]:
trumppres['diff'] = (trumppres['created_at'] - trumppres['created_at'].shift(1)).astype('timedelta64[h]')

In [14]:
trumppres.sort_values('diff', ascending=False).head()

Unnamed: 0,index,source,id_str,text,created_at,retweet_count,in_reply_to_user_id_str,favorite_count,is_retweet,eastern_created_at,date,year,month,day,hour,minute,time,diff
20679,3509,Twitter for iPhone,950866561153331200,"Today, it was my great honor to sign a new Exe...",2018-01-09 23:07:09,19481,,82151,False,2018-01-09 15:07:09-08:00,2018-01-09,2018,1,9,15,7,15:07:09,201.0
22417,1737,Twitter for iPhone,873120139222306816,"Despite so many false statements and lies, tot...",2017-06-09 10:10:37,37128,,126953,False,2017-06-09 03:10:37-07:00,2017-06-09,2017,6,9,3,10,03:10:37,45.0
20375,3205,Twitter for iPhone,968455547094753280,“He’s got a very good point. Somebody in the J...,2018-02-27 11:59:30,19756,,69464,False,2018-02-27 03:59:30-08:00,2018-02-27,2018,2,27,3,59,03:59:30,40.0
16219,6867,Twitter for iPhone,1109787378242240512,"Good Morning, Have A Great Day!",2019-03-24 12:01:44,77697,,462655,False,2019-03-24 05:01:44-07:00,2019-03-24,2019,3,24,5,1,05:01:44,39.0
22714,2034,Twitter for iPhone,853583417916755968,Why would I call China a currency manipulator ...,2017-04-16 12:18:40,15001,,72649,False,2017-04-16 05:18:40-07:00,2017-04-16,2017,4,16,5,18,05:18:40,38.0


In [15]:
trumppres['diff'].mean()

1.178211831813664

### Tweets per month, year

In [16]:
trump_years = src.groupby(['year','month']).agg({'date': 'count'}).reset_index()

In [17]:
trump_years.tail()

Unnamed: 0,year,month,date
133,2020,5,1106
134,2020,6,1139
135,2020,7,853
136,2020,8,1132
137,2020,9,1331


### What did he average each day in retweets, likes and replies

In [18]:
trump_engagements_day = src.groupby(['month', 'year']).agg({'id_str':'size', 'retweet_count':['sum', 'mean'], 'favorite_count':['sum', 'mean']}).round().reset_index()

In [19]:
trump_engagements_day.columns = trump_engagements_day.columns.map('_'.join).str.strip().str.rstrip('_')

In [20]:
trump_engagements_day.tail()

Unnamed: 0,month,year,id_str_size,retweet_count_sum,retweet_count_mean,favorite_count_sum,favorite_count_mean
133,9,2016,288,2659432,9234.0,6972293,24209.0
134,9,2017,307,5047042,16440.0,19423102,63267.0
135,9,2018,378,6368803,16849.0,21990052,58175.0
136,9,2019,800,11384264,14230.0,34639760,43300.0
137,9,2020,1331,20806605,15632.0,56868183,42726.0


In [21]:
chart = alt.Chart((trump_engagements_day.query('year > "2016"')))\
    .mark_bar(size=10,opacity=1)\
    .encode(
    x=alt.X('month:T', title=' ', \
            axis=alt.Axis(values=['1', '12'], grid=False, tickCount=1, format='%b')),
    y=alt.Y('id_str_size:Q', stack=None, title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=2, format='1,s'),\
           scale=alt.Scale(domain=(0, 1000))),\
    facet=alt.Facet('year:N', columns=4, title=' ')
).properties(width=150, height=100,
     title='Monthly tweets by @realDonaldTrump'
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
).configure_legend(symbolType='square')

In [22]:
chart = alt.Chart((trump_engagements_day.query('year > "2016"')))\
    .mark_bar(size=10,opacity=1)\
    .encode(
    x=alt.X('month:T', title=' ', \
            axis=alt.Axis(values=['1', '12'], grid=False, tickCount=1, format='%b')),
    y=alt.Y('favorite_count_mean:Q', stack=None, title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=2, format='1,s'),\
           scale=alt.Scale(domain=(0, 120000))),\
    facet=alt.Facet('year:N', columns=4, title=' ')
).properties(width=150, height=100,
     title='Mean monthly likes of @realDonaldTrump'
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
).configure_legend(symbolType='square')

In [23]:
chart = alt.Chart((trump_engagements_day.query('year > "2016"')))\
    .mark_bar(size=10,opacity=1)\
    .encode(
    x=alt.X('month:T', title=' ', \
            axis=alt.Axis(values=['1', '7'], grid=False, tickCount=1, format='%B')),
    y=alt.Y('retweet_count_mean:Q', stack=None, title=' ', axis=alt.Axis(gridWidth=.6,\
     gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=2, format='1,s'),\
           scale=alt.Scale(domain=(0, 30000))),\
    facet=alt.Facet('year:N', columns=4, title=' ')
).properties(width=150, height=100,
     title='Mean monthly retweets of @realDonaldTrump'
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient='top',
    symbolType='stroke'
).configure_legend(symbolType='square')

### Limit data to when Trump took office. How many tweets since then?

In [24]:
trumpall_prez = src[src.date >= '01/20/2017']
len(trumpall_prez)

23142

### Limit data to month Trump announced candidacy. How many tweets since then?

In [25]:
trumpall_running = src[src.date >= '06/01/2015']
len(trumpall_running)

31546

### Count daily tweets since candidacy, create dataframe with results

In [26]:
trump_tweets_day_candidacy = trumpall_running.groupby(['date']).size()
trump_tweets_day_candidacy_df = pd.DataFrame({'date': trump_tweets_day_candidacy.index, 'count': trump_tweets_day_candidacy.values})

In [27]:
trump_tweets_day_candidacy_df.to_csv('output/trump_tweets_day_candidacy.csv')

### Count daily tweets since inaguration, create dataframe with results

In [28]:
trump_tweets_day_prez = trumpall_prez.groupby(['date']).size()
trump_tweets_day_prez_df = pd.DataFrame({'date': trump_tweets_day_prez.index, 'count': trump_tweets_day_prez.values})

In [29]:
trump_tweets_day_prez_df.to_csv('output/trump_tweets_day_prez.csv')

### Count daily tweets from the beginning, create dataframe with results

In [30]:
trump_tweets_day = src.groupby(['date']).agg('size').reset_index(name='count')
trump_tweets_day.tail(10)

Unnamed: 0,date,count
3393,2020-09-27,20
3394,2020-09-28,38
3395,2020-09-29,37
3396,2020-09-30,50
3397,2020-10-01,50
3398,2020-10-02,2
3399,2020-10-03,3
3400,2020-10-04,9
3401,2020-10-05,31
3402,2020-10-06,13


In [31]:
trump_tweets_day.to_csv('output/trump_tweets_day.csv')

### Sort table for top tweet days during presidency

In [32]:
trump_freq_inauguration = trump_tweets_day_prez_df.sort_values(['count'], ascending=False)

In [33]:
trump_freq_inauguration.head(5)

Unnamed: 0,date,count
1218,2020-06-05,199
1192,2020-05-10,125
1042,2019-12-12,124
1304,2020-08-30,110
1313,2020-09-08,108


### Read all from today

In [34]:
today = pd.Timestamp.today().strftime("%m/%d/%Y")

In [35]:
trump_tweets_today = trumpall_prez[trumpall_prez.date == today]

In [36]:
trump_tweets_today.head()

Unnamed: 0,index,source,id_str,text,created_at,retweet_count,in_reply_to_user_id_str,favorite_count,is_retweet,eastern_created_at,date,year,month,day,hour,minute,time
0,0,Twitter for iPhone,1313529166265020416,Did anyone get to see that absolute “Joke” of ...,2020-10-06 17:18:52,12,,4,False,2020-10-06 10:18:52-07:00,2020-10-06,2020,10,6,10,18,10:18:52
1,1,Twitter for iPhone,1313511340124917760,REPEAL SECTION 230!!!,2020-10-06 16:08:02,15402,,61375,False,2020-10-06 09:08:02-07:00,2020-10-06,2020,10,6,9,8,09:08:02
2,2,Twitter for iPhone,1313510118437392384,The Fake News Media refuses to discuss how goo...,2020-10-06 16:03:10,13180,,41808,False,2020-10-06 09:03:10-07:00,2020-10-06,2020,10,6,9,3,09:03:10
3,3,Twitter for iPhone,1313507608196771840,https://t.co/519BvRfcU2,2020-10-06 15:53:12,8284,,29977,False,2020-10-06 08:53:12-07:00,2020-10-06,2020,10,6,8,53,08:53:12
4,4,Twitter for iPhone,1313505418866552832,THANK YOU! https://t.co/NvawGFpSIl,2020-10-06 15:44:30,8438,,32892,False,2020-10-06 08:44:30-07:00,2020-10-06,2020,10,6,8,44,08:44:30


---

### RT vs Tweet

In [37]:
rt_tweet = src.groupby(['date', 'is_retweet']).agg('size').reset_index(name='count')
rt_tweet_recent = src[src['date'] > '2019-01-01']\
    .groupby(['date', 'is_retweet']).agg('size').reset_index(name='count')

In [38]:
rt_pivot = pd.DataFrame(pd.pivot_table(rt_tweet, values='count', index=['date'],\
                          columns=['is_retweet'], aggfunc=np.sum).\
                        rename(columns={'is_retweet':'id', False:'tweet', True:'retweet'}).reset_index().fillna(''))

In [39]:
rt_pivot.to_csv('output/rt_pivot.csv')

---

### Chart it!

In [40]:
#bars

lines = alt.Chart(trump_tweets_day_candidacy_df, title = '@realDonaldTrump tweet frequency since candidacy announcement').mark_bar(size=2).encode(
    x = alt.X('date:T', axis = alt.Axis(grid=False, title = '', tickCount=6, format = ("%b. %Y"))),
    y = alt.Y('count:Q',
        scale=alt.Scale(domain=(0, 200)), axis = alt.Axis(gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0, tickCount=6, title = 'Daily tweet counts and mean')),
)

#rule showing mean

rule = alt.Chart(trump_tweets_day_candidacy_df).mark_rule(color='red').encode(
    y='mean(count):Q'
)

#rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align='center',
    baseline='middle',
    dx=200,
    dy=10,
    fontWeight='bold',
).encode(
    text=alt.Text('mean(count):Q', format=".2"))

#go
( lines + rule + text ).properties(height=400,width=800).configure_view(strokeOpacity=0)

In [41]:
#bars

bars = alt.Chart(trump_freq_inauguration, title = '@realDonaldTrump tweet frequency since inauguration').\
    mark_bar(size=2).encode(
    x = alt.X('date:T', axis = alt.Axis(grid=False, title = '', tickCount=6, format = ("%b. %Y"))),
    y = alt.Y('count:Q',
        scale=alt.Scale(domain=(0, 200)), axis = alt.Axis(gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0, tickCount=6, title = 'Daily tweet counts and mean')),
)

#rule showing mean

rule = alt.Chart(trump_freq_inauguration).mark_rule(color='red').encode(
    y='mean(count):Q'
)

#rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align='center',
    baseline='middle',
    dx=200,
    dy=10,
    fontWeight='bold',
).encode(
    text=alt.Text('mean(count):Q', format=".2"))

#go
( bars + rule + text ).properties(height=400,width=800).configure_view(strokeOpacity=0)

In [42]:
heatmap = alt.Chart(
    trump_freq_inauguration,
    title="@realDonaldTrump daily tweet frequency since inauguration"
).mark_rect().encode(
    x=alt.X('date(date):O', title=' '),
    y=alt.Y('month(date):O', title=''),
    color=alt.Color('count:Q', title='Tweet count'),
    tooltip=[
        alt.Tooltip('monthdate(date):T', title='Date'),
        alt.Tooltip('count:Q', title='Tweets')
    ],
    facet=alt.Facet('year(date):O', title='',columns=1)
).properties(width=600, height=200)

heatmap.configure_legend(
    orient='top',
    padding=10
).configure_view(strokeOpacity=0)

In [43]:
trump_RT_freq_inauguration = trump_tweets_day_prez_df.sort_values(['count'], ascending=False)

In [44]:
trumpall_prez_rt = trumpall_prez[trumpall_prez['is_retweet'] == True]
trumpall_prez_no_rt = trumpall_prez[trumpall_prez['is_retweet'] == False]

### Retweets

In [45]:
trump_tweets_day_prez_rt = trumpall_prez_rt.groupby(['date']).size()
trump_tweets_day_prez_rt_df = pd.DataFrame({'date': trump_tweets_day_prez_rt.index, \
                                         'count': trump_tweets_day_prez_rt.values})

In [46]:
trump_tweets_day_prez_rt_df.sort_values(by='date', ascending=False).head()

Unnamed: 0,date,count
327,2020-10-04,4
326,2020-10-01,37
325,2020-09-30,15
324,2020-09-28,18
323,2020-09-27,8


### No retweets

In [47]:
trump_tweets_day_prez_no_rt = trumpall_prez_no_rt.groupby(['date']).size()
trump_tweets_day_prez_no_rt_df = pd.DataFrame({'date': trump_tweets_day_prez_no_rt.index, \
                                         'count': trump_tweets_day_prez_no_rt.values})

In [48]:
# retweets
trump_rt_freq_inauguration = trump_tweets_day_prez_rt_df.sort_values(['count'], ascending=False)
# no retweets
trump_no_rt_freq_inauguration = trump_tweets_day_prez_no_rt_df.sort_values(['count'], ascending=False)

In [49]:
trump_rt_freq_inauguration.head()

Unnamed: 0,date,count
293,2019-12-12,94
289,2019-12-08,82
292,2019-12-11,69
300,2019-12-19,53
299,2019-12-18,47


In [50]:
### Try a different color scheme 

heatmap = alt.Chart(
    trump_no_rt_freq_inauguration,
    title="@realDonaldTrump daily tweet (without RT) frequency since inauguration"
).mark_rect().encode(
    x=alt.X('date(date):O', title=' '),
    y=alt.Y('month(date):O', title=''),
    color=alt.Color('count:Q', title='Tweet count', scale=alt.Scale(scheme="blues")),
    tooltip=[
        alt.Tooltip('monthdate(date):T', title='Date'),
        alt.Tooltip('count:Q', title='Tweets')
    ],
    facet=alt.Facet('year(date):O', title='', columns=1)
).properties(width=600, height=180)

heatmap.configure_legend(
    orient='top',
    padding=10
).configure_view(strokeOpacity=0)

In [51]:
#bars
bars = alt.Chart(trump_no_rt_freq_inauguration, \
                 title = '@realDonaldTrump tweet frequency (without RT) since inauguration').mark_bar(size=2).encode(
    x = alt.X('date:T', axis = alt.Axis(grid=False, title = '', tickCount=6, format = ("%b. %Y"))),
    y = alt.Y('count:Q',
        scale=alt.Scale(domain=(0, 150)), axis = alt.Axis(gridColor='#dddddd',offset=6,tickSize=0,domainOpacity=0,tickCount=5, title = 'Daily tweet counts and mean')),
)

#rule showing mean
rule = alt.Chart(trump_no_rt_freq_inauguration).mark_rule(color='red').encode(
    y='mean(count):Q'
)

#rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align='center',
    baseline='middle',
    dx=50,
    dy=10,
    fontWeight='bold',
).encode(
    text=alt.Text('mean(count):Q', format=".2"))

#go
( bars + rule + text ).properties(height=400,width=800).configure_view(strokeOpacity=0)