# How often does @realDonaldTrump tweet each day?

In [1]:
# https://github.com/dnanhkhoa/nb_black
%load_ext lab_black

In [2]:
import altair as alt
import pandas as pd
import matplotlib as mpl
import numpy as np
import json
import pytz
from datetime import datetime
import altair_latimes as lat

alt.themes.register("latimes", lat.theme)
alt.themes.enable("stiles")

ThemeRegistry.enable('latimes')

In [3]:
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)

### Read dump downloaded from trumptweetarchive.com

In [4]:
%run '00-trump-tweets-processing.ipynb'

FileNotFoundError: [Errno 2] No such file or directory: '/Users/stiles/github/notebooks/trump-tweets/output/realdonaldtrump.csv'

FileNotFoundError: [Errno 2] No such file or directory: '/Users/stiles/github/notebooks/trump-tweets/output/realdonaldtrump.csv'

In [None]:
src = pd.read_csv(
    "/Users/mhustiles/data/github/notebooks/\
trump-tweets/output/realdonaldtrump.csv",
    dtype={"id": str},
    low_memory=False,
)

In [None]:
src["user"] = "realDonaldTrump"

### How many tweets since May 2009?

In [None]:
len(src)

### Clean up dates

In [None]:
src["id"] = src["id"].astype(str)
src["year"] = src["year"].astype(str)
src["month"] = src["month"].astype(str)
src["day"] = src["day"].astype(str)
src["hour"] = src["hour"].astype(str)
src["date"] = pd.to_datetime(src["date"])

### Most popular tweets

In [None]:
src[["id", "date", "text", "favorites"]].sort_values("favorites", ascending=False).head(
    5
)

In [None]:
src[["id", "date", "text", "retweets"]].sort_values("retweets", ascending=False).head(5)

In [None]:
src.to_csv("output/trump-tweets-all.csv", index=False)

### Since he took office

In [None]:
trumppres = pd.DataFrame(src[src.date >= "01/20/2017"]).sort_values(
    "date", ascending=True
)

---

### Devices used for tweets as president

In [None]:
trumppres_devices = (
    trumppres.groupby(["device"])
    .agg({"id": "count"})
    .reset_index()
    .rename(columns={"id": "count"})
)

In [None]:
for i in trumppres_devices:
    trumppres_devices["pct"] = round(
        (trumppres_devices["count"] / trumppres_devices["count"].sum()) * 100, 2
    )

In [None]:
trumppres_devices.sort_values("count", ascending=False).head()

### How many not from an iPhone since president? 

In [None]:
trumppres_notiphone = trumppres[trumppres["device"] != "Twitter for iPhone"]

In [None]:
len(trumppres_notiphone)

### Time difference between tweets

In [None]:
trumppres = pd.DataFrame(src[src.date >= "01/20/2017"]).sort_values(
    "date", ascending=True
)

In [None]:
trumppres["diff"] = (trumppres["date"] - trumppres["date"].shift(1)).astype(
    "timedelta64[h]"
)

In [None]:
trumppres.sort_values("date", ascending=False).head()

In [None]:
trumppres["diff"].mean()

### Tweets per month, year

In [None]:
trump_years = src.groupby(["year", "month"]).agg({"date": "count"}).reset_index()

In [None]:
trump_years.tail()

### What did he average each day in retweets, likes and replies

In [None]:
trump_engagements_day = (
    src.groupby(["month", "year"])
    .agg({"id": "size", "retweets": ["sum", "mean"], "favorites": ["sum", "mean"]})
    .round()
    .reset_index()
)

In [None]:
trump_engagements_day.columns = (
    trump_engagements_day.columns.map("_".join).str.strip().str.rstrip("_")
)

In [None]:
trump_engagements_day.tail()

In [None]:
chart = (
    alt.Chart((trump_engagements_day.query('year > "2016"')))
    .mark_bar(size=10, opacity=1)
    .encode(
        x=alt.X(
            "month:T",
            title=" ",
            axis=alt.Axis(values=["1", "12"], grid=False, tickCount=1, format="%b"),
        ),
        y=alt.Y(
            "id_size:Q",
            stack=None,
            title=" ",
            axis=alt.Axis(
                gridWidth=0.6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=2,
                format="2,f",
            ),
            scale=alt.Scale(domain=(0, 1500)),
        ),
        facet=alt.Facet("year:N", columns=4, title=" "),
    )
    .properties(width=150, height=100, title="Monthly tweets by @realDonaldTrump")
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient="top", symbolType="stroke"
).configure_legend(symbolType="square")

In [None]:
chart = (
    alt.Chart((trump_engagements_day.query('year > "2016"')))
    .mark_bar(size=10, opacity=1)
    .encode(
        x=alt.X(
            "month:T",
            title=" ",
            axis=alt.Axis(values=["1", "12"], grid=False, tickCount=1, format="%b"),
        ),
        y=alt.Y(
            "favorites_mean:Q",
            stack=None,
            title=" ",
            axis=alt.Axis(
                gridWidth=0.6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=2,
                format="1,s",
            ),
            scale=alt.Scale(domain=(0, 120000)),
        ),
        facet=alt.Facet("year:N", columns=4, title=" "),
    )
    .properties(width=150, height=100, title="Mean monthly likes of @realDonaldTrump")
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient="top", symbolType="stroke"
).configure_legend(symbolType="square")

In [None]:
chart = (
    alt.Chart((trump_engagements_day.query('year > "2016"')))
    .mark_bar(size=10, opacity=1)
    .encode(
        x=alt.X(
            "month:T",
            title=" ",
            axis=alt.Axis(values=["1", "7"], grid=False, tickCount=1, format="%B"),
        ),
        y=alt.Y(
            "retweets_mean:Q",
            stack=None,
            title=" ",
            axis=alt.Axis(
                gridWidth=0.6,
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=2,
                format="1,s",
            ),
            scale=alt.Scale(domain=(0, 50000)),
        ),
        facet=alt.Facet("year:N", columns=5, title=" "),
    )
    .properties(
        width=150, height=100, title="Mean monthly retweets of @realDonaldTrump"
    )
)

chart.configure_view(strokeOpacity=0).configure_legend(
    orient="top", symbolType="stroke"
).configure_legend(symbolType="square")

---

### Dump monthly engagements

In [None]:
df_dt = trumppres.copy()

In [None]:
df_dt = df_dt.set_index("date")

In [None]:
g = df_dt.groupby(pd.Grouper(freq="MS"))

In [None]:
trump_engagements_month = g.mean().reset_index()

In [None]:
trump_engagements_month.tail()

In [None]:
# trump_engagements_month = (
#     src.groupby(["month", "year"])
#     .agg({"id": "size", "retweets": sum, "favorites": sum})
#     .round()
#     .reset_index()
# )

In [None]:
trump_engagements_month[["date", "favorites", "retweets"]].to_csv(
    "output/trump_engagements_month.csv", index=False
)

---

### Limit data to when Trump took office. How many tweets since then?

In [None]:
trumpall_prez = src[src.date >= "01/20/2017"]
len(trumpall_prez)

In [None]:
trumpall_prez.head()

### Limit data to month Trump announced candidacy. How many tweets since then?

In [None]:
trumpall_running = src[src.date >= "06/01/2015"]
len(trumpall_running)

### Count daily tweets since candidacy, create dataframe with results

In [None]:
trump_tweets_day_candidacy = trumpall_running.groupby(["date"]).size()

In [None]:
trump_tweets_day_candidacy.tail()

In [None]:
trump_tweets_day_candidacy_df = pd.DataFrame(
    {
        "date": trump_tweets_day_candidacy.index,
        "count": trump_tweets_day_candidacy.values,
    }
)

In [None]:
trump_tweets_day_candidacy_df.to_csv("output/trump_tweets_day_candidacy.csv")

### Count daily tweets since inaguration, create dataframe with results

In [None]:
trump_tweets_day_prez = trumpall_prez.groupby(["date"]).size()
trump_tweets_day_prez_df = pd.DataFrame(
    {"date": trump_tweets_day_prez.index, "count": trump_tweets_day_prez.values}
)

In [None]:
trump_tweets_day_prez_df.to_csv("output/trump_tweets_day_prez.csv")

### Count daily tweets from the beginning, create dataframe with results

In [None]:
trump_tweets_day = src.groupby(["date"]).agg("size").reset_index(name="count")
trump_tweets_day.tail(10)

In [None]:
trump_tweets_day.to_csv("output/trump_tweets_day.csv")

---

### Sort table for top tweet days during presidency

In [None]:
trump_freq_inauguration = trump_tweets_day_prez_df.sort_values(
    ["count"], ascending=False
)

In [None]:
trump_freq_inauguration.head(5)

### Read all from today

In [None]:
today = pd.Timestamp.today().strftime("%Y-%m-%d")

In [None]:
len(today)

In [None]:
trump_tweets_today = trumpall_prez[trumpall_prez.date == today]

In [None]:
trump_tweets_today.head()

---

### RT vs Tweet

In [None]:
rt_tweet = src.groupby(["date", "isRetweet"]).agg("size").reset_index(name="count")
rt_tweet_recent = (
    src[src["date"] > "2019-01-01"]
    .groupby(["date", "isRetweet"])
    .agg("size")
    .reset_index(name="count")
)

In [None]:
rt_pivot = pd.DataFrame(
    pd.pivot_table(
        rt_tweet, values="count", index=["date"], columns=["isRetweet"], aggfunc=np.sum
    )
    .rename(columns={"isRetweet": "id", False: "tweet", True: "retweet"})
    .reset_index()
    .fillna("")
)

In [None]:
rt_pivot.to_csv("output/rt_pivot.csv")

---

### Chart it!

In [None]:
# bars

lines = (
    alt.Chart(
        trump_tweets_day_candidacy_df,
        title="@realDonaldTrump tweet frequency since candidacy announcement",
    )
    .mark_bar(size=2)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=6, format=("%b. %Y")),
        ),
        y=alt.Y(
            "count:Q",
            scale=alt.Scale(domain=(0, 200)),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=6,
                title="Daily tweet counts and mean",
            ),
        ),
    )
)

# rule showing mean

rule = (
    alt.Chart(trump_tweets_day_candidacy_df)
    .mark_rule(color="red")
    .encode(y="mean(count):Q")
)

# rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align="center",
    baseline="middle",
    dx=200,
    dy=10,
    fontWeight="bold",
).encode(text=alt.Text("mean(count):Q", format=".2"))

# go
(lines + rule + text).properties(height=400, width=800).configure_view(strokeOpacity=0)

In [None]:
# bars

bars = (
    alt.Chart(
        trump_freq_inauguration,
        title="@realDonaldTrump tweet frequency since inauguration",
    )
    .mark_bar(size=2)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=6, format=("%b. %Y")),
        ),
        y=alt.Y(
            "count:Q",
            scale=alt.Scale(domain=(0, 200)),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=6,
                title="Daily tweet counts and mean",
            ),
        ),
    )
)

# rule showing mean

rule = (
    alt.Chart(trump_freq_inauguration).mark_rule(color="red").encode(y="mean(count):Q")
)

# rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align="center",
    baseline="middle",
    dx=200,
    dy=10,
    fontWeight="bold",
).encode(text=alt.Text("mean(count):Q", format=".2"))

# go
(bars + rule + text).properties(height=400, width=800).configure_view(strokeOpacity=0)

In [None]:
heatmap = (
    alt.Chart(
        trump_freq_inauguration,
        title="@realDonaldTrump daily tweet frequency since inauguration",
    )
    .mark_rect()
    .encode(
        x=alt.X("date(date):O", title=" "),
        y=alt.Y("month(date):O", title=""),
        color=alt.Color("count:Q", title="Tweet count"),
        tooltip=[
            alt.Tooltip("monthdate(date):T", title="Date"),
            alt.Tooltip("count:Q", title="Tweets"),
        ],
        facet=alt.Facet("year(date):O", title="", columns=1),
    )
    .properties(width=600, height=200)
)

heatmap.configure_legend(orient="top", padding=10).configure_view(strokeOpacity=0)

In [None]:
trump_RT_freq_inauguration = trump_tweets_day_prez_df.sort_values(
    ["count"], ascending=False
)

In [None]:
trumpall_prez_rt = trumpall_prez[
    (trumpall_prez["isRetweet"] == True) | (trumpall_prez["isRetweet"] == "t")
]
trumpall_prez_no_rt = trumpall_prez[
    (trumpall_prez["isRetweet"] == False) | (trumpall_prez["isRetweet"] == "f")
]

In [None]:
len(trumpall_prez_rt)

### Retweets

In [None]:
trump_tweets_day_prez_rt = trumpall_prez_rt.groupby(["date"]).size()
trump_tweets_day_prez_rt_df = pd.DataFrame(
    {"date": trump_tweets_day_prez_rt.index, "count": trump_tweets_day_prez_rt.values}
)

In [None]:
trump_tweets_day_prez_rt_df.sort_values(by="date", ascending=False).head()

### No retweets

In [None]:
trump_tweets_day_prez_no_rt = trumpall_prez_no_rt.groupby(["date"]).size()
trump_tweets_day_prez_no_rt_df = pd.DataFrame(
    {
        "date": trump_tweets_day_prez_no_rt.index,
        "count": trump_tweets_day_prez_no_rt.values,
    }
)

In [None]:
# retweets
trump_rt_freq_inauguration = trump_tweets_day_prez_rt_df.sort_values(
    ["count"], ascending=False
)
# no retweets
trump_no_rt_freq_inauguration = trump_tweets_day_prez_no_rt_df.sort_values(
    ["count"], ascending=False
)

In [None]:
trump_tweets_day_prez_rt_df.tail()

In [None]:
### Try a different color scheme

heatmap = (
    alt.Chart(
        trump_no_rt_freq_inauguration,
        title="@realDonaldTrump daily tweet (without RT) frequency since inauguration",
    )
    .mark_rect()
    .encode(
        x=alt.X("date(date):O", title=" "),
        y=alt.Y("month(date):O", title=""),
        color=alt.Color(
            "count:Q", title="Tweet count", scale=alt.Scale(scheme="blues")
        ),
        tooltip=[
            alt.Tooltip("monthdate(date):T", title="Date"),
            alt.Tooltip("count:Q", title="Tweets"),
        ],
        facet=alt.Facet("year(date):O", title="", columns=1),
    )
    .properties(width=600, height=180)
)

heatmap.configure_legend(orient="top", padding=10).configure_view(strokeOpacity=0)

In [None]:
# bars
bars = (
    alt.Chart(trump_no_rt_freq_inauguration, title="@realDonaldTrump tweet frequency")
    .mark_bar(size=2)
    .encode(
        x=alt.X(
            "date:T", axis=alt.Axis(grid=False, title="", tickCount=6, format=("%B %Y"))
        ),
        y=alt.Y(
            "count:Q",
            scale=alt.Scale(domain=(0, 70)),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=5,
                title="Daily tweet counts and mean (without retweets) since inauguration",
            ),
        ),
    )
)

# rule showing mean
rule = (
    alt.Chart(trump_no_rt_freq_inauguration)
    .mark_rule(color="red")
    .encode(y="mean(count):Q")
)

# rule label -- would like to add "Average: " annotation
text = rule.mark_text(
    align="center",
    baseline="middle",
    dx=50,
    dy=10,
    fontWeight="bold",
).encode(text=alt.Text("mean(count):Q", format=".2"))

# go
(bars + rule + text).properties(height=400, width=620).configure_view(strokeOpacity=0)

In [None]:
trumppres[
    (trumppres["text"].str.contains("ban"))
    & (trumppres["text"].str.contains("Twitter"))
]

In [None]:
trumppres[trumppres["id"] == "1137702218835136500"][["user", "text"]]

---