# Scraping former President Trump's 'desk'

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import datetime as dt
import tweepy

In [3]:
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

---

### Read the page

In [6]:
r = requests.get("https://www.donaldjtrump.com/desk")
soup = BeautifulSoup(r.text, "html.parser")

### Grab everything from each post div

In [7]:
rows = soup.find_all("div", class_="ftdli-main ftd-d")

In [8]:
data = []
for r in rows:
    if r.find("img") is not None:
        image = r.find("img")["src"]
    else:
        image = ""
    post_url = r.find("div", class_="title ftd-d").get("onclick")
    post = r.find("p", class_="ftd-post-text").text
    author = r.find("h2").text
    date = r.find("div", class_="date ftd-d").text
    raw = r.find("p", class_="ftd-post-text").text
    data.append(
        dict(
            date=date,
            url=post_url,
            author=author,
            post=post,
            image=image,
        )
    )

### First item from the dictionary

In [9]:
data[0]

{'date': '\n3:46pm May 12, 2021\n',
 'url': "location.href='/desk/desk-2de2mqg5yz/';",
 'author': 'Donald J. Trump',
 'post': 'If there were long and horrible gas lines like this under President Trump, the Fake News would make it a national outrage! Did Joe Biden put Hunter in charge of our energy, with all of his Burisma experience? Even Energy Secretary Jennifer Granholm is lost! ',
 'image': ''}

### Clean up before importing as a dataframe

In [10]:
for d in data:
    d["date"] = d["date"].replace("\n", "")
    d["url"] = (
        d["url"]
        .replace("location.href='", "https://www.donaldjtrump.com")
        .replace("/';", "")
    )

In [11]:
src = pd.DataFrame(data)

---

### Pull in early posts

In [12]:
archive_df = pd.read_csv("input/archive.csv")

In [13]:
archive_df.drop(["video"], axis=1, inplace=True)

In [14]:
df = pd.concat([src, archive_df]).drop_duplicates(subset="url", keep="first")

### How many posts total? 

In [15]:
len(df)

54

### Last five posts

In [16]:
df.head(5)

Unnamed: 0,date,url,author,post,image
0,"3:46pm May 12, 2021",https://www.donaldjtrump.com/desk/desk-2de2mqg5yz,Donald J. Trump,"If there were long and horrible gas lines like this under President Trump, the Fake News would make it a national outrage! Did Joe Biden put Hunter in charge of our energy, with all of his Burisma experience? Even Energy Secretary Jennifer Granholm is lost!",
1,"3:09pm May 12, 2021",https://www.donaldjtrump.com/desk/desk-r2kbxauecq,Donald J. Trump,"I see that everybody is comparing Joe Biden to Jimmy Carter. It would seem to me that is very unfair to Jimmy Carter. Jimmy mishandled crisis after crisis, but Biden has CREATED crisis after crisis. First there was the Biden Border Crisis (that he refuses to call a Crisis), then the Biden Economic Crisis, then the Biden Israel Crisis, and now the Biden Gas Crisis. Joe Biden has had the worst start of any president in United States history, and someday, they will compare future disasters to the Biden Administration—but no, Jimmy was better!",
2,"2:27pm May 12, 2021",https://www.donaldjtrump.com/desk/desk-ej2wkjxxmg,Donald J. Trump,,https://cdn.donaldjtrump.com/djtweb/general/Screenshot_on_2021-05-12_at_13_57_46.png
3,"9:36am May 12, 2021",https://www.donaldjtrump.com/desk/desk-au8zr963cu,Donald J. Trump,"Liz Cheney is a bitter, horrible human being. I watched her yesterday and realized how bad she is for the Republican Party. She has no personality or anything good having to do with politics or our Country. She is a talking point for Democrats, whether that means the Border, the gas lines, inflation, or destroying our economy. She is a warmonger whose family stupidly pushed us into the never-ending Middle East Disaster, draining our wealth and depleting our Great Military, the worst decision in our Country’s history. I look forward to soon watching her as a Paid Contributor on CNN or MSDNC!",
4,"8:33am May 12, 2021",https://www.donaldjtrump.com/desk/desk-qarwnutq8e,Donald J. Trump,"The Republicans in the House of Representatives have a great opportunity today to rid themselves of a poor leader, a major Democrat talking point, a warmonger, and a person with absolutely no personality or heart. As a representative of the Great State of Wyoming, Liz Cheney is bad for our Country and bad for herself. Almost everyone in the Republican Party, including 90% of Wyoming, looks forward to her ouster—and that includes me!",


### How many mention 'election'?

In [17]:
df["election"] = df["post"].str.contains("election") | df["post"].str.contains(
    "Election"
)

In [18]:
len(df[df["election"] == True])

26

### Clean up the dates

In [19]:
df["fulldate"] = pd.to_datetime(df["date"])
df["date"] = df["fulldate"].dt.date
df["time"] = df["fulldate"].dt.time

In [20]:
post_urls = list(df["url"])

---

### Posts per day 

In [21]:
election = df.groupby(["date", "election"]).agg({"author": "size"}).reset_index()

In [22]:
election.head()

Unnamed: 0,date,election,author
0,2021-03-24,True,1
1,2021-03-26,True,1
2,2021-03-30,True,1
3,2021-04-02,True,2
4,2021-04-03,True,1


In [23]:
daily = df.groupby(["date"])["author"].count().reset_index(name="count")

In [24]:
daily.rename(columns={"author": "count"}, inplace=True)

In [25]:
daily["seven-day-avg"] = daily["count"].rolling(7).mean()

In [26]:
daily["date"] = pd.to_datetime(daily["date"])

In [27]:
daily.sort_values("count", ascending=False).head()

Unnamed: 0,date,count,seven-day-avg
27,2021-05-12,5,3.0
26,2021-05-11,5,2.428571
23,2021-05-03,4,1.857143
25,2021-05-05,4,2.142857
19,2021-04-27,3,1.571429


### Chart it!

In [28]:
bars = (
    alt.Chart(
        daily,
        title="Trump posts to the 'desk' since it launched",
    )
    .mark_bar(size=10)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
        y=alt.Y(
            "count:Q",
            scale=alt.Scale(domain=(0, len("count:Q"))),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=3,
                title="Daily post count and seven-day average",
            ),
        ),
    )
)

rolling = (
    alt.Chart(daily)
    .mark_line(color="red")
    .encode(
        y="seven-day-avg",
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
    )
)

(bars + rolling).properties(height=350, width=600).configure_view(strokeOpacity=0)

In [29]:
election["date"] = pd.to_datetime(election["date"])

In [35]:
bars_elex = (
    alt.Chart(
        election,
        title="Trump posts to the 'desk' re: election",
    )
    .mark_bar(size=10)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
        y=alt.Y(
            "author:Q",
            scale=alt.Scale(domain=(0, len("author:Q"))),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=3,
                title="Daily post count",
            ),
        ),
        color=alt.Color(
            "election",
            title="About election?",
            scale=alt.Scale(domain=["true", "false"], range=["#f1a340", "#998ec3"]),
        ),
    )
)

(bars_elex).properties(height=350, width=600).configure_view(strokeOpacity=0)

In [31]:
(bars + rolling).properties(height=350, width=600).configure_view(strokeOpacity=0).save(
    "visuals/daily_posts.png"
)

In [32]:
(bars_elex).properties(height=350, width=600).configure_view(strokeOpacity=0).save(
    "visuals/daily_posts_re_election.png"
)

---

### Exports

In [33]:
today = dt.date.today().strftime("%m-%d-%Y")

In [34]:
df.to_csv("archive/posts_" + str(today) + ".csv", index=False)
df.to_csv("output/allposts.csv", index=False)