# Scraping former President Trump's 'desk'

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import datetime as dt
import tweepy

In [3]:
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

---

### Read the page

In [6]:
r = requests.get("https://www.donaldjtrump.com/desk")
soup = BeautifulSoup(r.text, "html.parser")

### Grab everything from each post div

In [7]:
rows = soup.find_all("div", class_="ftdli-main ftd-d")

In [8]:
data = []
for r in rows:
    if r.find("img") is not None:
        image = r.find("img")["src"]
    else:
        image = ""
    post_url = r.find("div", class_="title ftd-d").get("onclick")
    post = r.find("p", class_="ftd-post-text").text
    author = r.find("h2").text
    date = r.find("div", class_="date ftd-d").text
    raw = r.find("p", class_="ftd-post-text").text
    data.append(
        dict(
            date=date,
            url=post_url,
            author=author,
            post=post,
            image=image,
        )
    )

### First item from the dictionary

In [9]:
data[0]

{'date': '\n9:56am May 11, 2021\n',
 'url': "location.href='/desk/desk-xudptbbxru/';",
 'author': 'Donald J. Trump',
 'post': 'Congratulations to Glenn Youngkin for winning the Republican nomination for Governor of Virginia. Glenn is pro-Business, pro-Second Amendment, pro-Veterans, pro-America, he knows how to make Virginia’s economy rip-roaring, and he has my Complete and Total Endorsement! Glenn is running against Bill Clinton’s longtime enabler, Terry McAuliffe. Terry McAuliffe was the Clintons’ bagman in more ways than one, from the cover-ups to the get-rich-quick schemes, and his deals with Communist China look suspicious. He was responsible for many of the problems Virginia currently has. Virginia doesn’t need the Clintons or the Communist Chinese running the state, so say no to Terry McAuliffe, and yes to Patriot Glenn Youngkin!',
 'image': 'https://cdn.donaldjtrump.com/djtweb/general/SA_GlennYoungkin_EndorsementGraphic_Twitter.jpg'}

### Clean up before importing as a dataframe

In [10]:
for d in data:
    d["date"] = d["date"].replace("\n", "")
    d["url"] = (
        d["url"]
        .replace("location.href='", "https://www.donaldjtrump.com")
        .replace("/';", "")
    )

In [11]:
src = pd.DataFrame(data)

---

### Pull in early posts

In [12]:
archive_df = pd.read_csv("input/archive.csv")

In [13]:
archive_df.drop(["video"], axis=1, inplace=True)

In [14]:
df = pd.concat([src, archive_df]).drop_duplicates(subset="url", keep="first")

### How many posts total? 

In [15]:
len(df)

54

### Last five posts

In [16]:
df.head(5)

Unnamed: 0,date,url,author,post,image
0,"9:56am May 11, 2021",https://www.donaldjtrump.com/desk/desk-xudptbbxru,Donald J. Trump,"Congratulations to Glenn Youngkin for winning the Republican nomination for Governor of Virginia. Glenn is pro-Business, pro-Second Amendment, pro-Veterans, pro-America, he knows how to make Virginia’s economy rip-roaring, and he has my Complete and Total Endorsement! Glenn is running against Bill Clinton’s longtime enabler, Terry McAuliffe. Terry McAuliffe was the Clintons’ bagman in more ways than one, from the cover-ups to the get-rich-quick schemes, and his deals with Communist China look suspicious. He was responsible for many of the problems Virginia currently has. Virginia doesn’t need the Clintons or the Communist Chinese running the state, so say no to Terry McAuliffe, and yes to Patriot Glenn Youngkin!",https://cdn.donaldjtrump.com/djtweb/general/SA_GlennYoungkin_EndorsementGraphic_Twitter.jpg
1,"8:52pm May 10, 2021",https://www.donaldjtrump.com/desk/desk-x3nk392q9z,Donald J. Trump,"The major Michigan Election Fraud case has just filed a bombshell pleading claiming votes were intentionally switched from President Trump to Joe Biden. The number of votes is MASSIVE and determinative. This will prove true in numerous other States. All Republicans must UNIFY and not let this happen. If a thief robs a jewelry store of all of its diamonds (the 2020 Presidential Election), the diamonds must be returned. The Fake News media refuses to cover the greatest Election Fraud in the history of our Country. They have lost all credibility, but ultimately, they will have no choice!",
2,"2:26pm May 10, 2021",https://www.donaldjtrump.com/desk/desk-henmxass3a,Donald J. Trump,"After being loudly booed at the Utah Republican Party Convention, Utah’s Weber County censured RINO Mitt Romney in the strongest of terms. Washington County Republicans also censured Romney in April. He is BAD NEWS for our Country!",
3,"2:04pm May 10, 2021",https://www.donaldjtrump.com/desk/desk-xkpygaxcpz,Donald J. Trump,"Lieutenant Governor Dan Patrick is a great fighter for the people of Texas. He has stood up for up for Life, Liberty, the Second Amendment, Border Security, our Military and our Vets, and our God-given Freedoms. He has governed by conservative principles of LOW TAXES and careful spending, always doing what is best for his great State and for America. Texans should re-elect him! He is outstanding and has my Complete and Total Endorsement!",https://cdn.donaldjtrump.com/djtweb/general/SA_DanPatrick_EndorsementGraphic_Twitter.jpg
4,"10:46am May 10, 2021",https://www.donaldjtrump.com/desk/desk-wbwbqk2sy7,Donald J. Trump,"The House GOP has a massive opportunity to upgrade this week from warmonger Liz Cheney to gifted communicator Elise Stefanik. Elise has intelligence, an endorsement from American Patriot Brandon Judd and the National Border Patrol Council, she has an A+ from the NRA, and she loves our Veterans. We need someone in Leadership who has experience flipping districts from Blue to Red as we approach the important 2022 midterms, and that’s Elise! She knows how to win, which is what we need!",


### How many mention 'election'?

In [17]:
election = df[
    (df["post"].str.contains("election")) | (df["post"].str.contains("Election"))
]

In [18]:
len(election)

28

### Clean up the dates

In [19]:
df["fulldate"] = pd.to_datetime(df["date"])
df["date"] = df["fulldate"].dt.date
df["time"] = df["fulldate"].dt.time

In [20]:
post_urls = list(df["url"])

---

### Posts per day 

In [21]:
daily = df.groupby(["date"])["author"].count().reset_index(name="count")

In [22]:
daily

Unnamed: 0,date,count
0,2021-03-24,1
1,2021-03-26,1
2,2021-03-30,1
3,2021-04-02,2
4,2021-04-03,1
5,2021-04-04,1
6,2021-04-05,2
7,2021-04-06,1
8,2021-04-07,3
9,2021-04-08,3


In [23]:
daily["seven-day-avg"] = daily["count"].rolling(7).mean()

In [24]:
daily["date"] = pd.to_datetime(daily["date"])

In [25]:
daily.sort_values("count", ascending=False).head()

Unnamed: 0,date,count,seven-day-avg
28,2021-05-10,4,2.714286
25,2021-05-05,4,2.142857
23,2021-05-03,4,1.857143
9,2021-04-08,3,1.857143
26,2021-05-07,3,2.142857


### Chart it!

In [30]:
bars = (
    alt.Chart(
        daily,
        title="Trump posts to the 'desk' since it launched",
    )
    .mark_bar(size=10)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
        y=alt.Y(
            "count:Q",
            scale=alt.Scale(domain=(0, 5)),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=3,
                title="Daily post count and seven-day average",
            ),
        ),
    )
)

rolling = (
    alt.Chart(daily)
    .mark_line(color="red")
    .encode(
        y="seven-day-avg",
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
    )
)

(bars + rolling).properties(height=350, width=600).configure_view(strokeOpacity=0)

In [31]:
(bars + rolling).properties(height=350, width=600).configure_view(strokeOpacity=0).save(
    "visuals/daily_posts.png"
)

---

### Exports

In [28]:
today = dt.date.today().strftime("%m-%d-%Y")

In [29]:
df.to_csv("archive/posts_" + str(today) + ".csv", index=False)
df.to_csv("output/allposts.csv", index=False)