# Scraping former President Trump's 'desk'

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import datetime as dt

In [3]:
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt

In [4]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")

ThemeRegistry.enable('latimes')

In [5]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

---

### Read the page

In [6]:
r = requests.get("https://www.donaldjtrump.com/desk")
soup = BeautifulSoup(r.text, "html.parser")

### Grab everything from each post div

In [7]:
rows = soup.find_all("div", class_="ftdli-main ftd-d")

In [8]:
data = []
for r in rows:
    if r.find("img") is not None:
        image = r.find("img")["src"]
    else:
        image = ""
    post_url = r.find("div", class_="title ftd-d").get("onclick")
    post = r.find("p", class_="ftd-post-text").text
    author = r.find("h2").text
    date = r.find("div", class_="date ftd-d").text
    raw = r.find("p", class_="ftd-post-text").text
    data.append(
        dict(
            date=date,
            url=post_url,
            author=author,
            post=post,
            image=image,
        )
    )

### First item from the dictionary

In [9]:
data[0]

{'date': '\n8:29am May 25, 2021\n',
 'url': "location.href='/desk/desk-cr9wpsyfxe/';",
 'author': 'Donald J. Trump',
 'post': 'Crazy Joe Scarborough and his blood-curdling, psycho wife (?), Mika, are going crazy because their ratings have absolutely TANKED. They are wrong too often and always predictable. They were right about me in 2016, but I did better in the 2020 Election with 12 million more votes. Stay tuned!',
 'image': ''}

### Clean up before importing as a dataframe

In [10]:
for d in data:
    d["date"] = d["date"].replace("\n", "")
    d["url"] = (
        d["url"]
        .replace("location.href='", "https://www.donaldjtrump.com")
        .replace("/';", "")
    )

In [11]:
src = pd.DataFrame(data)

---

### Pull in early posts

In [12]:
archive_df = pd.read_csv("input/archive.csv")

In [13]:
archive_df.drop(["video"], axis=1, inplace=True)

In [14]:
df = pd.concat([src, archive_df]).drop_duplicates(subset="url", keep="first")

### How many posts total? 

In [15]:
len(df)

54

### Last five posts

In [36]:
df.head()

Unnamed: 0,date,url,author,post,image,election,fulldate,time
0,2021-05-25,https://www.donaldjtrump.com/desk/desk-cr9wpsyfxe,Donald J. Trump,"Crazy Joe Scarborough and his blood-curdling, psycho wife (?), Mika, are going crazy because their ratings have absolutely TANKED. They are wrong too often and always predictable. They were right about me in 2016, but I did better in the 2020 Election with 12 million more votes. Stay tuned!",,True,2021-05-25 08:29:00,08:29:00
1,2021-05-24,https://www.donaldjtrump.com/desk/desk-pwspbhwbx6,Donald J. Trump,"New Hampshire’s Election Audit has revealed that large-scale voting machines appear to count NON-EXISTING VOTES. State and local communities are seeking confirmation. It’s probably true, but we’ll soon know. Why aren’t Minority Leader Mitch McConnell and Republicans doing anything about what went on in the 2020 Election? How can the Democrats be allowed to get away with this? It will go down as the Crime of the Century! Other States like Arizona, Georgia (where a Judge just granted a motion to unseal and inspect ballots from the 2020 Election), Michigan, Pennsylvania, and more to follow.",,True,2021-05-24 20:30:00,20:30:00
2,2021-05-24,https://www.donaldjtrump.com/desk/desk-svhbnaemtt,Donald J. Trump,"Highly respected pollster John McLaughlin says 73% of all Republicans want Trump to run again in 2024 and Republican primary voters would support him 82%-13%. Even the Washington Post has just reported “All Republican Roads Lead to Mar-a-Lago.”\n\nWhat WaPo and other members of the Fake News Media don’t report is that Joe Biden is of no interest to anybody—21 million less people watched his Joint Address to Congress than watched mine.\n\nAlso, Biden’s ratings have killed the Radical Left Fake News Cable Channels. MSNBC and CNN have plummeted in the ratings. MSNBC doesn’t have a show in the top 10 of all cable news programs, and CNN doesn’t even have a show in the top 100. They should have embraced and endorsed “Trump”—their ratings would have been at new highs!\nI have been doing very limited media so the American public could see just how big of a disaster the Biden Administration has been, and I was right. Inflation, the Border Crisis, our forgotten Military, war in the Middle East, all as a result of Biden’s mistakes. Our Country is being destroyed. Watch, it’s only going to get worse!\n\nThe Washington Post also incorrectly reported about my DonaldJTrump.com website viewership. We have not yet launched our own social media “platform,” but even the very basic site we have to post our statements has received 36.7 million views over the past month alone, and we’re getting more traffic to our website now than in 2020, an Election year! This number would be even greater if we were still on Twitter and Facebook, but since Big Tech has illegally banned me, tens of millions of our supporters have stopped using these platforms because they’ve become “boring” and nasty.\n\nMy website is a place where everyone can see my statements, issued in real time, and engage with the MAGA Movement. This is meant to be a temporary way of getting my thoughts and ideas out to the public without the Fake News spin, but the website is not a “platform.” It is merely a way of communicating until I decide on what the future will be for the choice or establishment of a platform. It will happen soon. Stay tuned!",,True,2021-05-24 17:00:00,17:00:00
3,2021-05-22,https://www.donaldjtrump.com/desk/desk-ucnmbjj2mk,Donald J. Trump,Where’s Durham—what ever happened to the Durham Report?\n,,False,2021-05-22 15:39:00,15:39:00
4,2021-05-22,https://www.donaldjtrump.com/desk/desk-nxxuw8ftmx,Donald J. Trump,"The lackluster Attorney General of Arizona, Mark Brnovich, has to get on the ball and catch up with the great Republican Patriots in the Arizona State Senate. As massive crime in the 2020 Election is becoming more and more evident and obvious, Brnovich is nowhere to be found. He is always on television promoting himself, but never mentions the Crime of the Century, that took place during the 2020 Presidential Election, which was Rigged and Stolen. Arizona was a big part and Brnovich must put himself in gear, or no Arizona Republican will vote for him in the upcoming elections. They will never forget, and neither will the great Patriots of our Nation!",,True,2021-05-22 14:36:00,14:36:00


### How many mention 'election'?

In [17]:
df["election"] = df["post"].str.contains("election") | df["post"].str.contains(
    "Election"
)

In [18]:
len(df[df["election"] == True])

29

### Clean up the dates

In [19]:
df["fulldate"] = pd.to_datetime(df["date"])
df["date"] = df["fulldate"].dt.date
df["time"] = df["fulldate"].dt.time

In [20]:
post_urls = list(df["url"])

---

### Posts per day 

In [21]:
election = df.groupby(["date", "election"]).agg({"author": "size"}).reset_index()

In [22]:
election.head()

Unnamed: 0,date,election,author
0,2021-03-24,True,1
1,2021-03-26,True,1
2,2021-03-30,True,1
3,2021-04-02,True,2
4,2021-04-03,True,1


In [23]:
daily = df.groupby(["date"])["author"].count().reset_index(name="count")

In [24]:
daily.rename(columns={"author": "count"}, inplace=True)

In [25]:
daily["seven-day-avg"] = daily["count"].rolling(7).mean()

In [26]:
daily["date"] = pd.to_datetime(daily["date"])

In [27]:
daily.sort_values("count", ascending=False).head()

Unnamed: 0,date,count,seven-day-avg
23,2021-05-03,4,1.857143
25,2021-05-05,4,2.142857
26,2021-05-19,3,2.142857
8,2021-04-07,3,1.571429
9,2021-04-08,3,1.857143


### Chart it!

In [28]:
bars = (
    alt.Chart(
        daily,
        title="Trump posts to the 'desk' since it launched",
    )
    .mark_bar(size=10)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
        y=alt.Y(
            "count:Q",
            scale=alt.Scale(domain=(0, len("count"))),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=3,
                title="Daily post count and seven-day average",
            ),
        ),
    )
)

rolling = (
    alt.Chart(daily)
    .mark_line(color="red")
    .encode(
        y="seven-day-avg",
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
    )
)

(bars + rolling).properties(height=350, width=600).configure_view(strokeOpacity=0)

In [29]:
election["date"] = pd.to_datetime(election["date"])

In [30]:
bars_elex = (
    alt.Chart(
        election,
        title="Trump posts to the 'desk' re: election",
    )
    .mark_bar(size=10)
    .encode(
        x=alt.X(
            "date:T",
            axis=alt.Axis(grid=False, title="", tickCount=5, format=("%B %-d")),
        ),
        y=alt.Y(
            "author:Q",
            scale=alt.Scale(domain=(0, len("author:Q"))),
            axis=alt.Axis(
                gridColor="#dddddd",
                offset=6,
                tickSize=0,
                domainOpacity=0,
                tickCount=3,
                title="Daily post count",
            ),
        ),
        color=alt.Color(
            "election",
            title="About election?",
            scale=alt.Scale(domain=["true", "false"], range=["#f1a340", "#998ec3"]),
        ),
    )
)

(bars_elex).properties(height=350, width=600).configure_view(strokeOpacity=0)

In [31]:
(bars + rolling).properties(height=350, width=600).configure_view(strokeOpacity=0).save(
    "visuals/daily_posts.png"
)

In [32]:
(bars_elex).properties(height=350, width=600).configure_view(strokeOpacity=0).save(
    "visuals/daily_posts_re_election.png"
)

---

### Exports

In [33]:
today = dt.date.today().strftime("%m-%d-%Y")

In [34]:
df.to_csv("archive/posts_" + str(today) + ".csv", index=False)
df.to_csv("output/allposts.csv", index=False)