# President Biden polls

### Get Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import datetime as dt

In [3]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

---

### Get latest Biden trends from Real Clear Politics

In [4]:
url = "https://www.realclearpolitics.com/epolls/other/president-biden-job-approval-7320.html#polls"
headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X "}
r = requests.get(url, headers=headers)

In [5]:
soup = BeautifulSoup(r.text, "html.parser")

In [6]:
tables = soup.findAll("table", attrs={"class": "data"})

In [7]:
src = pd.read_html(str(tables))[0]

In [8]:
latest_df = src[:1].copy()

In [9]:
latest_df.columns = latest_df.columns.str.lower()

In [10]:
latest_df[["begin", "end"]] = (
    latest_df["date"].astype(str).str.split(" - ", n=1, expand=True)
)

In [11]:
date = (latest_df["end"] + "/2021").astype(str)

In [12]:
latest_df["date"] = pd.to_datetime(date).dt.date

In [13]:
latest_df.drop(["poll", "sample", "begin", "end"], axis=1, inplace=True)

In [14]:
latest_df.head()

Unnamed: 0,date,approve,disapprove,spread
0,2021-12-28,43.0,53.4,-10.4


---

### Import historical polling average for Biden from RCP via Wayback Machine

In [15]:
historical = pd.read_csv("data/processed/biden_history.csv")

In [16]:
historical.drop(["wayback_date", "wayback_time"], axis=1, inplace=True)

---

### Append latest to historical

In [17]:
full_df = historical.append(latest_df).reset_index(drop=True)

In [18]:
full_df["date"] = pd.to_datetime(full_df["date"])

In [19]:
full_df = full_df.sort_values("date", ascending=False)

In [20]:
full_df["candidate"] = "President Biden"

---

In [21]:
df_long = pd.melt(
    full_df,
    id_vars="date",
    value_vars=["approve", "disapprove", "spread"],
    var_name="value",
    value_name="variable",
)

In [22]:
df_long.to_csv("data/processed/biden_polling_averages_long.csv", index=False)