# Midterm elections and Congress

### Get Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [3]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000

In [4]:
url = "https://en.wikipedia.org/wiki/United_States_midterm_election"

In [5]:
src = pd.read_html(url, header=0)[1]

In [6]:
src.drop(range(0, 29), inplace=True)

In [7]:
src.columns = src.columns.str.lower().str.replace(" ", "_", regex=False)

In [8]:
src.rename(
    columns={
        "net_gain/loss_of_president's_party_1_2": "house",
        "net_gain/loss_of_president's_party_1_2.1": "senate",
        "president's_party": "president_party",
        "sitting_president": "president",
    },
    inplace=True,
)

In [9]:
src.head()

Unnamed: 0,year,president,president_party,house,senate
29,1902,Theodore Roosevelt,Republican,+9: (201 ► 210),0: (55 ► 55)
30,1906,Theodore Roosevelt,Republican,-27: (251 ► 224),+2: (58 ► 60)
31,1910,William Howard Taft,Republican,-56: (219 ► 163),-9: (59 ► 50)
32,1914,Woodrow Wilson,Democratic,-61: (291 ► 230),+3: (50 ► 53)
33,1918,Woodrow Wilson,Democratic,-22: (214 ► 192),-4: (52 ► 48)


In [10]:
src[["house_net", "house_change"]] = src["house"].str.split(": ", expand=True)
src[["senate_net", "senate_change"]] = src["senate"].str.split(": ", expand=True)

In [11]:
src[["house_from", "house_to"]] = (
    src["house_change"]
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.split(" ► ", expand=True)
)
src[["senate_from", "senate_to"]] = (
    src["house_change"]
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
    .str.split(" ► ", expand=True)
)

In [12]:
src.drop(["house", "senate", "senate_change", "house_change"], axis=1, inplace=True)

In [13]:
src["house_to"] = src["house_to"].str.replace("[f]", "", regex=False)
src["senate_to"] = src["senate_to"].str.replace("[f]", "", regex=False)

In [14]:
df = src.sort_values("year", ascending=False).copy()

---

In [15]:
df = df.query("president != 'Joe Biden'")

In [16]:
df[
    ["house_net", "senate_net", "house_from", "house_to", "senate_from", "senate_to"]
] = df[
    ["house_net", "senate_net", "house_from", "house_to", "senate_from", "senate_to"]
].astype(
    int
)

In [17]:
df[["first_name", "last_name"]] = df["president"].str.rsplit(" ", n=1, expand=True)

In [18]:
df["party_abbr"] = (
    df["president_party"]
    .str.replace("Republican", "R", regex=False)
    .str.replace("Democratic", "D", regex=False)
)

In [19]:
# df["display_name"] = (
#     "**" + df["year"] + ":** " + df["last_name"] + " (" + df["party_abbr"] + ")"
# )

In [20]:
df.head()

Unnamed: 0,year,president,president_party,house_net,senate_net,house_from,house_to,senate_from,senate_to,first_name,last_name,party_abbr
58,2018,Donald Trump,Republican,-41,2,241,200,241,200,Donald,Trump,R
57,2014,Barack Obama,Democratic,-13,-9,201,188,201,188,Barack,Obama,D
56,2010,Barack Obama,Democratic,-63,-6,256,193,256,193,Barack,Obama,D
55,2006,George W. Bush,Republican,-32,-6,231,199,231,199,George W.,Bush,R
54,2002,George W. Bush,Republican,8,2,221,229,221,229,George W.,Bush,R


---

### How many seats did each president since 1902 lose on average in their midterms?

In [21]:
df.groupby(["president_party"]).agg(
    {"house_net": "mean", "senate_net": "mean"}
).reset_index().round(2)

Unnamed: 0,president_party,house_net,senate_net
0,Democratic,-33.21,-3.43
1,Republican,-27.62,-3.31


---

### Export

In [22]:
df.rename(
    columns={"house_net": "House", "senate_net": "Senate", "display_name": "President"},
    inplace=True,
)

In [23]:
df.to_csv("data/processed/congress_midterm_change_by_president.csv", index=False)