# FEC: 2020 election results

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import us
import tabula
from tabula.io import read_pdf

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

---

## Read data

#### Extract data from [this PDF](https://www.fec.gov/introduction-campaign-finance/election-and-voting-information/) released by FEC

In [4]:
pdf = "data/raw/2020presgeresults.pdf"

biden_page = tabula.read_pdf(pdf, pages=2)[0][["STATE", "BIDEN"]][:-2]
trump_page = tabula.read_pdf(pdf, pages=8)[0][["STATE", "TRUMP", "TOTAL VOTES"]][2:-2]

#### Merge Biden and Trump into one dataframe

In [5]:
df = pd.merge(biden_page, trump_page, on="STATE").copy()

#### Clean up the dataframe

In [6]:
df.columns = df.columns.str.lower().str.replace(" ", "")

In [7]:
str_cols = ["biden", "trump", "totalvotes"]
df[str_cols] = df[str_cols].replace(",", "", regex=True)

In [8]:
df[str_cols] = df[str_cols].astype(int)

#### Calculate totals and vote share

In [9]:
df["biden_trump_votes"] = df["biden"] + df["trump"]
df["other_votes"] = df["totalvotes"] - df["biden_trump_votes"]

In [10]:
df["gop_vote_pct"] = ((df["trump"] / df["totalvotes"]) * 100).round(2)
df["dem_vote_pct"] = ((df["biden"] / df["totalvotes"]) * 100).round(2)
df["other_vote_pct"] = ((df["other_votes"] / df["totalvotes"]) * 100).round(2)

#### Clean up postal abbreviations for states

In [11]:
postal_to_name = us.states.mapping("abbr", "name")

In [12]:
df["state"] = df["state"].map(postal_to_name)

In [13]:
df["year"] = "2020"

#### Slim down what we export

In [14]:
df = df[
    [
        "state",
        "gop_vote_pct",
        "dem_vote_pct",
        "other_vote_pct",
        "year",
    ]
]

In [15]:
df.to_csv("data/processed/2020_election_results_states_fec.csv", index=False)