# FiveThirtyEight: 2020 State polls

#### Import Python tools

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_stiles as altstiles
import numpy as np
import us
import urllib.request, json
import glob
import os

In [3]:
alt.themes.register("stiles", altstiles.theme)
alt.themes.enable("grid")

ThemeRegistry.enable('grid')

In [4]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.set_option("display.max_colwidth", None)

In [5]:
today = pd.to_datetime("today").strftime("%Y-%m-%d")

---

## Harvest data 

#### First get a list of state names

In [6]:
all_features = []

for st in us.states.STATES:
    states_dict = {
        "fips": st.fips,
        "name": st.name,
        "abbr": st.abbr,
        "is_continental": st.is_continental,
        "statehood_year": st.statehood_year,
        "capital": st.capital,
        "capital_tz": st.capital_tz,
        "ap_abbr": st.ap_abbr,
        "shapefile_urls": st.shapefile_urls(),
    }
    all_features.append(states_dict)

In [7]:
states = (
    pd.DataFrame(all_features)
    .sort_values("fips", ascending=True)
    .reset_index(drop=True)
)

In [8]:
states = list(states["name"].str.lower().str.replace(" ", "-"))

#### Loop through list of states to download json data about each

In [9]:
for state in states:
    with urllib.request.urlopen(
        f"https://projects.fivethirtyeight.com/2020-election-forecast/{state}_steps.json"
    ) as url:
        with open(f"data/raw/{state}_data.json", "w") as f:
            json.dump(json.load(url), f)

In [10]:
temp = pd.DataFrame()

path_to_json = "data/raw/"

json_pattern = os.path.join(path_to_json, "*.json")
file_list = glob.glob(json_pattern)

In [11]:
dfs = []
for file in file_list:
    with open(file) as f:
        json_data = pd.json_normalize(json.loads(f.read())).assign(
            state=file.replace("data/raw/", "")
            .replace("_data.json", "")
            .replace("-", " ")
            .title()
        )
    dfs.append(json_data)
df = pd.concat(dfs, sort=False).reset_index(drop=True)

In [12]:
df["steps"] = df["steps"].str[0]

In [13]:
df[["step", "weight", "description", "candidates"]] = pd.json_normalize(df["steps"])

In [14]:
df[["gop_polling", "dem_polling"]] = pd.json_normalize(df["candidates"])

In [15]:
df[["drop", "gop_polling"]] = pd.json_normalize(df["gop_polling"])
df[["drop", "dem_polling"]] = pd.json_normalize(df["dem_polling"])

In [16]:
df = df.drop(
    ["type", "step", "weight", "candidates", "drop", "steps", "description"], axis=1
)

In [17]:
df = df.sort_values("state", ascending=True).reset_index(drop=True)

In [18]:
df["gop_polling_margin"] = (df["gop_polling"] - df["dem_polling"]).round(2)
df["dem_polling_margin"] = (df["dem_polling"] - df["gop_polling"]).round(2)

In [19]:
df = df.round(2)

In [20]:
df["year"] = "2020"

In [21]:
df["description"] = "538 polling average"

In [22]:
df.to_csv("data/processed/2020_polling_average_states_538.csv", index=False)