## Static Draft

In [None]:
#imports
import altair as alt
import pandas as pd
import geopandas as gp
import math

### Load data

In [None]:
# read in EPA NWI data from gdb file
nwi = gp.read_file("SmartLocationDatabase.gdb")

In [None]:
for col in nwi.columns:
    print(col)

In [None]:
# create county column from state and county fips codes
nwi["STATEFP"] = nwi["STATEFP"].astype(int)
nwi["COUNTYFP"] = nwi["COUNTYFP"].astype(int)
nwi["COUNTY5"] = nwi["STATEFP"] * 1000 + nwi["COUNTYFP"]

In [None]:
# read in 2020 election data
votes_2020 = gp.read_file("2020_precincts-with-results.geojson")

In [None]:
votes_2020.plot(column="pct_dem_lead")

In [None]:
# get county, state, precinct from GEOID
votes_2020["GEOID"] = votes_2020["GEOID"].astype("string")
votes_2020["COUNTY5"] = votes_2020["GEOID"].str.split("-").str.get(0).astype(int)
votes_2020["STATEFP"] = votes_2020["COUNTY5"].apply(lambda x: math.floor(x/1000))
votes_2020["PRECINCT"] = votes_2020["GEOID"].str.split("-").str.get(1)

In [None]:
# get dem lead by county
vc = votes_2020.groupby("COUNTY5", as_index=False).agg({"votes_dem": "sum", "votes_rep": "sum", "votes_total": "sum"})
vc["pct_dem_lead"] = vc["votes_dem"] / vc["votes_total"] - 0.5
votes_2020_counties = vc

In [None]:
# merge county-level voting data to block-level walkability data
vars = ["STATEFP", "COUNTYFP", "TRACTCE", "BLKGRPCE", "COUNTY5", "CBSA",
        "TotPop", "NatWalkInd",
        "AutoOwn0", "Pct_AO0", "AutoOwn1", "Pct_AO1", "AutoOwn2p", "Pct_AO2p", 
        "Workers", "R_LowWageWk", "R_MedWageWk", "R_HiWageWk", "R_PCTLOWWAGE", "TotEmp",
        "geometry"]
nwi["CBSA"].astype(str)
nwi_2020 = pd.merge(nwi, votes_2020_counties, on="COUNTY5")

In [None]:
# alternate: merge county-level with county-level nwi
nwi_counties = nwi.groupby(["COUNTY5", "STATEFP", "CBSA", "CBSA_Name"], as_index=False).agg({"TotPop": "sum", "NatWalkInd": "mean", "R_PCTLOWWAGE": "mean"})
nwi_2020_counties = pd.merge(nwi_counties, votes_2020_counties, on="COUNTY5")

In [None]:
# CBSA code lookups
NY = "New York-Newark-Jersey City, NY-NJ-PA"
LA = "Los Angeles-Long Beach-Anaheim, CA"
CHI ="Chicago-Naperville-Elgin, IL-IN-WI"
HOU = "Houston-The Woodlands-Sugar Land, TX"
SEA = "Seattle-Tacoma-Bellevue, WA"
DAL = "Dallas-Fort Worth-Arlington, TX"

CBSA_lookup = {
    "35620": NY,
    "31080": LA,
    "16980": CHI,
    "26420": HOU,
    "42660": SEA,
    "19100": DAL
}
CBSA_codes = ["35620", "31080", "16980", "26420", "19100"] #"42660",

domain = [CBSA_lookup[code] for code in CBSA_codes]
range_ = ['#4269d0', '#efb118', '#ff725c', '#6cc5b0', '#3ca951']

In [None]:
data_counties = nwi_2020_counties[nwi_2020_counties["CBSA"].isin(CBSA_codes)]
all_data = nwi_2020[nwi_2020["CBSA"].isin(CBSA_codes)]

In [None]:
all_data

## Visualizations

#### Walkability by population

In [None]:
data = all_data[["CBSA_Name", "NatWalkInd", "TotPop", "geometry"]]

In [None]:
# 
def nwi_by_population_by_cbsa():
    chart = alt.Chart(data).mark_bar().encode(
        alt.X("NatWalkInd:Q").bin(),
        alt.Y("TotPop:Q", aggregate="sum"),
        # facet and color to visually distinguish cities
        alt.Facet("CBSA_Name:N"),
        alt.Color("CBSA_Name:N", legend=None).scale(domain=domain, range=range_)
    )
    return chart

nwi_by_population_by_cbsa()

In [None]:
def nwi_proportion_by_cbsa():
    data = all_data[["CBSA_Name", "NatWalkInd", "TotPop", "geometry"]]
    chart = alt.Chart(data).mark_bar().encode(
        alt.X("TotPop", aggregate="sum").stack("normalize").scale(scheme="bluegreen-6"),
        alt.Y("CBSA_Name"),
        alt.Color("NatWalkInd").bin(maxbins=10).legend(direction="horizontal", orient="top")
    )
    return chart

nwi_proportion_by_cbsa()

#### Walkability vs Partisanship

In [None]:
data = data_counties

In [None]:
# NWI vs Dem lead (county)
alt.Chart(data_counties).mark_point().encode(
    alt.X("NatWalkInd:Q").title("Walkability Index"),
    alt.Y("pct_dem_lead:Q").title("Percent Dem lead"),
    alt.Size("TotPop:Q", legend=None),
    alt.Color("CBSA_Name:N").scale(domain=domain, range=range_),
    alt.Shape("CBSA_Name:N")
)

#### 

In [None]:
data = all_data
data
alt.Chart(data).mark_bar().encode(
    # stack car ownership by CBSA
)

#### Geo

In [None]:
data = all_data[["NatWalkInd", "CBSA", "CBSA_Name", "pct_dem_lead", 'geometry']]
data = data[data["CBSA"] == "19100"]
data

In [None]:
data.plot()

In [None]:
data
alt.Chart(data).mark_geoshape(
    stroke='white',
    strokeWidth=1.5
).encode(
    fill='CBSA_Name:N'
)

In [None]:
from vega_datasets import data as dta
states_data = alt.topo_feature(dta.us_10m.url, feature='states')

states = alt.Chart(states_data).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('albersUsa').properties(
    width=500,
    height=300
)

