In [92]:
import pandas as pd
import altair as alt
import json

# Read in configuration details
with open("../cache/ny-test-3boroughs/config.json") as infile:
    config = json.load(infile)

# Read in summary data
summary = pd.read_csv("../cache/ny-test-3boroughs/summary.csv")
summary.head()

Unnamed: 0,metric,pop_total,pop_white,pop_black,pop_asiapacific,hhld_single_mother
0,pop_total_c30_0,113519.796036,115914.599895,110089.123171,116691.581556,113587.505166
1,pop_total_c45_0,244301.865235,256704.173607,236371.12694,242941.848549,246932.423933
2,pharmacies_t3_0,10.626466,10.813285,10.478862,9.738627,10.8968
3,hospitals_t1_0,33.989989,33.929933,32.456734,35.355503,35.06705
4,urgent_care_facilities_t1_0,44.509209,38.546165,47.007455,46.917175,44.81561


## Differences in Population Demographics Across A Single Variable

In [112]:
# Let's start by fetching only the data that isn't a comparison
absolutes = summary[summary['metric'].str[-3:] != "1-0"].copy()
# Next, we split our data into scenario columns
absolutes['scenario'] = absolutes["metric"].str[-1]
# to_plot['metric_name'] = 
absolutes['parameter'] = absolutes["metric"].str[:-2].str.split("_").str[-1]
absolutes['opportunity'] = absolutes["metric"].str[:-2].str.split("_").str[:-1]
absolutes['opportunity'] = absolutes['opportunity'].str.join("_")
absolutes = absolutes.drop(columns=['metric'])
absolutes = absolutes.melt(id_vars=["opportunity", "parameter", "scenario"])
opp_dict = {}
for k in config["opportunities"].keys():
    opp_dict[k] = config["opportunities"][k]["name"]

scen_dict = {}
for idx, s in enumerate(config["scenarios"]):
    scen_dict[str(idx)] = s['name']

absolutes["scenario"] = absolutes["scenario"].map(scen_dict)
absolutes['variable'] = absolutes['variable'].map(config["demographics"])
absolutes['opportunity'] = absolutes['opportunity'].map(opp_dict)
absolutes.head()

Unnamed: 0,opportunity,parameter,scenario,variable,value
0,Total Population,c30,With Subway,Total Population,113519.796036
1,Total Population,c45,With Subway,Total Population,244301.865235
2,Pharmacies,t3,With Subway,Total Population,10.626466
3,Hospitals,t1,With Subway,Total Population,33.989989
4,Urgent Care Facilities,t1,With Subway,Total Population,44.509209


In [120]:
# Let's start by fetching only the data that isn't a comparison
deltas = summary[summary['metric'].str[-3:] == "1-0"].copy()

# to_plot['metric_name'] = 
deltas['parameter'] = deltas["metric"].str[:-2].str.split("_").str[-2]
deltas['opportunity'] = deltas["metric"].str[:-2].str.split("_").str[:-2].str.join("_")
# deltas['opportunity'] = deltas['opportunity'].str.join("_")
deltas = deltas.drop(columns=['metric'])
deltas = deltas.melt(id_vars=["opportunity", "parameter"])

# opp_dict = {}
# for k in config["opportunities"].keys():
#     opp_dict[k] = config["opportunities"][k]["name"]

deltas['variable'] = deltas['variable'].map(config["demographics"])
deltas['opportunity'] = deltas['opportunity'].map(opp_dict)
deltas.head()

Unnamed: 0,opportunity,parameter,variable,value
0,Total Population,c30,Total Population,607.994003
1,Total Population,c45,Total Population,726.510093
2,Pharmacies,t3,Total Population,-0.23168
3,Hospitals,t1,Total Population,-0.479019
4,Urgent Care Facilities,t1,Total Population,-0.008794


In [130]:
to_plot = deltas.query("opportunity == 'Total Population' and parameter == 'c45'").copy()



bars = alt.Chart(to_plot).mark_bar(color="lightgrey", height=3).encode(
    alt.X("value:Q", title="Jobs Accessible in 45 Minutes"),
    alt.Y("variable:N")
)

circles = alt.Chart(to_plot).mark_circle(color="#823BA0", height=3).encode(
    alt.X("value:Q", title="Jobs Accessible in 45 Minutes"),
    alt.Y("variable:N")
)



(bars+circles).properties(
    title={
        "text": config["project"],
        "subtitle": "Measured for 7-9am on Friday, February 3, 2023"
    },
    height=100, 
    width=600
).configure(
    font="Atkinson Hyperlegible"
).configure_title(
    anchor="start"
)