In [1]:
import pandas as pd
import altair as alt
import json

# Read in configuration details
with open("../cache/ny-test-3b-modes/config.json") as infile:
    config = json.load(infile)

# Read in summary data
summary = pd.read_csv("../cache/ny-test-3b-modes/summary.csv")
summary.head()

Unnamed: 0,metric,pop_total,pop_white,pop_black,pop_asiapacific,hhld_single_mother
0,pop_total_c30_0,114748.193899,116891.275665,110520.041303,118261.533996,115044.812824
1,pop_total_c45_0,119953.362549,122207.501247,115617.443495,124087.022917,120198.922201
2,pharmacies_t3_0,9.404389,9.860806,9.886727,7.882502,9.383505
3,hospitals_t1_0,97.589276,97.18463,94.968114,107.238951,100.034376
4,urgent_care_facilities_t1_0,128.079815,118.036881,130.434337,128.567837,128.658979


## Differences in Population Demographics Across A Single Variable

In [2]:
# Let's start by fetching only the data that isn't a comparison
absolutes = summary[summary['metric'].str[-3:] != "1-0"].copy()
# Next, we split our data into scenario columns
absolutes['scenario'] = absolutes["metric"].str[-1]
# to_plot['metric_name'] = 
absolutes['parameter'] = absolutes["metric"].str[:-2].str.split("_").str[-1]
absolutes['opportunity'] = absolutes["metric"].str[:-2].str.split("_").str[:-1]
absolutes['opportunity'] = absolutes['opportunity'].str.join("_")
absolutes = absolutes.drop(columns=['metric'])
absolutes = absolutes.melt(id_vars=["opportunity", "parameter", "scenario"])
opp_dict = {}
for k in config["opportunities"].keys():
    opp_dict[k] = config["opportunities"][k]["name"]

scen_dict = {}
for idx, s in enumerate(config["scenarios"]):
    scen_dict[str(idx)] = s['name']

absolutes["scenario"] = absolutes["scenario"].map(scen_dict)
absolutes['variable'] = absolutes['variable'].map(config["demographics"])
absolutes['opportunity'] = absolutes['opportunity'].map(opp_dict)
absolutes.head()

Unnamed: 0,opportunity,parameter,scenario,variable,value
0,Total Population,c30,With Subway,Total Population,114748.193899
1,Total Population,c45,With Subway,Total Population,119953.362549
2,Pharmacies,t3,With Subway,Total Population,9.404389
3,Hospitals,t1,With Subway,Total Population,97.589276
4,Urgent Care Facilities,t1,With Subway,Total Population,128.079815


In [3]:
to_plot = absolutes.query("opportunity == 'Total Population' and parameter == 'c45'").copy()

bars = alt.Chart().mark_bar(color="lightgrey", height=3).encode(
    alt.X("value:Q", title="Total Jobs Accessible in 45 Minutes"),
    alt.Y("scenario:N", axis=alt.Axis(labels=False, title=None))
)

circles = alt.Chart().mark_circle(size=150, opacity=1).encode(
    alt.X("value:Q"),
    alt.Y("scenario:N"),
    alt.Color("scenario:N", title="Scenario")
)


alt.layer(bars, circles, data=to_plot).facet(
    row=alt.Row("variable:N", title=None, header=alt.Header(labelAngle=0, labelAlign="left"))
).properties(
    title={
        "text": config["project"],
        "subtitle": "Measured for 7-9am on Friday, February 3, 2023"
    }
)

In [4]:
# Let's start by fetching only the data that isn't a comparison
deltas = summary[summary['metric'].str[-3:] == "1-0"].copy()

# to_plot['metric_name'] = 
deltas['parameter'] = deltas["metric"].str[:-2].str.split("_").str[-2]
deltas['opportunity'] = deltas["metric"].str[:-2].str.split("_").str[:-2].str.join("_")
# deltas['opportunity'] = deltas['opportunity'].str.join("_")
deltas = deltas.drop(columns=['metric'])
deltas = deltas.melt(id_vars=["opportunity", "parameter"])

# opp_dict = {}
# for k in config["opportunities"].keys():
#     opp_dict[k] = config["opportunities"][k]["name"]

deltas['variable'] = deltas['variable'].map(config["demographics"])
deltas['opportunity'] = deltas['opportunity'].map(opp_dict)
deltas.head()

Unnamed: 0,opportunity,parameter,variable,value
0,Total Population,c30,Total Population,0.0
1,Total Population,c45,Total Population,0.0
2,Pharmacies,t3,Total Population,0.0
3,Hospitals,t1,Total Population,0.0
4,Urgent Care Facilities,t1,Total Population,0.0


In [5]:
to_plot = deltas.query("opportunity == 'Total Population' and parameter == 'c45'").copy()

bars = alt.Chart(to_plot).mark_bar(color="lightgrey", height=3).encode(
    alt.X("value:Q", title="Change in Jobs Accessible in 45 Minutes"),
    alt.Y("variable:N", title=None)
)

circles = alt.Chart(to_plot).mark_circle(color="#823BA0", size=150, opacity=1).encode(
    alt.X("value:Q"),
    alt.Y("variable:N")
)

(bars+circles).properties(
    title={
        "text": config["project"],
        "subtitle": "Measured for 7-9am on Friday, February 3, 2023"
    },
    height=100, 
    width=600
).configure(
    font="Atkinson Hyperlegible"
).configure_title(
    anchor="start"
)

### Small Multiples

In [17]:
to_plot = deltas.copy()
to_plot['combined_name'] = deltas['opportunity'] + " (" + deltas["parameter"] + ")"

bars = alt.Chart().mark_bar(color="lightgrey", height=3).encode(
    alt.X("value:Q", title="Change in Measure"),
    alt.Y("variable:N", title=None)
)

circles = alt.Chart().mark_circle(color="#559613", size=150, opacity=1).encode(
    alt.X("value:Q"),
    alt.Y("variable:N")
)

alt.layer(bars, circles, data=to_plot).facet(
    facet=alt.Facet("combined_name:N", title=None, header=alt.Header(labelFontWeight='bold')),
    columns=2
).properties(
    title={
        "text": config["project"],
        "subtitle": "Measured for 7-9am on Friday, February 3, 2023"
    }
).configure(
    font="Atkinson Hyperlegible"
).configure_title(
    fontSize=18,
    subtitleFontSize=14
).resolve_scale(x='independent')

In [18]:
pct_delta = pd.merge(
    absolutes[absolutes['scenario'] == "With Subway"][["opportunity", "parameter", "variable", "value"]], 
    absolutes[absolutes['scenario'] == "Without Subway"][["opportunity", "parameter", "variable", "value"]], 
    on=["opportunity", "parameter", "variable"], suffixes=["0", "1"]
)

pct_delta['pct_delta'] = 100* (pct_delta["value1"] - pct_delta["value0"])/pct_delta["value0"]
pct_delta = pct_delta.drop(columns=["value0", "value1"])
pct_delta.head()
    

Unnamed: 0,opportunity,parameter,variable,pct_delta
0,Total Population,c30,Total Population,0.0
1,Total Population,c45,Total Population,0.0
2,Pharmacies,t3,Total Population,0.0
3,Hospitals,t1,Total Population,0.0
4,Urgent Care Facilities,t1,Total Population,0.0


In [19]:
to_plot = pct_delta.copy()
to_plot['combined_name'] = deltas['opportunity'] + " (" + deltas["parameter"] + ")"

bars = alt.Chart().mark_bar(color="lightgrey", height=3).encode(
    alt.X("pct_delta:Q", title="% Change in Measure"),
    alt.Y("variable:N", title=None)
)

circles = alt.Chart().mark_circle(color="#2464B0", size=150, opacity=1).encode(
    alt.X("pct_delta:Q"),
    alt.Y("variable:N")
)

alt.layer(bars, circles, data=to_plot).facet(
    facet=alt.Facet("combined_name:N", title=None, header=alt.Header(labelFontWeight='bold')),
    columns=2
).properties(
    title={
        "text": config["project"],
        "subtitle": "Measured for 7-9am on Friday, February 3, 2023"
    }
).configure(
    font="Atkinson Hyperlegible"
).configure_title(
    fontSize=18,
    subtitleFontSize=14
)

In [7]:
m0 = pd.read_csv("../cache/ny-test-3b-modes/matrix0.csv")
print(m0.shape[0], m0[m0.travel_time.isna()].shape[0])
m1 = pd.read_csv("../cache/ny-test-3b-modes/matrix0.csv")
print(m1.shape[0], m1[m1.travel_time.isna()].shape[0])

23483716 23075594
