In [1]:
import plotly.express as px
import pandas as pd
from geopy.distance import geodesic
import requests
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import MinMaxScaler
px.set_mapbox_access_token("pk.eyJ1IjoicGFycXVhciIsImEiOiJja3lpcXMycGUxbmF5MnBzZXVzMHBzaXl4In0.jz0tx-HTJWym8jWPa8lqiA")

from post_roe.query import BucketQuery as bq, BUCKET_BASE
from post_roe.distance import Distance

case="base"
drive_time = bq.drive_time_query(case)[['zip3','_drive_time']]
zip5 = bq.census_zip5_query().drop(columns=['lat_lon'])
zip5["zip3"] = zip5["zip5"].apply(lambda x: f"{x[0:3]}**")
states = bq.states_query(case)[['state','state_status']]
zip5 = zip5.merge(states, how='left')

In [2]:
# deadweight loss function

df = zip5.merge(drive_time, how='left', left_on="zip3", right_on="zip3").drop_duplicates(subset=['zip5'])
df['_adi_decile'] = df['adi_median'].apply(lambda x: 9 if x==100 else int(x/10))
df[['_population']] =  MinMaxScaler().fit_transform(df[['population']])
df['_loss'] = df['_drive_time'] * df['_population']
df

Unnamed: 0,state,zip5,lat,lon,population,adi_median,zip3,state_status,_drive_time,_adi_decile,_population,_loss
0,AL,30165,34.315791,-85.295125,39975,79.0,301**,at_risk,6.0,7,0.350916,2.105499
2,AL,31905,32.372789,-84.843361,16044,84.0,319**,at_risk,8.0,8,0.140841,1.126725
4,AL,35004,33.603429,-86.493786,10427,58.0,350**,at_risk,6.0,5,0.091532,0.549194
5,AL,35005,33.605877,-86.993731,7942,80.0,350**,at_risk,6.0,8,0.069718,0.418308
6,AL,35006,33.422751,-87.209751,3121,71.0,350**,at_risk,6.0,7,0.027397,0.164384
...,...,...,...,...,...,...,...,...,...,...,...,...
31224,WY,83124,41.758808,-110.315880,137,38.0,831**,at_risk,6.0,3,0.001203,0.007216
31225,WY,83126,42.581108,-110.904209,334,33.0,831**,at_risk,6.0,3,0.002932,0.017592
31226,WY,83127,42.918972,-110.997753,3041,39.0,831**,at_risk,6.0,3,0.026695,0.160171
31227,WY,83128,43.040079,-110.722208,1601,44.0,831**,at_risk,6.0,4,0.014054,0.084325


In [3]:
loss_by_adi = df.groupby(['_adi_decile']).agg(total_loss=("_loss","sum")).reset_index()
population_by_adi = df.groupby(['_adi_decile']).agg(population=("population","sum")).reset_index()

adi_decile = population_by_adi.merge(loss_by_adi)
adi_decile[['population','total_loss']] =  MinMaxScaler().fit_transform(adi_decile[['population','total_loss']])
adi_decile

px.bar(
    adi_decile.melt(id_vars=['_adi_decile']),
    x="_adi_decile",
    y='value',
    facet_col="variable"
)
# px.bar(population_by_adi, x='_adi_decile', y='population').show()
# px.bar(loss_by_adi, x='_adi_decile', y='total_loss').show()

In [36]:
px.scatter(df, x='adi_median', y="_loss", color="state_status")

In [19]:
# # df = df_alt[df_alt['state_status']=="at_risk"]
# px.scatter_mapbox(
#         drive_time,
#         lat="lat",
#         lon="lon",
#         title=f"{len(drive_time)} locations",
#         color='_drive_time',
#         size="population",
#         # hover_data=['LocationFacility'],
#         height=600,
#         zoom=3,
#     )


In [17]:
adi_median_population = df.groupby(['adi_decile']).agg(population=("population","sum")).reset_index()
adi_median_population
px.bar(adi_median_population, x="adi_decile", y="population")

In [21]:
adi_median_population = df.groupby(['state_status','adi_decile']).agg(population=("population","sum")).reset_index()
adi_median_population
px.bar(adi_median_population, x="adi_decile", y="population", color="state_status")

In [23]:
scaler = MinMaxScaler()
adi_median_population[['_population']] = scaler.fit_transform(adi_median_population[['population']])
adi_median_population['_pop_millions'] = adi_median_population['population'].apply(lambda x: x/(10**6))
adi_median_population
# df['_loss'] = df['drive_time'] * df['_population']

Unnamed: 0,state_status,adi_decile,population,_population,_pop_millions
0,at_risk,0,227831,0.001843,0.227831
1,at_risk,1,2076655,0.051187,2.076655
2,at_risk,2,5304625,0.137339,5.304625
3,at_risk,3,12264324,0.323088,12.264324
4,at_risk,4,19395418,0.513411,19.395418
5,at_risk,5,34108446,0.90609,34.108446
6,at_risk,6,36458697,0.968817,36.458697
7,at_risk,7,37627080,1.0,37.62708
8,at_risk,8,22171007,0.587489,22.171007
9,at_risk,9,3428737,0.087273,3.428737


In [12]:
def build(case="base"):
    """  
        Takes about 5 min at k=5, clinics = 1k
    """
    def _invoke_get_drive_time(origin: dict) -> dict:
        API_BASE = "https://us-central1-ohdo-post-roe-359822.cloudfunctions.net"
        endpoint = f"{API_BASE}/post-roe-sls-dev-get-drive-time"
        params = {
            "case": case,
            "k": 5,
            "origin": {
                "lat_lon": list(origin['lat_lon']),
                "zip3": origin['zip3'],
            }
        }
        resp = requests.post(endpoint, json=params)
        if resp.status_code != 200:
            print(resp.status_code, resp.text)
        else:
            return resp.json()
            
    origins = bq.census_zip3_query()
    zip3_origins = origins[['zip3','lat_lon']].to_dict(orient="records")
    with ThreadPoolExecutor(max_workers=20) as exec:
        futures = exec.map(_invoke_get_drive_time, zip3_origins)
    drive_time = pd.DataFrame(list(futures))
    
    # enrich
    df = origins.merge(drive_time, how="left").drop_duplicates(subset=['zip3']).drop(columns=["lat_lon"])
    df = df.merge(bq.states_query(case=case)[['state','state_status']])
    df['adi_decile'] = df['adi_median'].apply(lambda x: int(x/10))
    df['_drive_time'] = df['drive_time'].dropna().apply(lambda x: int(x/60))
    df.to_feather(f"drive_time_{case}.feather")
    # bq.to_feather(df) #cache
    return df

# df = build("alt")
case="alt"
df_alt = pd.read_feather(f"drive_time_{case}.feather")

case="base"
df_base = pd.read_feather(f"drive_time_{case}.feather")


2041