In [2]:
import plotly.express as px
import pandas as pd
from geopy.distance import geodesic
import requests
from concurrent.futures import ThreadPoolExecutor

from post_roe.query import BucketQuery as bq, BUCKET_BASE
from post_roe.distance import Distance


In [25]:
def process_async(request):
    """  
        Takes about 5 min at k=5, clinics = 1k
    """
    MAX_WORKERS = 50

    # request_json = request.get_json()
    # case = request_json.get("case")
    case = "main_515"

    def _invoke_get_drive_time(origin: dict) -> dict:
        API_BASE = "https://us-central1-ohdo-post-roe-359822.cloudfunctions.net"
        endpoint = f"{API_BASE}/post-roe-sls-dev-get-drive-time"
        params = {
            "case": case,
            "k": 5,
            "origin": {
                "lat_lon": list(origin['lat_lon']),
                "zip3": origin['zip3'],
            } 
        }
        resp = requests.post(endpoint, json=params)
        if resp.status_code != 200:
            print(resp.status_code, resp.text)
        else:
            return resp.json()
    
    # loads 8e2 zip3 origins
    origins = bq.census_zip3_query()
    zip3_origins = origins[['zip3','lat_lon']].to_dict(orient="records")
    
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as exec:
        futures = exec.map(_invoke_get_drive_time, zip3_origins)
    
    drive_time = pd.DataFrame(list(futures))
    


    
    df.to_feather(f"drive_time_{case}.feather") # this is brittle. errr
    resp = {
        "case": case,
        "count": len(drive_times),
        "drive_times": drive_times
    }
    return resp

# resp = process_async("")
# resp
case = "main_515"
drive_time = pd.read_feather("drive_time_main_515.feather")
origins = bq.census_zip3_query()
df = origins.merge(drive_time, how="left").drop_duplicates(subset=['zip3']).drop(columns=["lat_lon"])
df = df.merge(bq.states_query(case=case)[['state','state_status']])
df['adi_decile'] = df['adi_median'].apply(lambda x: int(x/10))
df['_drive_hrs'] = df['drive_time'].dropna().apply(lambda x: int(x/60))
df

Unnamed: 0,state,zip3,lat,lon,population,adi_median,case,drive_time,state_status,adi_decile,_drive_hrs
0,AL,301**,34.315791,-85.295125,39975,79.0,main_515,510.0,at_risk,7,8.0
1,AL,319**,32.372789,-84.843361,16044,84.0,main_515,637.4,at_risk,8,10.0
2,AL,350**,33.568065,-86.712692,442190,73.0,main_515,490.4,at_risk,7,8.0
3,AL,351**,33.506417,-86.624839,324591,61.0,main_515,501.2,at_risk,6,8.0
4,AL,352**,33.506077,-86.798758,491463,72.0,main_515,487.0,at_risk,7,8.0
...,...,...,...,...,...,...,...,...,...,...,...
877,WY,824**,44.438020,-108.408179,52930,48.0,main_515,431.0,at_risk,4,7.0
878,WY,825**,43.103802,-108.847958,38910,56.0,main_515,370.0,at_risk,5,6.0
879,WY,826**,43.106535,-106.629017,91238,52.5,main_515,257.6,at_risk,5,4.0
880,WY,828**,44.714826,-106.872537,36958,31.0,main_515,349.6,at_risk,3,5.0


In [26]:
from sklearn.preprocessing import MinMaxScaler

df[['_population','_drive_time']] =  MinMaxScaler().fit_transform(df[['population','drive_time']])
df['_loss'] = df['_drive_time'] * df['_population']
df


Unnamed: 0,state,zip3,lat,lon,population,adi_median,case,drive_time,state_status,adi_decile,_drive_hrs,_population,_drive_time,_loss
0,AL,301**,34.315791,-85.295125,39975,79.0,main_515,510.0,at_risk,7,8.0,0.013753,0.514549,0.007076
1,AL,319**,32.372789,-84.843361,16044,84.0,main_515,637.4,at_risk,8,10.0,0.005520,0.645082,0.003561
2,AL,350**,33.568065,-86.712692,442190,73.0,main_515,490.4,at_risk,7,8.0,0.152128,0.494467,0.075222
3,AL,351**,33.506417,-86.624839,324591,61.0,main_515,501.2,at_risk,6,8.0,0.111670,0.505533,0.056453
4,AL,352**,33.506077,-86.798758,491463,72.0,main_515,487.0,at_risk,7,8.0,0.169079,0.490984,0.083015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,WY,824**,44.438020,-108.408179,52930,48.0,main_515,431.0,at_risk,4,7.0,0.018210,0.433607,0.007896
878,WY,825**,43.103802,-108.847958,38910,56.0,main_515,370.0,at_risk,5,6.0,0.013386,0.371107,0.004968
879,WY,826**,43.106535,-106.629017,91238,52.5,main_515,257.6,at_risk,5,4.0,0.031389,0.255943,0.008034
880,WY,828**,44.714826,-106.872537,36958,31.0,main_515,349.6,at_risk,3,5.0,0.012715,0.350205,0.004453


In [31]:
loss_by_adi = df.groupby(['adi_decile']).agg(total_loss=("_loss","sum")).reset_index()
px.bar(loss_by_adi, x='adi_decile', y="total_loss")

In [3]:
MAX_WORKERS = 30

def build(case="base"):
    """  
        Takes about 5 min at k=5, clinics = 1k
    """
    def _invoke_get_drive_time(origin: dict) -> dict:
        API_BASE = "https://us-central1-ohdo-post-roe-359822.cloudfunctions.net"
        endpoint = f"{API_BASE}/post-roe-sls-dev-get-drive-time"
        params = {
            "case": case,
            "k": 5,
            "origin": {
                "lat_lon": list(origin['lat_lon']),
                "zip3": origin['zip3'],
            }
        }
        resp = requests.post(endpoint, json=params)
        if resp.status_code != 200:
            print(resp.status_code, resp.text)
        else:
            return resp.json()
            
    origins = bq.census_zip3_query()
    zip3_origins = origins[['zip3','lat_lon']].to_dict(orient="records")
    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as exec:
        futures = exec.map(_invoke_get_drive_time, zip3_origins[0:5])
    drive_time = pd.DataFrame(list(futures))
    
    # enrich
    df = origins.merge(drive_time, how="left").drop_duplicates(subset=['zip3']).drop(columns=["lat_lon"])
    df = df.merge(bq.states_query(case=case)[['state','state_status']])
    df['adi_decile'] = df['adi_median'].apply(lambda x: int(x/10))
    df['_drive_hrs'] = df['drive_time'].dropna().apply(lambda x: int(x/60))
    df.to_feather(f"drive_time_{case}.feather")
    # bq.to_feather(df) #cache
    return df

main = build("main_515")
main


Unnamed: 0,state,zip3,lat,lon,population,adi_median,case,drive_time,state_status,adi_decile,_drive_time
0,AL,301**,34.315791,-85.295125,39975,79.0,main_515,510.0,at_risk,7,8.0
1,AL,319**,32.372789,-84.843361,16044,84.0,main_515,637.4,at_risk,8,10.0
2,AL,350**,33.568065,-86.712692,442190,73.0,main_515,490.4,at_risk,7,8.0
3,AL,351**,33.506417,-86.624839,324591,61.0,main_515,501.2,at_risk,6,8.0
4,AL,352**,33.506077,-86.798758,491463,72.0,main_515,487.0,at_risk,7,8.0
...,...,...,...,...,...,...,...,...,...,...,...
877,WY,824**,44.438020,-108.408179,52930,48.0,,,at_risk,4,
878,WY,825**,43.103802,-108.847958,38910,56.0,,,at_risk,5,
879,WY,826**,43.106535,-106.629017,91238,52.5,,,at_risk,5,
880,WY,828**,44.714826,-106.872537,36958,31.0,,,at_risk,3,


In [16]:
import google.auth.transport.requests
import google.oauth2.id_token

import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/parker/.gcloud/ohdo-post-roe-359822-keyfile.json'

API_BASE = "https://us-central1-ohdo-post-roe-359822.cloudfunctions.net"

def _invoke_authenticated(endpoint, json_params):
    url = f"{API_BASE}/{endpoint}"
    auth_req = google.auth.transport.requests.Request()
    id_token = google.oauth2.id_token.fetch_id_token(auth_req, url)
    headers = {"Authorization": f"Bearer {id_token}"}
    resp = requests.post(url, json=json_params, headers=headers)
    return resp

def trigger_pipeline(case):
    json_params = {"case": case}
    resp = _invoke_authenticated("post-roe-sls-dev-process-async", json_params)
    if resp.status_code != 200:
        return dict(status_code = resp.status_code, text= resp.text)
    else:
        return resp.json()
    

resp = trigger_pipeline("main_515")
resp

{'status_code': 500, 'text': 'Error: could not handle the request\n'}

In [15]:
resp.text

'Error: could not handle the request\n'

In [11]:
bq.census_zip3_query()

Unnamed: 0,state,zip3,lat,lon,population,adi_median,lat_lon
0,AL,301**,34.315791,-85.295125,39975,79.0,"(34.3157908, -85.2951255)"
1,AL,319**,32.372789,-84.843361,16044,84.0,"(32.3727889, -84.8433612)"
2,AL,350**,33.568065,-86.712692,442190,73.0,"(33.56806525614035, -86.71269193157895)"
3,AL,351**,33.506417,-86.624839,324591,61.0,"(33.5064165925, -86.62483858)"
4,AL,352**,33.506077,-86.798758,491463,72.0,"(33.50607672857143, -86.79875809285714)"
...,...,...,...,...,...,...,...
960,WY,828**,44.714826,-106.872537,36958,31.0,"(44.71482560769231, -106.87253687692308)"
961,WY,829**,41.794508,-109.806840,70258,45.5,"(41.79450811666666, -109.80683991111111)"
962,WY,830**,43.639891,-110.731186,20421,3.0,"(43.63989136, -110.73118584)"
963,WY,831**,42.581182,-110.734945,22360,44.0,"(42.58118175294118, -110.73494530588235)"


In [None]:
case="alt"
df_alt = pd.read_feather(f"drive_time_{case}.feather")

case="base"
df_base = pd.read_feather(f"drive_time_{case}.feather")
