In [16]:
import plotly.express as px
import pandas as pd
from geopy.distance import geodesic
import requests
from concurrent.futures import ThreadPoolExecutor
from sklearn.preprocessing import MinMaxScaler
px.set_mapbox_access_token("pk.eyJ1IjoicGFycXVhciIsImEiOiJja3lpcXMycGUxbmF5MnBzZXVzMHBzaXl4In0.jz0tx-HTJWym8jWPa8lqiA")

from post_roe.query import BucketQuery as bq, BUCKET_BASE
from post_roe.distance import Distance

from post_roe.query import BucketQuery as bq

def build(case="base"):
    """  
        Takes about 5 min at k=5, clinics = 1k
    """
    def _invoke_get_drive_time(origin: dict) -> dict:
        API_BASE = "https://us-central1-ohdo-post-roe-359822.cloudfunctions.net"
        endpoint = f"{API_BASE}/post-roe-sls-dev-get-drive-time"
        params = {
            "case": case,
            "k": 5,
            "origin": {
                "lat_lon": list(origin['lat_lon']),
                "zip3": origin['zip3'],
            }
        }
        resp = requests.post(endpoint, json=params)
        if resp.status_code != 200:
            print(resp.status_code, resp.text)
        else:
            return resp.json()
            
    origins = bq.census_zip3_query()
    zip3_origins = origins[['zip3','lat_lon']].to_dict(orient="records")
    with ThreadPoolExecutor(max_workers=20) as exec:
        futures = exec.map(_invoke_get_drive_time, zip3_origins)
    drive_time = pd.DataFrame(list(futures))
    
    # enrich
    df = origins.merge(drive_time, how="left").drop_duplicates(subset=['zip3']).drop(columns=["lat_lon"])
    df = df.merge(bq.states_query(case=case)[['state','state_status']])
    df['adi_decile'] = df['adi_median'].apply(lambda x: int(x/10))
    df['_drive_time'] = df['drive_time'].dropna().apply(lambda x: int(x/60))
    df.to_feather(f"drive_time_{case}.feather")
    # bq.to_feather(df) #cache
    return df

# df = build("alt")
case="alt"
df_alt = pd.read_feather(f"drive_time_{case}.feather")

case="base"
df_base = pd.read_feather(f"drive_time_{case}.feather")


In [17]:
df = df_alt[df_alt['state_status']=="at_risk"]
px.scatter_mapbox(
        df,
        lat="lat",
        lon="lon",
        title=f"{len(df_alt)} locations",
        color='_drive_time',
        size="population",
        # hover_data=['LocationFacility'],
        height=600,
        zoom=3,
    )


In [17]:
adi_median_population = df.groupby(['adi_decile']).agg(population=("population","sum")).reset_index()
adi_median_population
px.bar(adi_median_population, x="adi_decile", y="population")

In [21]:
adi_median_population = df.groupby(['state_status','adi_decile']).agg(population=("population","sum")).reset_index()
adi_median_population
px.bar(adi_median_population, x="adi_decile", y="population", color="state_status")

In [23]:
scaler = MinMaxScaler()
adi_median_population[['_population']] = scaler.fit_transform(adi_median_population[['population']])
adi_median_population['_pop_millions'] = adi_median_population['population'].apply(lambda x: x/(10**6))
adi_median_population
# df['_loss'] = df['drive_time'] * df['_population']

Unnamed: 0,state_status,adi_decile,population,_population,_pop_millions
0,at_risk,0,227831,0.001843,0.227831
1,at_risk,1,2076655,0.051187,2.076655
2,at_risk,2,5304625,0.137339,5.304625
3,at_risk,3,12264324,0.323088,12.264324
4,at_risk,4,19395418,0.513411,19.395418
5,at_risk,5,34108446,0.90609,34.108446
6,at_risk,6,36458697,0.968817,36.458697
7,at_risk,7,37627080,1.0,37.62708
8,at_risk,8,22171007,0.587489,22.171007
9,at_risk,9,3428737,0.087273,3.428737


In [27]:
px.scatter(df, x='adi_median', y='_loss', color="state_status")

In [31]:
# df.to_feather("zip3_drive_time_mean.feather")

In [3]:
# 3 minutes to compute 1e6 o_d_pair geodesics
# df.to_feather("1m_o_d_geodesic.feather")
# df['geodesic'] = df['o_d_pair'].apply(_get_distance_geodesic)
# df

In [None]:
def states_query(case="base"):
    states = pd.read_csv("/Users/parker/Development/post-roe/data/tf/_220805_wp_roe_data.csv")
    if case == "base":
        states['case'] =  "base"
    elif case == "alt":
        i = states.index[states['state'] == "GA"][0]
        states.at[i, 'state_status'] = "protected"
        states['case'] =  "alt"
states = pd.read_csv("/Users/parker/Development/post-roe/data/tf/_220805_wp_roe_data.csv")
# bq.to_feather(states, "state_status_base")
states.to_feather("state_status_base.feather")

In [4]:
# ok now to filter that by od pair top 10
df['o'] = df['o_d_pair'].apply(lambda x: x[0])
df['d'] = df['o_d_pair'].apply(lambda x: x[1])
os = df['o'].unique()

o = os[0]
k = 5
o_df = df[df['o'] == o].sort_values("geodesic")[0:k].reset_index(drop=True)
o_df


Unnamed: 0,o_d_pair,geodesic,o,d
0,"((34.3157908, -85.2951255), (34.3937861, -85.6...",23,"(34.3157908, -85.2951255)","(34.3937861, -85.6979718)"
1,"((34.3157908, -85.2951255), (34.7336449, -85.0...",33,"(34.3157908, -85.2951255)","(34.7336449, -85.0008241)"
2,"((34.3157908, -85.2951255), (33.8723315, -85.8...",45,"(34.3157908, -85.2951255)","(33.8723315, -85.8896643)"
3,"((34.3157908, -85.2951255), (34.0323882, -84.5...",49,"(34.3157908, -85.2951255)","(34.0323882, -84.5051537)"
4,"((34.3157908, -85.2951255), (34.7838801, -86.1...",56,"(34.3157908, -85.2951255)","(34.7838801, -86.1022011)"


In [6]:
# took 1 minute to run this filter 
def _filter_geodesic(o, k=5):
    o_df = df[df['o'] == o].sort_values("geodesic")[0:k].reset_index(drop=True)
    return o_df

dff = pd.concat([_filter_geodesic(o) for o in os])
dff

Unnamed: 0,o_d_pair,geodesic,o,d
0,"((34.3157908, -85.2951255), (34.3937861, -85.6...",23,"(34.3157908, -85.2951255)","(34.3937861, -85.6979718)"
1,"((34.3157908, -85.2951255), (34.7336449, -85.0...",33,"(34.3157908, -85.2951255)","(34.7336449, -85.0008241)"
2,"((34.3157908, -85.2951255), (33.8723315, -85.8...",45,"(34.3157908, -85.2951255)","(33.8723315, -85.8896643)"
3,"((34.3157908, -85.2951255), (34.0323882, -84.5...",49,"(34.3157908, -85.2951255)","(34.0323882, -84.5051537)"
4,"((34.3157908, -85.2951255), (34.7838801, -86.1...",56,"(34.3157908, -85.2951255)","(34.7838801, -86.1022011)"
...,...,...,...,...
0,"((43.8614388, -110.9357249), (43.866555, -111....",34,"(43.8614388, -110.9357249)","(43.866555, -111.6333206)"
1,"((43.8614388, -110.9357249), (44.2839539, -112...",74,"(43.8614388, -110.9357249)","(44.2839539, -112.3021668)"
2,"((43.8614388, -110.9357249), (42.5148505, -111...",101,"(43.8614388, -110.9357249)","(42.5148505, -111.765362)"
3,"((43.8614388, -110.9357249), (43.020349, -112....",113,"(43.8614388, -110.9357249)","(43.020349, -112.8691669)"


In [7]:
dff.reset_index()

Unnamed: 0,index,o_d_pair,geodesic,o,d
0,0,"((34.3157908, -85.2951255), (34.3937861, -85.6...",23,"(34.3157908, -85.2951255)","(34.3937861, -85.6979718)"
1,1,"((34.3157908, -85.2951255), (34.7336449, -85.0...",33,"(34.3157908, -85.2951255)","(34.7336449, -85.0008241)"
2,2,"((34.3157908, -85.2951255), (33.8723315, -85.8...",45,"(34.3157908, -85.2951255)","(33.8723315, -85.8896643)"
3,3,"((34.3157908, -85.2951255), (34.0323882, -84.5...",49,"(34.3157908, -85.2951255)","(34.0323882, -84.5051537)"
4,4,"((34.3157908, -85.2951255), (34.7838801, -86.1...",56,"(34.3157908, -85.2951255)","(34.7838801, -86.1022011)"
...,...,...,...,...,...
4820,0,"((43.8614388, -110.9357249), (43.866555, -111....",34,"(43.8614388, -110.9357249)","(43.866555, -111.6333206)"
4821,1,"((43.8614388, -110.9357249), (44.2839539, -112...",74,"(43.8614388, -110.9357249)","(44.2839539, -112.3021668)"
4822,2,"((43.8614388, -110.9357249), (42.5148505, -111...",101,"(43.8614388, -110.9357249)","(42.5148505, -111.765362)"
4823,3,"((43.8614388, -110.9357249), (43.020349, -112....",113,"(43.8614388, -110.9357249)","(43.020349, -112.8691669)"


In [None]:
# k_closest = []

In [5]:
from typing import List, Tuple
from dataclasses import dataclass
import requests



{'destination_addresses': ['1601 Evatt Dr SE, Fort Payne, AL 35967, USA'],
 'origin_addresses': ['Snow Loop, Rome, GA 30165, USA'],
 'rows': [{'elements': [{'distance': {'text': '36.8 mi', 'value': 59178},
     'duration': {'text': '48 mins', 'value': 2861},
     'status': 'OK'}]}],
 'status': 'OK'}

In [None]:
@dataclass
class GeoPair:
    origin: Tuple[float, float]
    destination: Tuple[float, float]

    @property
    def distance_geodesic(self):
        return int(geodesic(self.origin, self.destination).miles)

# def _call_google_distance_api(geo_pair: GeoPair) -> dict:
#     """
#         Requires Google Distance Matrix API Key
#         cost $5 per 1k: https://developers.google.com/maps/documentation/distance-matrix/usage-and-billing
#         with $200 per month included (40k)
#     """
#     def _tuple_to_string(x: tuple) -> str: return f"{x[0]},{x[1]}"
#     params = {
#         'origins': _tuple_to_string(geo_pair.origin),
#         'destinations': _tuple_to_string(geo_pair.destination),
#         'units': 'imperial',
#         'key': GOOGLE_DISTANCE_API_KEY
#     }
#     url = "https://maps.googleapis.com/maps/api/distancematrix/json"
#     response = requests.get(url, params=params)
#     if response.status_code != 200:
#         print(response.status_code, response.text)
#     return response.json()

In [12]:
# ok so I have od pairs, now I need the geodescic distance between em





2041