In [29]:
import pandas as pd
from geopy.distance import geodesic
import requests
from concurrent.futures import ThreadPoolExecutor

from post_roe.query import BucketQuery as bq
from post_roe.distance import Distance

def _invoke_get_drive_time(origin: dict) -> dict:
    endpoint = "https://us-central1-concise-memory-274202.cloudfunctions.net/prm-get-drive-time"
    params = {
        "zip3": origin['zip3'],
        "lat_lon": list(origin['lat_lon']),
    }
    resp = requests.post(endpoint, json=params)
    if resp.status_code != 200:
        print(resp.status_code, resp.text)
    else:
        return resp.json()

def process():
    origins = bq.census_zip3_query()
    zip3_origins = origins[['zip3','lat_lon']].to_dict(orient="records")
    with ThreadPoolExecutor(max_workers=20) as exec:
        futures = exec.map(_invoke_get_drive_time, zip3_origins)
    df = pd.DataFrame(list(futures))
    return df

df = process()
df

Unnamed: 0,drive_time,origin
0,389.2,301**
1,515.2,319**
2,370.2,350**


In [None]:
destinations = bq.synthetic_clinics_query()

In [5]:
# destinations.to_csv("220818-synthetic-desintations.csv", index=False)
destinations.to_feather("220818-synthetic-destinations.feather")
pd.read_feather("220818-synthetic-destinations.feather")

Unnamed: 0,state,zip5,lat_lon,population,adi_median
0,CO,80232,"[39.6881978, -105.0894837]",21411,29.0
1,NY,11422,"[40.6607865, -73.7375702]",30425,15.0
2,NY,10471,"[40.8999842, -73.9067506]",22922,33.0
3,CA,95630,"[38.662781, -121.140107]",72180,13.0
4,NJ,08332,"[39.3662171, -75.026853]",36768,68.0
...,...,...,...,...,...
995,ME,04556,"[43.9759894, -69.6109523]",1249,41.0
996,MA,01843,"[42.6899881, -71.1603896]",24425,35.0
997,IL,60423,"[41.4744578, -87.8411506]",30423,26.0
998,CA,95209,"[38.046863, -121.3528022]",39488,30.0


In [7]:
DATA_BASE = "/Users/parker/Development/post-roe/data/"
BUCKET_BASE = "https://storage.googleapis.com/www.postroemap.org/data"
df = pd.read_csv(f"{DATA_BASE}/tf/_220805_wp_roe_data.csv")
# df.to_csv(f"{BUCKET_BASE}/tf/_220805_wp_roe_data.csv")

In [5]:
def get_drive_time_mean(origin, k=5) -> float:
    df = destinations[["lat_lon"]].reset_index()
    # 1. find k closest geodesic "as the crow flies"
    df["geodesic"] = df["lat_lon"].apply(
        lambda d: Distance.geodesic(origin["lat_lon"], d)
    )
    df = df.sort_values("geodesic")[0:k] 
    # 2. find the drive time
    df["drive_time"] = df["lat_lon"].apply(
        lambda d: Distance.drive_time(origin["lat_lon"], d)
    )
    # display(df)
    return df["drive_time"].mean()

df = origins.sample(10)
df['drive_time_mean'] = df.apply(get_drive_time_mean, axis=1)
df

Unnamed: 0,state,zip3,lat,lon,population,adi_median,lat_lon,drive_time_mean
680,OK,741**,36.135302,-95.935314,413574,76.5,"(36.13530226428571, -95.93531433928571)",163.6
887,VT,054**,44.609389,-73.047272,232200,38.0,"(44.60938884468085, -73.0472716106383)",118.0
249,IL,617**,40.504102,-88.922502,234885,63.0,"(40.50410172075471, -88.92250239811321)",36.6
447,MO,648**,36.969164,-94.345532,197024,76.0,"(36.969163884848484, -94.34553191818182)",143.6
115,CO,806**,40.334004,-104.577674,253265,37.0,"(40.33400376, -104.57767352799999)",45.2
406,MI,497**,45.567445,-84.517196,226819,74.0,"(45.56744473076923, -84.51719629076923)",405.0
809,TX,762**,33.412755,-97.305124,440694,54.0,"(33.412754605405404, -97.30512426216215)",280.6
243,IL,611**,42.285942,-89.047994,235834,80.0,"(42.285941936363635, -89.0479936)",30.2
905,WA,993**,46.356621,-118.962107,348984,54.0,"(46.356620694285716, -118.96210671142856)",50.4
539,NH,032**,43.523869,-71.657288,187843,39.5,"(43.52386925344828, -71.6572875448276)",57.6


In [None]:
GOOGLE_DISTANCE_API_KEY = "AIzaSyARPGbw0525MOHKf5l4hE41Z93lsp2L-8k"

class Distance:

    # get distances from origin-destination "o-d" pairs

    @staticmethod
    def geodesic(o: tuple, d: tuple) -> int:
        return int(geodesic(o, d).miles)

    @staticmethod
    def drive_time(o: tuple, d: tuple) -> int:
        def _get_minutes(google_distance_response: dict) -> int:
            try:
                seconds = google_distance_response["rows"][0]["elements"][0][
                    "duration"
                ]["value"]
                minutes = seconds / 60
                return int(minutes)
            except Exception as e:
                # print(e, google_distance_response)
                pass

        def _call_google(o: tuple, d: tuple) -> dict:
            def _tuple_to_string(x: tuple) -> str:
                return f"{x[0]},{x[1]}"

            params = {
                "origins": _tuple_to_string(o),
                "destinations": _tuple_to_string(d),
                "units": "imperial",
                "key": GOOGLE_DISTANCE_API_KEY,
            }
            url = "https://maps.googleapis.com/maps/api/distancematrix/json"
            response = requests.get(url, params=params)
            if response.status_code != 200:
                print(response.status_code, response.text)
            return response.json()

        data = _call_google(o, d)
        return _get_minutes(data)

In [52]:

df

UnboundLocalError: local variable 'destination' referenced before assignment

In [None]:
import pandas as pd
from post_roe.query import BucketQuery as bq
from itertools import product
from geopy.distance import geodesic

origins = bq.census_zip3_query()
origins['lat_lon'] = origins.apply(lambda x: (x['lat'], x['lon']), axis=1)

desintation = bq.census_zip5_query().sample(1000).reset_index(drop=True)
desintation['lat_lon'] = desintation.apply(lambda x: (x['lat'], x['lon']), axis=1)
# df = pd.DataFrame({"o_d_pair": list(product(origin['lat_lon'][0:1], desintation['lat_lon']))}) 
# len(df)

Unnamed: 0,index,lat_lon,geodesic
0,0,"(34.4145929, -85.050554)",15
1,1,"(34.0888743, -85.0703821)",20
2,2,"(34.5984644, -85.6350858)",27
3,3,"(34.6126865, -85.9045038)",40
4,4,"(33.8476552, -85.7849164)",42


In [3]:
# 3 minutes to compute 1e6 o_d_pair geodesics
# df.to_feather("1m_o_d_geodesic.feather")
# df['geodesic'] = df['o_d_pair'].apply(_get_distance_geodesic)
# df

In [4]:
# ok now to filter that by od pair top 10
df['o'] = df['o_d_pair'].apply(lambda x: x[0])
df['d'] = df['o_d_pair'].apply(lambda x: x[1])
os = df['o'].unique()

o = os[0]
k = 5
o_df = df[df['o'] == o].sort_values("geodesic")[0:k].reset_index(drop=True)
o_df


Unnamed: 0,o_d_pair,geodesic,o,d
0,"((34.3157908, -85.2951255), (34.3937861, -85.6...",23,"(34.3157908, -85.2951255)","(34.3937861, -85.6979718)"
1,"((34.3157908, -85.2951255), (34.7336449, -85.0...",33,"(34.3157908, -85.2951255)","(34.7336449, -85.0008241)"
2,"((34.3157908, -85.2951255), (33.8723315, -85.8...",45,"(34.3157908, -85.2951255)","(33.8723315, -85.8896643)"
3,"((34.3157908, -85.2951255), (34.0323882, -84.5...",49,"(34.3157908, -85.2951255)","(34.0323882, -84.5051537)"
4,"((34.3157908, -85.2951255), (34.7838801, -86.1...",56,"(34.3157908, -85.2951255)","(34.7838801, -86.1022011)"


In [6]:
# took 1 minute to run this filter 
def _filter_geodesic(o, k=5):
    o_df = df[df['o'] == o].sort_values("geodesic")[0:k].reset_index(drop=True)
    return o_df

dff = pd.concat([_filter_geodesic(o) for o in os])
dff

Unnamed: 0,o_d_pair,geodesic,o,d
0,"((34.3157908, -85.2951255), (34.3937861, -85.6...",23,"(34.3157908, -85.2951255)","(34.3937861, -85.6979718)"
1,"((34.3157908, -85.2951255), (34.7336449, -85.0...",33,"(34.3157908, -85.2951255)","(34.7336449, -85.0008241)"
2,"((34.3157908, -85.2951255), (33.8723315, -85.8...",45,"(34.3157908, -85.2951255)","(33.8723315, -85.8896643)"
3,"((34.3157908, -85.2951255), (34.0323882, -84.5...",49,"(34.3157908, -85.2951255)","(34.0323882, -84.5051537)"
4,"((34.3157908, -85.2951255), (34.7838801, -86.1...",56,"(34.3157908, -85.2951255)","(34.7838801, -86.1022011)"
...,...,...,...,...
0,"((43.8614388, -110.9357249), (43.866555, -111....",34,"(43.8614388, -110.9357249)","(43.866555, -111.6333206)"
1,"((43.8614388, -110.9357249), (44.2839539, -112...",74,"(43.8614388, -110.9357249)","(44.2839539, -112.3021668)"
2,"((43.8614388, -110.9357249), (42.5148505, -111...",101,"(43.8614388, -110.9357249)","(42.5148505, -111.765362)"
3,"((43.8614388, -110.9357249), (43.020349, -112....",113,"(43.8614388, -110.9357249)","(43.020349, -112.8691669)"


In [7]:
dff.reset_index()

Unnamed: 0,index,o_d_pair,geodesic,o,d
0,0,"((34.3157908, -85.2951255), (34.3937861, -85.6...",23,"(34.3157908, -85.2951255)","(34.3937861, -85.6979718)"
1,1,"((34.3157908, -85.2951255), (34.7336449, -85.0...",33,"(34.3157908, -85.2951255)","(34.7336449, -85.0008241)"
2,2,"((34.3157908, -85.2951255), (33.8723315, -85.8...",45,"(34.3157908, -85.2951255)","(33.8723315, -85.8896643)"
3,3,"((34.3157908, -85.2951255), (34.0323882, -84.5...",49,"(34.3157908, -85.2951255)","(34.0323882, -84.5051537)"
4,4,"((34.3157908, -85.2951255), (34.7838801, -86.1...",56,"(34.3157908, -85.2951255)","(34.7838801, -86.1022011)"
...,...,...,...,...,...
4820,0,"((43.8614388, -110.9357249), (43.866555, -111....",34,"(43.8614388, -110.9357249)","(43.866555, -111.6333206)"
4821,1,"((43.8614388, -110.9357249), (44.2839539, -112...",74,"(43.8614388, -110.9357249)","(44.2839539, -112.3021668)"
4822,2,"((43.8614388, -110.9357249), (42.5148505, -111...",101,"(43.8614388, -110.9357249)","(42.5148505, -111.765362)"
4823,3,"((43.8614388, -110.9357249), (43.020349, -112....",113,"(43.8614388, -110.9357249)","(43.020349, -112.8691669)"


In [None]:
# k_closest = []

In [5]:
from typing import List, Tuple
from dataclasses import dataclass
import requests



{'destination_addresses': ['1601 Evatt Dr SE, Fort Payne, AL 35967, USA'],
 'origin_addresses': ['Snow Loop, Rome, GA 30165, USA'],
 'rows': [{'elements': [{'distance': {'text': '36.8 mi', 'value': 59178},
     'duration': {'text': '48 mins', 'value': 2861},
     'status': 'OK'}]}],
 'status': 'OK'}

In [None]:
@dataclass
class GeoPair:
    origin: Tuple[float, float]
    destination: Tuple[float, float]

    @property
    def distance_geodesic(self):
        return int(geodesic(self.origin, self.destination).miles)

# def _call_google_distance_api(geo_pair: GeoPair) -> dict:
#     """
#         Requires Google Distance Matrix API Key
#         cost $5 per 1k: https://developers.google.com/maps/documentation/distance-matrix/usage-and-billing
#         with $200 per month included (40k)
#     """
#     def _tuple_to_string(x: tuple) -> str: return f"{x[0]},{x[1]}"
#     params = {
#         'origins': _tuple_to_string(geo_pair.origin),
#         'destinations': _tuple_to_string(geo_pair.destination),
#         'units': 'imperial',
#         'key': GOOGLE_DISTANCE_API_KEY
#     }
#     url = "https://maps.googleapis.com/maps/api/distancematrix/json"
#     response = requests.get(url, params=params)
#     if response.status_code != 200:
#         print(response.status_code, response.text)
#     return response.json()

In [12]:
# ok so I have od pairs, now I need the geodescic distance between em





2041