In [None]:
import copy
import json
import re
import time
import unicodedata
import warnings
from functools import partial
import os
import math

from tqdm.autonotebook import tqdm
from urllib.parse import urlencode
import contextily
import folium
import geopandas as gpd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyproj
import rasterio
import rasterio.warp
import rasterio.features
import rasterio.transform
import rasterio.plot
import requests
import requests_cache
import seaborn as sns
import shapely.wkt
import polyline
import scipy.interpolate

%matplotlib inline

In [None]:
def point_to_latlon(point):
    return point.y, point.x

def point_to_lonlat(point):
    return point.x, point.y

def point_to_xy(point):
    return point_to_lonlat(point)

def buffer_meters(geom, buffer):
    local_azimuthal_projection = f"+proj=aeqd +R=6371000 +units=m +lat_0={geom.centroid.y} +lon_0={geom.centroid.x}"

    wgs84_to_aeqd = partial(
        pyproj.transform,
        pyproj.Proj('+proj=longlat +datum=WGS84 +no_defs'),
        pyproj.Proj(local_azimuthal_projection),
    )

    aeqd_to_wgs84 = partial(
        pyproj.transform,
        pyproj.Proj(local_azimuthal_projection),
        pyproj.Proj('+proj=longlat +datum=WGS84 +no_defs'),
    )

    geom_transformed = shapely.ops.transform(wgs84_to_aeqd, geom)

    buffer = geom_transformed.buffer(buffer)

    buffer_wgs84 = shapely.ops.transform(aeqd_to_wgs84, buffer)
    
    return buffer_wgs84 


def lat_lon_distance(from_lat, from_lon, to_lat, to_lon):
    '''
    Find the distance between two latlon points.
    Uses the Haversine forumula.
    to_lat and to_lon can be vectors of points.
    '''
    mean_lat = (from_lat + to_lat) / 2
    radius = radius_of_earth(mean_lat)

    from_lat = math.radians(from_lat)
    from_lon = math.radians(from_lon)
    to_lat = np.radians(to_lat)
    to_lon = np.radians(to_lon)
    d_lat = to_lat - from_lat
    d_lon = to_lon - from_lon

    a = np.sin(d_lat / 2)**2 + math.cos(from_lat) * np.cos(to_lat) * np.sin(d_lon  / 2)**2
    d = radius * 2 * np.arcsin(np.sqrt(a))
    return d


def radius_of_earth(lat):
    '''
    Find the radius of the Earth at a given latitude. Vectorised.
    Formula and data from https://en.wikipedia.org/wiki/Earth_radius
    '''
    r_1 = 6378137.0  # Earth equatorial radius
    r_2 = 6356752.3  # Earth polar radius

    lat = np.radians(lat)
    part_1 = r_1**4 * np.cos(lat)**2 + r_2**4 * np.sin(lat)**2
    part_2 = r_1**2 * np.cos(lat)**2 + r_2**2 * np.sin(lat)**2

    return np.sqrt(part_1 / part_2)

In [None]:
df_city = gpd.read_file('../data/city_boundaries_100m.geojson')

In [None]:
def load_hospitals(iloc):
    path = f'../data/hosp/{iloc}.csv'
    if not os.path.exists(path):
        return None
    
    with open(path) as f:
        df_hosp = pd.read_csv(path)
    
    
    hospitals = []
    for t in df_hosp.itertuples():
        if t.type == 'node' and not pd.isnull(t.lat) and not pd.isnull(t.lon) and not pd.isnull(t.tags):
            hospitals.append((t.lat, t.lon))
            continue

        if t.type == 'way':
            node_id = eval(t.nodes)[0]
            node = df_hosp[df_hosp.id == node_id].iloc[0]
            hospitals.append((node.lat, node.lon))
            
    # Remove too close together.
    lats = np.array([h[0] for h in hospitals])
    lons = np.array([h[1] for h in hospitals])
    cluster_threshold = 500

    remove_point = np.full(len(lats), False, dtype=bool)
    for i, (lat, lon) in enumerate(zip(lats, lons)):
        if remove_point[i]:
            continue
        distances = lat_lon_distance(lat, lon, lats[i+1:], lons[i+1:])
        remove_point[i+1:] = remove_point[i+1:] | (distances < cluster_threshold)

    lats = lats[~remove_point]
    lons = lons[~remove_point]
    hospitals = list(zip(lats, lons))
    
            
    return hospitals
    

In [None]:
POP_PATH_1k = '../input/ppp_2020_1km_Aggregated.tif'
POP_PATH_100m = '../input/MOSAIC_ppp_prj_2020/MOSAIC_ppp_prj_2020.vrt'
def load_pop_raster(geom, buffer=1_000, pop_path=POP_PATH_100m):

    # Load population raster.
    geom_buffer = buffer_meters(geom, buffer)
    with rasterio.open(pop_path) as f:
        window = rasterio.windows.from_bounds(*geom_buffer.bounds, f.transform)
        a = f.read(1, window=window, masked=True)
        a = np.ma.filled(a, 0)
        

    return f, a, window

def load_mortality_raster(geom, buffer=1_000):
    path = '../input/povmap-global-subnational-infant-mortality-rates-v2-geotiff/povmap_global_subnational_infant_mortality_rates_v2.tif'
    geom_buffer = buffer_meters(geom, buffer)
    with rasterio.open(path) as f:
        window = rasterio.windows.from_bounds(*geom_buffer.bounds, f.transform)
        a = f.read(1, window=window, masked=True)
        a = np.ma.filled(a, 0)
        a[a < 0] = 0
        
    return f, a, window


def buffer_meters(geom, buffer):
    local_azimuthal_projection = f"+proj=aeqd +R=6371000 +units=m +lat_0={geom.centroid.y} +lon_0={geom.centroid.x}"

    wgs84_to_aeqd = partial(
        pyproj.transform,
        pyproj.Proj('+proj=longlat +datum=WGS84 +no_defs'),
        pyproj.Proj(local_azimuthal_projection),
    )

    aeqd_to_wgs84 = partial(
        pyproj.transform,
        pyproj.Proj(local_azimuthal_projection),
        pyproj.Proj('+proj=longlat +datum=WGS84 +no_defs'),
    )

    geom_transformed = shapely.ops.transform(wgs84_to_aeqd, geom)

    buffer = geom_transformed.buffer(buffer)

    buffer_wgs84 = shapely.ops.transform(aeqd_to_wgs84, buffer)
    
    return buffer_wgs84        
        
       
def weighted_raster_sample(f, a_geom, window, n_samples):
    weights = a_geom / a_geom.sum()
    weights_flat = weights.flatten()
    lonlats = []
    n = len(weights_flat)
    for _ in range(n_samples):
        idx_flat = np.random.choice(np.arange(n), p=weights_flat)
        idx = np.unravel_index(idx_flat, a_geom.shape)

        # Add subcell jitter.
        j0 = np.random.uniform(0, 1)
        j1 = np.random.uniform(0, 1)
        rowcol = (idx[0] + j0, idx[1] + j1)
        lonlat = rasterio.transform.xy(f.window_transform(window), rowcol[0], rowcol[1])
        lonlats.append(lonlat)
        
    return lonlats
    

In [None]:
# Pop-weighted sample.

for iloc in tqdm(range(len(df_city))):
    osrm_path = f'../data/osrm-low/{iloc}.csv'
    if os.path.exists(osrm_path):
        continue
        
    # Load hospitals.
    hospitals = load_hospitals(iloc)
    if len(hospitals) == 0:
        continue
    
    # Load population density raster.
    geom = df_city.geometry.iloc[iloc]
    f, a, window = load_pop_raster(geom)
    if max(a.shape) < 100:
        f, a, window = load_pop_raster(geom, pop_path=POP_PATH_100m)
    mask = rasterio.features.geometry_mask([geom], a.shape, f.window_transform(window), invert=True)
    a_geom = a * mask
    
    # Load infant mortality raster.
    f_im, a_im, window_im = load_mortality_raster(geom)
    x_im = np.arange(a_im.shape[1])
    y_im = np.arange(a_im.shape[0])
    x_a = np.arange(a.shape[1])
    y_a = np.arange(a.shape[0])
    x_im = np.linspace(0, 1, a_im.shape[1])
    y_im = np.linspace(0, 1, a_im.shape[0])
    x_a = np.linspace(0, 1, a.shape[1])
    y_a = np.linspace(0, 1, a.shape[0])
    a_im_intep = scipy.interpolate.RectBivariateSpline(y_im, x_im, a_im, kx=1, ky=1)(y_a, x_a)
    a_im_intep = a_im_intep + np.random.uniform(0, a_im_intep.max()/100, a_im_intep.shape)
    a_low_im = (a_im_intep < np.median(a_im_intep[mask])).astype(int)
    a_geom = a_geom * a_low_im
    
    # Pop-weighted random sample.
    source_lonlats = weighted_raster_sample(f, a_geom, window, n_samples=250)
    
    # Build OSM query.
    source_lats = [p[1] for p in source_lonlats]
    source_lons = [p[0] for p in source_lonlats]
    dest_lats = [p[0] for p in hospitals]
    dest_lons = [p[1] for p in hospitals]
    latlons = [(lat, lon) for lon, lat in source_lonlats] + hospitals
    pline = polyline.encode(latlons)
    sources = ';'.join(str(i) for i in range(len(source_lats)))
    destinations = ';'.join(str(i + len(source_lats)) for i in range(len(dest_lats)))
    url = f'http://router.project-osrm.org/table/v1/driving/polyline({pline})?sources={sources}&destinations={destinations}'
    
    response = requests.get(url)
    time.sleep(10)
    
    # Save dataframe.
    records = []
    durations = np.array(response.json()['durations'])
    for i_source in range(len(source_lats)):
        for i_dest in range(len(dest_lats)):
            records.append({
                'source_lat': source_lats[i_source],
                'source_lon': source_lons[i_source],
                'dest_lat': dest_lats[i_dest],
                'dest_lon': dest_lons[i_dest],
                'duration': durations[i_source, i_dest]
            })
    df_osrm = pd.DataFrame.from_records(records)
    df_osrm.to_csv(osrm_path, index=False)
        
    

In [None]:
# Merge into one table.
dfs = []
for iloc in tqdm(range(len(df_city))):
    osrm_path = f'../data/osrm-low/{iloc}.csv'
    if not os.path.exists(osrm_path):
        continue
        
    df = pd.read_csv(osrm_path)
    df['city_iloc'] = iloc
    dfs.append(df)
        
df_merged = pd.concat(dfs)
df_merged.to_csv('../data/osrm_hospital_duration_low_im.csv', index=False)