# Access to Public Infrastructure

In [1]:
%reset -f

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import os
import random
import re
import sys
from pathlib import Path
import warnings
import datetime

import geopandas as gpd

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import dask.dataframe as dd
import seaborn as sns

from tqdm import tqdm
from ast import literal_eval


In [4]:
# Google earth engine
import ee

try:
    ee.Initialize()
except Exception as e:
    ee.Authenticate()
    ee.Initialize()


In [5]:
# Set filepaths
PROJ = Path(os.path.realpath("."))
if str(PROJ) == "/n/home10/shreyasgm":
    PROJ = Path(
        "/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/"
    )
ROOT = PROJ.parents[1]
DATA = ROOT / "data/"


In [6]:
sys.path.append(str(PROJ))
sys.path.append(str(ROOT / "src"))
from gee_utils import *
from aggregate_ee import *
from general_utils import *


# Read data

## GHS

In [7]:
def prepare_ghs():
    # Prepare GHS
    ghs_cols = {
        "ID_HDC_G0": "ghs_id",
        "CTR_MN_ISO": "iso",
        "CTR_MN_NM": "country",
        "UC_NM_MN": "uc_name",
        "GRGN_L1": "region",
        "GRGN_L2": "subregion",
        "P15": "pop_2015",
        "P00": "pop_2000",
        "NTL_AV": "ntl_2015",
        "TT2CC": "timetocap",
    }

    ghs_shp = gpd.read_file(
        DATA / "raw/shapefiles/ghs/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
    )

    ghs_shp = ghs_shp[list(ghs_cols.keys()) + ["geometry"]]
    ghs_shp = ghs_shp.rename(columns=ghs_cols)
    ghs_shp.to_file(DATA / "intermediate/ghs/ghs.shp", index=False)


# prepare_ghs()

In [8]:
# Read GHS to check
gdf_mask = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
ghs = gpd.read_file(
    DATA / "intermediate/ghs/ghs.shp",
)
ghs.ghs_id = ghs.ghs_id.astype(int)
ghs.head(2)

Unnamed: 0,ghs_id,iso,country,uc_name,region,subregion,pop_2015,pop_2000,ntl_2015,timetocap,geometry
0,1,USA,United States,Honolulu,Northern America,Northern America,512853.666675,458967.881664,24.768574,15412.057919,"POLYGON ((-158.01244 21.42219, -157.99158 21.4..."
1,2,PYF,French Polynesia,Papeete,Oceania,Polynesia,91521.124603,83726.092071,9.028501,,"POLYGON ((-149.56967 -17.51763, -149.50802 -17..."


In [9]:
# Calculate travel time to major cities
ghs_points = ghs.copy()
ghs_points.geometry = ghs_points.centroid
# Get medium and large sized cities
medium_cities = ghs_points.loc[
    ghs_points.pop_2015 > 200000, ["ghs_id", "geometry"]
].copy()
large_cities = ghs_points.loc[
    ghs_points.pop_2015 > 500000, ["ghs_id", "geometry"]
].copy()
ghs_points.head()


  ghs_points.geometry = ghs_points.centroid


Unnamed: 0,ghs_id,iso,country,uc_name,region,subregion,pop_2015,pop_2000,ntl_2015,timetocap,geometry
0,1,USA,United States,Honolulu,Northern America,Northern America,512853.666675,458967.881664,24.768574,15412.057919,POINT (-157.89356 21.34069)
1,2,PYF,French Polynesia,Papeete,Oceania,Polynesia,91521.124603,83726.092071,9.028501,,POINT (-149.58338 -17.55980)
2,3,USA,United States,Santa Maria,Northern America,Northern America,123181.284843,114315.451935,19.102939,2653.754871,POINT (-120.43440 34.92314)
3,4,USA,United States,Monterey,Northern America,Northern America,67772.288858,65621.290962,14.669142,2716.114413,POINT (-121.88238 36.60772)
4,5,USA,United States,Santa Barbara,Northern America,Northern America,114753.150167,106699.837791,19.633925,2604.691241,POINT (-119.74371 34.42767)


In [10]:
len(medium_cities)

2908

In [11]:
len(large_cities)

996

## GADM

In [12]:
admin_boundaries_dict = {
    k: ee.FeatureCollection(f"users/shreyasgm/growth_lab/gadm36_{k}") for k in range(2)
}

In [13]:
gadm_0 = gpd.read_file(ROOT / "data/raw/shapefiles/gadm/gadm36_0.shp")
gadm_0.head()

Unnamed: 0,GID_0,NAME_0,geometry
0,ABW,Aruba,"POLYGON ((-69.97820 12.46986, -69.97847 12.469..."
1,AFG,Afghanistan,"POLYGON ((68.52644 31.75435, 68.53852 31.75457..."
2,AGO,Angola,"MULTIPOLYGON (((11.73347 -16.67255, 11.73347 -..."
3,AIA,Anguilla,"MULTIPOLYGON (((-63.42375 18.58903, -63.42375 ..."
4,ALA,Åland,"MULTIPOLYGON (((21.32195 59.74986, 21.32195 59..."


## Ports

In [14]:
# Location of ports
ports = gpd.read_file(
    DATA / "raw/ports/wld/wld_trs_ports_wfp.shp",
)
# There are other columns too if needed
selected_cols = [
    "portname",
    "code",
    "prttype",
    "status",
    "prtsize",
    "iso3_op",
    "geometry",
]
ports = ports[selected_cols]
# Ports > Very small only
ports = ports[ports.prtsize.isin(["Huge", "Large", "Medium", "Small"])]
# Sea ports only, if unknown, only include if >= Medium-sized
ports = ports[
    (ports.prttype == "Sea")
    | ((ports.prttype == "Unknown") & (ports.prtsize.isin(["Huge", "Large", "Medium"])))
]
# If a port serves multiple countries, include in each country
ports["iso3_op"] = ports.iso3_op.str.split(", |,| ")
ports = ports.explode("iso3_op")
ports.head()

Unnamed: 0,portname,code,prttype,status,prtsize,iso3_op,geometry
8,Punta De Palmas,VEPLV,Sea,Unknown,Small,VEN,POINT (-71.63472 10.44090)
9,Moin Bay,CRMOB,Sea,Unknown,Small,CRI,POINT (-83.07669 10.00433)
24,Damman,SADMN,Sea,Unknown,Medium,SAU,POINT (50.19733 26.48684)
25,Tijuana,MXTIJ,Sea,Unknown,Small,MEX,POINT (-117.12305 32.50821)
27,Khark Island,IRKHK,Sea,Open,Small,IRN,POINT (50.33049 29.21619)


In [15]:
ports[ports.iso3_op == "COL"]

Unnamed: 0,portname,code,prttype,status,prtsize,iso3_op,geometry
419,Cartagena,COCTG,Sea,Open,Large,COL,POINT (-75.53294 10.40575)
1411,Santa Marta,COSMR,Sea,Open,Small,COL,POINT (-74.21571 11.24916)
1902,Barranquilla,COBAQ,Sea,Unknown,Medium,COL,POINT (-74.75962 10.96288)
2402,Tolu,COTLU,Sea,Unknown,Small,COL,POINT (-75.58649 9.52177)
3122,Puerto Bolivar,COPBO,Sea,Unknown,Small,COL,POINT (-71.95953 12.24900)
3389,Buenaventura,COBUN,Sea,Unknown,Medium,COL,POINT (-77.07841 3.89244)
3391,Mamonal,COMAM,Sea,Open,Medium,COL,POINT (-75.50977 10.32662)


## Airports

In [16]:
# List of major airports in latin america
airports_locations = pd.read_csv(DATA / "raw/airports/datahub/airport-codes.csv")
airports_locations = airports_locations[
    ["type", "name", "iso_country", "iata_code", "coordinates"]
]
# Only keep medium or large sized airports
airports_locations = airports_locations[
    airports_locations.type.isin(["medium_airport", "large_airport"])
]
airports_locations[
    ["longitude", "latitude"]
] = airports_locations.coordinates.str.replace(" ", "").str.split(",", expand=True)

airports_locations = airports_locations.rename(columns={"iso_country": "iso2"})
# Convert to iso3
country_codes = pd.read_csv(
    DATA / "raw/country_codes/country-codes.csv",
    usecols=["ISO3166-1-Alpha-3", "ISO3166-1-Alpha-2"],
)
country_codes = country_codes.rename(
    columns={"ISO3166-1-Alpha-3": "iso3", "ISO3166-1-Alpha-2": "iso2"}
)
airports_locations = airports_locations.merge(country_codes, on="iso2", how="left")
airports_locations = airports_locations.drop(columns=["coordinates", "iso2"])
# Convert to geodataframe
airports_locations = gpd.GeoDataFrame(
    airports_locations,
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(
        airports_locations.longitude, airports_locations.latitude
    ),
)
airports_locations.head()

Unnamed: 0,type,name,iata_code,longitude,latitude,iso3,geometry
0,medium_airport,Aleknagik / New Airport,WKK,-158.617996216,59.2826004028,USA,POINT (-158.61800 59.28260)
1,medium_airport,Khost International Airport (U.C.),,69.80734,33.284605,AFG,POINT (69.80734 33.28460)
2,medium_airport,Honiara International Airport,HIR,160.05499267578,-9.4280004501343,SLB,POINT (160.05499 -9.42800)
3,medium_airport,Munda Airport,MUA,157.26300048828125,-8.327969551086426,SLB,POINT (157.26300 -8.32797)
4,medium_airport,Hongyuan Airport,AHJ,102.35224,32.53154,CHN,POINT (102.35224 32.53154)


In [17]:
# Read data from Nunn and Puga paper to get airport capacity from ICAO
icao = pd.read_stata(DATA / "raw/airports/nunn_and_puga/Data_for_Tables2&3 (1).dta")
# icao = icao[["cityid", "countrycode", "lat", "lon", "totnr_passengers2014"]]
# Convert to geodataframe
icao = gpd.GeoDataFrame(
    icao,
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(
        icao.lon, icao.lat
    ),
)
icao.head()

Unnamed: 0,cityid,countrycode,lat,lon,nrcities_55to65,share_below6000_w500,totnr_weekly1989,totnr_twice1989,totnr_daily1989,totnr_states_twice1989,...,z_EIGEN_1989,z_EIGEN_2014,shareEI_below6000_w500,zEIGEN_90to00,zEIGEN_90to10,ldist_eq,timezone_fullhours,Lights92_025grid,pop90_025grid,geometry
0,ABW1,ABW,12.501389,-70.015221,103.0,0.669903,,,,,...,,,0.594734,,,2.52584,-4.0,,,POINT (-70.01522 12.50139)
1,AFG1,AFG,34.565853,69.212326,16.0,0.625,,,,,...,,,0.124247,,,3.542866,4.0,0.087191,0.460277,POINT (69.21233 34.56585)
2,AFG2,AFG,31.505756,65.847824,10.0,0.6,,,,,...,,,0.723915,,,3.45017,4.0,0.003343,0.091252,POINT (65.84782 31.50576)
3,AGO1,AGO,-8.858375,13.231178,85.0,0.364706,,,,,...,,,0.258898,,,2.181363,1.0,0.269194,0.282425,POINT (13.23118 -8.85837)
4,ALB1,ALB,41.414742,19.72056,76.0,0.539474,,,,,...,,,0.792863,,,3.723637,1.0,0.262931,0.628007,POINT (19.72056 41.41474)


In [18]:
icao.apply(lambda x: x.isnull().sum())

cityid                        0
countrycode                   0
lat                           0
lon                           0
nrcities_55to65               0
share_below6000_w500          0
totnr_weekly1989            819
totnr_twice1989             819
totnr_daily1989             819
totnr_states_twice1989      819
totnr_weekly_55to651989     819
totnr_weekly_65to1001989    819
totnr_weekly_20to551989     819
totnr_passengers2014        819
totnr_weekly2014            819
totnr_weekly_55to652014     819
totnr_weekly_65to1002014    819
totnr_weekly_20to552014     819
EIGEN89_55to65                0
regdum_eap                    0
regdum_lac                    0
regdum_mena                   0
regdum_na                     0
regdum_sa                     0
regdum_ssa                    0
regdum_eca                    0
ltotnr_cityconnects1989     819
ltotnr_seats1989            819
ltotnr_flights1989          819
ltotnr_passengers1989       819
z_EIGEN_1989                819
z_EIGEN_

In [19]:
# Only include airports that lie within 50km of airports in the Nunn and Puga dataset
# Calculate distance between airports in airports_locations and icao
# Convert to EPSG:3857 to measure in meters
airports_locations = airports_locations.to_crs("EPSG:3857")
icao = icao.to_crs("EPSG:3857")
airports_locations["dist_to_icao"] = airports_locations.geometry.apply(
    lambda x: icao.geometry.distance(x).min()
)
airports_locations = airports_locations[airports_locations["dist_to_icao"] < 50000]
airports_locations.head()

Unnamed: 0,type,name,iata_code,longitude,latitude,iso3,geometry,dist_to_icao
21,large_airport,Port Moresby Jacksons International Airport,POM,147.22000122070312,-9.44338035583496,PNG,POINT (16388455.570 -1056024.327),5.10602
26,medium_airport,Zabrat Airport,ZXT,49.9768066406,40.4955422161,AZE,POINT (5563392.667 4938216.601),9673.489841
42,large_airport,Keflavik International Airport,KEF,-22.6056,63.985001,ISL,POINT (-2516443.881 9345956.366),1939.337205
52,medium_airport,Caye Caulker Airport,CUK,-88.03250122070312,17.734699249267578,BLZ,POINT (-9799733.209 2006518.784),38258.856216
53,medium_airport,Caye Chapel Airport,CYC,-88.04109954833984,17.7007999420166,BLZ,POINT (-9800690.371 2002557.223),35222.312233


In [20]:
len(airports_locations)

1202

# Functions

## Main travel time function

In [21]:
def get_travel_time(points_gdf, scaleFactor=500, maxdistance=1000000):
    # Load the global friction surface
    friction = ee.Image("Oxford/MAP/friction_surface_2019").select("friction")
    # Get coordinates
    coord_list = [
        [x, y] for x, y in zip(points_gdf["geometry"].x, points_gdf["geometry"].y)
    ]
    # Convert to EE FeatureCollection
    points_fc = ee.FeatureCollection(
        ee.List(coord_list).map(
            lambda x: ee.Feature(ee.Geometry.Point(x), {}).set("val", 1)
        )
    )
    # Convert to Image
    points_img = (
        points_fc.reduceToImage(["val"], ee.Reducer.firstNonNull())
        #         .unmask(0)
        #         .reproject("epsg:4326", scale=scaleFactor)
    )
    # Compute the cumulative travel time from everywhere to the nearest point
    points_travel_time = friction.cumulativeCost(points_img, maxdistance)
    return points_travel_time

## Supporting

In [22]:
def get_city_centroids(cntry_selected_abbr):
    import warnings

    # Prepare city locations
    gdf_mask = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
    ghs = gpd.read_file(
        DATA / "intermediate/ghs/ghs.shp",
        mask=gdf_mask[gdf_mask.iso_a3 == cntry_selected_abbr],
    )
    ghs = ghs[ghs.iso == cntry_selected_abbr]
    ghs.ghs_id = ghs.ghs_id.astype(int)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        ghs.geometry = ghs.centroid
    return ghs

In [23]:
def get_population_mask(image):
    # Get population mask for 2015
    pop = (
        ee.ImageCollection("JRC/GHSL/P2016/POP_GPW_GLOBE_V1")
        .filterDate("2015-01-01", "2015-01-02")
        .first()
    )
    return image.updateMask(pop.gt(0))

In [24]:
def compile_downloaded_files(infiles_pattern, outfile):
    # Process downloaded csv's from GCS
    csv_dict = {}
    csvlist = list((DATA / "intermediate/gee_accessibility_agg").glob(infiles_pattern))
    # Make sure file exists and has data in it
    csvlist_valid = [x for x in csvlist if x.stat().st_size > 2]
    print(outfile)
    print(f"Total files: {len(csvlist)} ..... Valid files: {len(csvlist_valid)}")
    # Read files
    df = dd.read_csv(csvlist_valid, include_path_column="fname")
    df["date"] = dd.to_datetime(df["date"])
    df = df.compute()
    # Export
    df.to_parquet(outfile, index=False)

In [25]:
def process_cntry_accessibility(
    cntry_selected_abbr,
    dest_points_shp,
    ee_boundaries=None,
    ee_id_cols=None,
    gcs_name=None,
    gcs_bucket="earth_engine_aggregations",
    apply_population_mask=False,
    export_image=False,
):
    # Calculate travel time
    if len(dest_points_shp) == 0:
        warnings.warn("Destination shapefile has no points, skipping")
    else:
        dest_travel_time = get_travel_time(dest_points_shp)
        if apply_population_mask:
            dest_travel_time = get_population_mask(dest_travel_time)
        if export_image:
            export_ee_img_to_gcs(
                dest_travel_time, cntry_selected_abbr, outfile=export_image
            )
        if ee_boundaries is not None:
            res = aggregate_ee(
                dest_travel_time,
                ee_boundaries,
                id_cols=ee_id_cols,
                set_date="2019-01-01",
                export_to_gcs=gcs_name,
                gcs_bucket=gcs_bucket,
            )

# Time to Cities

### GHS

In [None]:
def process_cities_travel_time_ghs(cntry_selected_abbr):
    # Get city centroids
    ghs = get_city_centroids(cntry_selected_abbr)
    medium_cities = ghs.loc[ghs.pop_2015 > 200000, ["ghs_id", "geometry"]]
    large_cities = ghs.loc[ghs.pop_2015 > 500000, ["ghs_id", "geometry"]]
    # Load city boundaries on earth engine
    city_boundaries_ee = load_city_boundaries(cntry_selected_abbr)
    # Calculate travel time to medium and large cities
    process_cntry_accessibility(
        cntry_selected_abbr,
        medium_cities,
        city_boundaries_ee,
        ee_id_cols=["ghs_id"],
        gcs_name=f"medium_cities_travel_time_ghs_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
    )
    process_cntry_accessibility(
        cntry_selected_abbr,
        large_cities,
        city_boundaries_ee,
        ee_id_cols=["ghs_id"],
        gcs_name=f"large_cities_travel_time_ghs_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
    )

In [None]:
# check = process_cntry_accessibility_ghs("KAZ")

### GADM

In [None]:
def process_cities_travel_time_gadm(cntry_selected_abbr, selected_admin_level):
    # Get city centroids
    ghs = get_city_centroids(cntry_selected_abbr)
    medium_cities = ghs.loc[ghs.pop_2015 > 200000, ["ghs_id", "geometry"]]
    large_cities = ghs.loc[ghs.pop_2015 > 500000, ["ghs_id", "geometry"]]
    # Load administrative boundaries
    country_boundaries_ee, admin_boundaries_ee = load_admin_boundaries(
        cntry_selected_abbr, selected_admin_level
    )
    # Calculate travel time
    process_cntry_accessibility(
        cntry_selected_abbr,
        ghs,
        admin_boundaries_ee,
        ee_id_cols=[f"GID_{selected_admin_level}"],
        gcs_name=f"cities_travel_time_level_{selected_admin_level}_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
        apply_population_mask=True,
    )
    process_cntry_accessibility(
        cntry_selected_abbr,
        large_cities,
        admin_boundaries_ee,
        ee_id_cols=[f"GID_{selected_admin_level}"],
        gcs_name=f"large_cities_travel_time_level_{selected_admin_level}_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
        apply_population_mask=True,
    )
    process_cntry_accessibility(
        cntry_selected_abbr,
        medium_cities,
        admin_boundaries_ee,
        ee_id_cols=[f"GID_{selected_admin_level}"],
        gcs_name=f"medium_cities_travel_time_level_{selected_admin_level}_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
        apply_population_mask=True,
    )

In [None]:
# check = process_cities_travel_time_gadm("KAZ", 2)

## Run for all countries

In [None]:
cntry_list = pd.read_parquet(
    DATA / f"intermediate/gadm_without_geometry/gadm36_0.parquet"
)
cntry_list.head(2)

Unnamed: 0,GID_0,NAME_0
0,ABW,Aruba
1,AFG,Afghanistan


In [None]:
bucket = initialize_gcs()

In [None]:
# Run for all countries - GADM
results_dict_gadm = {}
for i, cntry_row in tqdm(cntry_list.iterrows()):
    for admin_level in [0, 1, 2]:
        # Set outfile name
        outfile = f"cities_travel_time_level_{admin_level}_{cntry_row.GID_0}"
        # Check if file exists
        if not check_if_file_exists_gcs(f"{outfile}.csv", bucket):
            # Get VIIRS results - export to drive
            results = process_cities_travel_time_gadm(
                cntry_selected_abbr=cntry_row.GID_0,
                selected_admin_level=admin_level,
            )
            # Put result in dict
            results_dict_gadm[cntry_row.GID_0] = results

256it [17:36,  4.13s/it]


In [None]:
# Run for all countries - GHS
results_dict_ghs = {}
for i, cntry_row in tqdm(cntry_list.iterrows()):
    # Set outfile name
    outfile = f"cities_travel_time_ghs_{cntry_row.GID_0}"
    # Check if file exists
    if not (
        check_if_file_exists_gcs(f"medium_{outfile}.csv", bucket)
        and check_if_file_exists_gcs(f"large_{outfile}.csv", bucket)
    ):
        # Get VIIRS results - export to drive
        results = process_cities_travel_time_ghs(
            cntry_selected_abbr=cntry_row.GID_0,
        )
        # Put result in dict
        results_dict_ghs[cntry_row.GID_0] = results

256it [04:02,  1.05it/s]


## Download and process files

In [None]:
# Download all missing files
downloaded_files = download_missing_gcp(
    local_folderpath=DATA / "intermediate/gee_accessibility_agg/",
    gcs_bucketname="earth_engine_aggregations",
)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 174/174 [00:14<00:00, 11.95it/s]


In [None]:
compile_downloaded_files(
    infiles_pattern="medium_cities_travel_time_ghs_*.csv",
    outfile=PROJ / f"tables/time_to_cities/medium_cities_travel_time_ghs.parquet",
)
compile_downloaded_files(
    infiles_pattern="large_cities_travel_time_ghs_*.csv",
    outfile=PROJ / f"tables/time_to_cities/large_cities_travel_time_ghs.parquet",
)

/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/medium_cities_travel_time_ghs.parquet
Total files: 151 ..... Valid files: 151
/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/large_cities_travel_time_ghs.parquet
Total files: 134 ..... Valid files: 134


In [None]:
for x in range(3):
    compile_downloaded_files(
        infiles_pattern=fr"medium_cities_travel_time_level_{x}_*.csv",
        outfile=PROJ
        / f"tables/time_to_cities/medium_cities_travel_time_level_{x}.parquet",
    )
    compile_downloaded_files(
        infiles_pattern=fr"large_cities_travel_time_level_{x}_*.csv",
        outfile=PROJ
        / f"tables/time_to_cities/large_cities_travel_time_level_{x}.parquet",
    )
    compile_downloaded_files(
        infiles_pattern=fr"cities_travel_time_level_{x}_*.csv",
        outfile=PROJ / f"tables/time_to_cities/cities_travel_time_level_{x}.parquet",
    )

/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/medium_cities_travel_time_level_0.parquet
Total files: 139 ..... Valid files: 139
/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/large_cities_travel_time_level_0.parquet
Total files: 124 ..... Valid files: 124
/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/cities_travel_time_level_0.parquet
Total files: 231 ..... Valid files: 231
/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/medium_cities_travel_time_level_1.parquet
Total files: 145 ..... Valid files: 145
/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/large_cities_travel_time_level_1.parquet
Total files: 129 ..... Valid files: 129
/nfs/projects_ci3/c/ci3_colombia/shreyas/proj/2021-08-11 - Accessibility/tables/time_to_cities/cities_travel_time_level_1.pa

# Time to Ports and Airports

## Define functions

### Ports

In [27]:
def process_ports_travel_time_ghs(cntry_selected_abbr):
    # Location of ports
    ports_cntry = ports[ports.iso3_op == cntry_selected_abbr]
    # Load city boundaries on earth engine
    city_boundaries_ee = load_city_boundaries(cntry_selected_abbr)
    # Calculate travel time to ports
    process_cntry_accessibility(
        cntry_selected_abbr,
        ports_cntry,
        city_boundaries_ee,
        ee_id_cols=["ghs_id"],
        gcs_name=f"ports_travel_time_ghs_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
    )

In [28]:
def process_ports_travel_time_gadm(cntry_selected_abbr, selected_admin_level):
    # Location of ports
    ports_cntry = ports[ports.iso3_op == cntry_selected_abbr]
    # Load administrative boundaries
    country_boundaries_ee, admin_boundaries_ee = load_admin_boundaries(
        cntry_selected_abbr, selected_admin_level
    )
    # Calculate travel time
    process_cntry_accessibility(
        cntry_selected_abbr,
        ports_cntry,
        admin_boundaries_ee,
        ee_id_cols=[f"GID_{selected_admin_level}"],
        gcs_name = f"ports_travel_time_level_{selected_admin_level}_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
        apply_population_mask=True,
    )

### Airports

In [29]:
def process_airports_travel_time_ghs(cntry_selected_abbr):
    # Location of airports
    airports = airports_locations[airports_locations.iso3 == cntry_selected_abbr]
    # Load city boundaries on earth engine
    city_boundaries_ee = load_city_boundaries(cntry_selected_abbr)
    # Calculate travel time to airports
    process_cntry_accessibility(
        cntry_selected_abbr,
        airports,
        city_boundaries_ee,
        ee_id_cols=["ghs_id"],
        gcs_name=f"airports_travel_time_ghs_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
    )

In [30]:
def process_airports_travel_time_gadm(cntry_selected_abbr, selected_admin_level):
    # Location of airports
    airports = airports_locations[airports_locations.iso3 == cntry_selected_abbr]
    # Load administrative boundaries
    country_boundaries_ee, admin_boundaries_ee = load_admin_boundaries(
        cntry_selected_abbr, selected_admin_level
    )
    # Calculate travel time
    process_cntry_accessibility(
        cntry_selected_abbr,
        airports,
        admin_boundaries_ee,
        ee_id_cols=[f"GID_{selected_admin_level}"],
        gcs_name = f"airports_travel_time_level_{selected_admin_level}_{cntry_selected_abbr}",
        gcs_bucket="earth_engine_aggregations",
        apply_population_mask=True,
    )

## Run for custom set of countries

In [31]:
bucket = initialize_gcs()

### Ports

In [None]:
# Run for all countries - GADM
results_dict_gadm = {}
for cntry_iso in tqdm(gadm_0.GID_0.unique()):
    for admin_level in [0, 1, 2]:
        # Set outfile name
        outfile = f"ports_travel_time_level_{admin_level}_{cntry_iso}"
        # Check if file exists
        if not check_if_file_exists_gcs(f"{outfile}.csv", bucket):
            # Get VIIRS results - export to GCS
            results = process_ports_travel_time_gadm(
                cntry_selected_abbr=cntry_iso,
                selected_admin_level=admin_level,
            )
            # Put result in dict
            results_dict_gadm[cntry_iso] = results

100%|██████████| 256/256 [02:58<00:00,  1.44it/s]


In [107]:
# Run for all countries - GHS
results_dict_ghs = {}
for cntry_iso in tqdm(gadm_0.GID_0.unique()):
    # Set outfile name
    outfile = f"ports_travel_time_ghs_{cntry_iso}"
    # Check if file exists
    if not check_if_file_exists_gcs(f"{outfile}.csv", bucket):
        # Get accessibility results - export to GCS
        results = process_ports_travel_time_ghs(
            cntry_selected_abbr=cntry_iso,
        )
        # Put result in dict
        results_dict_ghs[cntry_iso] = results

100%|██████████| 256/256 [00:54<00:00,  4.71it/s]


### Airports

In [102]:
# Run for all countries - GADM
results_dict_gadm = {}
for cntry_iso in tqdm(gadm_0.GID_0.unique()):
    for admin_level in [0, 1, 2]:
        # Set outfile name
        outfile = f"airports_travel_time_level_{admin_level}_{cntry_iso}"
        # Check if file exists
        if not check_if_file_exists_gcs(f"{outfile}.csv", bucket):
            # Get VIIRS results - export to GCS
            results = process_airports_travel_time_gadm(
                cntry_selected_abbr=cntry_iso,
                selected_admin_level=admin_level,
            )
            # Put result in dict
            results_dict_gadm[cntry_iso] = results

100%|██████████| 256/256 [04:59<00:00,  1.17s/it]


In [106]:
# Run for all countries - GHS
results_dict_ghs = {}
for cntry_iso in tqdm(gadm_0.GID_0.unique()):
    # Set outfile name
    outfile = f"airports_travel_time_ghs_{cntry_iso}"
    # Check if file exists
    if not check_if_file_exists_gcs(f"{outfile}.csv", bucket):
        # Get accessibility results - export to GCS
        results = process_airports_travel_time_ghs(
            cntry_selected_abbr=cntry_iso,
        )
        # Put result in dict
        results_dict_ghs[cntry_iso] = results

100%|██████████| 256/256 [02:09<00:00,  1.97it/s]


## Download and process files

In [26]:
# Download all missing files
downloaded_files = download_missing_gcp(
    local_folderpath=DATA / "intermediate/gee_accessibility_agg/",
    gcs_bucketname="earth_engine_aggregations",
)

100%|██████████| 1839/1839 [04:04<00:00,  7.51it/s]


### Ports

In [27]:
compile_downloaded_files(
    infiles_pattern="ports_travel_time_ghs_*.csv",
    outfile=PROJ / "tables/time_to_cities/ports_travel_time_ghs.parquet",
)

/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/ports_travel_time_ghs.parquet
Total files: 123 ..... Valid files: 98


In [28]:
for x in range(3):
    compile_downloaded_files(
        infiles_pattern=fr"ports_travel_time_level_{x}_*.csv",
        outfile=PROJ / f"tables/time_to_cities/ports_travel_time_level_{x}.parquet",
    )

/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/ports_travel_time_level_0.parquet
Total files: 115 ..... Valid files: 115
/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/ports_travel_time_level_1.parquet
Total files: 118 ..... Valid files: 114
/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/ports_travel_time_level_2.parquet
Total files: 119 ..... Valid files: 86


### Airports

In [29]:
compile_downloaded_files(
    infiles_pattern="airports_travel_time_ghs_*.csv",
    outfile=PROJ / "tables/time_to_cities/airports_travel_time_ghs.parquet",
)

/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/airports_travel_time_ghs.parquet
Total files: 209 ..... Valid files: 174


In [30]:
for x in range(3):
    compile_downloaded_files(
        infiles_pattern=fr"airports_travel_time_level_{x}_*.csv",
        outfile=PROJ / f"tables/time_to_cities/airports_travel_time_level_{x}.parquet",
    )

/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/airports_travel_time_level_0.parquet
Total files: 193 ..... Valid files: 193
/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/airports_travel_time_level_1.parquet
Total files: 201 ..... Valid files: 192
/n/holystore01/LABS/hausmann_lab/lab/glocal_aggregations/shreyas/proj/2021-07-28 - GEE/tables/time_to_cities/airports_travel_time_level_2.parquet
Total files: 202 ..... Valid files: 148
