In [None]:
import geopandas
import numpy
import pandas
import rasterio
from pyproj import Geod
from tqdm import tqdm as tqdm_core
from tqdm.notebook import tqdm

In [None]:
import math
import os
from glob import glob

In [None]:
tqdm.pandas()

In [None]:
# Read data
sorted(glob("outputs/*"))

In [None]:
continent = 'antarctica'

In [None]:
def min_max(df):
    # Calculate min/max over exposure
    exposure = df[df.columns[3:]]
    min_exp = exposure.min(axis=1)
    max_exp = exposure.max(axis=1)
    return min_exp, max_exp

In [None]:
def downsample(df, key, factor, raster_height, raster_width, raster_transform):
    # Set up rescaled transform
    height_ds = math.floor(raster_height * factor)
    width_ds = math.floor(raster_width * factor)
    raster_transform_ds = raster_transform * raster_transform.scale(
        (raster_width / width_ds),
        (raster_height / height_ds)
    )
    
    # Downsample
    def downsample_index(xy):
        x, y = xy
        x = math.floor(x * factor) % width_ds
        y = math.floor(y * factor) % height_ds
        return (x, y)
    df['cell_index_downsample'] = df.cell_index.apply(downsample_index)
    grouped = df[['cell_index_downsample', 'length_km']].groupby('cell_index_downsample').sum()
    
    # Set up data array
    length_raster_ds = numpy.zeros((height_ds, width_ds))    
    for cell in grouped.reset_index().itertuples():
        col, row = cell.cell_index_downsample
        length_raster_ds[row, col] = cell.length_km
        
    with rasterio.open(
        f'outputs/{continent}-core_{key}_{factor}.tif',
        'w',
        driver='GTiff',
        height=length_raster_ds.shape[0],
        width=length_raster_ds.shape[1],
        count=1,
        dtype=length_raster_ds.dtype,
        crs='+proj=latlong',
        transform=raster_transform_ds,
        compress='lzw'
    ) as dataset:
        dataset.write(length_raster_ds, 1)

In [None]:
continents = [
    'africa',
    'antarctica',
    'asia',
    'australia-oceania',
    'central-america',
    'europe',
    'north-america',
    'south-america',
]

In [None]:
# Read metadata
with rasterio.open('../aqueduct/inuncoast_historical_nosub_hist_rp0001_5.tif') as dataset:
    raster_width = dataset.width
    raster_height = dataset.height
    raster_transform = dataset.transform

In [None]:
DEPTH_THRESHOLD = 1

In [None]:
geod = Geod(ellps="WGS84")

In [None]:
for continent in continents:
    fname = f'outputs/{continent}-latest-highway-core_splits.geoparquet'
    df = geopandas.read_parquet(fname)
    min_exp, max_exp = min_max(df)
    df['min_exp'] = min_exp
    df['max_exp'] = max_exp
    df['length_km'] = df.geometry.progress_apply(geod.geometry_length) / 1e3
    total = df['length_km'].sum()
    print(f"Total roads in {continent} {total:0,.0f}km")
    
    # Extract only exposed
    any_exposed = df[df.max_exp > DEPTH_THRESHOLD].copy()
    # Convert index to tuple so we can hash it and do a groupby
    any_exposed.cell_index = any_exposed.cell_index.apply(tuple)
        
    downsample(any_exposed, 'any', 1, raster_height, raster_width, raster_transform)
    downsample(any_exposed, 'any', 1/16, raster_height, raster_width, raster_transform)
    downsample(any_exposed, 'any', 1/32, raster_height, raster_width, raster_transform)
    
    for column in any_exposed.columns:
        if "inunriver" in column:
            print(column)
            scenario = any_exposed[any_exposed[column] > DEPTH_THRESHOLD].copy()
            downsample(scenario, column, 1/32, raster_height, raster_width, raster_transform)    