In [37]:
import os
from glob import glob


import geopandas
import pandas
import pyrosm
import rasterio
import snail

from snail.intersections import split
from snail.intersections import get_cell_indices
from tqdm.notebook import tqdm

In [2]:
adm1_name = 'bangladesh'

In [8]:
data_folder = '/tmp/mert2014'

In [3]:
osm = pyrosm.OSM(os.path.join(data_folder,'osm',f'{adm1_name}-latest-highway.osm.pbf'))

In [5]:
nodes, edges = osm.get_network(nodes=True, network_type="driving")

In [12]:
core = (
    'motorway_link',
    'motorway',
    'trunk_link',
    'trunk',
    'primary_link',
    'primary',
    'secondary_link',
    'secondary',
    'tertiary_link',
    'tertiary',
)
core_edges = edges[edges.highway.isin(core)]

In [13]:
len(core_edges), len(edges)

(572595, 3915109)

In [14]:
select_columns = [
    'bridge', 'highway', 'lanes', 'maxspeed', 'oneway',
    'smoothness', 'surface', 'tracktype', 'tunnel', 'width', 
    'id', 'name', 'osm_type', 'geometry', 'u', 'v', 'length'
]
core_edges = core_edges[select_columns]

In [18]:
%%timeit
core_edges.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.gpkg'), driver='GPKG')

2min 56s ± 12.3 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit
core_edges.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.fgb'), driver='FlatGeobuf')

2min 31s ± 14.5 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit
core_edges.to_parquet(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.geoparquet'))


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



2.07 s ± 47.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit
core_edges.to_feather(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.geofeather'))


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



1.78 s ± 90.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [19]:
# Write direct from pyrosm driving
#edges.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads.gpkg'), driver='GPKG', layer='edges')
#nodes.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads.gpkg'), driver='GPKG', layer='nodes')

In [20]:
# Read from file written above
#core_edges = geopandas.read_file(os.path.join('data', 'osm', f'{adm1_name}-roads-core.gpkg'))

In [21]:
raster_data = rasterio.open(os.path.join('..', 'aqueduct', 'inuncoast_historical_nosub_hist_rp0050_0.tif'))

In [97]:
%%timeit
core_splits = []
for edge in tqdm(core_edges.itertuples()):
    splits = split(
        edge.geometry,
        raster_data.width,
        raster_data.height,
        list(raster_data.transform),
    )
    for s in splits:
        core_splits.append({
            'id': edge.id,
            'geometry': s
        })
core_splits = geopandas.GeoDataFrame(core_splits)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

0it [00:00, ?it/s]

1min 13s ± 2.6 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [98]:
len(core_edges), len(core_splits)

(572595, 602027)

In [23]:
core_edges[['id','geometry']].head(50).tail()

Unnamed: 0,id,geometry
103,10101959,"LINESTRING (90.27121 23.93744, 90.27134 23.93740)"
104,10101959,"LINESTRING (90.27134 23.93740, 90.27147 23.93735)"
105,10101959,"LINESTRING (90.27147 23.93735, 90.27160 23.93730)"
106,10101959,"LINESTRING (90.27160 23.93730, 90.27179 23.93723)"
107,10101959,"LINESTRING (90.27179 23.93723, 90.27227 23.93705)"


In [24]:
tqdm.pandas()

In [96]:
%%timeit
core_splits['cell_index'] = core_splits.geometry.apply(
    lambda geom: list(get_cell_indices(geom, raster_data.width, raster_data.height, list(raster_data.transform))))

59.8 s ± 166 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [27]:
core_splits.head()

Unnamed: 0,id,geometry,cell_index
0,10101955,"LINESTRING (90.26915 23.93680, 90.26892 23.93628)","(32432, 7927)"
1,10101955,"LINESTRING (90.26892 23.93628, 90.26869 23.93580)","(32432, 7927)"
2,10101955,"LINESTRING (90.26869 23.93580, 90.26855 23.93550)","(32432, 7927)"
3,10101955,"LINESTRING (90.26855 23.93550, 90.26818 23.93467)","(32432, 7927)"
4,10101955,"LINESTRING (90.26818 23.93467, 90.26784 23.93397)","(32432, 7927)"


In [28]:
band = raster_data.read(1)

In [29]:
%%timeit
core_splits['inuncoast_historical_nosub_hist_rp0050_0'] = core_splits.cell_index.apply(lambda i: band[i[1], i[0]])

388 ms ± 87 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
core_splits

Unnamed: 0,id,geometry,cell_index,inuncoast_historical_nosub_hist_rp0050_0
0,10101955,"LINESTRING (90.26915 23.93680, 90.26892 23.93628)","(32432, 7927)",0.0
1,10101955,"LINESTRING (90.26892 23.93628, 90.26869 23.93580)","(32432, 7927)",0.0
2,10101955,"LINESTRING (90.26869 23.93580, 90.26855 23.93550)","(32432, 7927)",0.0
3,10101955,"LINESTRING (90.26855 23.93550, 90.26818 23.93467)","(32432, 7927)",0.0
4,10101955,"LINESTRING (90.26818 23.93467, 90.26784 23.93397)","(32432, 7927)",0.0
...,...,...,...,...
602022,978106853,"LINESTRING (90.49298 23.86353, 90.49298 23.86389)","(32459, 7936)",0.0
602023,978106853,"LINESTRING (90.49298 23.86389, 90.49298 23.86401)","(32459, 7936)",0.0
602024,978114132,"LINESTRING (90.51480 23.84409, 90.51482 23.84589)","(32461, 7938)",0.0
602025,978114133,"LINESTRING (90.51498 23.84590, 90.51499 23.84410)","(32461, 7938)",0.0


In [31]:
fnames = glob('../aqueduct/*.tif')
fnames[0]

'../aqueduct/inunriver_rcp8p5_00IPSL-CM5A-LR_2080_rp00050.tif'

In [52]:
coastal = []
river = []
for fname in fnames:
    fname = os.path.basename(fname)
    colname = fname[:-4]
    if 'coast' in colname:
        # inuncoast_{climatescenario}_{subsidence}_{year}_{returnperiod}_{projection}.tif
        try:
            _, clim, sub, y, rp, proj = colname.split("_")
        except ValueError:
            _, clim, sub, y, rp, _, _, proj = colname.split("_")
        if proj == "0":
            proj = "95"
        if y == "hist":
            y = 2010
        coastal.append({
            "key": colname,
            "climate_scenario": clim,
            "subsidence": sub,
            "year": int(y),
            "return_period": int(rp[2:]),
            "sea_level_rise_percentile": int(proj),
            "filename": fname,
        })
    else:
        # inunriver_{climatescenario}_{model}_{year}_{returnperiod}.tif
        _, clim, model, y, rp = colname.split("_")
        if y == "hist":
            y = 2010
        
        river.append({
            "key": colname,
            "climate_scenario": clim,
            "model": model.replace("0",""),
            "year": int(y),
            "return_period": int(rp[2:]),
            "filename": fname,
        })
coastal = pandas.DataFrame(coastal)
river = pandas.DataFrame(river)

In [53]:
coastal.to_csv('aqueduct_coastal.csv')

In [54]:
river.to_csv('aqueduct_river.csv')

In [60]:
def associate_raster(df, key, fname, band_number=1):
    with rasterio.open(fname) as dataset:
        band_data = dataset.read(band_number)
        df[key] = df.cell_index.apply(lambda i: band_data[i[1], i[0]])

In [57]:
river.year.value_counts()

2080    90
2030    90
2050    90
1980     9
Name: year, dtype: int64

In [59]:
subset = river[river.year.isin((1980, 2080)) & river.return_period.isin((50, 100, 500, 1000))]
len(subset)

44

In [91]:
%%timeit
associate_raster(core_splits, 'inunriver_rcp8p5_00IPSL-CM5A-LR_2080_rp00050', os.path.join(data_folder, 'aqueduct', 'inunriver_rcp8p5_00IPSL-CM5A-LR_2080_rp00050.tif'))

9.15 s ± 403 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [92]:
# do I/O stuff outside of timeit loop
dataset = rasterio.open(os.path.join(data_folder, 'aqueduct', 'inunriver_rcp8p5_00IPSL-CM5A-LR_2080_rp00050.tif'))
band_data = dataset.read(1)

In [95]:
len(core_splits)

602027

In [94]:
%%timeit
core_splits['inunriver_rcp8p5_00IPSL-CM5A-LR_2080_rp00050'] = core_splits.cell_index.apply(lambda i: band_data[i[1], i[0]])

364 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [86]:
for raster in subset.itertuples():
    associate_raster(core_splits, raster.key, os.path.join(data_folder, 'aqueduct', raster.filename))

KeyboardInterrupt: 

In [87]:
%%timeit
core_splits.drop(columns='geometry').to_csv(os.path.join(data_folder, 'outputs', 'core_splits.csv.gz'))

24.5 s ± 1.82 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [81]:
%%timeit
pandas.DataFrame(core_splits.drop(columns=['geometry'])) \
    .to_parquet(os.path.join(data_folder, 'outputs', 'core_splits.parquet'))

728 ms ± 25.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [88]:
%%timeit
core_splits[['id','geometry']].to_file(os.path.join(data_folder,  'outputs', 'core_splits.gpkg'), driver='GPKG')

1min 52s ± 5.12 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [89]:
%%timeit
core_splits.to_parquet(os.path.join(data_folder,  'outputs', 'core_splits.geoparquet'))


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



2.54 s ± 49.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [90]:
%%timeit
core_splits.to_feather(os.path.join(data_folder,  'outputs', 'core_splits.geofeather'))


This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



1.79 s ± 63.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
