In [1]:
import os
from glob import glob


import geopandas
import pyrosm
import rasterio
import snail

from snail.intersections import split
from snail.intersections import get_cell_indices
from tqdm.notebook import tqdm

In [2]:
adm1_name = 'bangladesh'

In [8]:
data_folder = '/tmp/mert2014'

In [3]:
osm = pyrosm.OSM(os.path.join(data_folder,'osm',f'{adm1_name}-latest-highway.osm.pbf'))

In [5]:
nodes, edges = osm.get_network(nodes=True, network_type="driving")

In [12]:
core = (
    'motorway_link',
    'motorway',
    'trunk_link',
    'trunk',
    'primary_link',
    'primary',
    'secondary_link',
    'secondary',
    'tertiary_link',
    'tertiary',
)
core_edges = edges[edges.highway.isin(core)]

In [13]:
len(core_edges), len(edges)

(572595, 3915109)

In [14]:
select_columns = [
    'bridge', 'highway', 'lanes', 'maxspeed', 'oneway',
    'smoothness', 'surface', 'tracktype', 'tunnel', 'width', 
    'id', 'name', 'osm_type', 'geometry', 'u', 'v', 'length'
]
core_edges = core_edges[select_columns]

In [None]:
%%timeit
core_edges.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.gpkg'), driver='GPKG')

In [None]:
%%timeit
core_edges.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.fgb'), driver='FlatGeobuf')

In [None]:
%%timeit
core_edges.to_parquet(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.geoparquet'))

In [None]:
%%timeit
core_edges.to_feather(os.path.join(data_folder, 'osm', f'{adm1_name}-roads-core.geofeather'))

In [None]:
# Write direct from pyrosm driving
#edges.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads.gpkg'), driver='GPKG', layer='edges')
#nodes.to_file(os.path.join(data_folder, 'osm', f'{adm1_name}-roads.gpkg'), driver='GPKG', layer='nodes')

In [None]:
# Read from file written above
#core_edges = geopandas.read_file(os.path.join('data', 'osm', f'{adm1_name}-roads-core.gpkg'))

In [None]:
raster_data = rasterio.open(os.path.join('..', 'aqueduct', 'inuncoast_historical_nosub_hist_rp0050_0.tif'))

In [None]:
%%timeit
core_splits = []
for edge in tqdm(core_edges.itertuples()):
    splits = split(
        edge.geometry,
        raster_data.width,
        raster_data.height,
        list(raster_data.transform),
    )
    for s in splits:
        core_splits.append({
            'id': edge.id,
            'geometry': s
        })
core_splits = geopandas.GeoDataFrame(core_splits)

In [None]:
core_edges[['id','geometry']].head(50).tail()

In [None]:
tqdm.pandas()

In [None]:
core_splits['cell_index'] = core_splits.geometry.progress_apply(
    lambda geom: get_cell_indices(geom, raster_data.width, raster_data.height, list(raster_data.transform)))

In [None]:
core_splits.head()

In [None]:
band = raster_data.read(1)

In [None]:
core_splits['inuncoast_historical_nosub_hist_rp0050_0'] = core_splits.cell_index.apply(lambda i: band[i[1], i[0]])

In [None]:
raster_data.width, raster_data.height

In [None]:
core_splits.cell_index.value_counts()

In [None]:
core_splits

In [None]:
fnames = glob('../aqueduct/*.tif')
fnames[0]

In [None]:
for fname in fnames:
    colname = os.path.basename(fname[:-4])
    print(colname)
    with rasterio.open(fname) as dataset:
        band = dataset.read(1)
        core_splits[colname] = core_splits.cell_index.apply(lambda i: band[i[1], i[0]])

In [None]:
core_splits.drop(columns='geometry').to_csv('core_splits.csv.gz')

In [None]:
core_splits[['id','geometry']].to_file('core_splits.gpkg', driver='GPKG')