In [None]:
import itertools
import json
import pathlib
import os
from glob import glob

import geopandas
import numpy
import pandas
import pyarrow.parquet as pq

# Add UIDs to networks data

Read each network file, add `uid` column with unique integer id, for consistent reference in database and MBTiles
for visualisation tool.

In [None]:
def load_config():
    """Read config.json"""
    config_path = os.path.join(os.path.dirname(__file__), "..", "..", "config.json")
    with open(config_path, "r") as config_fh:
        config = json.load(config_fh)
    return config

base_path = load_config()['paths']['data']

In [None]:
layers = pandas.read_csv(f"{base_path}/processed_data/networks/network_layers_hazard_intersections_details.csv")
layers

In [None]:
base_ids = []
counts = []
vis_fnames = []
for i, layer in enumerate(layers.itertuples()):
    print(layer.sector, layer.asset_gpkg, layer.asset_layer)
    
    base_id = i * 100_000_000
    base_ids.append(base_id)
    
    layer_data = geopandas.read_file(os.path.join("..", layer.path), layer=layer.asset_layer)
    count =  len(layer_data)
    counts.append(count)
    layer_data['uid'] = numpy.arange(base_id, base_id + count)
    
    out_fname = os.path.join(base_path, 'processed_data', layer.path.replace("networks", "networks_uids"))
    if "buildings" in out_fname:
        out_fname = out_fname.replace("buildings/", "networks_uids/buildings/")
    pathlib.Path(os.path.dirname(out_fname)).mkdir(parents=True, exist_ok=True)
    vis_fnames.append(out_fname)
    
    layer_data.to_file(
        out_fname, 
        layer=layer.asset_layer, 
        index=False,
        driver='GPKG')

layers['base_id'] = base_ids
layers['count'] = counts
layers['vis_path'] = vis_fnames
layers

In [None]:
layers.to_csv(f"{base_path}/processed_data/networks_uids/network_details.csv", index=False)

In [None]:
del layer_data

In [None]:
import gc
gc.collect()

## Results

Process `direct_damages_summary` results into parquet files with integer UIDs.

In [None]:
layers = pandas.read_csv(f"{base_path}/processed_data/networks_uids/network_details.csv")

In [None]:
suffixes = ['damages.parquet','exposures.parquet','losses.parquet', 'EAD_EAEL.csv']

In [None]:
def get_id_fname(layer):
    return f"{base_path}/processed_data/networks_uids/{layer.asset_gpkg}_{layer.asset_layer}_ids.parquet"

In [None]:
def get_results_fname(layer, suffix, clean=False):
    if clean:
        return f"{base_path}/results/direct_damages_summary_uids/{layer.asset_gpkg}_{layer.asset_layer}_{suffix}"
    else:
        return f"{base_path}/processed_data/results/direct_damages_summary/{layer.asset_gpkg}_{layer.asset_layer}_{suffix}"

In [None]:
for layer in layers.itertuples():
    layer_data = geopandas.read_file(layer.vis_path, layer=layer.asset_layer)
    id_lookup = layer_data[[layer.asset_id_column, 'uid']]
    id_lookup.to_parquet(get_id_fname(layer), index=False)
    print(get_id_fname(layer))

In [None]:
def process_layer(layer):
    id_lookup = pandas.read_parquet(get_id_fname(layer)).set_index(layer.asset_id_column)
    for suffix in suffixes:
        try:
            print(get_results_fname(layer, suffix, clean=False))
            if 'parquet' in suffix:
                data = pandas.read_parquet(get_results_fname(layer, suffix, clean=False))
            elif 'csv' in suffix:
                data = pandas.read_csv(get_results_fname(layer, suffix, clean=False), dtype={'rcp':object})
            else:
                print(f"WARN Skipping suffix with unhandled filetype: {suffix}")
                continue
            linked = data.set_index(layer.asset_id_column).join(id_lookup).reset_index()
            assert len(data) == len(linked),  (len(data),len(linked))

            linked.to_parquet(get_results_fname(layer, suffix.replace('csv', 'parquet'), clean=True))
        except FileNotFoundError as ex:
            print(ex)
            
for layer in layers.itertuples():
    if 'buildings' in layer.asset_gpkg:
        continue
    process_layer(layer)

In [None]:
hazards = ['coastal', 'cyclone', 'fluvial', 'surface']
rcps = ['rcp_2.6', 'rcp_4.5', 'rcp_8.5', 'rcp_baseline']
epochs = [
    'epoch_2010',
    'epoch_2030',
    'epoch_2050',
    'epoch_2070',
    'epoch_2080',
    'epoch_2100',
]

def process_buildings(layer):
    id_lookup = pandas.read_parquet(get_id_fname(layer)).set_index(layer.asset_id_column)
    for suffix in suffixes:
        try:
            fname = get_results_fname(layer, suffix, clean=False)
            print(fname)
            if 'parquet' in suffix:
                pf = pq.ParquetFile(fname)
                for hazard, rcp, epoch in itertools.product(hazards, rcps, epochs):
                    base_cols = ['osm_id'] + [col for col in pf.schema.names if 'unit' in col]
                    data_cols = [col for col in pf.schema.names if hazard in col and rcp in col and epoch in col]
                    if data_cols:
                        print(base_cols, hazard, rcp, epoch, len(data_cols))
                        process_subset(layer, fname, base_cols, data_cols, id_lookup, hazard, rcp, epoch, suffix)
                
            elif 'csv' in suffix:
                data = pandas.read_csv(get_results_fname(layer, suffix, clean=False), dtype={'rcp':object})
                linked = data.set_index(layer.asset_id_column).join(id_lookup).reset_index()
                linked.to_parquet(get_results_fname(layer, suffix.replace('csv', 'parquet'), clean=True))
        except Exception as ex:
            raise ex

def process_subset(layer, fname, base_cols, data_cols, id_lookup, hazard, rcp, epoch, suffix):
    data = pandas.read_parquet(
        fname,
        columns=base_cols+data_cols
    )
    linked = data.set_index(layer.asset_id_column).join(id_lookup).reset_index()
    linked.to_parquet(get_results_fname(layer, f"{hazard}__{rcp}__{epoch}__{suffix}", clean=True))

for layer in layers.itertuples():
    if 'buildings' in layer.asset_gpkg:
        process_buildings(layer)