In [170]:
import os
import geopandas as gpd
import pandas as pd
import xarray as xr
import numpy as np 
import shapely 
import csv
import ast

import osm_flex.download as dl
import osm_flex.extract as ex
from osm_flex.simplify import remove_contained_points,remove_exact_duplicates
from osm_flex.config import OSM_DATA_DIR,DICT_GEOFABRIK

from tqdm import tqdm

from lonboard import viz
from lonboard.colormap import apply_continuous_cmap
from palettable.colorbrewer.sequential import Blues_9

from pathlib import Path
import pathlib

In [171]:
#define paths
p = Path('..')
data_path = Path(pathlib.Path.home().parts[0]) / 'Projects' / 'gmhcira' / 'data' #should contain folder 'Vulnerability' with vulnerability data
flood_data_path = Path('//labsdfs.labs.vu.nl/labsdfs/BETA-IVM-BAZIS/eks510/fathom-global') # Flood data
eq_data_path = Path('//labsdfs.labs.vu.nl/data_catalogue/open_street_map/global_hazards/earthquakes/GEM')  # Earthquake data
landslide_data_path = Path('//labsdfs.labs.vu.nl/data_catalogue/open_street_map/global_hazards/landslides')  # Landslide data
cyclone_data_path = Path('//labsdfs.labs.vu.nl/data_catalogue/open_street_map/global_hazards/tropical_cyclones')  # Cyclone data

In [172]:
def country_download(iso3):
    """
    Download OpenStreetMap data for a specific country.
    Arguments:
        *iso3* (str): ISO 3166-1 alpha-3 country code.
    Returns:
        *Path*: The file path of the downloaded OpenStreetMap data file.
    """
    
    dl.get_country_geofabrik(iso3) # Use the download library to get the geofabrik data for the specified country
    data_loc = OSM_DATA_DIR.joinpath(f'{DICT_GEOFABRIK[iso3][1]}-latest.osm.pbf') # Specify the location of the OpenStreetMap (OSM) data file
    return data_loc

def overlay_hazard_assets(df_ds,assets):
    """
    Overlay hazard assets on a dataframe of spatial geometries.
    Arguments:
        *df_ds*: GeoDataFrame containing the spatial geometries of the hazard data. 
        *assets*: GeoDataFrame containing the infrastructure assets.
    Returns:
        *geopandas.GeoSeries*: A GeoSeries containing the spatial geometries of df_ds that intersect with the infrastructure assets.
    """
    
    #overlay 
    hazard_tree = shapely.STRtree(df_ds.geometry.values)
    if (shapely.get_type_id(assets.iloc[0].geometry) == 3) | (shapely.get_type_id(assets.iloc[0].geometry) == 6): # id types 3 and 6 stand for polygon and multipolygon
        return  hazard_tree.query(assets.geometry,predicate='intersects')    
    else:
        return  hazard_tree.query(assets.buffered,predicate='intersects')

def buffer_assets(assets,buffer_size=0.00083):
    """
    Buffer spatial assets in a GeoDataFrame.
    Arguments:
        *assets*: GeoDataFrame containing spatial geometries to be buffered.
        *buffer_size* (float, optional): The distance by which to buffer the geometries. Default is 0.00083.
    Returns:
        *GeoDataFrame*: A new GeoDataFrame with an additional 'buffered' column containing the buffered geometries.
    """
    assets['buffered'] = shapely.buffer(assets.geometry.values,distance=buffer_size)
    return assets

def get_damage_per_asset(asset,hazard_numpified,asset_geom,hazard_intensity,fragility_values,maxdam_asset):
    """
    Calculate damage for a given asset based on hazard information.
    Arguments:
        *asset*: Tuple containing information about the asset. It includes:
            - Index or identifier of the asset (asset[0]).
            - The specific hazard points in which asset is exposed (asset[1]['hazard_point']).
        *hazard_numpified*: NumPy array representing hazard information.
        *asset_geom*: Shapely geometry representing the spatial coordinates of the asset.
        *hazard_intensity*: NumPy array representing the hazard intensities of the curve for the asset type.
        *fragility_values*: NumPy array representing the damage factors of the curve for the asset type.
        *maxdam_asset*: Maximum damage value for asset.
    Returns:
        *float*: The calculated damage for the specific asset.
    """
     
    # find the exact hazard overlays:
    get_hazard_points = hazard_numpified[asset[1]['hazard_point'].values] 
    get_hazard_points[shapely.intersects(get_hazard_points[:,1],asset_geom)]

    # estimate damage
    if len(get_hazard_points) == 0: # no overlay of asset with hazard
        return 0
    
    else:
        if asset_geom.geom_type == 'LineString':
            overlay_meters = shapely.length(shapely.intersection(get_hazard_points[:,1],asset_geom)) # get the length of exposed meters per hazard cell
            return np.sum((np.interp(np.float16(get_hazard_points[:,0]),hazard_intensity,fragility_values))*overlay_meters*maxdam_asset) #return asset number, total damage for asset number (damage factor * meters * max. damage)
        elif asset_geom.geom_type in ['MultiPolygon','Polygon']:
            overlay_m2 = shapely.area(shapely.intersection(get_hazard_points[:,1],asset_geom))
            return np.sum((np.interp(np.float16(get_hazard_points[:,0]),hazard_intensity,fragility_values))*overlay_m2*maxdam_asset) #return asset number, total damage for asset number (damage factor * meters * max. damage)
        elif asset_geom.geom_type == 'Point':
            return np.sum((np.interp(np.float16(get_hazard_points[:,0]),hazard_intensity,fragility_values))*maxdam_asset)

def create_pathway_dict(data_path, flood_data_path, eq_data_path, landslide_data_path, cyclone_data_path): 

    """
    Create a dictionary containing paths to various hazard datasets.
    Arguments:
        *data_path* (Path): Base directory path for general data.
        *flood_data_path* (Path): Path to flood hazard data.
        *eq_data_path* (Path): Path to earthquake hazard data.
        *landslide_data_path* (Path): Path to landslide hazard data.
        *cyclone_data_path* (Path): Path to tropical cyclone hazard data.
    Returns:
        *dict*: A dictionary where keys represent a general pathway and different hazard types and values are corresponding paths.
    """

    #create a dictionary
    pathway_dict = {'data_path': data_path, 
                    'fluvial': flood_data_path, 
                    'pluvial': flood_data_path, 
                    'windstorm': cyclone_data_path, 
                    'earthquake': eq_data_path, 
                    'landslides': landslide_data_path,}

    return pathway_dict

def read_hazard_data(hazard_data_path,hazard_type):
    """
    Read hazard data files for a specific hazard type.
    Arguments:
        *hazard_data_path* (Path): Base directory path where hazard data is stored.
        *hazard_type* (str): Type of hazard for which data needs to be read ('fluvial', 'pluvial', 'windstorm', 'earthquake', 'landslides').
    
    Returns:
        *list*: A list of Path objects representing individual hazard data files for the specified hazard type.
    """  

    if hazard_type == 'fluvial':
        hazard_data = hazard_data_path / 'Jamaica' / 'fluvial_undefended' # need to make country an input
        return list(hazard_data.iterdir())

    elif hazard_type == 'pluvial':
        hazard_data = hazard_data_path / 'Jamaica' / 'pluvial' # need to make country an input
        return list(hazard_data.iterdir())

    elif hazard_type == 'windstorm':
        hazard_data = hazard_data_path 
        return list(hazard_data.iterdir())

    elif hazard_type == 'earthquake':
        hazard_data = hazard_data_path
        return list(hazard_data.iterdir())

    elif hazard_type == 'landslides':
        hazard_data = hazard_data_path 
        return list(hazard_data.iterdir())


def read_vul_maxdam(data_path,hazard_type,infra_type):
    """
    Read vulnerability curves and maximum damage data for a specific hazard and infrastructure type.
    Arguments:
        *data_path*: The base directory path where vulnerability and maximum damage data files are stored.
        *hazard_type*: The type of hazard in string format, such as 'pluvial', 'fluvial', or 'windstorm'.
        *infra_type*: The type of infrastructure in string format for which vulnerability curves and maximum damage data are needed.
    
    Returns:
        *tuple*: A tuple containing two DataFrames:
            - The first DataFrame contains vulnerability curves specific to the given hazard and infrastructure type.
            - The second DataFrame contains maximum damage data for the specified infrastructure type.
    """

    vul_data = data_path / 'Vulnerability'
    
    # Load assumptions file containing curve - maxdam combinations per infrastructure type
    assumptions = pd.read_excel(vul_data / 'S1_Assumptions_Test.xlsx',sheet_name = 'Flooding assumptions',header=[1])
    assumptions['Infrastructure type'] = assumptions['Infrastructure type'].str.lower()
    assump_infra_type = assumptions[assumptions['Infrastructure type'] == infra_type]
    assump_curves = ast.literal_eval(assump_infra_type['Vulnerability ID number'].item())
    assump_maxdams = ast.literal_eval(assump_infra_type['Maximum damage ID number'].item())
    
    # Get curves
    if hazard_type in ['pluvial','fluvial']:  
        curves = pd.read_excel(vul_data / 'Table_D2_Multi-Hazard_Fragility_and_Vulnerability_Curves_V1.0.0.xlsx',sheet_name = 'F_Vuln_Depth',index_col=[0],header=[0,1,2,3,4])
    elif hazard_type == 'windstorm':
        curves = pd.read_excel(vul_data / 'Table_D2_Multi-Hazard_Fragility_and_Vulnerability_Curves_V1.0.0.xlsx',sheet_name = 'W_Vuln_V10m',index_col=[0],header=[0,1,2,3,4])
    
    infra_curves =  curves[assump_curves]
    
    # get maxdam
    maxdam = pd.read_excel(vul_data / 'Table_D3_Costs_V1.0.1.xlsx', sheet_name='Cost_Database',index_col=[0])
    infra_maxdam = maxdam[maxdam.index.isin(assump_maxdams)]['Amount'].dropna()
    infra_maxdam = infra_maxdam[pd.to_numeric(infra_maxdam, errors='coerce').notnull()]

    return infra_curves,infra_maxdam

def read_flood_map(flood_map_path,diameter_distance=0.00083/2):
    """
    Read flood map data from a NetCDF file and process it into a GeoDataFrame.
    Arguments:
        *flood_map_path* (Path): Path to the NetCDF file containing flood map data.
        *diameter_distance* (float, optional): The diameter distance used for creating square geometries around data points. Default is 0.00083/2.
    
    Returns:
        *geopandas.GeoDataFrame*: A GeoDataFrame representing the processed flood map data.
    """
    
    flood_map = xr.open_dataset(flood_map_path, engine="rasterio")

    flood_map_vector = flood_map['band_data'].to_dataframe().reset_index() #transform to dataframe
    
    #remove data that will not be used
    flood_map_vector = flood_map_vector.loc[(flood_map_vector.band_data > 0) & (flood_map_vector.band_data < 100)]
    
    # create geometry values and drop lat lon columns
    flood_map_vector['geometry'] = [shapely.points(x) for x in list(zip(flood_map_vector['x'],flood_map_vector['y']))]
    flood_map_vector = flood_map_vector.drop(['x','y','band','spatial_ref'],axis=1)
    
    # drop all non values to reduce size
    flood_map_vector = flood_map_vector.loc[~flood_map_vector['band_data'].isna()].reset_index(drop=True)
    
    # and turn them into squares again:
    flood_map_vector.geometry= shapely.buffer(flood_map_vector.geometry,distance=diameter_distance,cap_style='square').values 

    return flood_map_vector

def read_windstorm_map(windstorm_map_path,bbox):
     
    # load data from NetCDF file
    with xr.open_dataset(flood_map_path) as ds:
        
        # convert data to WGS84 CRS
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
        #ds['band_data'] = ds['band_data']/0.88*1.11 #convert 10-min sustained wind speed to 3-s gust wind speed
    
        ds_vector = ds['band_data'].to_dataframe().reset_index() #transform to dataframe
        
        #remove data that will not be used
        ds_vector = ds_vector.loc[(ds_vector.band_data > 0) & (ds_vector.band_data < 100)]
        
        # create geometry values and drop lat lon columns
        ds_vector['geometry'] = [shapely.points(x) for x in list(zip(ds_vector['x'],ds_vector['y']))]
        ds_vector = ds_vector.drop(['x','y','band','spatial_ref'],axis=1)
        ds_vector['geometry'] = shapely.buffer(ds_vector.geometry, distance=0.1/2, cap_style='square').values
    
        return ds_vector

def create_damage_csv(damage_output, hazard_type, pathway_dict, country_code, sub_system):
    """
    Create a CSV file containing damage information.
    Arguments:
        damage_output: A dictionary containing damage information.
        hazard_type: The type of hazard (e.g., 'earthquake', 'flood').
        pathway_dict: A dictionary containing file paths for different data.
        country_code: A string containing information about the country code
        sub_system: A string containing information about the subsystem considered

    Returns:
        None
    """
  
    hazard_output_path = pathway_dict['data_path'] / 'damage' / country_code
    
    # Check if the directory exists
    if not hazard_output_path.exists():
        # Create the directory
        hazard_output_path.mkdir(parents=True, exist_ok=True)
    
    csv_file_path = hazard_output_path / '{}_{}_{}.csv'.format(country_code, hazard_type, sub_system)
    
    with open(csv_file_path, 'w', newline='') as csv_file:
        csv_writer = csv.writer(csv_file)
        
        # Write header
        csv_writer.writerow(['Country', 'Return period', 'Subsystem', 'Infrastructure type', 'Curve ID number', 'Damage ID number', 'Damage'])
        
        # Write data
        for key, value in damage_output.items():
            csv_writer.writerow(list(key) + [value])
    
    print(f"CSV file created at: {csv_file_path}")

In [181]:
def country_infrastructure_hazard(pathway_dict, country_code, sub_system, infra_type_lst, hazard_type):

    # get country osm data
    data_loc = country_download(country_code)
    
    # get infrastructure data:
    assets = ex.extract_cis(data_loc, sub_system)
    
    # convert assets to epsg3857 (system in meters)
    assets = gpd.GeoDataFrame(assets).set_crs(4326).to_crs(3857)
    
    if sub_system == 'road':
        assets = assets.loc[assets.geometry.geom_type == 'LineString']
        assets = assets.rename(columns={'highway' : 'asset'})
        list_of_highway_assets_to_keep =["living_street", "motorway", "motorway_link", "primary","primary_link",
                                         "residential","road", "secondary", "secondary_link","tertiary","tertiary_link", "trunk", "trunk_link","unclassified","service"]
        #reclassify assets 
        mapping_dict = {
            "living_street" : "tertiary", 
            "motorway" : "primary", 
            "motorway_link" : "primary", 
            "primary" : "primary", 
            "primary_link" : "primary", 
            "residential" : "tertiary",
            "road" : "secondary", 
            "secondary" : "secondary", 
            "secondary_link" : "secondary", 
            "tertiary" : "tertiary", 
            "tertiary_link" : "tertiary", 
            "trunk" : "primary",
            "trunk_link" : "primary",
            "unclassified" : "tertiary", 
            "service" : "tertiary"
        }
        
        assets['asset'] = assets.asset.apply(lambda x : mapping_dict[x])  #reclassification
    elif sub_system == 'rail':
        assets = assets.loc[assets.geometry.geom_type == 'LineString']
        assets = assets.rename(columns={'railway' : 'asset'})
    elif sub_system == 'education':
        assets = assets.rename(columns={'building' : 'asset'})
        assets = assets.reset_index(drop=True)
        assets = remove_contained_points(assets)

        #convert points to polygons
        assets.loc[assets.geom_type == 'Point','geometry'] = assets.loc[assets.geom_type == 'Point'].buffer(
                                                                        distance=np.sqrt(assets.loc[assets.geom_type == 'MultiPolygon'].area.median())/2, cap_style='square')

    elif sub_system == 'air':
        assets = assets.rename(columns={'aeroway' : 'asset'})

    
    # read hazard data
    hazard_data_path = pathway_dict[hazard_type]
    hazard_data_list = read_hazard_data(hazard_data_path,hazard_type)

    # start analysis 
    print(f'{country_code} runs for {sub_system} for {hazard_type} for {len(hazard_data_list)} maps')

    if hazard_type in ['windstorm','earthquake','landslide']:
        # load country geometry file and create geometry to clip
        ne_countries = gpd.read_file(data_path / "natural_earth" / "ne_10m_admin_0_countries.shp") #https://www.naturalearthdata.com/downloads/10m-cultural-vectors/10m-admin-0-countries/
        bbox = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.envelope.values[0].bounds
        
    collect_output = {}
    for single_footprint in hazard_data_list: #tqdm(hazard_data_list,total=len(hazard_data_list)):
    
        hazard_name = single_footprint.parts[-1].split('.')[0]
        
        # load hazard map
        if hazard_type in ['pluvial','fluvial']:
            hazard_map = read_flood_map(single_footprint)
        elif hazard_type in ['windstorm']:
             hazard_map = read_windstorm_map(single_footprint,bbox)
        elif hazard_type in ['earthquake']:
             hazard_map = read_earthquake_map(single_footprint)
        elif hazard_type in ['landslide']:
             hazard_map = read_landslide_map(single_footprint)
         
        # convert hazard data to epsg 3857
        hazard_map = gpd.GeoDataFrame(hazard_map).set_crs(4326).to_crs(3857)

        # Loop through unique infrastructure types within the subsystem
        for infra_type in infra_type_lst: 
            assets_infra_type = assets[assets['asset'] == infra_type].copy().reset_index(drop=True)
        
            # create dicts for quicker lookup
            geom_dict = assets_infra_type['geometry'].to_dict()
            type_dict = assets_infra_type['asset'].to_dict()

            # read vulnerability and maxdam data:
            data_path = pathway_dict['data_path']
            infra_curves,maxdams = read_vul_maxdam(data_path,hazard_type, infra_type)

            # start analysis 
            print(f'{country_code} runs for {infra_type} for {hazard_type} for {hazard_name} map for {len(infra_curves.T)*len(maxdams)} combinations')
    
            # overlay assets
            overlay_assets = pd.DataFrame(overlay_hazard_assets(hazard_map,buffer_assets(assets_infra_type)).T,columns=['asset','hazard_point'])
    
            # convert dataframe to numpy array
            hazard_numpified = hazard_map.to_numpy() 

            for infra_curve in infra_curves:
                # get curves
                curve = infra_curves[infra_curve[0]]
                hazard_intensity = curve.index.values
                fragility_values = (np.nan_to_num(curve.values,nan=(np.nanmax(curve.values)))).flatten()
                        
                for maxdam in maxdams:
                    
                    collect_inb = []
                    for asset in tqdm(overlay_assets.groupby('asset'),total=len(overlay_assets.asset.unique())): #group asset items for different hazard points per asset and get total number of unique assets
                        if np.max(fragility_values) == 0: #if exposure does not lead to damage
                            collect_inb.append(0)  
                        else:
                            asset_geom = geom_dict[asset[0]]
                            collect_inb.append(get_damage_per_asset(asset,hazard_numpified,asset_geom,hazard_intensity,fragility_values,maxdam)) #get list of damages for specific asset
                    collect_output[country_code, hazard_name, sub_system, infra_type, infra_curve[0], maxdam] = np.sum(collect_inb) # dictionary to store results for various combinations of hazard maps, infrastructure curves, and maximum damage values.
        break #delete after testing
    return collect_output


            

In [183]:
# List of critical infrastructure systems to process
cis_dict = {
    "energy": {"power": ["line","minor_line","cable","plant","substation",
                        "power_tower","power_pole"]},
    "transportation": {"road":  ["primary", "secondary", "tertiary"], 
                        "airports": ["airports"],
                        "railways": ["railway"]},
    "water": {"water_supply": ["water_tower", "water_well", "reservoir_covered",
                                "water_works", "reservoir"]},
    "waste": {"waste_solid": ["landfill","waste_transfer_station"],
            "waste_water": ["wastewater_treatment_plant"]},
    "telecommunication": {"telecom": ["communication_tower", "mast"]},
    "healthcare": {"health": ["clinic", "doctors", "hospital", "dentist", "pharmacy", 
                        "physiotherapist", "alternative", "laboratory", "optometrist", "rehabilitation", 
                        "blood_donation", "birthing_center"]},
    "education": {"education_facilities": ["college", "kindergarten", "library", "school", "university"]}
}

cis_dict = {
    "transportation": {"road":  ["primary", "secondary", "tertiary"]}
                      }

In [175]:
hazard_type='pluvial'
country_codes=['JAM']

# Run analysis

In [184]:
pathway_dict = create_pathway_dict(data_path, flood_data_path, eq_data_path, landslide_data_path, cyclone_data_path)
for country_code in country_codes: 
    for ci_system in cis_dict: 
        for sub_system in cis_dict[ci_system]:
            infra_type_lst = cis_dict[ci_system][sub_system]
            test = country_infrastructure_hazard(pathway_dict, country_code, sub_system, infra_type_lst, hazard_type)
            #create_damage_csv(test, hazard_type, pathway_dict, country_code, sub_system)

extract points: 0it [00:00, ?it/s]
extract multipolygons: 100%|█████████████████████████████████████████████████████████████| 2/2 [00:16<00:00,  8.28s/it]
extract lines: 100%|███████████████████████████████████████████████████████████| 39974/39974 [00:09<00:00, 4063.68it/s]


JAM runs for road for pluvial for 10 maps
JAM runs for primary for pluvial for P_1in50 map for 1 combinations


100%|██████████████████████████████████████████████████████████████████████████████| 853/853 [00:00<00:00, 1313.03it/s]


JAM runs for secondary for pluvial for P_1in50 map for 1 combinations


100%|███████████████████████████████████████████████████████████████████████████████| 377/377 [00:00<00:00, 951.75it/s]


JAM runs for tertiary for pluvial for P_1in50 map for 1 combinations


100%|██████████████████████████████████████████████████████████████████████████| 13337/13337 [00:07<00:00, 1738.97it/s]


In [185]:
test

{('JAM',
  'P_1in50',
  'road',
  'primary',
  'F7.1',
  909.3454827565133): 44278885.94165449,
 ('JAM',
  'P_1in50',
  'road',
  'secondary',
  'F7.1',
  909.3454827565133): 47364030.14625165,
 ('JAM',
  'P_1in50',
  'road',
  'tertiary',
  'F7.1',
  909.3454827565133): 489666812.8877882}

In [None]:
print(f' outcome of total infra road types {sum(list(test.values()))}')

## Code to extract infra

In [209]:
sub_system = 'railway'

In [210]:
    # get country osm data
    data_loc = country_download(country_code)
    
    # get infrastructure data:
    assets = ex.extract_cis(data_loc, sub_system)
    
    # convert assets to epsg3857 (system in meters)
    assets = gpd.GeoDataFrame(assets).set_crs(4326).to_crs(3857)
    
 

AttributeError: You are calling a geospatial method on the GeoDataFrame, but the active geometry column to use has not been set. 
There are no existing columns with geometry data type. You can add a geometry column as the active geometry column with df.set_geometry. 

In [201]:
help(ex.extract_cis)

Help on function extract_cis in module osm_flex.extract:

extract_cis(osm_path, ci_type)
    A wrapper around extract() to conveniently extract map info for a
    selection of  critical infrastructure types from the given osm.pbf file.
    No need to search for osm key/value tags and relevant geometry types.
    Parameters
    ----------
    osm_path : str or Path
        location of osm.pbf file from which to parse
    ci_type : str
        one of DICT_CIS_OSM.keys(), i.e. 'education', 'healthcare',
        'water', 'telecom', 'road', 'rail', 'air', 'gas', 'oil', 'power',
        'wastewater', 'food'
    See also
    -------
    DICT_CIS_OSM for the keys and key/value tags queried for the respective
    CIs. Modify if desired.



In [204]:
help(ex.DICT_CIS_OSM)

Help on dict object:

class dict(object)
 |  dict() -> new empty dictionary
 |  dict(mapping) -> new dictionary initialized from a mapping object's
 |      (key, value) pairs
 |  dict(iterable) -> new dictionary initialized as if via:
 |      d = {}
 |      for k, v in iterable:
 |          d[k] = v
 |  dict(**kwargs) -> new dictionary initialized with the name=value pairs
 |      in the keyword argument list.  For example:  dict(one=1, two=2)
 |  
 |  Built-in subclasses:
 |      StgDict
 |  
 |  Methods defined here:
 |  
 |  __contains__(self, key, /)
 |      True if the dictionary has the specified key, else False.
 |  
 |  __delitem__(self, key, /)
 |      Delete self[key].
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __gt__(self, value, /)
 |      Return self>va

In [188]:
   if sub_system == 'road':
        assets = assets.loc[assets.geometry.geom_type == 'LineString']
        assets = assets.rename(columns={'highway' : 'asset'})
        list_of_highway_assets_to_keep =["living_street", "motorway", "motorway_link", "primary","primary_link",
                                         "residential","road", "secondary", "secondary_link","tertiary","tertiary_link", "trunk", "trunk_link","unclassified","service"]
        #reclassify assets 
        mapping_dict = {
            "living_street" : "tertiary", 
            "motorway" : "primary", 
            "motorway_link" : "primary", 
            "primary" : "primary", 
            "primary_link" : "primary", 
            "residential" : "tertiary",
            "road" : "secondary", 
            "secondary" : "secondary", 
            "secondary_link" : "secondary", 
            "tertiary" : "tertiary", 
            "tertiary_link" : "tertiary", 
            "trunk" : "primary",
            "trunk_link" : "primary",
            "unclassified" : "tertiary", 
            "service" : "tertiary"
        }
        
        assets['asset'] = assets.asset.apply(lambda x : mapping_dict[x])  #reclassification
    elif sub_system == 'rail':
        assets = assets.loc[assets.geometry.geom_type == 'LineString']
        assets = assets.rename(columns={'railway' : 'asset'})
    elif sub_system == 'education':
        assets = assets.rename(columns={'building' : 'asset'})
        assets = assets.reset_index(drop=True)
        assets = remove_contained_points(assets)

        #convert points to polygons
        assets.loc[assets.geom_type == 'Point','geometry'] = assets.loc[assets.geom_type == 'Point'].buffer(
                                                                        distance=np.sqrt(assets.loc[assets.geom_type == 'MultiPolygon'].area.median())/2, cap_style='square')

    elif sub_system == 'air':
        assets = assets.rename(columns={'aeroway' : 'asset'})

Unnamed: 0,osm_id,asset,name,gauge,electrified,voltage,geometry
0,25878434,rail,,1435.0,no,,"LINESTRING (-8597795.959 2025602.436, -8598181..."
1,49406367,rail,,1435.0,no,,"LINESTRING (-8597795.959 2025602.436, -8597689..."
2,49483414,rail,,,,,"LINESTRING (-8586701.725 2025852.765, -8586698..."
3,49483419,rail,,1435.0,no,,"LINESTRING (-8586610.332 2025382.420, -8586593..."
4,49483421,rail,,1435.0,no,,"LINESTRING (-8586379.255 2025094.372, -8586484..."
5,49483422,rail,,1435.0,no,,"LINESTRING (-8586384.031 2025091.985, -8586509..."
6,49483423,rail,,1435.0,no,,"LINESTRING (-8586388.806 2025089.587, -8586534..."
7,49483424,rail,,,,,"LINESTRING (-8586697.284 2025724.939, -8586701..."
8,49483428,rail,,1435.0,no,,"LINESTRING (-8586354.175 2025045.405, -8586491..."
9,49742782,rail,,1435.0,no,,"LINESTRING (-8600712.552 2027079.393, -8600772..."


In [None]:
sub_system = 'rail'