# Process railways from OSM

- current network (filter out disused/abandoned/proposed/under construction)
- connected network (link nodes and split edges)

In [None]:
import os
from glob import glob

import fiona
import geopandas as gpd
gpd._compat.USE_PYGEOS = False
import snkit

## Current network

In [None]:
files = glob('../incoming_data/OpenStreetMap/railways/*rail.gpkg')

In [None]:
def process_points(df):
    print("Railway", df.railway.unique())

    df = df[ 
        df['railway'].isin(('stop', 'station', 'halt', 'yes')) 
    ]

    return df[['osm_id', 'name', 'railway', 'geometry']]

In [None]:
def process_lines(df):
    print("Railway", df.railway.unique())


    df = df[ 
        ~df['railway'].isin(('abandoned', 'disused', 'construction', 'proposed', 'tram', 'funicular')) 
    ]

    return df[['osm_id', 'name', 'railway', 'bridge', 'geometry']]

In [None]:
def process_polygons(df):
    print("Railway", df.railway.unique())
    print("Disused", df.disused.unique())
    

    df = df[
        ~df['railway'].isin(('construction', 'proposed'))
        & ~df['disused'].isin(('yes',))
    ]

    return df[['osm_id', 'osm_way_id', 'name', 'railway', 'geometry']]

In [None]:
def polys_to_points(df):
    df.geometry = df.geometry.centroid
    return df

In [None]:
for fname in files:
    print(f"\n{fname}")
    layers = fiona.listlayers(fname)
    out_fname = fname.replace('.gpkg', '_filtered.gpkg')
    try:
        os.remove(out_fname)
    except FileNotFoundError:
        pass
    
    if 'points' in layers:
        df = process_points(gpd.read_file(fname, layer='points'))
        if not df.empty:
            df.to_file(out_fname, layer='points', driver="GPKG")
    
    if 'lines' in layers:
        df = process_lines(gpd.read_file(fname, layer='lines'))
        if not df.empty:
            df.to_file(out_fname, layer='lines', driver="GPKG")
        
    if 'multipolygons' in layers:
        df = process_polygons(gpd.read_file(fname, layer='multipolygons'))
        if not df.empty:
            df.to_file(out_fname, layer='multipolygons', driver="GPKG")
            
            df = polys_to_points(df)
            df.to_file(out_fname, layer='centroids', driver="GPKG")

## Connected network

In [None]:
files = glob('../incoming_data/OpenStreetMap/railways/*rail_filtered.gpkg')

In [None]:
def read_nodes(fname):
    nodes = gpd.read_file(fname, layer='points')
    centroids = gpd.read_file(fname, layer='centroids')
    def get_id(row):
        if row.osm_id is None:
            return row.osm_way_id
        else:
            return row.osm_id
    centroids.osm_id = centroids.apply(get_id, axis=1)
    centroids = centroids.drop('osm_way_id', axis=1)
    nodes = nodes.append(centroids)
    return nodes

In [None]:
def read_edges(fname):
    edges = gpd.read_file(fname, layer='lines')
    return edges

In [None]:
sorted(files)

In [None]:
country_to_code = {
    'cambodia': 'KHM',
    'indonesia': 'IDN',
    'laos': 'LAO',
    'myanmar': 'MMR',
    'philippines': 'PHL',
    'thailand': 'THA',
    'vietnam': 'VNM',
}

In [None]:
for fname in files:
    country = os.path.basename(fname).replace('-rail_filtered.gpkg', '')
    code = country_to_code[country]
    print(country, code)
    
    out_fname = fname.replace('filtered', 'network').replace(country, code)
    try:
        os.remove(out_fname)
    except FileNotFoundError:
        pass
    
    nodes = read_nodes(fname)
    edges = read_edges(fname)
    
    network = snkit.Network(nodes, edges)
    network = snkit.network.snap_nodes(network)
    network = snkit.network.split_edges_at_nodes(network)
    network = snkit.network.add_endpoints(network)    
    network = snkit.network.add_ids(network, edge_prefix=f"rail_{code}", node_prefix=f"rail_{code}")
    network = snkit.network.add_topology(network, id_col='id')
    
    network.edges.to_file(out_fname, layer='edges', driver='GPKG')
    network.nodes.to_file(out_fname, layer='nodes', driver='GPKG')