In [3]:
import pyogrio
from ireiat.config import CACHE_PATH,RADIUS_EARTH_MILES, LATLONG_CRS
from ireiat.util.graph import get_coordinates_from_geoframe, get_allowed_node_indices,generate_zero_based_node_maps
import pandas as pd
import numpy as np
pd.set_option('display.max_columns',None)
from typing import Dict, Tuple, Optional, List
import igraph as ig
from collections import Counter
from itertools import chain
import geopandas

In [2]:
%%time
narn_links = pyogrio.read_dataframe(CACHE_PATH / "raw/narn_rail_links.zip",use_arrow=True)

CPU times: total: 3.52 s
Wall time: 3.42 s


In [21]:
# narn_links['shape_Leng']-narn_links['geometry'].length) < 1e-5)

In [5]:
%%time
# we need a strongly connected graph, so we create one by iterating through the links
# we get the coordinates of every line segment and "melt" the dataframe. We group by each line segment and take the first and last coordinates,
# which should be the start / end nodes
link_coords = get_coordinates_from_geoframe(narn_links)
link_coords = pd.concat([narn_links[['miles','frfranode','tofranode']],link_coords],axis=1) # join in the direction

CPU times: total: 375 ms
Wall time: 371 ms


In [6]:
# link_coords.groupby(['origin_latitude','origin_longitude'])[['frfranode']].nunique().max()

In [6]:
# we create a simple graph and check its connectedness
complete_rail_node_to_idx = generate_zero_based_node_maps(link_coords)
unfiltered_idx_node_dict = {v:k for k,v in complete_rail_node_to_idx.items()}


edge_tuples = []
edge_attributes = []
for row in link_coords.itertuples():
    origin_coords = (row.origin_latitude, row.origin_longitude)
    destination_coords = (row.destination_latitude, row.destination_longitude)
    tail, head = (
        complete_rail_node_to_idx[origin_coords],
        complete_rail_node_to_idx[destination_coords],
    )

    # record some original edge information needed for visualization and/or TAP setup
    attribute_tuple_one_way = (row.miles, row.frfranode, row.tofranode)
    attribute_tuple_two_way = (row.miles, row.tofranode, row.frfranode)
    edge_tuples.append((tail, head))
    edge_attributes.append(attribute_tuple_one_way)
    edge_tuples.append((head, tail))
    edge_attributes.append(attribute_tuple_two_way)

In [7]:
n_vertices = len(complete_rail_node_to_idx)
print(f"Original number of nodes {n_vertices}, edges {len(edge_tuples)}.")
g = ig.Graph(
    n_vertices,
    edge_tuples,
    vertex_attrs={"original_node_idx": list(complete_rail_node_to_idx.values())},
    edge_attrs={
        "length": [attr[0] for attr in edge_attributes],
        "frfranode": [attr[1] for attr in edge_attributes],
        "tofranode": [attr[2] for attr in edge_attributes]
    },
    directed=True,
)
allowed_node_indices = get_allowed_node_indices(g)

Original number of nodes 250436, edges 605988.
Number of nodes in largest connected component Counter({2: 112, 3: 44, 4: 38, 5: 18, 6: 12, 7: 9, 11: 8, 8: 7, 12: 4, 9: 4, 18: 3, 17: 3, 15: 2, 13: 2, 21: 2, 60: 1, 79: 1, 31: 1, 48: 1, 24: 1, 34: 1, 36: 1, 19: 1, 35: 1, 33: 1, 23: 1, 248850: 1})
Excluded # of nodes that are not strongly connected 1586


In [8]:
g.is_connected()

False

In [9]:
connected_subgraph = g.subgraph(allowed_node_indices)

In [10]:
connected_subgraph.is_connected()

True