In [1]:
import requests
import time

import osmnx as ox
import networkx as nx
import numpy as np

from pathlib import Path
import pandas as pd
import pickle 
import geopy.distance
from geopy.distance import geodesic
from tqdm import tqdm

from file import dump_json, load_json

In [2]:
nodes_data = load_json("../front-end/src/data/full_node_data.json")
len(nodes_data)

301

In [6]:
nodes_data[0]

{'node_id': 2072605244, 'lat': 10.8134736, 'lon': 106.6654239, 'index': 0}

In [12]:
nodes = [node_data["node_id"] for node_data in nodes_data]
nodes[0]

2072605244

In [7]:
node2index = {node["node_id"]: node["index"] for node in nodes_data}
node2index

{2072605244: 0,
 5721686016: 1,
 11405988031: 2,
 9338675727: 3,
 5467207470: 4,
 5721686017: 5,
 2469693281: 6,
 366446893: 7,
 11414869169: 8,
 5467216157: 9,
 11363982568: 10,
 5755175894: 11,
 2036141704: 12,
 411926532: 13,
 411926547: 14,
 411926578: 15,
 411926580: 16,
 411926698: 17,
 10978330103: 18,
 2403047190: 19,
 9843908828: 20,
 9843908827: 21,
 5764817349: 22,
 2393614146: 23,
 4447252158: 24,
 10118771783: 25,
 4672043489: 26,
 4875861250: 27,
 696860153: 28,
 6858588489: 29,
 11389684401: 30,
 5742713535: 31,
 9545277488: 32,
 3640191262: 33,
 5762610094: 34,
 1393358848: 35,
 411925984: 36,
 411925985: 37,
 366476912: 38,
 411926344: 39,
 411926469: 40,
 411926477: 41,
 411926523: 42,
 411926551: 43,
 411926554: 44,
 411926559: 45,
 411926563: 46,
 411926594: 47,
 411926604: 48,
 411926624: 49,
 411926693: 50,
 411926700: 51,
 411926705: 52,
 6794477957: 53,
 8812298790: 54,
 676561493: 55,
 6696632982: 56,
 4604920479: 57,
 10046800715: 58,
 4448584537: 59,
 6751290

In [71]:
with open("results/test_graph.pkl", "rb") as f:
    G = pickle.load(f)

len(G.nodes()), G.is_directed()

(99562, True)

In [72]:
def get_node_data(node, G):
    data = G.nodes(data=True)[node]
    return {
        "node_id": node,
        "lat": data['y'],
        "lon": data['x'],
    }

def get_nodes_data(nodes, G):
    nodes_data = []
    for index, node in enumerate(nodes):
        node_data = get_node_data(node, G)
        node_data["index"] = index
        nodes_data.append(node_data)
    return nodes_data

In [73]:
full_nodes_data = []

for index, node in enumerate(G.nodes()):
    node_data = get_node_data(node, G)
    node_data["index"] = index
    full_nodes_data.append(node_data)

len(full_nodes_data)

99562

In [None]:
# base_nodes_data = [x for x in nodes_data if x["index"] in [0, 30, 134]]
# base_nodes_data

[{'node_id': 2072605244, 'lat': 10.8134736, 'lon': 106.6654239, 'index': 0},
 {'node_id': 11389684401, 'lat': 10.7928951, 'lon': 106.6533154, 'index': 30},
 {'node_id': 1497276524, 'lat': 10.8015651, 'lon': 106.7110915, 'index': 134}]

In [76]:
def is_in_circle(node_data, center_coord, radius):
    node_coord = (node_data['lat'], node_data['lon']) 
    # center_coord = (center_data['lat'], center_data['lon'])
    distance = geodesic(node_coord, center_coord).meters
    return distance <= radius 

In [91]:
def is_on_main_roads(node_data, G, count_thresh=1):
    in_edges = list(G.in_edges(node_data["node_id"], data=True))
    out_edges = list(G.out_edges(node_data["node_id"], data=True))
    
    count = 0
    for u, v, data in in_edges + out_edges: 
        if count >= count_thresh:
            return True
        
        try:
            if data.get("highway") in {"primary", "secondary", "primary_link", "secondary_link"}:
                count += 1
        except TypeError as e:
            print(e)
            print(data)
    return False

In [101]:
tmp_nodes_data = []

In [102]:
center_coord = (10.7779086, 106.6895565)
radius = 1000

tmp_nodes_data += [
    x for x in full_nodes_data 
    if is_in_circle(x, center_coord, radius) and is_on_main_roads(x, G, 2)
]
dump_json("../front-end/src/data/test_node_data.json", tmp_nodes_data)

In [None]:
center_coord = (10.8134736, 106.6654239)

radius = 1000

tmp_nodes_data += [
    x for x in full_nodes_data 
    if is_in_circle(x, center_coord, radius) and is_on_main_roads(x, G)
]
dump_json("../front-end/src/data/test_node_data.json", tmp_nodes_data)

unhashable type: 'list'
{'osmid': [32586234, 32575971], 'highway': ['residential', 'tertiary'], 'oneway': False, 'reversed': False, 'length': 517.508, 'geometry': <LINESTRING (106.655 10.809, 106.655 10.809, 106.655 10.81, 106.655 10.81, 1...>}
unhashable type: 'list'
{'osmid': [32586234, 32575971], 'highway': ['residential', 'tertiary'], 'oneway': False, 'reversed': True, 'length': 517.508, 'geometry': <LINESTRING (106.657 10.813, 106.657 10.813, 106.657 10.812, 106.656 10.812,...>}


In [97]:
center_coord = (10.8026722, 106.6767735)
radius = 2800

tmp_nodes_data += [
    x for x in full_nodes_data 
    if is_in_circle(x, center_coord, radius) and is_on_main_roads(x, G, count_thresh=3)
]
dump_json("../front-end/src/data/test_node_data.json", tmp_nodes_data)

unhashable type: 'list'
{'osmid': [35114264, 211408041, 189165798], 'oneway': True, 'lanes': '5', 'name': ['Cầu Điện Biên Phủ', 'Đường Điện Biên Phủ'], 'highway': ['primary', 'trunk'], 'reversed': False, 'length': 156.053, 'bridge': 'yes', 'geometry': <LINESTRING (106.701 10.794, 106.701 10.794, 106.7 10.793, 106.7 10.793)>}
unhashable type: 'list'
{'osmid': [53553128, 189165785, 53553099], 'oneway': True, 'lanes': '5', 'name': ['Cầu Điện Biên Phủ', 'Đường Điện Biên Phủ'], 'highway': ['primary', 'trunk'], 'maxspeed': '50', 'reversed': False, 'length': 210.118, 'bridge': 'yes', 'geometry': <LINESTRING (106.7 10.793, 106.7 10.793, 106.701 10.794, 106.701 10.794, 106...>}
unhashable type: 'list'
{'osmid': [32586234, 32575971], 'highway': ['residential', 'tertiary'], 'oneway': False, 'reversed': True, 'length': 517.508, 'geometry': <LINESTRING (106.657 10.813, 106.657 10.813, 106.657 10.812, 106.656 10.812,...>}
unhashable type: 'list'
{'osmid': [32586234, 32575971], 'highway': ['residenti

In [60]:
def get_nodes_by_edges(base_nodes_data, kpi = 200):
    current_nodes = set()
    current_edges = set()

    for node_data in base_nodes_data:
        in_edges = list(G.in_edges(node_data["node_id"], data=True))
        out_edges = list(G.out_edges(node_data["node_id"], data=True))
        for u, v, data in in_edges + out_edges:  # Get all edges connected to the node
            current_nodes.add(u)
            current_nodes.add(v)

            print(data) 
            current_edges.add((u, v))        

    return current_nodes, current_edges

In [103]:
with open("results/manual_nodes.txt", "r") as f:
    manual_nodes = f.read().split("\n")
    manual_nodes = list(set([int(x) for x in manual_nodes if x]))

len(manual_nodes)

207

In [104]:
manual_nodes_data = get_nodes_data(manual_nodes, G)
len(manual_nodes_data), manual_nodes_data[0]

(207,
 {'node_id': 5721686016, 'lat': 10.8000091, 'lon': 106.6606224, 'index': 0})

In [105]:
dump_json("../front-end/src/data/test_node_data.json", manual_nodes_data)

## Edges

In [7]:
def get_node2index(nodes_data):
    return {node["node_id"]: node["index"] for node in nodes_data}

def get_index2node(nodes_data):
    return {node["index"]: node["node_id"] for node in nodes_data}

def get_edges_data(edges, node2index, est_capacity):
    edges_data = []
    for edge in edges:
        edges_data.append({
            "src": node2index[edge[0]],
            "dst": node2index[edge[1]],
            "capacity": 1, # TODO: est capacity
        })

    return edges_data

In [118]:
manual_G = G.subgraph(manual_nodes)

adjacency_matrix = nx.to_numpy_array(manual_G)
adjacency_matrix.shape, np.sum(adjacency_matrix)

((207, 207), 204.0)

In [119]:
np.save("./results/manual_adj_matrix.npy", adjacency_matrix)

In [120]:
manual_nodes_data = get_nodes_data(list(manual_G.nodes()), G)
len(manual_nodes_data), manual_nodes_data[0]

(207,
 {'node_id': 5721686016, 'lat': 10.8000091, 'lon': 106.6606224, 'index': 0})

In [122]:
n = len(manual_nodes_data)
manual_edges_data = []

for i in range(n):
    node_i = manual_nodes_data[i]
    for j in range(n):
        node_j = manual_nodes_data[j]
        if adjacency_matrix[i, j] > 0:
            manual_edges_data.append({
                "src": node_i['index'],
                "dst": node_j['index'],
                "capacity": int(adjacency_matrix[i, j]),
            })

len(manual_edges_data)

204

In [123]:
dump_json("../front-end/src/data/test_edge_data.json", manual_edges_data)

In [42]:
main_road_types = {"primary", "secondary"}
connections = []

for node_data in nodes_data:
    node = node_data["node_id"]
    in_edges = list(G.in_edges(node, data=True))
    out_edges = list(G.out_edges(node, data=True))

    for u, v, d in (in_edges + out_edges):  # Get all edges connected to the node
        # if d.get("highway") in main_road_types:  # Check if it's a main road
        if u in node2index and v in node2index: 
            connections.append((u, v))

len(connections)


620

In [35]:
# Having manual nodes and connections

nodes_data = load_json("../front-end/src/data/manual_node_data.json") 

with open("./results/manual_edges.txt", "r") as f:
    manual_edge_connections = f.read().split("\n")

connections = [(int(x.split(",")[0]), int(x.split(",")[1])) for x in manual_edge_connections if x]
len(connections)

261

In [36]:
connections[0]

(192, 160)

In [37]:
n = len(nodes_data)
adjacency_matrix = np.zeros((n, n))

for u, v in connections:
    idx_u, idx_v = u, v
    # idx_u = node2index[u]
    # idx_v = node2index[v]
    adjacency_matrix[idx_u, idx_v] = 1 
    adjacency_matrix[idx_v, idx_u] = 1 

adjacency_matrix.sum()

488.0

In [38]:
from functools import partial

def est_capacity(adj_value, row, col, node_data, est_width=20): 
    if int(adj_value) == 0:
        return 0
    
    row_node = node_data[row]
    col_node = node_data[col]
    row_lat, row_lon = row_node['lat'], row_node['lon']
    col_lat, col_lon = col_node['lat'], col_node['lon']
    distance = geopy.distance.distance((row_lat, row_lon), (col_lat, col_lon)).m
    capacity = adj_value * distance * est_width
    if capacity <= 0:
        print(adj_value, distance)
    return capacity

hehe = partial(est_capacity, node_data=nodes_data)

adjacency_matrix = np.vectorize(hehe)(adjacency_matrix, np.indices(adjacency_matrix.shape)[0], np.indices(adjacency_matrix.shape)[1])
len(adjacency_matrix[adjacency_matrix > 0])

488

In [39]:
capacity_series = pd.Series(adjacency_matrix.ravel())
capacity_series.describe()

count    42849.000000
mean        67.105417
std       1074.028133
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max      82191.000000
dtype: float64

In [40]:
np.save("results/manual_undi_adj_matrix.npy", adjacency_matrix)

In [41]:
adjacency_matrix[80,199]

421

In [42]:
edges = []

for i in range(n):
    node_i = nodes_data[i]
    for j in range(n):
        node_j = nodes_data[j]
        if adjacency_matrix[i, j] > 0:
            edges.append({
                "src": node_i['index'],
                "dst": node_j['index'],
                "capacity": int(adjacency_matrix[i, j]),
            })

len(edges)

488

In [43]:
dump_json("../front-end/src/data/manual_edge_data.json", edges)

In [1]:
from file import load_json

len(load_json("../front-end/src/data/manual_edge_data.json"))

488