In [300]:
import requests as rq
import os
import json
import pandas as pd
import IPython
import osmnx as ox
import numpy as np
import simpy

DATA_BASE = "data"
TIMETABLE_FILE = "timetable"
STOPS_FILE = "stops"
PATHS_FILE = "paths"
FILE_EXT = ".json"

In [2]:
ROUTE_API = lambda id: f"http://apicms.ebms.vn/businfo/getroutebyid/{id}"
TIMETABLE_API = lambda route: f"http://apicms.ebms.vn/businfo/gettimetablebyroute/{route}"
ROUTEVAR_API = lambda route: f"http://apicms.ebms.vn/businfo/getvarsbyroute/{route}"
STOPS_API = lambda id, varId: f"http://apicms.ebms.vn/businfo/getstopsbyvar/{id}/{varId}"
PATHS_API = lambda id, varId: f"http://apicms.ebms.vn/businfo/getpathsbyvar/{id}/{varId}"

def LoadData(route, source, api):
    source += FILE_EXT
    dir_path = os.path.join(os.getcwd(), DATA_BASE, str(route))
    file_path = os.path.join(os.getcwd(), DATA_BASE, str(route), source)

    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    if not os.path.exists(file_path):
        response = rq.get(api)
        with open(file_path, "w+", encoding="utf-8") as f:
            string = json.dumps(response.json(), ensure_ascii=False)
            f.write(string)
            
    return pd.read_json(file_path, encoding="utf-8")

In [3]:
class StationModel():
    def __init__(self, routeId, varId) -> None:
        src = STOPS_FILE + str(varId)
        api = STOPS_API(id=routeId, varId=varId)
        stops_df = LoadData(route=routeId, source=src, api=api)
        self.stops_df = stops_df[["StopId", "Lat", "Lng"]]
        self.stops_df = self.stops_df.rename(columns={"StopId":"stop_id", "Lat":"lat", "Lng":"lng"})

In [4]:
class BusModel():
    def __init__(self, id) -> None:
        self.stations = dict()
        self.paths_df = dict()
        timetables_df = LoadData(route=id, source=TIMETABLE_FILE, api=TIMETABLE_API(id))
        for varId in timetables_df["RouteVarId"]:
            self.stations[varId] = StationModel(routeId=id, varId=varId)
            self.paths_df[varId] = LoadData(route=id, source=PATHS_FILE, api=PATHS_API(id, varId=varId))

In [5]:
utn = ox.settings.useful_tags_node
oxna = ox.settings.osm_xml_node_attrs
oxnt = ox.settings.osm_xml_node_tags
utw = ox.settings.useful_tags_way
oxwa = ox.settings.osm_xml_way_attrs
oxwt = ox.settings.osm_xml_way_tags
utn = list(set(utn + oxna + oxnt))
utw = list(set(utw + oxwa + oxwt))
ox.settings.all_oneway = False
ox.settings.useful_tags_node = utn
ox.settings.useful_tags_way = utw
ox.settings.timeout=1200

In [6]:
HCM_NETWORK = "./data/hcm.graphml"
if not os.path.exists(HCM_NETWORK):
    G = ox.graph_from_place("Ho Chi Minh City", retain_all=True, truncate_by_edge=True, buffer_dist=1000)
    ox.save_graphml(G, HCM_NETWORK)
else:
    G = ox.load_graphml(HCM_NETWORK)
ox.add_edge_speeds(G)
ox.add_edge_travel_times(G)

<networkx.classes.multidigraph.MultiDiGraph at 0x25eef8e7090>

In [225]:
buses = list()
bus_ids = [8]
for id in bus_ids:
    buses.append(BusModel(id))

stations = buses[0].stations[15].stops_df
stations["edge"] = ox.nearest_edges(G, X=stations["lng"], Y=stations["lat"])
stations.drop_duplicates(subset="edge", inplace=True, ignore_index=True)

def get_routes(station):
    if station.name + 1 != stations.shape[0]:
        return ox.shortest_path(G, station["edge"][0], stations.loc[station.name + 1, "edge"][0], cpus=4)
    else:
        return list(station["edge"][:2])

stations["route"] = stations.apply(get_routes, axis="columns")
stations.reset_index(drop=True, inplace=True)

# Remove looping
loop_filt = stations.apply(
    lambda station: 
    len(set(station["route"]).intersection(
        set(stations.loc[station.name - 1, "route"] if station.name != 0 else []))
    ) > 1
    , axis="columns")
loop_routes = stations[loop_filt]
stations.drop(loop_routes.index.union(loop_routes.index.map(lambda x: x - 1)), inplace=True)

stations.reset_index(inplace=True, drop=True)
stations.drop("route", axis="columns", inplace=True)
stations["route"] = stations.apply(get_routes, axis="columns")

stations

Unnamed: 0,stop_id,lat,lng,edge,route
0,496,10.733545,106.656357,"(4652534427, 2495870411, 0)","[4652534427, 5755051488, 366450563, 5755153785..."
1,502,10.736460,106.656346,"(6433515438, 5755053133, 0)","[6433515438, 6432690983, 5816764351, 575515378..."
2,499,10.738769,106.656427,"(5755153782, 6758003387, 0)","[5755153782, 6758003387, 2040317179, 204031718..."
3,542,10.744513,106.656802,"(6757924465, 5764804979, 0)","[6757924465, 5764804979, 6450096913, 675792447..."
4,501,10.746073,106.659763,"(6449960694, 6450096946, 0)","[6449960694, 6450096946, 4616320836, 366442410..."
...,...,...,...,...,...
59,1919,10.873515,106.809458,"(9339328179, 9417468295, 0)","[9339328179, 9417468295, 6956996792, 695699679..."
60,7644,10.874405,106.806995,"(8372428300, 8372428302, 0)","[8372428300, 8372428302, 5763376688, 576289046..."
61,7648,10.875886,106.804968,"(5762890477, 6768441603, 0)","[5762890477, 6768441603, 6326613470, 632661347..."
62,7623,10.876655,106.802537,"(2949182378, 6768441588, 0)","[2949182378, 6768441590, 2949182366, 294918236..."


In [298]:
buses = list()
bus_ids = [8]
for id in bus_ids:
    buses.append(BusModel(id))

paths = buses[0].paths_df[15]
paths["node"] = ox.nearest_nodes(G, X=paths["lng"], Y=paths["lat"])

stations = buses[0].stations[15].stops_df
stations["node"] = ox.nearest_nodes(G, X=stations["lng"], Y=stations["lat"])

# paths.drop_duplicates(subset="node", inplace=True, ignore_index=True)

def get_routes(path):
    if path.name + 1 != paths.shape[0]:
        return ox.shortest_path(G, path["node"], paths.loc[path.name + 1, "node"], cpus=4)
    else:
        return [path["node"]]

paths["route"] = paths.apply(get_routes, axis="columns")
paths.reset_index(drop=True, inplace=True)
paths

Unnamed: 0,lat,lng,node,route
0,10.733545,106.656357,2495870411,[2495870411.0]
1,10.733545,106.656357,2495870411,"[2495870411.0, 5755051493, 2495872596, 6432690..."
2,10.733556,106.656685,6432690948,"[6432690948.0, 6432690945, 2495872596, 5755051..."
3,10.734046,106.656685,5755051489,"[5755051489.0, 5755051493, 2495870411, 4652534..."
4,10.734031,106.656166,4652534427,"[4652534427.0, 5755051488]"
...,...,...,...,...
609,10.876465,106.801552,6741814224,"[6741814224.0, 6912957463, 7456680192]"
610,10.875196,106.801453,7456680192,"[7456680192.0, 6912957470, 8372509234]"
611,10.874263,106.801407,8372509234,"[8372509234.0, 3342113661, 6768412184, 6768441..."
612,10.874231,106.802055,7961512927,"[7961512927.0, 6839833068, 3342113628]"


In [319]:
match_node = pd.merge(paths["node"], stations["node"], on=["node"], how='left', indicator='exist')
match_node['exist'] = np.where(match_node["exist"] == 'both', True, False)
match_node[match_node['exist'] == True]

Unnamed: 0,node,exist
0,2495870411,True
8,6433515443,True
12,6432751391,True
22,6449960735,True
26,6450096946,True
...,...,...
315,6822974942,True
322,8372428302,True
326,6768441603,True
332,2949182388,True


In [320]:
def find_loop(path):
    index = path.name
    return len(paths[paths.apply(
        lambda path:
        len(
        set(path['route']).intersection(
        set(paths.loc[index, "route"]))
        ) > 1 if path.name != index else False
        , axis="columns"
    )].index) != 0

loop_filt = paths.apply(find_loop, axis="columns")
loop_routes = paths[loop_filt]

while loop_routes.shape[0] > 0:
    paths.drop(loop_routes.index, inplace=True)

    paths.reset_index(inplace=True, drop=True)
    paths.drop("route", axis="columns", inplace=True)
    paths["route"] = paths.apply(get_routes, axis="columns")

    loop_filt = paths.apply(find_loop, axis="columns")
    loop_routes = paths[loop_filt]

paths

Unnamed: 0,lat,lng,node,route
0,10.733545,106.656357,2495870411,"[2495870411.0, 4652534427]"
1,10.734031,106.656166,4652534427,"[4652534427.0, 5755051488]"
2,10.734633,106.656181,5755051488,"[5755051488.0, 366450563]"
3,10.735237,106.656189,366450563,"[366450563.0, 5755153785, 6432690972, 5755153759]"
4,10.735844,106.656204,5755153759,"[5755153759.0, 5755153752, 5755053133, 6433515..."
...,...,...,...,...
159,10.876486,106.803360,2949182362,"[2949182362.0, 2949182357, 2949182355]"
160,10.876592,106.803200,2949182355,"[2949182355.0, 2949182371, 2949182368, 2949182..."
161,10.876607,106.802277,6768441588,"[6768441588.0, 2217934753, 6768441586, 3664575..."
162,10.875196,106.801453,7456680192,"[7456680192.0, 6912957470, 8372509234, 3342113..."


In [321]:
def find_turn_around(path):
    route = path["route"]
    try:
        names = [G.edges[node, route[i + 1], 0]["name"] for i, node in enumerate(route[:-1])]
        return names[0] == names[-1] and len(set(names)) > 1
    except:
        return False

turn_around_filt = paths.apply(find_turn_around, axis="columns")
turn_around_routes = paths[turn_around_filt]
while turn_around_routes.shape[0] > 0:
    paths.drop(turn_around_routes.index, inplace=True)

    paths.reset_index(inplace=True, drop=True)
    paths.drop("route", axis="columns", inplace=True)
    paths["route"] = paths.apply(get_routes, axis="columns")

    turn_around_filt = paths.apply(find_turn_around, axis="columns")
    turn_around_routes = paths[turn_around_filt]

paths

Unnamed: 0,lat,lng,node,route
0,10.733545,106.656357,2495870411,"[2495870411.0, 4652534427]"
1,10.734031,106.656166,4652534427,"[4652534427.0, 5755051488]"
2,10.734633,106.656181,5755051488,"[5755051488.0, 366450563]"
3,10.735237,106.656189,366450563,"[366450563.0, 5755153785, 6432690972, 5755153759]"
4,10.735844,106.656204,5755153759,"[5755153759.0, 5755153752, 5755053133, 6433515..."
...,...,...,...,...
151,10.876486,106.803360,2949182362,"[2949182362.0, 2949182357, 2949182355]"
152,10.876592,106.803200,2949182355,"[2949182355.0, 2949182371, 2949182368, 2949182..."
153,10.876607,106.802277,6768441588,"[6768441588.0, 2217934753, 6768441586, 3664575..."
154,10.875196,106.801453,7456680192,"[7456680192.0, 6912957470, 8372509234, 3342113..."


In [322]:
def find_wander_path(path):
    route = path["route"]
    try:
        names = [G.edges[node, route[i + 1], 0]["name"] for i, node in enumerate(route[:-1])]
        return len(set(names)) > 2
    except:
        return False

wander_filt = paths.apply(find_wander_path, axis="columns")
wander_routes = paths[wander_filt]
while wander_routes.shape[0] > 0:
    paths.drop(wander_routes.index, inplace=True)

    paths.reset_index(inplace=True, drop=True)
    paths.drop("route", axis="columns", inplace=True)
    paths["route"] = paths.apply(get_routes, axis="columns")

    wander_filt = paths.apply(find_wander_path, axis="columns")
    wander_routes = paths[wander_filt]

paths

Unnamed: 0,lat,lng,node,route
0,10.733545,106.656357,2495870411,"[2495870411.0, 4652534427]"
1,10.734031,106.656166,4652534427,"[4652534427.0, 5755051488]"
2,10.734633,106.656181,5755051488,"[5755051488.0, 366450563]"
3,10.735237,106.656189,366450563,"[366450563.0, 5755153785, 6432690972, 5755153759]"
4,10.735844,106.656204,5755153759,"[5755153759.0, 5755153752, 5755053133, 6433515..."
...,...,...,...,...
141,10.875854,106.804939,6768441603,"[6768441603.0, 6326613470, 6326613473, 6768441..."
142,10.876486,106.803360,2949182362,"[2949182362.0, 2949182357, 2949182355]"
143,10.876592,106.803200,2949182355,"[2949182355.0, 2949182371, 2949182368, 2949182..."
144,10.875196,106.801453,7456680192,"[7456680192.0, 6912957470, 8372509234, 3342113..."


In [329]:
match_node = pd.merge(stations["node"], paths["node"], on=["node"], how='left', indicator='exist')
match_node['exist'] = np.where(match_node["exist"] == 'left_only', True, False)
match_node[match_node['exist'] == True].shape

(37, 2)

In [297]:
routes = paths["route"]
route_map = ox.plot_route_folium(G, routes.iloc[0], None, color="#ff0000", tiles="openstreetmap", zoom=10)
for route in routes[1:]:
    try: route_map = ox.plot_route_folium(G, route, route_map, color="#ff0000", tiles="openstreetmap", zoom=10)
    except: 
        display(route) 
        continue
route_map

[3342113628.0]

In [None]:
def get_travel_times(df):
    return df["route"].apply(lambda route: sum(ox.utils_graph.get_route_edge_attributes(G, route, "travel_time")), axis="columns")