In [None]:
!conda install -y -c conda-forge swifter

In [None]:
!conda install -y -c conda-forge momepy

In [1]:
import swifter
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from osmnx import utils_graph
import networkx as nx
import math
import osmnx as ox
pd.set_option('display.max_columns', None)
import momepy
import numpy as np
from geopandas import GeoSeries
import os

os.chdir(os.path.join(os.getcwd(), ".."))
print(os.getcwd())

/home/jovyan/work/code/paratransit-mdp


### Load nodes and edges

In [2]:
# Load nodes file
file_path = os.path.join(os.getcwd(), "data", "travel_time_matrix", "nodes.csv")
nodes = pd.read_csv(file_path)
nodes.index = nodes['node_id']
nodes.head(2)

Unnamed: 0_level_0,node_id,osmid,lat,lon
node_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,66923001,34.984104,-85.145232
1,1,66937537,34.986203,-85.15179


In [3]:
# Load edges file
file_path = os.path.join(os.getcwd(), "data", "travel_time_matrix", "edges.csv")
edges = pd.read_csv(file_path)
print(len(edges.dropna()), len(edges))
edges.head(2)

27518 27518


Unnamed: 0,source_osmid,target_osmid,source_node,target_node,travel_time
0,202724336,2689933658,6607,9595,0.106671
1,2689933658,202724336,9595,6607,0.106671


### Need to add distance back to the graph

In [4]:
# get the OSM graph
G = ox.graph_from_place('Chattanooga, Tennessee',
                        network_type='drive',
                        simplify=True,
                        truncate_by_edge=True,
                        retain_all=False,
                        buffer_dist=500)

# add edge speeds
G = ox.speed.add_edge_speeds(G, fallback=40.2, precision=6)

# add edge travel time
G = ox.speed.add_edge_travel_times(G, precision=6)

# get nodes and edges in a geopandas dataframe
nodes_ref, edges_ref = ox.utils_graph.graph_to_gdfs(G)
edges_ref = edges_ref.reset_index()
edges_ref = edges_ref.rename(columns={'u': 'source_osmid', 'v': 'target_osmid'})
edges_ref = edges_ref[['source_osmid', 'target_osmid', 'key', 'osmid', 'length', 'speed_kph', 'travel_time']]
edges_ref = edges_ref.sort_values(by=['travel_time'])
edges_ref = edges_ref.drop_duplicates(subset=['source_osmid', 'target_osmid'], ignore_index=True)
edges_ref = edges_ref[['source_osmid', 'target_osmid', 'osmid', 'length', 'speed_kph']]
print(f"Number of nodes: {len(nodes)}, number of edges: {len(edges)}")
edges_ref.head(2)

Number of nodes: 10788, number of edges: 27518


Unnamed: 0,source_osmid,target_osmid,osmid,length,speed_kph
0,202724336,2689933658,19510140,1.05,35.826984
1,2689933658,202724336,19510140,1.05,35.826984


In [5]:
print(len(edges), len(edges.dropna()))
edges = edges.merge(edges_ref, left_on=['source_osmid', 'target_osmid'], right_on=['source_osmid', 'target_osmid'], how='left', validate='one_to_one')
print(len(edges), len(edges.dropna()))
edges.head(2)

27518 27518
27518 27518


Unnamed: 0,source_osmid,target_osmid,source_node,target_node,travel_time,osmid,length,speed_kph
0,202724336,2689933658,6607,9595,0.106671,19510140,1.05,35.826984
1,2689933658,202724336,9595,6607,0.106671,19510140,1.05,35.826984


In [6]:
def format_osmid(x):
    if type(x) == int:
        return [x]
    else:
        return x
    
edges['osmid'] = edges['osmid'].apply(lambda x: format_osmid(x))
print(len(edges), len(edges.dropna()))
edges.head(3)

27518 27518


Unnamed: 0,source_osmid,target_osmid,source_node,target_node,travel_time,osmid,length,speed_kph
0,202724336,2689933658,6607,9595,0.106671,[19510140],1.05,35.826984
1,2689933658,202724336,9595,6607,0.106671,[19510140],1.05,35.826984
2,202595928,3639277853,856,9841,0.112868,[19492768],1.111,35.826984


### Add xdsegid to edges

In [7]:
file_path = os.path.join(os.getcwd(), "data", "dubey", "USA_Tennessee.csv")
osm_map = pd.read_csv(file_path)
osm_map['osmid'] = osm_map['OSMWayIDs'].apply(lambda x: [int(y) for y in x.split(";")])
osm_map = osm_map[['XDSegID', 'osmid']]
print(len(osm_map))
osm_map.head(2)

92118


Unnamed: 0,XDSegID,osmid
0,136894283,[19659968]
1,136894316,[827216458]


In [8]:
xds = []
osmids = []
for k, v in osm_map.iterrows():
    for osmid in v['osmid']:
        xds.append(int(v['XDSegID']))
        osmids.append(int(osmid))
osm_map = pd.DataFrame({'XDSegID': xds, 'osmid': osmids})
print(len(osm_map))
osm_map.head(3)

129455


Unnamed: 0,XDSegID,osmid
0,136894283,19659968
1,136894316,827216458
2,136894420,463472790


In [9]:
def get_xd(osm_ways):
    result = []
    for osm_way in osm_ways:
        temp = osm_map[osm_map['osmid']==osm_way]['XDSegID']
        for t in temp.tolist():
            result.append(t)
    return list(set(result))

print(len(edges), len(edges.dropna()))
edges['xdsegid'] = edges['osmid'].apply(lambda x: get_xd(x))
print(len(edges), len(edges.dropna()))
edges.head()

27518 27518
27518 27518


Unnamed: 0,source_osmid,target_osmid,source_node,target_node,travel_time,osmid,length,speed_kph,xdsegid
0,202724336,2689933658,6607,9595,0.106671,[19510140],1.05,35.826984,[]
1,2689933658,202724336,9595,6607,0.106671,[19510140],1.05,35.826984,[]
2,202595928,3639277853,856,9841,0.112868,[19492768],1.111,35.826984,[]
3,3639277853,202595928,9841,856,0.112868,[19492768],1.111,35.826984,[]
4,5863378097,1432394908,10158,8558,0.117527,[37272970],1.709,52.238418,"[450306885, 450304245]"


In [10]:
file_path = os.path.join(os.getcwd(), "data", "dubey", "hamilton_inrix_model_month_day_hour.parquet")
hammodel = pd.read_parquet(file_path)
hammodel['speed_kph'] = hammodel['speed_mean'].apply(lambda x: x * 1.60934)
hammodel['reference_speed_kph'] = hammodel['reference_speed'].apply(lambda x: x * 1.60934)
print(len(hammodel))

unique_xdsegs = []
empty_map = 0
for xdsegids in edges['xdsegid'].tolist():
    if len(xdsegids) == 0:
        empty_map += 1
    unique_xdsegs = unique_xdsegs + xdsegids
print(len(unique_xdsegs), empty_map)
unique_xdsegs = list(set(unique_xdsegs))
print(len(unique_xdsegs))


#hammodel = hammodel[hammodel['xdsegid'].isin(unique_xdsegs)]

hammodel = hammodel[hammodel['day'].isin(['mon', 'tue', 'wed', 'thur', 'fri'])]
print(len(hammodel), len(hammodel.dropna()))

hammodel.head(2)

10539193
56513 19660
4647
7528825 7528825


Unnamed: 0,xdsegid,hour,day,month,speed_mean,reference_speed,speed_kph,reference_speed_kph
4107,429159428,0,mon,1,39.606979,40.0,63.741096,64.3736
4113,429159445,0,mon,1,26.186667,34.0,42.14325,54.71756


In [11]:
# 1 percentile

result = []

for xdsegid in hammodel['xdsegid'].unique():
    temp = hammodel[hammodel['xdsegid']==xdsegid]
    speed_kph = temp['speed_kph'].quantile(0.05)
    reference_speed_kph = temp['reference_speed_kph'].median()
    #multiplier = speed_kph / reference_speed_kph
    multiplier = reference_speed_kph / speed_kph
    #print(xdsegid, speed_kph, reference_speed_kph, multiplier)
    result.append({'multiplier': multiplier, 'xdsegid': xdsegid})
speed_map = pd.DataFrame.from_records(result)
print(speed_map['multiplier'].mean())

def filter_mult(mult):
    if mult < 1.25:
        return 1.25
    else:
        return mult
    
speed_map['multiplier'] = speed_map['multiplier'].apply(lambda x: filter_mult(x))
print(speed_map['multiplier'].mean())
speed_map.tail(5)

1.2399378495791367
1.334105661632214


Unnamed: 0,multiplier,xdsegid
5263,1.25,1386926050
5264,1.772178,461525671
5265,1.25,1386737568
5266,1.25,391175065
5267,1.25,391166097


In [12]:
def get_new_mult(row):
    if len(row['xdsegid']) == 0:
        mult = 1.25
    else:
        temp = speed_map[speed_map['xdsegid'].isin(row['xdsegid'])]['multiplier']
        if len(temp) > 0:
            mult = temp.max()
        else:
            mult = 1.25
        
    #r = row['travel_time'] * mult
    #r = np.float64(r)
    #if np.isnan(r):
    #    print(row)
    return mult

print(len(edges), len(edges.dropna()))
edges['mult'] = edges.apply(lambda row: get_new_mult(row), axis=1)
edges['travel_time_cong'] = edges.apply(lambda row: row['travel_time'] * row['mult'], axis=1)
print(len(edges), len(edges.dropna()))
edges.head()

27518 27518
27518 27518


Unnamed: 0,source_osmid,target_osmid,source_node,target_node,travel_time,osmid,length,speed_kph,xdsegid,mult,travel_time_cong
0,202724336,2689933658,6607,9595,0.106671,[19510140],1.05,35.826984,[],1.25,0.133339
1,2689933658,202724336,9595,6607,0.106671,[19510140],1.05,35.826984,[],1.25,0.133339
2,202595928,3639277853,856,9841,0.112868,[19492768],1.111,35.826984,[],1.25,0.141085
3,3639277853,202595928,9841,856,0.112868,[19492768],1.111,35.826984,[],1.25,0.141085
4,5863378097,1432394908,10158,8558,0.117527,[37272970],1.709,52.238418,"[450306885, 450304245]",1.72973,0.20329


# Get New Travel Time Matrix

In [None]:
edgelist = []
for k, v in edges.iterrows():
    edgelist.append((v['source_node'], v['target_node'], v['travel_time_cong']))
    
G = nx.DiGraph()
G.add_weighted_edges_from(edgelist)
print(f"Is strongly connected: {nx.is_strongly_connected(G)}, is weakly connected: {nx.is_weakly_connected(G)}")


In [13]:
G = nx.DiGraph()
for k, v in edges.iterrows():
    #G.add_edge(v['source_node'], v['target_node'], weight=v['travel_time'])
    G.add_edge(v['source_node'], v['target_node'], weight=v['travel_time_cong'])
    #print(v['travel_time_cong'])
    if v['travel_time_cong'] <= 0:
        print(v['source_node'], v['target_node'], v['travel_time_cong'])

In [14]:
import time
import sys

In [15]:
# generate the nodelist and travel time matrix
weight = 'weight'
nodelist = list(nodes.index)
print(f"len of nodelist: {len(nodelist)}")

results = np.full((len(nodelist), len(nodelist)), -1, dtype=np.float32)

length = nx.all_pairs_dijkstra_path_length(G, weight='weight')
start_time = time.time()
for source in length:
    source_node = source[0]
    source_node_index = nodelist.index(source_node)
    for target_node_index in range(len(nodelist)):
        target_node = nodelist[target_node_index]
        if target_node in source[1].keys():
            results[source_node_index, target_node_index] = source[1][target_node]
    if (source_node_index % 1000) == 0:
        cur_runtime = time.time() - start_time
        print(f"Done with {source_node_index} nodes in {cur_runtime} seconds, size of results is {sys.getsizeof(results)}")

travel_time_matrix = results

len of nodelist: 10788
Done with 8000 nodes in 83.38770961761475 seconds, size of results is 465523896
Done with 9000 nodes in 85.72692036628723 seconds, size of results is 465523896
Done with 1000 nodes in 127.5832130908966 seconds, size of results is 465523896
Done with 0 nodes in 157.3815336227417 seconds, size of results is 465523896
Done with 2000 nodes in 249.02468705177307 seconds, size of results is 465523896
Done with 6000 nodes in 329.1386468410492 seconds, size of results is 465523896
Done with 7000 nodes in 380.29919624328613 seconds, size of results is 465523896
Done with 3000 nodes in 380.8635663986206 seconds, size of results is 465523896
Done with 4000 nodes in 481.14025688171387 seconds, size of results is 465523896
Done with 10000 nodes in 506.6502454280853 seconds, size of results is 465523896
Done with 5000 nodes in 566.3276402950287 seconds, size of results is 465523896


In [18]:
file_path = os.path.join(os.getcwd(), "data", "travel_time_matrix", 'travel_time_matrix_cong_mike.csv')
np.savetxt(file_path, travel_time_matrix, fmt='%f', delimiter=",")

In [22]:
file_path = os.path.join(os.getcwd(), "data", "travel_time_matrix", 'travel_time_matrix_cong.csv')
np.savetxt(file_path, travel_time_matrix, fmt='%f', delimiter=",")

In [34]:
file_path = os.path.join(os.getcwd(), "data", "travel_time_matrix", 'nodes_cong.csv')
nodes.to_csv(file_path, index=False)

In [35]:
file_path = os.path.join(os.getcwd(), "data", "travel_time_matrix", 'edges_cong.csv')
edges.to_csv(file_path, index=False)

# Update Format for MA-RTV Simulator

In [49]:
file_path = os.path.join(os.getcwd(), 'data', 'format_samitha', 'nodes.csv')
columns = ['node_id', 'lat', 'lon']
y_nodes = pd.read_csv(file_path, index_col=False, names=columns, header=None)
y_nodes['node_id'] = y_nodes['node_id'].apply(lambda x: x-1)
y_nodes.head(3)

Unnamed: 0,node_id,lat,lon
0,0,34.984104,-85.145232
1,1,34.986203,-85.15179
2,2,34.985842,-85.151119


In [50]:
def match_node(row):
    temp = nodes[(nodes['lat']==row['lat']) & (nodes['lon']==row['lon'])]
    if len(temp) != 1:
        print(f"issue {row['node_id']}")
    return int(temp.iloc[0]['node_id'])

y_nodes['my_node_id'] = y_nodes.apply(lambda row: match_node(row), axis=1)
y_nodes.head()

Unnamed: 0,node_id,lat,lon,my_node_id
0,0,34.984104,-85.145232,0
1,1,34.986203,-85.15179,1
2,2,34.985842,-85.151119,2
3,3,34.984425,-85.147417,3
4,4,34.979835,-85.144704,4


In [51]:
file_path = os.path.join(os.getcwd(), 'data', 'format_samitha', 'edges.csv')
columns = ['source', 'target', 'weight']
y_edges = pd.read_csv(file_path, index_col=False, names=columns, header=None)
y_edges['source'] = y_edges['source'].apply(lambda x: x -1)
y_edges['target'] = y_edges['target'].apply(lambda x: x -1)
y_edges.head(3)

Unnamed: 0,source,target,weight
0,6602,9573,1
1,9573,6602,1
2,9573,10025,9


In [54]:
def get_new_weight(row):
    #print(row['source'], row['target'])
    source_node = int(y_nodes[y_nodes['node_id']==row['source']].iloc[0]['my_node_id'])
    target_node = int(y_nodes[y_nodes['node_id']==row['target']].iloc[0]['my_node_id'])
    #print(source_node, target_node)
    return edges[(edges['source_node']==source_node) & (edges['target_node']==target_node)].iloc[0]['travel_time_cong']

y_edges['travel_time_cong'] = y_edges.apply(lambda row: get_new_weight(row), axis=1)
y_edges['new_weight'] = y_edges['travel_time_cong'].apply(lambda x: math.ceil(x))
y_edges.head()

Unnamed: 0,source,target,weight,travel_time_cong,new_weight
0,6602,9573,1,0.133339,1
1,9573,6602,1,0.133339,1
2,9573,10025,9,10.512591,11
3,9573,5832,12,14.284186,15
4,9573,6966,18,21.859633,22


In [56]:
file_path = os.path.join(os.getcwd(), 'data', 'format_samitha', 'edges_cong.csv')
y_edges['source'] = y_edges['source'].apply(lambda x: x + 1)
y_edges['target'] = y_edges['target'].apply(lambda x: x + 1)
y_edges = y_edges[['source', 'target', 'new_weight']]
y_edges['source'] = y_edges['source'].astype(int)
y_edges['target'] = y_edges['target'].astype(int)
y_edges['new_weight'] = y_edges['new_weight'].astype(int)
y_edges.to_csv(file_path, header=False, index=False)

In [58]:
file_path = os.path.join(os.getcwd(), 'data', 'format_samitha', 'nodes_cong.csv')
y_nodes = y_nodes[['node_id', 'lat', 'lon']]
y_nodes['node_id'] = y_nodes['node_id'].apply(lambda x: x+1)
y_nodes['node_id'] = y_nodes['node_id'].astype(int)
y_nodes.to_csv(file_path, header=False, index=False)

In [55]:
edges.head()

Unnamed: 0,source_osmid,target_osmid,source_node,target_node,travel_time,osmid,length,speed_kph,xdsegid,mult,travel_time_cong
0,202724336,2689933658,6607,9595,0.106671,[19510140],1.05,35.826984,[],1.25,0.133339
1,2689933658,202724336,9595,6607,0.106671,[19510140],1.05,35.826984,[],1.25,0.133339
2,202595928,3639277853,856,9841,0.112868,[19492768],1.111,35.826984,[],1.25,0.141085
3,3639277853,202595928,9841,856,0.112868,[19492768],1.111,35.826984,[],1.25,0.141085
4,5863378097,1432394908,10158,8558,0.117527,[37272970],1.709,52.238418,"[450306885, 450304245]",1.72973,0.20329


# Update Test and Train Chains

In [70]:
file_path = os.path.join(os.getcwd(), 'data', 'CARTA', 'processed', 'test_chains_r2.csv')
chains = pd.read_csv(file_path, index_col=False)

chains['new_dropoff_time_since_midnight'] = chains.apply(lambda row: row['pickup_time_since_midnight'] + travel_time_matrix[row['pickup_node_id'], row['dropoff_node_id']], axis=1)
chains['new_dropoff_time_since_midnight'] = chains['new_dropoff_time_since_midnight'].apply(lambda x: math.ceil(x))

chains = chains[['pickup_node_id', 'dropoff_node_id', 'pickup_time_since_midnight', 'new_dropoff_time_since_midnight', 'chain_id', 'chain_order']]
file_path = os.path.join(os.getcwd(), 'data', 'CARTA', 'processed', 'test_chains_cong.csv')
chains.to_csv(file_path, index=False)

chains.head()


Unnamed: 0,pickup_node_id,dropoff_node_id,pickup_time_since_midnight,new_dropoff_time_since_midnight,chain_id,chain_order
0,229,2898,18900,19261,0,0
1,5346,2898,19800,19914,0,1
2,1905,10243,23400,23547,0,2
3,5139,4446,26100,26888,0,3
4,8879,1446,26400,27712,0,4


In [73]:
file_path = os.path.join(os.getcwd(), 'data', 'CARTA', 'processed', 'train_chains_r2.csv')
chains = pd.read_csv(file_path, index_col=False)

chains['new_dropoff_time_since_midnight'] = chains.apply(lambda row: row['pickup_time_since_midnight'] + travel_time_matrix[row['pickup_node_id'], row['dropoff_node_id']], axis=1)
chains['new_dropoff_time_since_midnight'] = chains['new_dropoff_time_since_midnight'].apply(lambda x: math.ceil(x))

chains = chains[['pickup_node_id', 'dropoff_node_id', 'pickup_time_since_midnight', 'new_dropoff_time_since_midnight', 'chain_id', 'chain_order']]
file_path = os.path.join(os.getcwd(), 'data', 'CARTA', 'processed', 'train_chains_cong.csv')
chains.to_csv(file_path, index=False)

chains.head()

Unnamed: 0,pickup_node_id,dropoff_node_id,pickup_time_since_midnight,new_dropoff_time_since_midnight,chain_id,chain_order
0,5897,595,14760,14847,0,0
1,5839,595,14790,15623,0,1
2,236,595,15060,15860,0,2
3,5839,595,15300,16133,0,3
4,5897,595,15330,15417,0,4
