In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from shapely.wkt import loads
import matplotlib.pyplot as plt

### fix random seed
#random.seed(0)
np.random.seed(0)

In [2]:
case = 'fairfax'

### read OD
edges = pd.read_csv('{}_links.csv'.format(case), float_precision = 'round_trip')
nodes = pd.read_csv('{}_nodes.csv'.format(case), float_precision = 'round_trip')

### bay area or tokyo
edges['nid_s'] = edges['start_node_id']
edges['nid_e'] = edges['end_node_id']
nodes['nid'] = nodes['node_id']

print(edges.shape, nodes.shape)
display(edges.head(1))
display(nodes.head(1))

(48795, 10) (21332, 5)


Unnamed: 0,link_id,start_node_id,end_node_id,link_type,length,maxmph,lanes,geometry,nid_s,nid_e
0,0,0,7270,motorway,790.94,55,2,"LINESTRING (-122.5152143 37.9653023, -122.5146...",0,7270


Unnamed: 0,node_id,lon,lat,type,nid
0,0,-122.515214,37.965302,real,0


In [3]:
removed_edges = edges[edges['length']<=20].copy()
print(edges.shape, removed_edges.shape)
removed_node_grp = {}
grp_id = 0
for edge in removed_edges.itertuples():
    nid_s = getattr(edge, 'nid_s')
    nid_e = getattr(edge, 'nid_e')
    try:
        nid_s_grp = removed_node_grp[nid_s]
    except KeyError:
        nid_s_grp = grp_id
    try:
        nid_e_grp = removed_node_grp[nid_e]
    except KeyError:
        nid_e_grp = grp_id
    nid_se_grp_id = min(nid_s_grp, nid_e_grp)
    if (nid_s==755916) and (nid_e==938408):
        print(nid_s, nid_e, nid_s_grp, nid_e_grp)
    removed_node_grp[nid_s] = nid_se_grp_id
    removed_node_grp[nid_e] = nid_se_grp_id
    if nid_se_grp_id == grp_id: grp_id += 1
removed_node_grp_df = pd.DataFrame(removed_node_grp.items(), columns=['nid', 'node_grp'])
removed_node_grp_df['node_grp'] = removed_node_grp_df['node_grp'].apply(lambda x: 'g{}'.format(x))
print(nodes.shape, removed_node_grp_df.shape)
removed_node_grp_df.tail()

(48795, 10) (6662, 10)
(21332, 5) (5897, 2)


Unnamed: 0,nid,node_grp
5892,21256,g2465
5893,21257,g2465
5894,21265,g2361
5895,21318,g2466
5896,21317,g2466


In [4]:
new_nodes = pd.merge(nodes, removed_node_grp_df, how='left', on='nid')
new_nodes['node_grp'] = np.where(
    pd.isnull(new_nodes['node_grp']), new_nodes['nid'], new_nodes['node_grp'])
new_nodes = new_nodes.groupby('node_grp').agg({'lon': np.mean, 'lat': np.mean}).reset_index()
print(nodes.shape, new_nodes.shape)

(21332, 5) (17848, 3)


In [5]:
edges.head(1)

Unnamed: 0,link_id,start_node_id,end_node_id,link_type,length,maxmph,lanes,geometry,nid_s,nid_e
0,0,0,7270,motorway,790.94,55,2,"LINESTRING (-122.5152143 37.9653023, -122.5146...",0,7270


In [6]:
new_edges = edges.copy() ### remove those with duplicated new_node_id
new_edges = pd.merge(new_edges, removed_node_grp_df, 
                           how='left', left_on='nid_s', right_on='nid')
new_edges = pd.merge(new_edges, removed_node_grp_df, 
                           how='left', left_on='nid_e', right_on='nid', suffixes=['_ns0', '_ne0'])
new_edges['node_grp_ns0'] = np.where(
    pd.isnull(new_edges['node_grp_ns0']), new_edges['nid_s'], new_edges['node_grp_ns0'])
new_edges['node_grp_ne0'] = np.where(
    pd.isnull(new_edges['node_grp_ne0']), new_edges['nid_e'], new_edges['node_grp_ne0'])
new_edges = new_edges[[
    'nid_s', 'nid_e', 'node_grp_ns0', 'node_grp_ne0', 'length', 'lanes', 'link_type', 'maxmph', 'geometry']]
new_edges = new_edges.loc[new_edges['node_grp_ns0']!=new_edges['node_grp_ne0']]
### update nodes
### first remove those without links
new_nodes = new_nodes.loc[(
    new_nodes['node_grp'].isin(new_edges['node_grp_ns0'])) | 
    (new_nodes['node_grp'].isin(new_edges['node_grp_ne0']))
]
new_nodes['node_id'] = np.arange(new_nodes.shape[0])

new_edges = pd.merge(new_edges, new_nodes, how='left', left_on='node_grp_ns0', right_on='node_grp')
new_edges = pd.merge(new_edges, new_nodes, how='left', left_on='node_grp_ne0', right_on='node_grp', 
                           suffixes=['_ns', '_ne'])

geometry_list = []
for edge in new_edges.itertuples():
    geometry = getattr(edge, 'geometry').replace('LINESTRING(','').replace(')', '').split(', ')
    geometry = [tuple(xy.split(' ')) for xy in geometry]
    lon_ns, lat_ns = getattr(edge, 'lon_ns'), getattr(edge, 'lat_ns')
    lon_ne, lat_ne = getattr(edge, 'lon_ne'), getattr(edge, 'lat_ne')
    geometry = [(lon_ns, lat_ns)] + geometry[1:-2] + [(lon_ne, lat_ne)]
    geometry_list.append('LINESTRING('+', '.join('{} {}'.format(xy[0], xy[1]) for xy in geometry)+')')
new_edges['geometry'] = geometry_list
new_edges['start_nid'] = new_edges['node_id_ns']
new_edges['end_nid'] = new_edges['node_id_ne']
new_edges['nid_s_old'] = new_edges['nid_s']
new_edges['nid_e_old'] = new_edges['nid_e']
new_edges = new_edges[['start_nid', 'end_nid', 'nid_s_old', 'nid_e_old',
                                  'length', 'lanes', 'link_type', 'maxmph', 'geometry']]
new_edges = new_edges.loc[new_edges['start_nid']!=new_edges['end_nid']]

### add attributes
new_edges['fft'] = new_edges['length']/(new_edges['maxmph']*1609/3600)
new_edges = new_edges.sort_values(by='fft', ascending=True).drop_duplicates(subset=['start_nid', 'end_nid'], keep='first')

### add link_id
new_edges['link_id'] = np.arange(new_edges.shape[0])
print(new_edges.shape, new_nodes.shape)
display(new_edges.tail(1))
display(new_nodes.tail(1))

(40194, 11) (17848, 4)


Unnamed: 0,start_nid,end_nid,nid_s_old,nid_e_old,length,lanes,link_type,maxmph,geometry,fft,link_id
38123,13918,2890,19219,3368,11710.86,1,tertiary,25,"LINESTRING(-122.624709 37.9727076, -122.624836...",1048.081939,40193


Unnamed: 0,node_grp,lon,lat,node_id
17847,g999,-122.546796,38.003471,17847


In [7]:
removed_node_grp_df.to_csv('{}_nid_grp_conversion.csv'.format(case), index=False)
new_nodes.to_csv('new_{}_nodes.csv'.format(case), index=False)
new_edges.to_csv('new_{}_links.csv'.format(case), index=False)

In [9]:
node_old_map_new = {}
for n in new_edges.itertuples():
    node_old_map_new[getattr(n, 'nid_s_old')] = getattr(n, 'start_nid')
    node_old_map_new[getattr(n, 'nid_e_old')] = getattr(n, 'end_nid')

In [13]:
for od_file in ['{}_ods_day'.format(case), 
                'background_ods_day_for_Marin']:
    print(od_file)
    sub_od = pd.read_csv(od_file+'.csv', float_precision = 'round_trip')
    sub_od['origin_nid_new'] = sub_od['origin_nid'].map(node_old_map_new)
    sub_od['destin_nid_new'] = sub_od['destin_nid'].map(node_old_map_new)
    sub_od =  sub_od[~pd.isnull(sub_od['origin_nid_new'])]
    sub_od =  sub_od[~pd.isnull(sub_od['destin_nid_new'])]
    sub_od['origin_nid_new'] = sub_od['origin_nid_new'].astype('int')
    sub_od['destin_nid_new'] = sub_od['destin_nid_new'].astype('int')    
    sub_od.to_csv(od_file + '_new.csv', index = False)

fairfax_ods_day
background_ods_day_for_Marin
