In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import cPickle as pickle
from model.utilities import haversine, diff_timestamps

# Reading in static schedule data
stops_full = pd.read_csv('data/google_transit/stops.txt', index_col='stop_id')
routes = pd.read_csv('data/google_transit/routes.txt', index_col='route_id')
trips = pd.read_csv('data/google_transit/trips.txt', index_col='trip_id')
stop_times = pd.read_csv('data/google_transit/stop_times.txt')
shapes = pd.read_csv('data/google_transit/shapes.txt')

# Some of these stops are named "Not a public stop" but are still in trips.
# Luckily, in the few trips they appear in, they're only either at the
# beginning or the end, so we can remove them now and we'll still build
# a nice graph with the connections we expect.
stops = stops_full[~stops_full.index.isin([7520, 7530, 7531, 7540])]
stop_times = stop_times[~stop_times['stop_id'].isin([7520, 7530, 7531, 7540])]

# Oh and some stops are in stops.txt but not used in trips... let's remove 'em
used_stops = set(stop_times['stop_id'].unique())
stops = stops[stops.index.isin(used_stops)]


In [2]:
G_x = nx.read_gpickle('graph_x_5.gpkl')

In [3]:
#foot speeds in m/s
walk_speed = 1.39
run_speed = 4.47
sprint_speed = 6.7

In [4]:
sorted(list((n for n,d in G_x.nodes_iter(data=True) if d['stop_id'] == 3335)))[:20]

['3335_05:27:38',
 '3335_05:27:48',
 '3335_05:47:38',
 '3335_05:57:48',
 '3335_06:02:38',
 '3335_06:15:38',
 '3335_06:17:48',
 '3335_06:38:48',
 '3335_06:48:48',
 '3335_06:54:48',
 '3335_07:10:58',
 '3335_07:19:48',
 '3335_07:27:58',
 '3335_07:42:08',
 '3335_07:44:08',
 '3335_07:51:48',
 '3335_08:00:08',
 '3335_08:17:08',
 '3335_08:22:58',
 '3335_08:29:08']

In [5]:
G_x.edge['4555_12:17:13']

{'4555_12:21:13': {'duration': 240, 'type': 'wait'},
 '4559_12:17:53': {'block_id': 1908,
  'duration': 40,
  'route_id': 11063,
  'service_id': 1,
  'shape_id': 135694,
  'trip_id': 6874997,
  'type': 'ride'}}

In [12]:
# Let's make some sorted stop-timepoint lists for each stop_id to
# make lookup faster in our loop.
all_stop_timepoints = {}
for stopid in used_stops:
    all_stop_timepoints[stopid] = sorted( list( set(stop_times[stop_times['stop_id'] == stopid].\
                                                    apply(lambda x: '{0}_{1}'.\
                                                          format(stopid, \
                                                                 x['arrival_time']), axis=1))))

In [13]:
all_stop_timepoints[7423]

['7423_05:49:43',
 '7423_05:50:33',
 '7423_05:51:23',
 '7423_06:09:23',
 '7423_06:09:43',
 '7423_06:11:33',
 '7423_06:28:23',
 '7423_06:30:43',
 '7423_06:32:33',
 '7423_06:36:23',
 '7423_06:44:23',
 '7423_06:50:43',
 '7423_06:55:43',
 '7423_06:56:23',
 '7423_06:58:23',
 '7423_07:10:43',
 '7423_07:11:23',
 '7423_07:16:23',
 '7423_07:16:43',
 '7423_07:23:23',
 '7423_07:30:43',
 '7423_07:31:43',
 '7423_07:33:23',
 '7423_07:36:23',
 '7423_07:43:23',
 '7423_07:47:54',
 '7423_07:50:54',
 '7423_07:52:23',
 '7423_07:56:23',
 '7423_08:01:23',
 '7423_08:03:54',
 '7423_08:10:23',
 '7423_08:10:54',
 '7423_08:16:23',
 '7423_08:19:44',
 '7423_08:19:54',
 '7423_08:27:44',
 '7423_08:30:54',
 '7423_08:34:54',
 '7423_08:35:44',
 '7423_08:36:23',
 '7423_08:43:44',
 '7423_08:48:54',
 '7423_08:50:54',
 '7423_08:51:44',
 '7423_08:56:23',
 '7423_08:59:44',
 '7423_09:04:04',
 '7423_09:07:44',
 '7423_09:11:54',
 '7423_09:15:44',
 '7423_09:16:23',
 '7423_09:19:04',
 '7423_09:23:44',
 '7423_09:31:44',
 '7423_09:

In [15]:
for row in stops.iterrows():
    stop_id = row[0]
    stop_info = row[1]

    # calculate distance between this stop and every other stop
    dists = stops.apply(lambda x: haversine(x['stop_lon'], \
                        x['stop_lat'], stop_info['stop_lon'],\
                        stop_info['stop_lat']), axis=1)

    # only keep the stops within 200 meters
    dists = dists[dists <= 0.2]

    # don't connect the stop to itself, duh
    dists = dists.drop(row[0])

    # convert to walking times. add 30 seconds for maybe waiting for a stoplight, sure.
    walk_times = np.ceil(1000*dists/walk_speed) + 30

    # for every closest stop, add an edge between each stop-timepoint
    # and the earliest walkable closest stop timepoint
    for close_stop in walk_times.iteritems():
        close_stop_id = close_stop[0]
        walk_time = close_stop[1]
        for node1 in all_stop_timepoints[stop_id]:
            t1 = G_x.node[node1]['arrival_time']
            for node2 in all_stop_timepoints[close_stop_id]:
                t2 = G_x.node[node2]['arrival_time']
                if (t2 > t1) and (diff_timestamps(t1, t2) >= walk_time):
                    if G_x.has_edge(node1, node2):
                        break
                    G_x.add_edge(node1, node2, duration=walk_time, type='walk')
                    break

In [14]:
G_x.node['3335_07:42:08']['arrival_time'] < G_x.node['4555_05:48:48']['arrival_time']

False

In [18]:
G_x['3335_07:42:08']

{'3327_07:43:00': {'block_id': 3309,
  'duration': 52,
  'route_id': 11076,
  'service_id': 1,
  'shape_id': 135779,
  'trip_id': 6887009,
  'type': 'ride'},
 '3335_07:44:08': {'duration': 120, 'type': 'wait'},
 '3336_07:46:47': {'duration': 52.0, 'type': 'walk'},
 '4179_07:55:37': {'duration': 121.0, 'type': 'walk'},
 '4180_07:52:27': {'duration': 112.0, 'type': 'walk'},
 '4185_07:55:24': {'duration': 149.0, 'type': 'walk'}}

In [19]:
nx.write_gpickle(G_x, 'graph_x_6.gpkl')

In [36]:
nx.dijkstra_path_length(G_x, '3335_07:42:08', '4555_09:47:13', weight='duration')

2122.0

In [37]:
2122./60

35.36666666666667

In [23]:
nx.dijkstra_path(G_x, '3335_07:42:08', '4555_09:47:13', weight='duration')

['3335_07:42:08',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '4314_07:51:00',
 '3326_08:05:00',
 '4315_08:07:00',
 '4316_08:07:25',
 '4318_08:07:55',
 '4321_08:08:26',
 '4322_08:08:53',
 '4325_08:09:26',
 '4333_08:09:41',
 '4327_08:10:00',
 '4328_08:10:28',
 '7290_08:11:00',
 '3521_08:11:35',
 '5732_08:12:06',
 '5734_08:12:35',
 '5736_08:13:07',
 '5738_08:13:37',
 '3544_08:14:09',
 '3536_08:14:44',
 '3538_08:15:19',
 '3541_08:16:00',
 '4157_08:16:40',
 '4161_08:17:08',
 '4150_08:17:30',
 '4146_08:17:47',
 '4143_08:18:21',
 '4152_08:18:52',
 '4154_08:19:10',
 '4159_08:19:34',
 '7213_08:20:07',
 '4148_08:21:00',
 '5093_08:21:41',
 '5094_08:22:35',
 '5095_08:23:24',
 '5884_08:24:17',
 '5882_08:24:55',
 '5880_08:25:35',
 '5939_08:42:44',
 '5938_08:59:16',
 '5936_08:59:43',
 '5934_09:00:24',
 '5176_09:15:04',
 '3129_09:25:27',
 '7404_09:30:54',
 '7352_09:31:52',
 '4555_09:47:00',
 '4555_09:47:13']

In [24]:
path = _

In [29]:
G_x['3326_07:49:00']

{'3311_07:54:00': {'duration': 169.0, 'type': 'walk'},
 '3325_07:50:00': {'duration': 52.0, 'type': 'walk'},
 '3326_08:05:00': {'duration': 960, 'type': 'wait'},
 '3340_07:57:02': {'duration': 168.0, 'type': 'walk'},
 '3341_07:49:56': {'block_id': 3301,
  'duration': 56,
  'route_id': 11076,
  'service_id': 1,
  'shape_id': 135773,
  'trip_id': 6886921,
  'type': 'ride'},
 '3341_08:05:56': {'duration': 153.0, 'type': 'walk'},
 '4313_07:51:59': {'duration': 153.0, 'type': 'walk'},
 '4314_07:51:00': {'duration': 36.0, 'type': 'walk'},
 '4315_07:50:44': {'duration': 44.0, 'type': 'walk'},
 '4316_07:55:34': {'duration': 147.0, 'type': 'walk'},
 '4317_07:59:05': {'duration': 165.0, 'type': 'walk'},
 '4334_07:54:10': {'duration': 158.0, 'type': 'walk'},
 '5665_07:54:00': {'duration': 165.0, 'type': 'walk'},
 '7253_08:02:00': {'duration': 169.0, 'type': 'walk'}}

In [28]:
for i in xrange(len(path) - 1):
    print path[i]
    print path[i+1]
    print G_x[path[i]][path[i+1]], '\n'

3335_07:42:08
3336_07:46:47
{'duration': 52.0, 'type': 'walk'} 

3336_07:46:47
3329_07:48:01
{'block_id': 3301, 'trip_id': 6886921, 'route_id': 11076, 'shape_id': 135773, 'duration': 74, 'service_id': 1, 'type': 'ride'} 

3329_07:48:01
3326_07:49:00
{'block_id': 3301, 'trip_id': 6886921, 'route_id': 11076, 'shape_id': 135773, 'duration': 59, 'service_id': 1, 'type': 'ride'} 

3326_07:49:00
4314_07:51:00
{'duration': 36.0, 'type': 'walk'} 

4314_07:51:00
3326_08:05:00
{'duration': 36.0, 'type': 'walk'} 

3326_08:05:00
4315_08:07:00
{'duration': 44.0, 'type': 'walk'} 

4315_08:07:00
4316_08:07:25
{'block_id': 2403, 'type': 'ride', 'route_id': 11067, 'service_id': 2, 'duration': 25, 'shape_id': 135714, 'trip_id': 7044264} 

4316_08:07:25
4318_08:07:55
{'block_id': 2403, 'type': 'ride', 'route_id': 11067, 'service_id': 2, 'duration': 30, 'shape_id': 135714, 'trip_id': 7044264} 

4318_08:07:55
4321_08:08:26
{'block_id': 2403, 'type': 'ride', 'route_id': 11067, 'service_id': 2, 'duration': 3

In [31]:
stops.loc[4555]

stop_name    Evans Ave & Newhall St
stop_desc                          
stop_lat                    37.7419
stop_lon                   -122.386
zone_id                            
stop_url                           
Name: 4555, dtype: object

In [32]:
stops.loc[3335]

stop_name    18th St & Hattie St
stop_desc                       
stop_lat                 37.7606
stop_lon                -122.441
zone_id                         
stop_url                        
Name: 3335, dtype: object

In [33]:
routes.loc[11067]

agency_id                SFMTA
route_short_name            24
route_long_name     DIVISADERO
route_desc                    
route_type                   3
route_url                     
route_color                   
route_text_color              
Name: 11067, dtype: object