In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import cPickle as pickle
from model.utilities import haversine, diff_timestamps

# Reading in static schedule data
stops_full = pd.read_csv('data/google_transit/stops.txt', index_col='stop_id')
routes = pd.read_csv('data/google_transit/routes.txt', index_col='route_id')
trips = pd.read_csv('data/google_transit/trips.txt', index_col='trip_id')
stop_times = pd.read_csv('data/google_transit/stop_times.txt')
shapes = pd.read_csv('data/google_transit/shapes.txt')

# Some of these stops are named "Not a public stop" but are still in trips.
# Luckily, in the few trips they appear in, they're only either at the
# beginning or the end, so we can remove them now and we'll still build
# a nice graph with the connections we expect.
stops = stops_full[~stops_full.index.isin([7520, 7530, 7531, 7540])]
stop_times = stop_times[~stop_times['stop_id'].isin([7520, 7530, 7531, 7540])]

# Oh and some stops are in stops.txt but not used in trips... let's remove 'em
used_stops = set(stop_times['stop_id'].unique())
stops = stops[stops.index.isin(used_stops)]

# Let's make some sorted stop-timepoint lists for each stop_id to
# make lookup faster for things
all_stop_timepoints = {}
for stopid in used_stops:
    all_stop_timepoints[stopid] = sorted( list( set(stop_times[stop_times['stop_id'] == stopid].\
                                                    apply(lambda x: '{0}_{1}'.\
                                                          format(stopid, \
                                                                 x['arrival_time']), axis=1))))

In [2]:
G_x = nx.read_gpickle('graph_x_6.gpkl')

In [4]:
G_x['3335_07:42:08']

{'3327_07:43:00': {'block_id': 3309,
  'duration': 52,
  'route_id': 11076,
  'service_id': 1,
  'shape_id': 135779,
  'trip_id': 6887009,
  'type': 'ride'},
 '3335_07:44:08': {'duration': 120, 'type': 'wait'},
 '3336_07:46:47': {'duration': 52.0, 'type': 'walk'},
 '4179_07:55:37': {'duration': 121.0, 'type': 'walk'},
 '4180_07:52:27': {'duration': 112.0, 'type': 'walk'},
 '4185_07:55:24': {'duration': 149.0, 'type': 'walk'}}

In [10]:
path_length, path = nx.bidirectional_dijkstra(G_x, '3335_07:42:08', '4555_09:47:13', weight='duration')

In [40]:
path_length/60.

35.366666666666667

In [31]:
path

['3335_07:42:08',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '4314_07:51:00',
 '3326_08:05:00',
 '4315_08:07:00',
 '4316_08:07:25',
 '4318_08:07:55',
 '4321_08:08:26',
 '4322_08:08:53',
 '4325_08:09:26',
 '4333_08:09:41',
 '4327_08:10:00',
 '4328_08:10:28',
 '7290_08:11:00',
 '3521_08:11:35',
 '5732_08:12:06',
 '5734_08:12:35',
 '5736_08:13:07',
 '5738_08:13:37',
 '3544_08:14:09',
 '3536_08:14:44',
 '3538_08:15:19',
 '3541_08:16:00',
 '4157_08:16:40',
 '4161_08:17:08',
 '4150_08:17:30',
 '4146_08:17:47',
 '4143_08:18:21',
 '4152_08:18:52',
 '4154_08:19:10',
 '4159_08:19:34',
 '7213_08:20:07',
 '4148_08:21:00',
 '5093_08:21:41',
 '5094_08:22:35',
 '5095_08:23:24',
 '5884_08:24:17',
 '5882_08:24:55',
 '5880_08:25:35',
 '5939_08:42:44',
 '5938_08:59:16',
 '5936_08:59:43',
 '5934_09:00:24',
 '5176_09:15:04',
 '3129_09:25:27',
 '7404_09:30:54',
 '7352_09:31:52',
 '4555_09:47:00',
 '4555_09:47:13']

In [19]:
routes.loc[11066]

agency_id              SFMTA
route_short_name          23
route_long_name     MONTEREY
route_desc                  
route_type                 3
route_url                   
route_color                 
route_text_color            
Name: 11066, dtype: object

In [41]:
simple_path = []
last_step = 0
for i in xrange(len(path) - 1):
    route = G_x[path[i]][path[i+1]].get('route_id', 'walk')
    if route != 'walk':
        route = routes.loc[route]['route_short_name']
    #if route != last_step:
        print '{0}: {1} to {2}'.\
                format(G_x.node[path[i]]['arrival_time'], \
                       route, G_x.node[path[i+1]]['stop_name'])
    last_step = route

07:46:47: 33 to 18th St & Diamond St
07:48:01: 33 to 18th St & Castro St
08:07:00: 24 to Castro St & 19th St
08:07:25: 24 to Castro St & 20th St
08:07:55: 24 to Castro St & 21st St
08:08:26: 24 to Castro St & 22nd St
08:08:53: 24 to Castro St & 23rd St
08:09:26: 24 to Castro St & Elizabeth St
08:09:41: 24 to Castro St & 24th St
08:10:00: 24 to Castro St & 25th St
08:10:28: 24 to Castro St & 26th St
08:11:00: 24 to 26th St & Noe St
08:11:35: 24 to Noe St & 27th St
08:12:06: 24 to Noe St & 28th St
08:12:35: 24 to Noe St & 29th St
08:13:07: 24 to Noe St & 30th St
08:13:37: 24 to 30th St & Sanchez St
08:14:09: 24 to 30th St & Church St
08:14:44: 24 to 30th St & Dolores St
08:15:19: 24 to 30th St & Mission St
08:16:00: 24 to Cortland Ave & Mission St
08:16:40: 24 to Cortland Ave & Prospect Ave
08:17:08: 24 to Cortland Ave & Elsie St
08:17:30: 24 to Cortland Ave & Bocana St
08:17:47: 24 to Cortland Ave & Andover St
08:18:21: 24 to Cortland Ave & Ellsworth St
08:18:52: 24 to Cortland Ave & Fo

In [42]:
nx.dijkstra_path?

In [46]:
len(all_stop_timepoints[5093])

186

In [51]:
%timeit G_x.node['5880_08:25:35']['arrival_time']

The slowest run took 12.99 times longer than the fastest. This could mean that an intermediate result is being cached 
10000000 loops, best of 3: 147 ns per loop


In [52]:
%timeit '5880_08:25:35'.split('_')[1]

The slowest run took 16.62 times longer than the fastest. This could mean that an intermediate result is being cached 
1000000 loops, best of 3: 244 ns per loop
