In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import cPickle as pickle
from model.utilities import haversine, diff_timestamps

# Reading in static schedule data
stops_full = pd.read_csv('data/google_transit/stops.txt', index_col='stop_id')
routes = pd.read_csv('data/google_transit/routes.txt', index_col='route_id')
trips = pd.read_csv('data/google_transit/trips.txt', index_col='trip_id')
stop_times = pd.read_csv('data/google_transit/stop_times.txt')
shapes = pd.read_csv('data/google_transit/shapes.txt')

# Some of these stops are named "Not a public stop" but are still in trips.
# Luckily, in the few trips they appear in, they're only either at the
# beginning or the end, so we can remove them now and we'll still build
# a nice graph with the connections we expect.
stops = stops_full[~stops_full.index.isin([7520, 7530, 7531, 7540])]
stop_times = stop_times[~stop_times['stop_id'].isin([7520, 7530, 7531, 7540])]

# Oh and some stops are in stops.txt but not used in trips... let's remove 'em
used_stops = set(stop_times['stop_id'].unique())
stops = stops[stops.index.isin(used_stops)]

# Let's make some sorted stop-timepoint lists for each stop_id to
# make lookup faster for things
all_stop_timepoints = {}
for stopid in used_stops:
    all_stop_timepoints[stopid] = sorted( list( set(stop_times[stop_times['stop_id'] == stopid].\
                                                    apply(lambda x: '{0}_{1}'.\
                                                          format(stopid, \
                                                                 x['arrival_time']), axis=1))))

In [2]:
G_x = nx.read_gpickle('graph_x_7.gpkl')

In [3]:
G_x['3335_07:42:08']

{'3327_07:43:00': {'block_id': 3309,
  'duration': 52,
  'route_id': 11076,
  'service_id': 1,
  'shape_id': 135779,
  'trip_id': 6887009,
  'type': 'ride'},
 '3335_07:44:08': {'duration': 120, 'type': 'wait'},
 '3336_07:46:47': {'duration': 279, 'type': 'walk'},
 '4179_07:55:37': {'duration': 809, 'type': 'walk'},
 '4180_07:52:27': {'duration': 619, 'type': 'walk'},
 '4185_07:55:24': {'duration': 796, 'type': 'walk'}}

In [31]:
def earliest_target(source, target, est_time, G):
    t1 = source[-8:]
    p_t, p = None, None
    for n in all_stop_timepoints[target][::-1]:
        t2 = n[-8:]
        if (t2 > t1) and (diff_timestamps(t1, t2) < est_time):
            try:
                p_t, p = nx.bidirectional_dijkstra(G, source, n, weight='duration')
            except nx.NetworkXNoPath:
                return p_t, p

In [35]:
earliest_target('3335_07:42:08', 4555, 3600, G_x)

(2692,
 ['3335_07:42:08',
  '3336_07:46:47',
  '3329_07:48:01',
  '3326_07:49:00',
  '3341_07:49:56',
  '3345_07:51:00',
  '3323_07:52:13',
  '3331_07:53:17',
  '3334_07:54:23',
  '3349_07:55:27',
  '5553_07:57:00',
  '3292_07:58:40',
  '3293_08:00:00',
  '3299_08:01:32',
  '3289_08:02:42',
  '3282_08:04:00',
  '3296_08:04:55',
  '3297_08:05:39',
  '7761_08:06:39',
  '7762_08:07:39',
  '7763_08:08:38',
  '7764_08:10:43',
  '7324_08:11:14',
  '7360_08:12:52',
  '7361_08:14:30',
  '7362_08:16:00',
  '7363_08:17:48',
  '7364_08:20:00',
  '7365_08:22:08',
  '4555_08:27:00'])

In [4]:
foo = nx.all_shortest_paths(G_x, '3335_07:42:08', '4555_09:47:13', weight='duration')

In [8]:
cand = []
for p in foo:
    st = [n[:4] for n in p]
    if st.count('4555') > 1:
        cand.append(p)

KeyboardInterrupt: 

In [11]:
cand[4]

['3335_07:42:08',
 '3327_07:43:00',
 '3327_07:45:00',
 '3328_07:45:48',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '3341_07:49:56',
 '3345_07:51:00',
 '3323_07:52:13',
 '3331_07:53:17',
 '3334_07:54:23',
 '3349_07:55:27',
 '5553_07:57:00',
 '3292_07:58:40',
 '3293_08:00:00',
 '3299_08:01:32',
 '3289_08:02:42',
 '3282_08:04:00',
 '3296_08:04:55',
 '3297_08:05:39',
 '7761_08:06:39',
 '7762_08:07:39',
 '7763_08:08:38',
 '7764_08:10:43',
 '7324_08:11:14',
 '7360_08:12:52',
 '7361_08:14:30',
 '7362_08:16:00',
 '7363_08:17:48',
 '7364_08:20:00',
 '7365_08:22:08',
 '4555_08:27:00',
 '7365_08:31:08',
 '7428_08:40:16',
 '7352_08:43:52',
 '4560_08:58:33',
 '4547_09:06:02',
 '7365_09:11:08',
 '7428_09:22:16',
 '7772_09:29:00',
 '7428_09:37:20',
 '4547_09:40:56',
 '4547_09:44:50',
 '4555_09:47:13']

In [16]:
try:
    path_length, path = nx.bidirectional_dijkstra(G_x, '3335_07:42:08', '4555_08:27:00', weight='duration')
except nx.NetworkXNoPath:
    print "that ain't no path"

In [17]:
path_length

2692

In [18]:
path

['3335_07:42:08',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '3341_07:49:56',
 '3345_07:51:00',
 '3323_07:52:13',
 '3331_07:53:17',
 '3334_07:54:23',
 '3349_07:55:27',
 '5553_07:57:00',
 '3292_07:58:40',
 '3293_08:00:00',
 '3299_08:01:32',
 '3289_08:02:42',
 '3282_08:04:00',
 '3296_08:04:55',
 '3297_08:05:39',
 '7761_08:06:39',
 '7762_08:07:39',
 '7763_08:08:38',
 '7764_08:10:43',
 '7324_08:11:14',
 '7360_08:12:52',
 '7361_08:14:30',
 '7362_08:16:00',
 '7363_08:17:48',
 '7364_08:20:00',
 '7365_08:22:08',
 '4555_08:27:00']

In [7]:
all_stop_timepoints[4555]

['4555_05:28:00',
 '4555_05:28:48',
 '4555_05:48:00',
 '4555_05:48:48',
 '4555_06:09:00',
 '4555_06:29:00',
 '4555_06:49:00',
 '4555_07:04:00',
 '4555_07:09:00',
 '4555_07:19:00',
 '4555_07:29:00',
 '4555_07:34:13',
 '4555_07:47:00',
 '4555_07:49:13',
 '4555_08:04:13',
 '4555_08:07:00',
 '4555_08:18:13',
 '4555_08:27:00',
 '4555_08:33:13',
 '4555_08:47:00',
 '4555_08:48:13',
 '4555_09:02:13',
 '4555_09:07:00',
 '4555_09:17:13',
 '4555_09:27:00',
 '4555_09:32:13',
 '4555_09:47:00',
 '4555_09:47:13',
 '4555_10:02:13',
 '4555_10:06:00',
 '4555_10:17:13',
 '4555_10:21:00',
 '4555_10:32:13',
 '4555_10:36:00',
 '4555_10:47:13',
 '4555_10:51:00',
 '4555_11:02:13',
 '4555_11:06:00',
 '4555_11:17:13',
 '4555_11:21:00',
 '4555_11:32:13',
 '4555_11:36:00',
 '4555_11:47:13',
 '4555_11:51:00',
 '4555_12:02:13',
 '4555_12:06:00',
 '4555_12:17:13',
 '4555_12:21:13',
 '4555_12:32:13',
 '4555_12:36:13',
 '4555_12:47:13',
 '4555_12:51:13',
 '4555_13:02:13',
 '4555_13:06:13',
 '4555_13:17:13',
 '4555_13:

In [18]:
path_length, path = nx.bidirectional_dijkstra(G_x, '3335_07:42:08', '4555_08:27:00', weight='duration')

In [21]:
nx.bidirectional_dijkstra?

In [23]:
path

['3335_07:42:08',
 '3336_07:46:47',
 '3329_07:48:01',
 '3326_07:49:00',
 '3341_07:49:56',
 '3345_07:51:00',
 '3323_07:52:13',
 '3331_07:53:17',
 '3334_07:54:23',
 '3349_07:55:27',
 '5553_07:57:00',
 '3292_07:58:40',
 '3293_08:00:00',
 '3299_08:01:32',
 '3289_08:02:42',
 '3282_08:04:00',
 '3296_08:04:55',
 '3297_08:05:39',
 '7761_08:06:39',
 '7762_08:07:39',
 '7763_08:08:38',
 '7764_08:10:43',
 '7324_08:11:14',
 '7360_08:12:52',
 '7361_08:14:30',
 '7362_08:16:00',
 '7363_08:17:48',
 '7364_08:20:00',
 '7365_08:22:08',
 '4555_08:27:00']

In [34]:
simple_path = []
last_step = 0
for i in xrange(len(path) - 1):
    route = G_x[path[i]][path[i+1]].get('route_id', 'walk')
    serv = G_x[path[i]][path[i+1]].get('service_id', 'walk')
    if route != 'walk':
        route = routes.loc[route]['route_short_name']
        #if route != last_step:
    print '{0}: {1}({2}) to {3}'.\
                format(G_x.node[path[i]]['arrival_time'], \
                       route, serv, G_x.node[path[i+1]]['stop_name'])
    last_step = route

07:42:08: walk(walk) to 18th St & Hattie St
07:46:47: 33(1) to 18th St & Diamond St
07:48:01: 33(1) to 18th St & Castro St
07:49:00: 33(1) to 18th St & Noe St
07:49:56: 33(1) to 18th St & Sanchez St
07:51:00: 33(1) to 18th St & Church St
07:52:13: 33(1) to 18th St & Dolores St
07:53:17: 33(1) to 18th St & Guerrero St
07:54:23: 33(1) to 18th St & Valencia St
07:55:27: 33(1) to Mission St & 18th St
07:57:00: 33(1) to 16th St & Mission St
07:58:40: walk(walk) to 16th St & Mission St
08:00:00: 55(3) to 16th St & Shotwell St
08:01:32: 55(3) to 16th St & Harrison St
08:02:42: 55(3) to 16th St & Bryant St
08:04:00: 55(3) to 16th St & Potrero Ave
08:04:55: 55(3) to 16th St & San Bruno Ave
08:05:39: 55(3) to 16th St& Rhode Island St
08:06:39: 55(3) to 16th St & Wisconsin St
08:07:39: 55(3) to 16th St & Missouri St
08:08:38: 55(3) to 16th St & 4th St
08:10:43: 55(3) to 3rd St & Gene Friend Way
08:11:14: walk(walk) to UCSF/Mission Bay
08:12:52: KT(2) to Third Street & Mariposa St
08:14:30: KT(2) 

In [33]:
trips.head()

Unnamed: 0_level_0,route_id,service_id,trip_headsign,direction_id,block_id,shape_id
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6858659,11047,2,Geary + 33rd Avenue,0,115,135594
7048008,11047,1,Geary + 33rd Avenue,0,110,135595
7048006,11047,1,Geary + 33rd Avenue,0,116,135595
7048005,11047,1,Geary + 33rd Avenue,0,103,135595
7048004,11047,1,Geary + 33rd Avenue,0,113,135595


In [36]:
nx.shortest_paths.bidirectional_dijkstra?

In [3]:
sat_edges = [(u, v, d) for u,v,d in G_x.edges_iter(data=True) if d.get('service_id', 0) == 2]

In [4]:
sun_edges = [(u, v, d) for u,v,d in G_x.edges_iter(data=True) if d.get('service_id', 0) == 3]

In [5]:
w_edges = [(u, v, d) for u,v,d in G_x.edges_iter(data=True) if d.get('service_id', 0) == 1]

In [6]:
G_x.number_of_edges()

6474187

In [7]:
len(sat_edges)

198470

In [8]:
len(sun_edges)

148775

In [9]:
len(w_edges)

401965

In [11]:
G_x.remove_edges_from(sat_edges)

In [12]:
G_x.remove_edges_from(sun_edges)

In [13]:
G_x.number_of_edges()

6126942

In [46]:
path_length, path = nx.bidirectional_dijkstra(G_x, '3335_07:42:08', '4555_09:07:00', weight='duration')

In [47]:
simple_path = []
last_step = 0
for i in xrange(len(path) - 1):
    route = G_x[path[i]][path[i+1]].get('route_id', 'walk')
    serv = G_x[path[i]][path[i+1]].get('service_id', 'walk')
    if route != 'walk':
        route = routes.loc[route]['route_short_name']
        #if route != last_step:
    print '{0}: {1}({2}) to {3}'.\
                format(G_x.node[path[i]]['arrival_time'], \
                       route, serv, G_x.node[path[i+1]]['stop_name'])
    last_step = route

07:42:08: walk(walk) to Corbett Ave & Hattie St
07:52:27: walk(walk) to Corbett Ave & Douglass St
07:57:48: 37(3) to Market St & Castro St
07:59:00: walk(walk) to Metro Castro Station/Downtown
08:01:00: KT(3) to Metro Church Station/Downtown
08:02:07: KT(3) to Metro Van Ness Station
08:04:00: walk(walk) to Market St & Van Ness Ave
08:06:00: walk(walk) to 11th St & Market St
08:10:00: 9(2) to 11th St & Mission St
08:10:47: 9(2) to 11th St & Howard St
08:11:29: 9(2) to 11th St & Harrison St
08:12:56: walk(walk) to Bryant St & Division St
08:26:46: walk(walk) to Bryant St & Alameda St
08:37:41: walk(walk) to Bryant St & Alameda St
08:38:19: 27(2) to Bryant St & Division St
08:38:57: 27(2) to Bryant St & 9th St
08:40:04: walk(walk) to 8th St & Brannan St
08:43:00: 19(2) to 8th St & Townsend St
08:43:44: 19(2) to Division St & Rhode Island St
08:44:03: 19(2) to Rhode Island St & Alameda St
08:44:29: 19(2) to Rhode Island St & 15th St
08:44:58: 19(2) to Rhode Island St & 16th St
08:45:27: 19

In [18]:
def get_closest_stop(lat, lon, stops):
    dist = stops.apply(lambda x: haversine(x['stop_lon'], \
                        x['stop_lat'], lon, lat), axis=1)
    return dist.argmin()

In [15]:
dist = stops.apply(lambda x: haversine(x['stop_lon'], \
                        x['stop_lat'], -122.396262, 37.781954), axis=1)
dist.head()

stop_id
390     9.679407
913     7.446415
3003    0.379653
3004    0.354342
3008    0.374988
dtype: float64

In [17]:
dist.argmin()

3144

In [14]:
stops.head()

Unnamed: 0_level_0,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
390,19th Avenue & Holloway St,,37.72119,-122.475096,,
913,DUBLIN ST & LAGRANDE AVE,,37.719192,-122.425802,,
3003,2nd St & Brannan St,,37.781827,-122.391945,,
3004,2nd St & Brannan St,,37.781854,-122.392232,,
3008,2nd St & Folsom St,,37.785318,-122.396562,,


In [19]:
stops.loc[3144]

stop_name    3rd St & Perry St
stop_desc                     
stop_lat               37.7827
stop_lon              -122.397
zone_id                       
stop_url                      
Name: 3144, dtype: object