In [3]:
import pandas as pd
import numpy as np
import networkx as nx
import requests
from bs4 import BeautifulSoup
import cPickle as pickle

In [2]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2.)**2 + cos(lat1) * cos(lat2) * sin(dlon/2.)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [5]:
with open('model/graph.pkl') as f:
    G = pickle.load(f)

In [7]:
nx.shortest_path(G, 3415,4555)

[3415, 3329, 3311, 4726, 7227, 7520, 7334, 5509, 4555]

In [69]:
stops = pd.read_csv('data/google_transit/stops.txt')
routes = pd.read_csv('data/google_transit/routes.txt')
trips = pd.read_csv('data/google_transit/trips.txt')
stop_times = pd.read_csv('data/google_transit/stop_times.txt')
shapes = pd.read_csv('data/google_transit/shapes.txt')

In [33]:
nx.shortest_path?

In [45]:
stops[stops['stop_name'] == 'Not a public stop']

Unnamed: 0,stop_id,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url
4377,7520,Not a public stop,,37.709523,-122.404631,,
4386,7530,Not a public stop,,37.755387,-122.386889,,
4387,7531,Not a public stop,,37.755387,-122.386832,,
4392,7540,Not a public stop,,37.755387,-122.386867,,


In [68]:
stop_times[stop_times['stop_id'] == 7520].head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
126835,6869040,06:55:00,06:55:00,7520,43,,,,
126876,6869041,07:08:00,07:08:00,7520,43,,,,
126917,6869042,07:22:00,07:22:00,7520,43,,,,
126958,6869043,07:35:00,07:35:00,7520,43,,,,
126999,6869044,07:49:00,07:49:00,7520,43,,,,


In [65]:
stop_times[stop_times['trip_id'] == 6869266]

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled
134805,6869266,15:24:00,15:24:00,7520,16,,,,
134806,6869266,15:25:00,15:25:00,6352,18,,,,
134807,6869266,15:34:00,15:34:00,6364,29,,,,
134808,6869266,15:44:00,15:44:00,6038,36,,,,
134809,6869266,15:51:00,15:51:00,6026,42,,,,
134810,6869266,15:59:00,15:59:00,3244,47,,,,
134811,6869266,16:00:31,16:00:31,5652,48,,,,
134812,6869266,16:01:27,16:01:27,5651,49,,,,
134813,6869266,16:02:49,16:02:49,5650,50,,,,
134814,6869266,16:04:11,16:04:11,5647,51,,,,


In [67]:
trips.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id,block_id,shape_id
0,11047,2,6858659,Geary + 33rd Avenue,0,115,135594
1,11047,1,7048008,Geary + 33rd Avenue,0,110,135595
2,11047,1,7048006,Geary + 33rd Avenue,0,116,135595
3,11047,1,7048005,Geary + 33rd Avenue,0,103,135595
4,11047,1,7048004,Geary + 33rd Avenue,0,113,135595


In [48]:
for n in [7520, 7530, 7531, 7540]:
    G.remove_node(n)

In [49]:
nx.shortest_path(G, 3415,4555)

[3415, 3329, 3311, 4726, 7227, 6352, 7334, 5509, 4555]

In [66]:
G.edge[7227][6352]

{'trips': [{'arr': '05:00:00', 'route': 1900, 'trip_id': 6869556},
  {'arr': '05:20:00', 'route': 1900, 'trip_id': 6869555},
  {'arr': '05:40:00', 'route': 1900, 'trip_id': 6869554},
  {'arr': '06:00:00', 'route': 1900, 'trip_id': 6869553},
  {'arr': '05:00:00', 'route': 1900, 'trip_id': 6869389},
  {'arr': '05:20:00', 'route': 1900, 'trip_id': 6869388},
  {'arr': '05:40:00', 'route': 1900, 'trip_id': 6869387},
  {'arr': '06:00:00', 'route': 1900, 'trip_id': 6869386},
  {'arr': '05:24:00', 'route': 1900, 'trip_id': 6869114},
  {'arr': '05:10:00', 'route': 1900, 'trip_id': 6869113},
  {'arr': '04:55:00', 'route': 1900, 'trip_id': 6869112},
  {'arr': '05:51:00', 'route': 1900, 'trip_id': 6869116}]}

In [51]:
nx.write_gpickle?

In [None]:
# Adding edges between stops that are within walking distance (250 meters)
for row in stops.iterrows():
    stop = row[1]
    dists = stops.apply(lambda x: haversine(x['stop_lon'], \
                        x['stop_lat'], stop['stop_lon'], stop['stop_lat']),\
                       axis=1)
    dists = dists[dists < 0.250]
    dists = dists.drop(row[0])
    for close_stop in dists.iteritems():
        close_stop_id = stops.loc[close_stop[0]]['stop_id']
        if G.has_edge(stop['stop_id'], close_stop_id):
            G[stop['stop_id']][close_stop_id]['d'] = close_stop[1]
        else:
            G.add_edge(stop['stop_id'], close_stop_id , {'d':close_stop[1]})

In [71]:
G.subgraph?