In [59]:
%matplotlib inline

# London tube dataset provided by: markdunne (github.com/markdunne)
# Bike dataset provided by: Transport for London

import colorsys
import random
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
from bokeh.plotting import figure, show
from bokeh.resources import CDN
from bokeh.io import output_notebook
output_notebook( resources=CDN )
import math

pd.set_option('max_colwidth', 200)

In [60]:
# Tube
lines       = pd.read_csv('london.lines.csv', index_col=0)
stations    = pd.read_csv('london.stations.csv', index_col=0)
stationsRein= pd.read_csv('london.stations.reindexed.csv', index_col=0)
connections = pd.read_csv('london.connections.csv')

# Bikes
bikeTripData = pd.read_csv('bikes/london-bikes-sorted.csv', index_col=0)     # http://cycling.data.tfl.gov.uk
bikeDockingStations = pd.read_csv('santander-cycle-coords.csv', index_col=0)   # https://tfl.gov.uk/tfl/syndication/feeds/cycle-hire/livecyclehireupdates.xml 

In [61]:
# Divide bike data into different files for quicker processing
# or rather, store indices of start/stop of data.
startOfBikeDataStartStationSection = {}
startOfBikeDataStartStationSection[1] = 0

currId = 1
for station_id, station in bikeTripData.iterrows():
    if currId == station["StartStation Id"]:
        continue
    else:
        startOfBikeDataStartStationSection[currId+1] = station_id
        currId = currId + 1
        

{1: 0, 2: 284, 3: 751, 4: 1322, 5: 1610, 6: 2264, 7: 2772, 8: 2969, 9: 3006, 10: 3466, 11: 3703, 12: 4156, 13: 5105, 14: 5428, 15: 8544, 16: 9037, 17: 9616, 18: 10346, 19: 11152, 20: 11573, 21: 12081, 22: 12249, 23: 12774, 24: 13113, 25: 13827, 26: 14494, 27: 14848, 28: 15415, 29: 15958, 30: 16315, 31: 16790, 32: 17251, 33: 18121, 34: 18642, 35: 18923, 36: 18924, 37: 19344, 38: 19800, 39: 20040, 40: 21077, 41: 21432, 42: 22168, 43: 22699, 44: 23115, 45: 23422, 46: 24042, 47: 24810, 48: 25299, 49: 26104, 50: 26372, 51: 26774, 52: 27100, 53: 27477, 54: 27734, 55: 28212, 56: 29514, 57: 29841, 58: 30360, 59: 30928, 60: 30929, 61: 31114, 62: 31470, 63: 31692, 64: 31997, 65: 32851, 66: 33223, 67: 34181, 68: 34718, 69: 35276, 70: 35624, 71: 35822, 72: 36818, 73: 37230, 74: 38299, 75: 39167, 76: 39624, 77: 39857, 78: 40336, 79: 40905, 80: 40919, 81: 41305, 82: 41822, 83: 42122, 84: 42626, 85: 42956, 86: 43519, 87: 43751, 88: 44246, 89: 44839, 90: 45298, 91: 45423, 92: 45696, 93: 45968, 94: 463

In [62]:
tubeGraph = nx.Graph()

for connection_id, connection in connections.iterrows():
    station1_name = stations.ix[connection['station1']]['name']
    station2_name = stations.ix[connection['station2']]['name']
    tubeGraph.add_edge(station1_name, station2_name, time = connection['time'])
    
#add the connection between Bank and Monument manually
tubeGraph.add_edge('Bank', 'Monument', time = 1)
# nx.draw(tubeGraph)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """


In [63]:
# Example Shortest Path
nx.shortest_path(tubeGraph, 'Holborn', 'Rotherhithe', weight='time')

['Holborn',
 'Chancery Lane',
 "St. Paul's",
 'Bank',
 'Shadwell',
 'Wapping',
 'Rotherhithe']

In [64]:
# Construct graph. Code from: github.com/markdunne
normed = stations[['longitude', 'latitude']]
normed = normed - normed.min()
normed = normed / normed.max()
locations = dict(zip(stations['name'], normed[['longitude', 'latitude']].values))

p = figure(
    x_range = (.4,.7),
    y_range = (.2,.5),
    height= 700,
    width= 900,
)
for edge in tubeGraph.edges():
    p.line( 
        x= [locations[pt][0] for pt in edge],
        y= [locations[pt][1] for pt in edge],
    )

for node in tubeGraph.nodes():
    x = [locations[node][0]]
    y = [locations[node][1]]
    p.circle(
        x, y, 
        line_alpha=0)
    p.text(
        x, y, 
        text = {'value':node}, 
        text_font_size = str(10) +"pt",
        text_align='center',
        text_font_style='bold')
    
show(p)

In [65]:
# You can do graphs things on the graph.
nx.algorithms.connectivity.cuts.minimum_node_cut(tubeGraph)

{'Paddington'}

In [66]:
bikeDockingStations
#bikeDockingStations.get_value(4,"station__name")
#bikeTripData

Unnamed: 0_level_0,station__name,station__terminalName,station__lat,station__long,station__installed,station__locked,station__installDate,station__removalDate,station__temporary
station__id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,"River Street , Clerkenwell",1023,51.529163,-0.109971,True,False,1.278950e+12,,False
2,"Phillimore Gardens, Kensington",1018,51.499607,-0.197574,True,False,1.278590e+12,,False
3,"Christopher Street, Liverpool Street",1012,51.521284,-0.084606,True,False,1.278240e+12,,False
4,"St. Chad's Street, King's Cross",1013,51.530059,-0.120974,True,False,1.278240e+12,,False
5,"Sedding Street, Sloane Square",3420,51.493130,-0.156876,True,False,1.278240e+12,,False
6,"Broadcasting House, Marylebone",3424,51.518118,-0.144229,True,False,1.278240e+12,,False
7,"Charlbert Street, St. John's Wood",3422,51.534300,-0.168074,True,False,1.278240e+12,,False
8,"Maida Vale, Maida Vale",3423,51.529857,-0.183486,True,False,1.540980e+12,,False
9,"New Globe Walk, Bankside",1015,51.507385,-0.096441,True,False,1.278240e+12,,False
10,"Park Street, Bankside",1024,51.505974,-0.092754,True,False,1.278240e+12,,False


In [67]:
# Calculate the farthest bike station from the center.
maxDistance = 0
maxStationName = ''
maxStationId = -1

# Center of London
center = (51.510776, -0.115638)

for id, station in bikeDockingStations.iterrows():
    
    Lat = station["station__lat"]
    Long = station["station__long"]
    Distance = math.sqrt((Lat*(11/7))**2 + (Long)**2)
    
    if Distance > maxDistance:
        maxStationName = station["station__name"]
        maxStationId = id
        maxDistance = Distance
        
print("Furthest Docking Station:",maxStationName, " - ",maxStationId)
print(bikeDockingStations.at[maxStationId,"station__lat"], bikeDockingStations.at[maxStationId,"station__long"])



Furthest Docking Station: Lee Valley VeloPark, Queen Elizabeth Olympic Park  -  786
51.549369 -0.015717613999999998


In [99]:
###################### START OF THE COMPUTATION #####################

In [100]:
# ------------- Get Random value and Mean -----------------

def randomCoordinate():
    # Calcuates mean coordinates based on Tube Stations
    latcoors = stationsRein['latitude']
    longcoors = stationsRein['longitude']
    latmean = latcoors.mean()
    longmean = longcoors.mean()

    meancoors = (latmean, longmean)
    meancoors = (51.510776, -0.115638) # Center of London
    # print("Mean Coordinates: ", meancoors)

    latitudeBoundSize = 0.03  # Mess with these for bigger radius
    longitudeBoundSize = 0.01 # Mess with these for bigger radius

    randLat1 = meancoors[0] + (random.uniform(0,1)*latitudeBoundSize - (latitudeBoundSize/2))
    randLong1 = meancoors[1] + (random.uniform(0,1)*longitudeBoundSize - (longitudeBoundSize/2))
    randLat2 = meancoors[0] + (random.uniform(0,1)*latitudeBoundSize - (latitudeBoundSize/2))
    randLong2 = meancoors[1] + (random.uniform(0,1)*longitudeBoundSize - (longitudeBoundSize/2))

    print("Random origin location:        ", randLat1,"," ,randLong1)
    print("Random destination location:   ", randLat2,",", randLong2)
    
    return randLat1, randLong1, randLat2, randLong2

In [101]:
def getRandomBikeTrip():
    # Get random index between 0 and size of bikeTripData
    randIdx = random.randint(0,len(bikeTripData))
    
    # Choose a random bike trip
    randomStationId1 = int(bikeTripData.at[int(randIdx),"StartStation Id"])
    randomStationId2 = int(bikeTripData.at[int(randIdx),"EndStation Id"])
    randomTripDuration = int(bikeTripData.at[int(randIdx),"Duration"])

    
    # Get the latitude and longitude of a trip
    randLat1 =   bikeDockingStations.at[int(randomStationId1), "station__lat"]
    randLong1 =  bikeDockingStations.at[int(randomStationId1), "station__long"]
    randLat2 =   bikeDockingStations.at[int(randomStationId2), "station__lat"]
    randLong2 =  bikeDockingStations.at[int(randomStationId2), "station__long"]
    
    print(randomStationId1)
    print(randomStationId2)

    print(randomTripDuration/60)
    
    
    return randLat1, randLong1, randLat2, randLong2, randomStationId1, randomStationId2
    
    
getRandomBikeTrip()
    

125
134
14.0


(51.50069491, -0.09452431900000001, 51.504903999999996, -0.06797, 125, 134)

In [102]:
# --------------- Calculates the closest TUBE stations to the random coordinates --------------

def closestTubeStation(randLat1, randLong1, randLat2, randLong2):
    #calculate distances to all stations in array from co-ordinates
    tubeDistances1 = []
    tubeDistances2 = []
    for i in range(1, len(stationsRein['longitude'])):
        distance1 = math.sqrt(((randLat1 - stationsRein['latitude'].iloc[i-1])*(11/7))**2 + (randLong1 - stationsRein['longitude'].iloc[i-1])**2)
        distance2 = math.sqrt(((randLat2 - stationsRein['latitude'].iloc[i-1])*(11/7))**2 + (randLong2 - stationsRein['longitude'].iloc[i-1])**2)

        tubeDistances1.append(distance1)
        tubeDistances2.append(distance2)

    #pick shortest for each co-ordinate
    station1 = tubeDistances1.index(min(tubeDistances1))
    station2 = tubeDistances2.index(min(tubeDistances2))
    
    oName = stationsRein['name'][station1+1]
    oLat  = stationsRein['latitude'][station1+1]
    oLong = stationsRein['longitude'][station1+1]
    
    dName = stationsRein['name'][station2+1]
    dLat  = stationsRein['latitude'][station2+1]
    dLong = stationsRein['longitude'][station2+1]

    print("Nearest train station to Random Origin:",oName,oLat,oLong)
    print("Nearest train station to Random Destination:",dName,dLat,dLong)
    
    return oName, dName
    #return oName,oLat,oLong,dName,dLat,dLong

In [103]:
# ---------------- Calculate the closest BIKE station to the random coordinates ---------------

def closestBikeStation(randLat1, randLong1, randLat2, randLong2):

    shortestOriginID = -1
    shortestDestID = -1
    
    shortestBikeOriginValue = 9999
    shortestBikeDestinationValue = 9999

    for st_id, dockingStation in bikeDockingStations.iterrows():
        dockingID = st_id
        dockingLat  = dockingStation["station__lat"]
        dockingLong = dockingStation["station__long"]
        distanceFromRandomToOrigin      = math.sqrt(((randLat1 - dockingLat)*11/7)**2 + (randLong1 - dockingLong)**2)
        distanceFromRandomToDestination = math.sqrt(((randLat2 - dockingLat)*11/7)**2 + (randLong2 - dockingLong)**2)

        if distanceFromRandomToOrigin < shortestBikeOriginValue:
            shortestOriginID = dockingID
            shortestBikeOriginValue = distanceFromRandomToOrigin

        if distanceFromRandomToDestination < shortestBikeDestinationValue:
            shortestDestID = dockingID
            shortestBikeDestinationValue = distanceFromRandomToDestination

    print("Nearest bike station to Random Origin:", shortestOriginID)
    print("Nearest bike station to Random Destination:", shortestDestID)
    
    return shortestOriginID, shortestDestID

In [121]:
# ------------- Calculate BIKE travel time -----------------------
# Calculates the mean of the closest bike ports, as well as ones that are really close.

def calcBikeTravelTimeMins(oBikeStationID, dBikeStationID):
    exactMatches = 0
    #relativeMatches = 0
    totalDurationAllTrips = 0
    
    # Get the range of samples to look at 
    # As an optimization, only looked at the subset of data with the curent station station.
    lowerBound = startOfBikeDataStartStationSection[oBikeStationID]
    upperBound = startOfBikeDataStartStationSection[oBikeStationID +1] - 1
    
    print(oBikeStationID,lowerBound,upperBound)
    
    for i in range(lowerBound, upperBound):

        # Extract the trip data.
        tStartName =  bikeTripData.at[int(i),"StartStation Name"]
        tStartID =    bikeTripData.at[int(i),"StartStation Id"]

        tEndName   =  bikeTripData.at[int(i),"EndStation Name"]
        tEndID =  bikeTripData.at[int(i),"EndStation Id"]
        
        tDuration = bikeTripData.at[int(i),"Duration"]
        
        # If this trip exactly matches the given origin and dest.
        if oBikeStationID == tStartID and dBikeStationID == tEndID:
            exactMatches += 1
            totalDurationAllTrips += tDuration
            
            
    print("For bikes there were:",exactMatches, "exact matches.")
#   print("For bikes there were:",relativeMatches, "relative matches.")
    
#     if exactMatches + relativeMatches == 0:
#         return -1 # ERROR! No matches...
    
    bikeMins = (totalDurationAllTrips / (exactMatches))/60
    print("The average time of this BIKE trip is:", bikeMins, "minutes.")
    
    
    return bikeMins

In [122]:
# --------------- Calculate TRAIN travel time --------------
# Calculates with dijkstra's shortest path algorithm (weighted by TIME)
def calcTrainTravelTimeMins(oName, dName):
    
    #nx.algorithms.shortest_paths.dijkstra_path(tubeGraph, oName, dName)

    quickestTrainTime = nx.algorithms.shortest_paths.dijkstra_path_length(tubeGraph,oName,dName, weight = "time")

    print("The quickest time for this TRAIN trip is ",quickestTrainTime, "minutes.")
    
    return quickestTrainTime

In [123]:
#### ------ Calculate how far the trip actually is------- ####

#distance1 = math.sqrt(((randLat1 - stationsRein['latitude'].iloc[i-1])*(11/7))**2 + (randLong1 - stationsRein['longitude'].iloc[i-1])**2)
#distance2 = math.sqrt(((randLat2 - stationsRein['latitude'].iloc[i-1])*(11/7))**2 + (randLong2 - stationsRein['longitude'].iloc[i-1])**2)

In [124]:
########## RUN ME ############
numIterations = 5


##############################

output = {}
for i in range(0, numIterations):
    print("[!] Iteration", i)
    # Compute random coordinates in our range
    #(randLat1, randLong1, randLat2, randLong2) = randomCoordinate()
    (randLat1, randLong1, randLat2, randLong2, oBike, dBike) = getRandomBikeTrip()

    # Calculate the nearest origin and destination stations, given the random coordinates.
    (oTrain,dTrain) = closestTubeStation(randLat1, randLong1, randLat2, randLong2)
##    (oBike, dBike) = closestBikeStation(randLat1, randLong1, randLat2, randLong2)

    # Calculate the minimum/average travel time (with our data) given an origin and a destination.
    trainMins = calcTrainTravelTimeMins(oTrain,dTrain)
    bikeMins = calcBikeTravelTimeMins(oBike, dBike)

##    if bikeMins == -1 or trainMins == -1:
##        print("error")
##        continue
        
    output[i] = {"oRandomLat": randLat1,
                 "oRandomLong": randLong1,
                 "dRandomLat": randLat2,
                 "dRandomLong": randLong2,
                 "oTrain": oTrain, 
                 "dTrain": dTrain,
                 "oBike": oBike,
                 "dBike": dBike,
                 "bikeMins": bikeMins,
                 "trainMins": trainMins}
    



[!] Iteration 0
81
192
12.0
Nearest train station to Random Origin: Great Portland Street 51.5238 -0.1439
Nearest train station to Random Destination: Picadilly Circus 51.5098 -0.1342
The quickest time for this TRAIN trip is  8 minutes.
81 41305 41821
For bikes there were: 2 exact matches.
The average time of this BIKE trip is: 10.5 minutes.
[!] Iteration 1
803
645
3.0
Nearest train station to Random Origin: Borough 51.5011 -0.0943
Nearest train station to Random Destination: Southwark 51.501000000000005 -0.1052
The quickest time for this TRAIN trip is  4 minutes.
803 305986 306291
For bikes there were: 3 exact matches.
The average time of this BIKE trip is: 3.0 minutes.
[!] Iteration 2
587
262
10.0
Nearest train station to Random Origin: Monument 51.5108 -0.0863
Nearest train station to Random Destination: Southwark 51.501000000000005 -0.1052
The quickest time for this TRAIN trip is  5 minutes.
587 241321 241932
For bikes there were: 2 exact matches.
The average time of this BIKE trip

In [125]:
dataFrame = pd.DataFrame.from_dict(output, orient="index")
dataFrame

Unnamed: 0,oRandomLat,oRandomLong,dRandomLat,dRandomLong,oTrain,dTrain,oBike,dBike,bikeMins,trainMins
0,51.520253,-0.141327,51.512515,-0.133202,Great Portland Street,Picadilly Circus,81,192,10.5,8
1,51.50541,-0.098341,51.501732,-0.100292,Borough,Southwark,803,645,3.0,4
2,51.50964,-0.08497,51.498745,-0.103133,Monument,Southwark,587,262,11.0,5
3,51.504628,-0.091774,51.504043,-0.105312,London Bridge,Southwark,194,420,7.428571,2
4,51.510485,-0.08299,51.512484,-0.099141,Monument,St. Paul's,199,48,8.884615,3


In [82]:
dataFrame.to_csv("./londonComputedData/computedData.csv")


In [129]:
#calcBikeTravelTimeMins(751,95)


In [126]:
#bikeDockingStations.at[int(4),"station__lat"]


In [128]:
#bikeTripData