In [1]:
#We want to search the shortest tube path from kcl to ucl

In [2]:
#packages we need
import geopandas as gpd
%matplotlib inline  
from shapely.geometry import Point
import numpy as np
import pandas as pd
import networkx as nx
from geopy.distance import distance
from geopy.geocoders import Nominatim

In [3]:
#london geographic dataset we need
london = gpd.read_file("data/London_Ward.shp")

In [4]:
#tube geographic data we need
station=pd.read_csv('station.csv')
station=station.drop_duplicates(subset='Name')

# Read in the full zone 1 csv
zone1=pd.read_csv('zone1.csv')
#Read it into a network
tubeZone1=nx.from_pandas_dataframe(zone1,'station_name1','station_name2', create_using=nx.Graph())

In [5]:
#match network data with location data
#read station names seperately
df1=zone1['station_name1']
df2=zone1['station_name2']

#concat two columns into one
df3=pd.concat([df1,df2], axis=0)

#choose the unique names
df4=df3.unique()

#turn it into dataframe format
data = {'Name':df4}
frame = pd.DataFrame(data)

#change the station's name from Edgware Road (B) to Edgware Road (Bakerloo Line)
frame['Name'][2]='Edgware Road (Bakerloo Line)'

In [6]:
frame.head()

Unnamed: 0,Name
0,Baker Street
1,Charing Cross
2,Edgware Road (Bakerloo Line)
3,Embankment
4,Lambeth North


In [7]:
#merge location data with network data on common column 'name'
frame1=pd.merge(frame, station, on='Name')

In [58]:
#kcl geographic data
geolocator = Nominatim()
location = geolocator.geocode("WC2R 2LS")
print location

WC2R 2LS, United Kingdom


In [59]:
kcl_coords=(location.longitude, location.latitude)
kings_loc = gpd.GeoDataFrame([{"name": "KCL","geometry": Point(kcl_coords)}],crs={'init' :'epsg:4326'})
kings_loc = kings_loc.to_crs(london.crs)

In [60]:
#the buffer we use is to limit the area of searching the stations within 500 meters.
kings_buffer = kings_loc.copy() # copy the GeoDataFrame
# create a buffer of 500 meters to convert the POINTs to POLYGONs 
#   in the 'geometry' column
kings_buffer['geometry'] = kings_buffer.buffer(500)

In [61]:
#get the individual column from location-network dataset (frame1), and turn it into geodataframe
name=[frame1['Name'][i] for i in range(len(frame1))]
lon=[float(frame1['Longitude'][i]) for i in range(len(frame1))]
lat=[float(frame1['Latitude'][i]) for i in range(len(frame1))]
point2=[Point(lon[i],lat[i]) for i in range(len(lat))]
frame2 = gpd.GeoDataFrame({"name": name,"geometry":point2},crs={'init' :'epsg:4326'})
#change the crs into london crs
frame2 = frame2.to_crs(london.crs)

In [62]:
#intersect to find common field
stations_near_kings = gpd.sjoin(frame2, kings_buffer, how="inner",op='intersects')

In [63]:
stations_near_kings

Unnamed: 0,geometry,name_left,index_right,name_right
8,POINT (531152.7220491403 181628.569548202),Chancery Lane,0,KCL
50,POINT (531016.6251926103 180821.6366513587),Temple,0,KCL


In [64]:
station_names_kings=stations_near_kings[['name_left']]
station_names_kings.columns=['Name']
station_names_kings

Unnamed: 0,Name
8,Chancery Lane
50,Temple


In [65]:
#we get the station names closed to kings, now we combine the frame1 
#(the dataframe contains all the stations geographic informarion) with she station names closed with kings
newframe=pd.merge(station_names_kings, frame1, on='Name')

In [66]:
# we add a new column distance which can be used to store the distance between kings with each station
distance_data=[]
for i in range(len(newframe)):
    distance_data.append(round(distance((float(newframe.loc[i]['Longitude']),
                                         float(newframe.loc[i]['Latitude'])),kcl_coords).km,4))
newframe['distance']=distance_data

In [67]:
#we choose the closest station
chosen_station=newframe.loc[newframe['distance'].idxmin()]
kcl_chosen_station=chosen_station['Name']
print kcl_chosen_station

Temple


In [68]:
geolocator = Nominatim()
location = geolocator.geocode("WC1E 6BT")
print location

WC1E 6BT, United Kingdom


In [69]:
ucl_coords=(location.longitude, location.latitude)
ucl_loc = gpd.GeoDataFrame([{"name": "ucl", 
                               "geometry": Point(ucl_coords)}],
                             crs={'init' :'epsg:4326'})
ucl_loc = ucl_loc.to_crs(london.crs)
ucl_buffer = ucl_loc.copy() 
ucl_buffer['geometry'] = ucl_buffer.buffer(500)
try:
    stations_near_ucl = gpd.sjoin(frame2, ucl_buffer, how="inner",op='intersects')
    
except ValueError:
    print "There is not station within 500 meters' distance"

station_names_ucl=stations_near_ucl[['name_left']]
station_names_ucl.columns=['Name']
newframe=pd.merge(station_names_ucl, frame1, on='Name')
distance_data=[]
for i in range(len(newframe)):
    distance_data.append(round(distance((float(newframe.loc[i]['Longitude']),
                                         float(newframe.loc[i]['Latitude'])),ucl_coords).km,4))
newframe['distance']=distance_data
chosen_station=newframe.loc[newframe['distance'].idxmin()]
ucl_chosen_station=chosen_station['Name']
print ucl_chosen_station

Euston Square


In [70]:
#Find the shortest route from liverpool street to Bond street
nx.shortest_path(tubeZone1,kcl_chosen_station,ucl_chosen_station)

['Temple',
 'Embankment',
 'Westminster',
 'Green Park',
 'Bond Street',
 'Baker Street',
 'Great Portland Street',
 'Euston Square']

In [82]:
def starting_point(place_name):
    geolocator = Nominatim()
    location = geolocator.geocode(str(place_name))
    ucl_coords=(location.longitude, location.latitude)
    ucl_loc = gpd.GeoDataFrame([{"name": "ucl","geometry": Point(ucl_coords)}],crs={'init' :'epsg:4326'})
    ucl_loc = ucl_loc.to_crs(london.crs)
    ucl_buffer = ucl_loc.copy() 
    ucl_buffer['geometry'] = ucl_buffer.buffer(500)
    try:
        stations_near_ucl = gpd.sjoin(frame2, ucl_buffer, how="inner",op='intersects')
    
    except ValueError:
        print "There is not station within 500 meters' distance"

    station_names_ucl=stations_near_ucl[['name_left']]
    station_names_ucl.columns=['Name']
    newframe=pd.merge(station_names_ucl, frame1, on='Name')
    distance_data=[]
    for i in range(len(newframe)):
        distance_data.append(round(distance((float(newframe.loc[i]['Longitude']),
                                         float(newframe.loc[i]['Latitude'])),ucl_coords).km,4))
    newframe['distance']=distance_data
    chosen_station=newframe.loc[newframe['distance'].idxmin()]
    starting_station=chosen_station['Name']
    return starting_station

In [83]:
def destination(place_name):
    geolocator = Nominatim()
    location = geolocator.geocode(str(place_name))
    ucl_coords=(location.longitude, location.latitude)
    ucl_loc = gpd.GeoDataFrame([{"name": "ucl","geometry": Point(ucl_coords)}],crs={'init' :'epsg:4326'})
    ucl_loc = ucl_loc.to_crs(london.crs)
    ucl_buffer = ucl_loc.copy() 
    ucl_buffer['geometry'] = ucl_buffer.buffer(500)
    try:
        stations_near_ucl = gpd.sjoin(frame2, ucl_buffer, how="inner",op='intersects')
    
    except ValueError:
        print "There is not station within 500 meters' distance"

    station_names_ucl=stations_near_ucl[['name_left']]
    station_names_ucl.columns=['Name']
    newframe=pd.merge(station_names_ucl, frame1, on='Name')
    distance_data=[]
    for i in range(len(newframe)):
        distance_data.append(round(distance((float(newframe.loc[i]['Longitude']),
                                         float(newframe.loc[i]['Latitude'])),ucl_coords).km,4))
    newframe['distance']=distance_data
    chosen_station=newframe.loc[newframe['distance'].idxmin()]
    destination_station=chosen_station['Name']
    return destination_station

In [84]:
starting_point('WC2R 2LS')

'Temple'

In [85]:
destination('WC1E 6BT')

'Euston Square'

In [86]:
nx.shortest_path(tubeZone1,starting_point('WC2R 2LS'),destination('WC1E 6BT'))

['Temple',
 'Embankment',
 'Westminster',
 'Green Park',
 'Bond Street',
 'Baker Street',
 'Great Portland Street',
 'Euston Square']

In [None]:
# Read in the full zone 1 csv
zone1=pd.read_csv('zone1.csv')
#Read it into a network
tubeZone1=nx.from_pandas_dataframe(zone1,'station_name1','station_name2', create_using=nx.Graph())

In [12]:
def naive_citymapper(starting_name,destination_name):
    import geopandas as gpd
    %matplotlib inline  
    from shapely.geometry import Point
    import numpy as np
    import pandas as pd
    import networkx as nx
    from geopy.distance import distance
    from geopy.geocoders import Nominatim

    zone1=pd.read_csv('zone1.csv')
    tubeZone1=nx.from_pandas_dataframe(zone1,'station_name1','station_name2', create_using=nx.Graph())
    df1=zone1['station_name1']
    df2=zone1['station_name2']
    df3=pd.concat([df1,df2], axis=0)
    df4=df3.unique()
    data = {'Name':df4}
    frame = pd.DataFrame(data)
    frame['Name'][2]='Edgware Road (Bakerloo Line)'
    frame1=pd.merge(frame, station, on='Name')
    name=[frame1['Name'][i] for i in range(len(frame1))]
    lon=[float(frame1['Longitude'][i]) for i in range(len(frame1))]
    lat=[float(frame1['Latitude'][i]) for i in range(len(frame1))]
    point2=[Point(lon[i],lat[i]) for i in range(len(lat))]
    frame2 = gpd.GeoDataFrame({"name": name,"geometry":point2},crs={'init' :'epsg:4326'})
    frame2 = frame2.to_crs(london.crs)

    def starting_point(starting_name):
        geolocator = Nominatim()
        location = geolocator.geocode(str(starting_name))
        ucl_coords=(location.longitude, location.latitude)
        ucl_loc = gpd.GeoDataFrame([{"name": "ucl","geometry": Point(ucl_coords)}],crs={'init' :'epsg:4326'})
        ucl_loc = ucl_loc.to_crs(london.crs)
        ucl_buffer = ucl_loc.copy() 
        ucl_buffer['geometry'] = ucl_buffer.buffer(500)
        try:
            stations_near_ucl = gpd.sjoin(frame2, ucl_buffer, how="inner",op='intersects')
    
        except ValueError:
            print "There is not station within 500 meters' distance"

        station_names_ucl=stations_near_ucl[['name_left']]
        station_names_ucl.columns=['Name']
        newframe=pd.merge(station_names_ucl, frame1, on='Name')
        distance_data=[]
        for i in range(len(newframe)):
            distance_data.append(round(distance((float(newframe.loc[i]['Longitude']),
                                         float(newframe.loc[i]['Latitude'])),ucl_coords).km,4))
        newframe['distance']=distance_data
        chosen_station=newframe.loc[newframe['distance'].idxmin()]
        starting_station=chosen_station['Name']
        #distance between starting point to station
        global distance1
        distance1=round(newframe.loc[newframe['distance'].idxmin()]['distance'],2)
        return starting_station# Creates tuple automatically



    def destination(destination_name):
        geolocator = Nominatim()
        location = geolocator.geocode(str(destination_name))
        ucl_coords=(location.longitude, location.latitude)
        ucl_loc = gpd.GeoDataFrame([{"name": "ucl","geometry": Point(ucl_coords)}],crs={'init' :'epsg:4326'})
        ucl_loc = ucl_loc.to_crs(london.crs)
        ucl_buffer = ucl_loc.copy() 
        ucl_buffer['geometry'] = ucl_buffer.buffer(500)
        try:
            stations_near_ucl = gpd.sjoin(frame2, ucl_buffer, how="inner",op='intersects')
    
        except ValueError:
            print "There is not station within 500 meters' distance"

        station_names_ucl=stations_near_ucl[['name_left']]
        station_names_ucl.columns=['Name']
        newframe=pd.merge(station_names_ucl, frame1, on='Name')
        distance_data=[]
        for i in range(len(newframe)):
            distance_data.append(round(distance((float(newframe.loc[i]['Longitude']),
                                         float(newframe.loc[i]['Latitude'])),ucl_coords).km,4))
        newframe['distance']=distance_data
        chosen_station=newframe.loc[newframe['distance'].idxmin()]
        destination_station=chosen_station['Name']
        #distance between destination to station
        global distance2
        distance2=round(newframe.loc[newframe['distance'].idxmin()]['distance'],2)
        return destination_station
         

    shortest_path=nx.shortest_path(tubeZone1,starting_point(starting_name),destination(destination_name))
    arrow=[]
    for i in shortest_path:
        if i != shortest_path[-1]:
            arrow.append(i+'-->')
        else:
            arrow.append(i)
    
    path_result=''.join(arrow)
    print 'From %s to %s.\nFirst, walk %skm to %s station.\nPass through %s stations, terminated at %s station.\
    \nAnd then walk %skm to destination. \nThe tube path is \n%s'%(starting_name,destination_name,distance1,
                                                              shortest_path[0],len(shortest_path)-1,
                                                              shortest_path[-1],distance2,path_result)   


In [13]:
naive_citymapper('WC2R 2LS','WC1E 6BT')

From WC2R 2LS to WC1E 6BT.
First, walk 0.27km to Temple station.
Pass through 7 stations, terminated at Euston Square station.    
And then walk 0.17km to destination. 
The tube path is 
Temple-->Embankment-->Westminster-->Green Park-->Bond Street-->Baker Street-->Great Portland Street-->Euston Square
