# Finding the nearest train/tube station

The purpose of this project is to create module that takes a London postcode input e.g. N4 4AF and will output the nearest train station to it.

In [18]:
import pandas as pd
import requests
from geopy import distance

In [2]:
# A csv file of london stations can be downloaded from this link
# https://www.doogal.co.uk/london_stations.php
# TODO change to all gb stations so it is not london centric

london_stations = pd.read_csv('london_stations.csv')

In [3]:
london_stations.head()

Unnamed: 0,Station,OS X,OS Y,Latitude,Longitude,Zone,Postcode
0,Abbey Road,539081,183352,51.531952,0.003723,3,E15 3NB
1,Abbey Wood,547297,179002,51.490784,0.120272,4,SE2 9RH
2,Acton Central,520613,180299,51.508758,-0.26343,2,W3 6BH
3,Acton Main Line,520296,181196,51.516887,-0.26769,3,W3 9EH
4,Acton Town,519457,179639,51.503071,-0.280303,3,W3 8HN


In [4]:
test_postcodes = ['N15 3AD', 'N4 4AF', 'N10 3QS', 'N15 4AR', 'N15 4JF']

In [5]:
# 1. Geocode postcode data using api

# create endpoint
def create_endpoint(postcode):
    postcode = postcode.replace(' ','').lower()
    output = f'http://api.postcodes.io/postcodes/{postcode}'
    return output

In [6]:
create_endpoint(test_postcodes[0])

'http://api.postcodes.io/postcodes/n153ad'

In [7]:
# get data from endpoint
r = requests.get(create_endpoint(test_postcodes[0]))

In [8]:
r.status_code

200

In [9]:
r.json()

{'status': 200,
 'result': {'postcode': 'N15 3AD',
  'quality': 1,
  'eastings': 532236,
  'northings': 188920,
  'country': 'England',
  'nhs_ha': 'London',
  'longitude': -0.092808,
  'latitude': 51.583631,
  'european_electoral_region': 'London',
  'primary_care_trust': 'Haringey Teaching',
  'region': 'London',
  'lsoa': 'Haringey 026A',
  'msoa': 'Haringey 026',
  'incode': '3AD',
  'outcode': 'N15',
  'parliamentary_constituency': 'Tottenham',
  'admin_district': 'Haringey',
  'parish': 'Haringey, unparished area',
  'admin_county': None,
  'admin_ward': "St Ann's",
  'ced': None,
  'ccg': 'NHS North Central London',
  'nuts': 'Haringey and Islington',
  'codes': {'admin_district': 'E09000014',
   'admin_county': 'E99999999',
   'admin_ward': 'E05000277',
   'parish': 'E43000204',
   'parliamentary_constituency': 'E14001002',
   'ccg': 'E38000240',
   'ccg_id': '93C',
   'ced': 'E99999999',
   'nuts': 'UKI43',
   'lsoa': 'E01002043',
   'msoa': 'E02000422',
   'lau2': 'E05000277'

In [10]:
r.json()['result']['postcode']

'N15 3AD'

In [11]:
r.json()['result']['longitude']

-0.092808

In [12]:
r.json()['result']['latitude']

51.583631

In [46]:
# TODO for each postcode request 
# create a dictionary that looks like {'postcode': 'N15 3AD', 'longitude': -0.092808, 'latitude': 51.583631}
# define a function to do this
def get_long_lat(endpoint):
    output_dict = {}
    r = requests.get(endpoint)
    output_dict['postcode'] = r.json()['result']['postcode']
#     output_dict['longitude'] = r.json()['result']['longitude'] # TODO delete
#     output_dict['latitude'] = r.json()['result']['latitude'] #TODO delete
    output_dict['lat_lon'] = (r.json()['result']['latitude'], r.json()['result']['longitude'])
    return output_dict

# Once this is done you will have successfully geocoded the data woohoo!

In [47]:
test_dict = get_long_lat(create_endpoint(test_postcodes[0]))

In [48]:
test_dict

{'postcode': 'N15 3AD', 'lat_lon': (51.583631, -0.092808)}

In [49]:
test_dict['lat_lon']

(51.583631, -0.092808)

In [16]:
# TODO: Combine endpoint creation with getting long and lat so the get_long_long_lat takes a postcode as an input

In [55]:
london_stations[london_stations['Station'].isin(['Blackhorse Road', 'Walthamstow Central', 'Tottenham Hale'])]

Unnamed: 0,Station,OS X,OS Y,Latitude,Longitude,Zone,Postcode
60,Blackhorse Road,535914,189256,51.585777,-0.039626,3,E17 6JJ
562,Tottenham Hale,534483,189465,51.587998,-0.060188,3,N17 9LR
586,Walthamstow Central,537286,188986,51.583018,-0.019941,3,E17 7LP


In [57]:
# measure distance between two points

# test with postcode
# distance.distance((lat, lon), (lat, lon))

print(distance.distance(test_dict['lat_lon'], (51.587998, -0.060188)).miles)
print(distance.distance(test_dict['lat_lon'], (51.587998, -0.060188)).km)

1.4369277331214914
2.3125110257326735


In [36]:
distance.distance((51.599983, -0.037353), (51.585777, -0.039626)).miles

0.9869758121665563

In [37]:
distance.distance((51.599983, -0.037353), (51.585777, -0.039626)).km

1.5883836014553745