In [1]:
import requests
import os
import pandas as pd
import time
import concurrent.futures
from collections import namedtuple

In [2]:
# TODO Documentation

class TflStopPoint:
    def __init__(self,mode):
        self.mode = mode
        
    @property
    def endpoint(self):
        return f"https://api.tfl.gov.uk/StopPoint/Mode/{self.mode}"
    
    @property
    def stopPoint_json(self):
        try:
            r = requests.get(self.endpoint, timeout = 30)
        except:
            return None
            
        if r.status_code == 200:
            return r.json()
        else:
            return None
    
    @property
    def stopPoint_df(self):
        # dict keys of items to retrieve from json
        desired_keys = [ 'indicator','naptanId', 'stationNaptan', 'lines', 'lineGroup', 'placeType'
                       , 'stopType', 'commonName', 'modes', 'lat', 'lon']

        data=[]
        if self.stopPoint_json != None:
            for i, value in enumerate(self.stopPoint_json['stopPoints']):

                stopPoint = {}

                for key in desired_keys:
                    if key in value:
                        stopPoint[key] = value[key]
                    else:
                        stopPoint[key] = None
                data.append(stopPoint)
        else:
            return None
            
        return pd.DataFrame(data)


In [3]:
tfl_modes = ['dlr', 'national-rail', 'overground', 'tflrail', 'tube']

In [4]:
def get_stopPoint_df(mode):
    return TflStopPoint(mode).stopPoint_df

In [5]:
# TODO currently takes 60 seconds to do, can we optimise?
# Got it down to 30 seconds, can you further reduce that?

start = time.time()

futures = []
results = []

with concurrent.futures.ThreadPoolExecutor() as executor:
    
    for mode in tfl_modes:
        futures.append(executor.submit(get_stopPoint_df, mode=mode))
        
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

end = time.time()

print("Time Taken: {:.6f}s".format(end-start))

Time Taken: 30.145397s


In [6]:
len(results)

5

In [7]:
# Remove none type from from results
results = [x for x in results if x is not None]

In [8]:
len(results)

3

In [9]:
df = pd.concat(results, ignore_index=True)

In [10]:
# Check total df len is equal to that of the individual results
sum([len(x) for x in results]) == len(df)

True

In [11]:
df.tail()

Unnamed: 0,commonName,indicator,lat,lineGroup,lines,lon,modes,naptanId,placeType,stationNaptan,stopType
860,West Hampstead,,51.547533,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.191357,"[bus, national-rail, overground, tube]",HUBWHD,StopPoint,,TransportInterchange
861,Willesden Junction,,51.532556,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.243006,"[bus, overground, tube]",HUBWIJ,StopPoint,,TransportInterchange
862,Wembley Central,,51.55232,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.296642,"[bus, national-rail, overground, tube]",HUBWMB,StopPoint,,TransportInterchange
863,Canada Water,,51.498053,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.049667,"[bus, overground, tube]",HUBZCW,StopPoint,,TransportInterchange
864,Whitechapel,,51.519498,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.059858,"[bus, overground, tube]",HUBZWL,StopPoint,,TransportInterchange


In [12]:
# created a named tuple object
Station = namedtuple('Station', ['name', 'latitude', 'longitude'])

In [13]:
# TODO create array for use in the finding nearest stations script
LONDON_STATIONS = [Station(s[0], s[1], s[2]) for s in df[['commonName', 'lat', 'lon']].values]

In [14]:
# check lengths match
len(LONDON_STATIONS) == len(df)

True

In [15]:
LONDON_STATIONS[0]

Station(name='Acton Main Line', latitude=51.517069, longitude=-0.267121)

In [16]:
LONDON_STATIONS[0].name

'Acton Main Line'

In [17]:
LONDON_STATIONS[0][0]

'Acton Main Line'

In [19]:
for station in LONDON_STATIONS[:5]:
    print(station.name)
    print(station.latitude)
    print(station.longitude)
    print('\n')

Acton Main Line
51.517069
-0.267121


Burnham (Berks) Rail Station
51.523506
-0.646374


Brentwood Rail Station
51.613684
0.300153


Chadwell Heath
51.568094
0.129394


Ealing Broadway
51.514643
-0.30173


