In [1]:
import requests
import os
import pandas as pd
import time
import concurrent.futures
from collections import namedtuple

In [2]:
# TODO Documentation

class TflStopPoint:
    def __init__(self,mode):
        self.mode = mode
        
    @property
    def endpoint(self):
        return f"https://api.tfl.gov.uk/StopPoint/Mode/{self.mode}"
    
    @property
    def stopPoint_json(self):
        try:
            r = requests.get(self.endpoint, timeout = 30)
        except:
            return None
            
        if r.status_code == 200:
            return r.json()
        else:
            return None
    
    @property
    def stopPoint_df(self):
        # dict keys of items to retrieve from json
        desired_keys = [ 'indicator','naptanId', 'stationNaptan', 'lines', 'lineGroup', 'placeType'
                       , 'stopType', 'commonName', 'modes', 'lat', 'lon']

        data=[]
        if self.stopPoint_json != None:
            for i, value in enumerate(self.stopPoint_json['stopPoints']):

                stopPoint = {}

                for key in desired_keys:
                    if key in value:
                        stopPoint[key] = value[key]
                    else:
                        stopPoint[key] = None
                data.append(stopPoint)
        else:
            return None
            
        return pd.DataFrame(data)


In [3]:
tfl_modes = ['dlr', 'national-rail', 'overground', 'tflrail', 'tube']

In [4]:
def get_stopPoint_df(mode):
    return TflStopPoint(mode).stopPoint_df

In [5]:
# TODO currently takes 60 seconds to do, can we optimise?
# Got it down to 30 seconds, can you further reduce that?

start = time.time()

futures = []
results = []

with concurrent.futures.ThreadPoolExecutor() as executor:
    
    for mode in tfl_modes:
        futures.append(executor.submit(get_stopPoint_df, mode=mode))
        
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

end = time.time()

print("Time Taken: {:.6f}s".format(end-start))

Time Taken: 30.112368s


In [6]:
len(results)

5

In [7]:
# Remove none type from from results
results = [x for x in results if x is not None]

In [8]:
len(results)

4

In [23]:
df = pd.concat(results, ignore_index=True)

In [24]:
# Check total df len is equal to that of the individual results
sum([len(x) for x in results]) == len(df)

True

In [25]:
df.tail()

Unnamed: 0,commonName,indicator,lat,lineGroup,lines,lon,modes,naptanId,placeType,stationNaptan,stopType
2558,Westminster,,51.501603,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.125984,"[bus, tube]",HUBWSM,StopPoint,,TransportInterchange
2559,Canada Water,,51.498053,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.049667,"[bus, overground, tube]",HUBZCW,StopPoint,,TransportInterchange
2560,Farringdon,,51.520214,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.105054,"[tube, national-rail, bus]",HUBZFD,StopPoint,,TransportInterchange
2561,Moorgate,,51.518338,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.088627,"[tube, bus, national-rail]",HUBZMG,StopPoint,,TransportInterchange
2562,Whitechapel,,51.519498,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.059858,"[bus, overground, tube]",HUBZWL,StopPoint,,TransportInterchange


In [15]:
# created a named tuple object
Station = namedtuple('Station', ['station_name', 'latitude', 'longitude'])

In [20]:
Station(df['commonName'][0], df['lat'][0], df['lon'][0])

Station(station_name=0                 Acton Main Line
0             Bushey Rail Station
0          Abbey Road DLR Station
0    Amersham Underground Station
Name: commonName, dtype: object, latitude=0    51.517069
0    51.645629
0    51.532087
0    51.674203
Name: lat, dtype: float64, longitude=0   -0.267121
0   -0.385600
0    0.003830
0   -0.607365
Name: lon, dtype: float64)

In [17]:
# TODO create array for use in the finding nearest stations script
LONDON_STATIONS = [Station(s[0], s[1], s[2]) for s in df[['commonName', 'lat', 'lon']]]

In [18]:
len(LONDON_STATIONS)

3

In [19]:
LONDON_STATIONS[0]

Station(station_name='c', latitude='o', longitude='m')