In [1]:
import requests
import os
import pandas as pd
import time
import concurrent.futures
from collections import namedtuple

In [2]:
# TODO Documentation

class TflStopPoint:
    def __init__(self,mode):
        self.mode = mode
        
    @property
    def endpoint(self):
        return f"https://api.tfl.gov.uk/StopPoint/Mode/{self.mode}"
    
    @property
    def stopPoint_json(self):
        try:
            r = requests.get(self.endpoint, timeout = 30)
        except:
            return None
            
        if r.status_code == 200:
            return r.json()
        else:
            return None
    
    @property
    def stopPoint_df(self):
        # dict keys of items to retrieve from json
        desired_keys = [ 'indicator','naptanId', 'stationNaptan', 'lines', 'lineGroup', 'placeType'
                       , 'stopType', 'commonName', 'modes', 'lat', 'lon']

        data=[]
        if self.stopPoint_json != None:
            for i, value in enumerate(self.stopPoint_json['stopPoints']):

                stopPoint = {}

                for key in desired_keys:
                    if key in value:
                        stopPoint[key] = value[key]
                    else:
                        stopPoint[key] = None
                data.append(stopPoint)
        else:
            return None
            
        return pd.DataFrame(data)


In [3]:
tfl_modes = ['dlr', 'national-rail', 'overground', 'tflrail', 'tube']

In [4]:
def get_stopPoint_df(mode):
    return TflStopPoint(mode).stopPoint_df

In [5]:
# TODO currently takes 60 seconds to do, can we optimise?
# Got it down to 30 seconds, can you further reduce that?

start = time.time()

futures = []
results = []

with concurrent.futures.ThreadPoolExecutor() as executor:
    
    for mode in tfl_modes:
        futures.append(executor.submit(get_stopPoint_df, mode=mode))
        
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

end = time.time()

print("Time Taken: {:.6f}s".format(end-start))

Time Taken: 30.207344s


In [6]:
len(results)

5

In [7]:
# Remove none type from from results
results = [x for x in results if x is not None]

In [8]:
len(results)

4

In [9]:
df = pd.concat(results, ignore_index=True)

In [10]:
# Check total df len is equal to that of the individual results
sum([len(x) for x in results]) == len(df)

True

In [11]:
df.tail()

Unnamed: 0,commonName,indicator,lat,lineGroup,lines,lon,modes,naptanId,placeType,stationNaptan,stopType
2557,Shadwell,,51.511492,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.056782,"[bus, dlr, overground]",HUBSDE,StopPoint,,TransportInterchange
2558,Stratford,,51.541508,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.00241,"[bus, dlr, international-rail, national-rail, ...",HUBSRA,StopPoint,,TransportInterchange
2559,Tower Gateway,,51.510621,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,-0.074813,"[bus, dlr]",HUBTOG,StopPoint,,TransportInterchange
2560,West Ham,,51.528178,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,0.004997,"[bus, dlr, national-rail, tube]",HUBWEH,StopPoint,,TransportInterchange
2561,Woolwich Arsenal,,51.489962,[{'$type': 'Tfl.Api.Presentation.Entities.Line...,[{'$type': 'Tfl.Api.Presentation.Entities.Iden...,0.06917,"[bus, dlr, national-rail]",HUBWWA,StopPoint,,TransportInterchange


In [12]:
# created a named tuple object
Station = namedtuple('Station', ['name', 'lat_lon'])

In [13]:
# TODO create array for use in the finding nearest stations script
LONDON_STATIONS = [Station(s[0], (s[1], s[2])) for s in df[['commonName', 'lat', 'lon']].values]

In [14]:
# check lengths match
len(LONDON_STATIONS) == len(df)

True

In [15]:
LONDON_STATIONS[0]

Station(name='Bushey Rail Station', lat_lon=(51.645629, -0.3856))

In [16]:
LONDON_STATIONS[0].name

'Bushey Rail Station'

In [17]:
LONDON_STATIONS[0][0]

'Bushey Rail Station'

In [20]:
for station in LONDON_STATIONS[:5]:
    print(station.name)
    print(station.lat_lon)
#     print(station.longitude)
#     print('\n')

Bushey Rail Station
(51.645629, -0.3856)
Bushey Rail Station
(51.645756, -0.384367)
Bushey Rail Station
(51.645693, -0.384355)
Bushey Rail Station
(51.645692, -0.385612)
Cheshunt Rail Station
(51.702951, -0.024101)
