# Visualizing Isochrone data
We'll be looking at the average data of walking area accessibility and the population coverage of the train stations. We'll compare different Klang Valley lines and then different cities with KL

In [2]:
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


In [3]:
file_kl = '../resources/data/train_stations_iso.csv'
file_sg = '../resources/data/mrtsg_iso.csv'
file_mtl = '../resources/data/montreal_metro_iso.csv'

data_kl = pd.read_csv(file_kl)
data_sg = pd.read_csv(file_sg)
data_mtl = pd.read_csv(file_mtl)


In [13]:
print(data_kl.columns)
print(data_sg.columns)
print(data_mtl.columns)
data_kl['City']="Kuala Lumpur"
data_sg['City']="Singapore"
data_mtl['City']="Montreal"
data_sg['Service Provider Name']="SMRT"
data_mtl['Service Provider Name']="STM"
columns = ['Name','Route Name','Latitude','Longitude','Line Colour',
            '5 Minute Range Area', '10 Minute Range Area','15 Minute Range Area', 
            '5 Minute Reach Factor','10 Minute Reach Factor', '15 Minute Reach Factor',
            '5 Minute Population', '10 Minute Population', '15 Minute Population','City','Service Provider Name']
data_all = pd.concat([data_kl[columns],data_sg[columns],data_mtl[columns]])

Index(['Unnamed: 0', 'Stop ID', 'Name', 'Service Provider Name', 'Latitude',
       'Longitude', 'ROUTE ID', 'Route Name', 'Line Number', 'Line Colour',
       'Colour Hex Code', 'iso', '5 Minute Range Area', '10 Minute Range Area',
       '15 Minute Range Area', '5 Minute Reach Factor',
       '10 Minute Reach Factor', '15 Minute Reach Factor',
       '5 Minute Population', '10 Minute Population', '15 Minute Population',
       'City'],
      dtype='object')
Index(['OBJECTID', 'Name', 'STN_NO', 'X', 'Y', 'Latitude', 'Longitude',
       'Line Colour', 'Colour Hex Code', 'Route Name', 'Unnamed: 10', 'iso',
       '5 Minute Range Area', '10 Minute Range Area', '15 Minute Range Area',
       '5 Minute Reach Factor', '10 Minute Reach Factor',
       '15 Minute Reach Factor', '5 Minute Population', '10 Minute Population',
       '15 Minute Population', 'City', 'Service Provider Name'],
      dtype='object')
Index(['Unnamed: 0', 'Stop ID', 'Object ID', 'Name', 'Odonym', 'Namesake',
       'O

## Visualize walkable area within different timeframes from KL train stations

In [69]:
# draws histograms for RapidKL lines' 5 minute walking area coverage
fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur') &(data_all["Service Provider Name"]=='Rapid KL')]
             ,x='Route Name'
             ,y='5 Minute Range Area'
             , barmode = 'group'
             , title="Area coverage within 5 minutes walk from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", '5 Minute Range Area' : "5 Minutes Walk Area(km^2)"}
            ).update_xaxes(categoryorder='total ascending')
fig.update_layout(showlegend=False)
fig.show() 

# draws histograms for RapidKL lines' 10 minute walking area coverage
fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur') &(data_all["Service Provider Name"]=='Rapid KL')]
             ,x='Route Name'
             ,y='10 Minute Range Area'
             , barmode = 'group'
             , title="Area coverage within 10 minutes walk from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", '10 Minute Range Area' : "10 Minutes Walk Area(km^2)"}
            ).update_xaxes(categoryorder='total ascending')
fig.update_layout(showlegend=False)
fig.show()

# draws histograms for RapidKL lines' 15 minute walking area coverage
fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur') &(data_all["Service Provider Name"]=='Rapid KL')]
             ,x='Route Name'
             ,y='15 Minute Range Area'
             , barmode = 'group'
             , title="Area coverage within 15 minutes walk from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", '15 Minute Range Area' : "15 Minutes Walk Area(km^2)"}
            ).update_xaxes(categoryorder='total ascending')
fig.update_layout(showlegend=False)
fig.show() 

# draws all coverage in one chart along with all train lines
fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur') ]
             ,x='Route Name'
             ,y=['5 Minute Range Area','10 Minute Range Area','15 Minute Range Area']
             , barmode = 'group'
             , title="Area coverage within walking times from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines",'value' : "Walk Area Coverage(km^2)"}
            ).update_xaxes(categoryorder='total ascending')

fig.show()       

In [68]:
# drawshistograms for RapidKL lines' 5 minute walking population coverage

fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur')&(data_all["Service Provider Name"]=='Rapid KL')]
             ,x='Route Name'
             ,y='5 Minute Population'
             , barmode = 'group'
             , title="Population covered within 5 minutes walk from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", '5 Minute Population' : "Population Coverage"}
            ).update_xaxes(categoryorder='total ascending')
fig.update_layout(showlegend=False)
fig.show() 

# draws histograms for RapidKL lines' 10 minute walking population coverage

fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur')&(data_all["Service Provider Name"]=='Rapid KL')]
             ,x='Route Name'
             ,y='10 Minute Population'
             , barmode = 'group'
             , title="Population coverage within 10 minutes walk from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", '10 Minute Population' : "Population Coverage"}
            ).update_xaxes(categoryorder='total ascending')
fig.update_layout(showlegend=False)
fig.show() 

# draws histograms for RapidKL lines' 15 minute walking population coverage

fig = px.histogram(data_all[(data_all["City"]=='Kuala Lumpur')&(data_all["Service Provider Name"]=='Rapid KL')]
             ,x='Route Name'
             ,y='15 Minute Population'
             , barmode = 'group'
             , title="Population coverage within 15 minutes walk from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", '15 Minute Population' : "Population Coverage"}
            ).update_xaxes(categoryorder='total ascending')
fig.update_layout(showlegend=False)
fig.show() 

# draws  histograms for RapidKL lines' 5,10,15 minute walking population coverage

fig = px.histogram(data_all[data_all["City"]=='Kuala Lumpur']
             ,x='Route Name'
             ,y=['5 Minute Population','10 Minute Population','15 Minute Population']
             , barmode = 'group'
             , title="Population coverage within walking times from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines",'value' : "Population Coverage"}
            ).update_xaxes(categoryorder='total ascending')

fig.show()       

In [66]:
#Thought it would be fun to draw the 15 minutes walking area coverage for each KL train station, Bukit Bintang is the best, Sg Besi the worst 
fig = px.histogram(data_all[data_all["City"]=='Kuala Lumpur']
             ,x='Name'
             ,y='15 Minute Range Area'
             #,color='Route Name'
             , labels={'count':'Count of Heroes'}
             , title="Average Area covered by 15 minute Walk"
             , template='plotly' 
             , histfunc = 'avg'
            ).update_xaxes(categoryorder='total ascending')
        
fig.show()

The Singapore and Montreal dataset doesn't really include the equivalent of Komuter trains like KL so i thought it would be unfair to bundle them together because generally the coverage for those stations are not as great due to low density. So i excluded Komuter in the following averages so that we get purely mass transit styled train stations

In [32]:
data_kl['Service Provider Name'].unique()

array(['Keretapi Tanah Melayu', 'Rapid KL', 'Express Rail Link',
       'Rapid Bus'], dtype=object)

In [59]:
non_rapidkl =['Keretapi Tanah Melayu', 'Express Rail Link','Rapid Bus']
data_temp = data_all[(data_all["Service Provider Name"]=='STM')|(data_all["Service Provider Name"]=='Rapid KL')|(data_all["Service Provider Name"]=='SMRT')]

In [60]:
data_temp

Unnamed: 0,Name,Route Name,Latitude,Longitude,Line Colour,5 Minute Range Area,10 Minute Range Area,15 Minute Range Area,5 Minute Reach Factor,10 Minute Reach Factor,15 Minute Reach Factor,5 Minute Population,10 Minute Population,15 Minute Population,City,Service Provider Name
60,PWTC,Ampang Line,3.166563,101.693594,Orange,0.269062,0.924177,2.326002,0.4933,0.4236,0.4738,1359.0,7026.0,16752.0,Kuala Lumpur,Rapid KL
61,SULTAN ISMAIL,Ampang Line,3.161185,101.694127,Orange,0.154505,0.798401,2.067845,0.2833,0.3660,0.4213,1273.0,5538.0,13620.0,Kuala Lumpur,Rapid KL
62,BANDARAYA,Ampang Line,3.155548,101.694406,Orange,0.338725,1.364728,3.034092,0.6210,0.6255,0.6181,2717.0,9469.0,19064.0,Kuala Lumpur,Rapid KL
63,TITIWANGSA,Ampang Line,3.173591,101.695273,Orange,0.405508,1.342984,3.347849,0.7435,0.6156,0.6820,3121.0,9825.0,23762.0,Kuala Lumpur,Rapid KL
64,SENTUL TIMUR,Ampang Line,3.185821,101.695335,Orange,0.157145,0.980905,2.277380,0.2881,0.4496,0.4639,1245.0,6512.0,16646.0,Kuala Lumpur,Rapid KL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,D'Iberville,Blue Line,45.553078,-73.602270,Blue,0.392088,1.532228,3.482008,0.7189,0.7023,0.7093,4854.0,16359.0,40443.0,Montreal,STM
69,Saint-Michel,Blue Line,45.559813,-73.599940,Blue,0.375009,1.528195,3.478716,0.6876,0.7005,0.7087,3847.0,16371.0,30414.0,Montreal,STM
70,Berri–UQAM,Yellow Line,45.515027,-73.561260,Yellow,0.395714,1.521756,3.442349,0.7255,0.6975,0.7013,1952.0,16004.0,33577.0,Montreal,STM
71,Jean-Drapeau,Yellow Line,45.512435,-73.533170,Yellow,0.357867,1.249158,2.073656,0.6561,0.5726,0.4224,0.0,0.0,0.0,Montreal,STM


In [73]:
#comparing walking area coverage averaged out for different cities this includes all our train lines
fig = px.histogram(data_temp
             ,x='City'
             ,y=['5 Minute Range Area','10 Minute Range Area','15 Minute Range Area']
             , barmode = 'group'
             , title="Area coverage within walking times from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines", 'value':'Walk Able Area Coverage(km^2)'}
            ).update_xaxes(categoryorder='total ascending')

fig.show() 

In [74]:
fig = px.histogram(data_temp
             ,x='City'
             ,y=['5 Minute Population','10 Minute Population','15 Minute Population']
             , barmode = 'group'
             , title="Area coverage within walking times from station"
             , template='plotly'
             , histfunc = 'avg'
             ,labels={'Route Name': "Lines",'value':'Population Coverage'}
            ).update_xaxes(categoryorder='total ascending')

fig.show() 

# Conclusions
Well as we suspect we dont seem to maximize our train stations for the users compared to other cities like Montreal where our population number is similar or Singapore where our culture and weather are similar. The reasons for that could definitely be further analyzed by looking at individual stations and understanding why it may be so.