In [1]:
import pandas as pd
import datetime
import requests
import plotly.express as px
import dash_leaflet as dl
from dash import dcc, html, Dash
from dash.dependencies import Input, Output
from simplekml import Kml
import plotly.graph_objects as go
from plotly.subplots import make_subplots

#Ended up not really using
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from plotly.tools import mpl_to_plotly

### 1. Data intake and cleaning

In [2]:
cities = pd.read_csv("datasets/cities.csv")
stations = pd.read_csv("datasets/stations.csv")
tracks = pd.read_csv("datasets/tracks.csv")
lines = pd.read_csv("datasets/lines.csv")
track_lines = pd.read_csv("datasets/track_lines.csv")
station_lines = pd.read_csv("datasets/station_lines.csv")
systems = pd.read_csv("datasets/systems.csv")

In [3]:
stations = stations.rename(columns={'id':'station_id','name':'station_name'})
tracks = tracks.rename(columns={'id':'section_id'})

cities_simpl = pd.DataFrame({'city_id':cities.id,'country':cities.country,'city':cities.name})
station_lines_simpl = pd.DataFrame({'station_id':station_lines.station_id,'line_id':station_lines.line_id})
lines_simpl = pd.DataFrame({'line_id':lines.id,'line_name':lines.name,'line_color':lines.color,'system_id':lines.system_id})
systems_simpl = pd.DataFrame({'system_id':systems.id,'system_name':systems.name})
track_lines_simpl = pd.DataFrame({'section_id':track_lines.section_id,'line_id':track_lines.line_id})

In [4]:
# Merge multiple datasets into STATIONS
stations = pd.merge (stations, cities_simpl, how='left',on='city_id')
stations = pd.merge (stations, station_lines_simpl, how='left',on='station_id')
stations = pd.merge (stations, lines_simpl, how='left',on='line_id')
stations = pd.merge (stations, systems_simpl, how='left',on='system_id')

In [5]:
# Split 'geometry' into 'longitudes' and 'latitudes'
stations['longitude'] = stations['geometry'].apply(lambda x: x.split('POINT(')[1].split(' ')[0])
stations['longitude'] = stations['longitude'].astype(float)
stations['latitude'] = stations['geometry'].apply(lambda x: x.split('POINT(')[1].split(' ')[1].split(')')[0])
stations['latitude'] = stations['latitude'].astype(float)

In [6]:
# Reorder columns in STATIONS and clean up
stations = stations[['station_id','station_name','geometry','longitude',
                     'latitude','opening','closure','city_id','city',
                     'country','line_id','line_name','system_id','system_name']]
stations.head()

Unnamed: 0,station_id,station_name,geometry,longitude,latitude,opening,closure,city_id,city,country,line_id,line_name,system_id,system_name
0,7694,Keisei Tsudanuma,POINT(140.024812197129 35.6837744784723),140.024812,35.683774,1921.0,999999.0,114,Tokyo,Japan,629.0,Chiba Line,308.0,Keisei
1,6003,Kossuth Lajos tér,POINT(19.0462376564033 47.5054880717671),19.046238,47.505488,0.0,999999.0,29,Budapest,Hungary,528.0,M2,22.0,Metro
2,7732,Saint-Charles,POINT(5.3801556 43.3024646),5.380156,43.302465,1977.0,999999.0,74,Marseilles,France,570.0,M1,63.0,Métro de Marseille
3,7695,Keisei Makuhari-Hongo,POINT(140.042146725175 35.6726021159981),140.042147,35.672602,1991.0,999999.0,114,Tokyo,Japan,629.0,Chiba Line,308.0,Keisei
4,7726,Chartreux,POINT(5.4014815 43.309129),5.401482,43.309129,1977.0,999999.0,74,Marseilles,France,570.0,M1,63.0,Métro de Marseille


In [7]:
# Merge multiple datasets into TRACKS
tracks = pd.merge(tracks, track_lines_simpl,how='left',on='section_id')
tracks = pd.merge(tracks, lines_simpl, how='left', on='line_id')
tracks = pd.merge(tracks, cities_simpl, how='left', on='city_id')
tracks = pd.merge(tracks, systems_simpl, how='left', on='system_id')

In [8]:
# Define function to split coord from linestring object - two versions for different applications
def split_coord_lonlat(x):
    stripped_x = x.rstrip(')) ').lstrip(' MULTILINESTRING ((').strip() # strip non-numerical values from object 
    coord_list = []
    for point in stripped_x.split(','):
        coord = point.split(' ')                 # split into lon-lat 
        coord = [float(x.strip()) for x in coord]             # turn to float
        coord_list.append(coord)
    return coord_list

def split_coord_latlon(x):
    stripped_x = x.rstrip(')) ').lstrip(' MULTILINESTRING ((').strip() # strip non-numerical values from object 
    coord_list = []
    for point in stripped_x.split(','):
        coord = point.split(' ')                 # split into lon-lat 
        coord[0],coord[1] = coord[1],coord[0]     # swap to lat-lon
        coord = [float(x.strip()) for x in coord]             # turn to float
        coord_list.append(coord)
    return coord_list

# Split 'geometry' for each row into 'linestring' a list of coordinates to draw track lines
tracks['linestring_latlon'] = tracks.geometry.apply(split_coord_latlon)
tracks['linestring_lonlat'] = tracks.geometry.apply(split_coord_lonlat)

In [9]:
# Reorder columns
tracks = tracks[['section_id','geometry','linestring_latlon','linestring_lonlat','opening','closure',
                 'length','line_id','line_name','line_color',
                 'system_id','system_name','city_id','city','country']]
tracks.head()

Unnamed: 0,section_id,geometry,linestring_latlon,linestring_lonlat,opening,closure,length,line_id,line_name,line_color,system_id,system_name,city_id,city,country
0,1911,"LINESTRING(19.0817752 47.5005079,19.0817355 47...","[[47.5005079, 19.0817752], [47.5004893, 19.081...","[[19.0817752, 47.5005079], [19.0817355, 47.500...",0.0,999999.0,6719,530.0,M4,#71be1c,22.0,Metro,29,Budapest,Hungary
1,2563,"LINESTRING(16.4151057 48.1907238,16.4156455 48...","[[48.1907238, 16.4151057], [48.190389, 16.4156...","[[16.4151057, 48.1907238], [16.4156455, 48.190...",0.0,999999.0,199,154.0,U3,#f5a623,251.0,U-Bahn,118,Vienna,Austria
2,2557,"LINESTRING(16.4164437 48.1839655,16.4161534 48...","[[48.1839655, 16.4164437], [48.1836515, 16.416...","[[16.4164437, 48.1839655], [16.4161534, 48.183...",0.0,999999.0,925,154.0,U3,#f5a623,251.0,U-Bahn,118,Vienna,Austria
3,2558,"LINESTRING(16.4164901 48.1839473,16.416198 48....","[[48.1839473, 16.4164901], [48.1836313, 16.416...","[[16.4164901, 48.1839473], [16.416198, 48.1836...",0.0,999999.0,881,154.0,U3,#f5a623,251.0,U-Bahn,118,Vienna,Austria
4,2564,"LINESTRING(16.415259 48.1908074,16.4153634 48....","[[48.1908074, 16.415259], [48.190746, 16.41536...","[[16.415259, 48.1908074], [16.4153634, 48.1907...",0.0,999999.0,213,154.0,U3,#f5a623,251.0,U-Bahn,118,Vienna,Austria


##### 1.3 Data cleanup

In [10]:
stations['station_name'] = stations['station_name'].fillna('N.A.')
tracks['line_color'] = tracks['line_color'].fillna('#000000')
stations['closure'] = stations['closure'].fillna(999999)
tracks['closure'] = tracks['closure'].fillna(999999)
stations['line_id'] = stations['line_id'].fillna(0)
tracks['line_id'] = tracks['line_id'].fillna(0)
stations['line_name'] = stations['line_name'].fillna('N.A.')
tracks['line_name'] = tracks['line_name'].fillna('N.A.')

In [11]:
stations['opening'] = stations['opening'].fillna(0)
tracks['opening'] = tracks['opening'].fillna(0)

stations.loc[stations.opening>2040, 'opening'] = 0
tracks.loc[tracks.opening>2040, 'opening'] = 0

---------------------------------
### 2. Actually create mapping and plotting functions that take cities and year inputs

In [14]:
# Get current year
currentDateTime = datetime.datetime.now()
currentDate = currentDateTime.date()
currentYear = float(currentDate.strftime("%Y"))

In [15]:
# Get geocoords from city input, to help center the plot and map
with open('geo_api_key.txt') as key:
    api_key = key.read()    # key.read() to extract from txt file

def get_geocode(city):
    url = f'http://api.positionstack.com/v1/forward?access_key={api_key}&query={city}&limit=1'  
    response = requests.get(url)    
    geocode_data = requests.get(url).json()      
    lat = geocode_data['data'][0]['latitude']
    long = geocode_data['data'][0]['longitude']
    geocode = [lat,long]
    return geocode            # return lat_lon geocode

In [87]:
def plot_it(city='Madrid',year=currentYear):
    
    my_stations = stations[(stations.city == city.title()) 
                           & (stations.opening <= year) 
                           & (stations.closure > year)]
    
    my_tracks = tracks[(tracks.city == city.title()) 
                       & (tracks.opening <= year) 
                       & (tracks.closure > year)]
    
    
    # Tracks: Extract linestring coords into lists, combine into a plottable df    
    long=[]
    lat=[]
    line_color=[]
    for sect in range(len(my_tracks)):
        linesegment = my_tracks.linestring_lonlat.iloc[sect]
        for point in linesegment:
            long.append(point[0])
            lat.append(point[1])
            line_color.append(my_tracks.line_color.iloc[sect])
    plot = pd.DataFrame({'x':long,
                         'y':lat,
                         'z':line_color})
    plot['x'] = plot['x'].astype(float)
    plot['y'] = plot['y'].astype(float)

    fig = px.scatter(plot, 
                     x="x", 
                     y="y" , 
                     color="z",
                     template="simple_white",
                    width=1000, height=1000)
    
    fig.update_yaxes(title_text="",showgrid=False,
                     showline=False,mirror=True,showticklabels=False,ticks='',automargin=True)
    fig.update_xaxes(title_text="",showgrid=False,
                     showline=False,mirror=True,showticklabels=False,ticks='',automargin=True)
    fig.update_layout(showlegend=False)
    
    return fig     # return plotly graph

In [17]:
def map_it(city='Madrid',year=currentYear):
    my_stations = stations[(stations.city == city.title()) 
                           & (stations.opening <= year) 
                           & (stations.closure > year)]
    
    my_tracks = tracks[(tracks.city == city.title()) 
                       & (tracks.opening <= year) 
                       & (tracks.closure > year)]
    
    km_zero = get_geocode(city)
    url = 'https://tiles.stadiamaps.com/tiles/alidade_smooth_dark/{z}/{x}/{y}{r}.png'
    attribution = '&copy; <a href="https://stadiamaps.com/">Stadia Maps</a> '

    markers = []
    for i in range(len(my_stations)):
        latlon = my_stations[['latitude', 'longitude']]
        latlonlist = latlon.values.tolist()        
        marker = dl.Marker(position=latlonlist[i])
        markers.append(marker)
      
    lines = []
    for i in range(len(my_tracks)):
        linesegment = my_tracks.linestring_latlon.iloc[i]
        line = dl.Polyline(positions=linesegment)
        lines.append(line) 
        
    my_map = dl.Map([
        dl.LayersControl(
            [
                dl.BaseLayer(
                    dl.TileLayer(url=url, maxZoom=20, attribution=attribution),
                    name='Dark mode',
                    checked=True
                ),
                dl.BaseLayer(
                    dl.TileLayer(),
                    name="Light mode",
                    checked=False
                ),
            ] + 
            [
                dl.Overlay(dl.LayerGroup(markers), name="markers", checked=True),
                dl.Overlay(dl.LayerGroup(lines), name="lines", checked=True)
            ]
        )
    ], id='map',center=km_zero,zoom = 12)

    return my_map             # return leaflet map object

In [18]:
def count_it(city='Madrid',year=currentYear):
    my_stations = stations[(stations.city == city.title()) 
                           & (stations.opening <= year) 
                           & (stations.closure > year)]
    my_tracks = tracks[(tracks.city == city.title()) 
                       & (tracks.opening <= year) 
                       & (tracks.closure > year)]
    track_length_km = my_tracks.length.sum()/1000
    num_stations = len(my_stations)
    count_it_result = f'{city}\'s transit system in {year} has {num_stations} stations and {track_length_km} km total track lengths'
    return count_it_result    # return string

In [102]:
def summarize_it(city='Madrid'):
    my_stations = stations[(stations.city == city.title())]
    my_tracks = tracks[(tracks.city == city.title())]
    
    joint_df = pd.concat([my_tracks.opening,my_stations.opening])
    
    min_year = int(sorted(joint_df.unique(),reverse=False)[1])
    max_year = int(sorted(joint_df.unique(),reverse=True)[0])
    
    data = []
    for y in range(min_year, max_year):
        d = {'year': y,
             'track_length' : my_tracks[my_tracks.opening <= y].length.sum()/1000,
             'stations_num' : len(my_stations[my_stations.opening <= y])}
        data.append(d)
    dataset = pd.DataFrame(data)
    dataset.stations_num = dataset.stations_num.astype(float)
    
# Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(
        go.Scatter(x=dataset.year, y=dataset.stations_num, name="stations"),
        secondary_y=False,
    )

    fig.add_trace(
        go.Scatter(x=dataset.year, y=dataset.track_length, name="tracks"),
        secondary_y=True,
    )

    fig.update_xaxes(title_text="Year")
    fig.update_yaxes(title_text="Number of stations", secondary_y=False,showgrid=False,zeroline=False) #Prim
    fig.update_yaxes(title_text="Track length", secondary_y=True,showgrid=False,zeroline=False) #Sec    
    
    fig.update_layout(legend={
        'yanchor':"top",
        'y':0.99,
        'xanchor':"left",
        'x':0.01
    })
    
    return fig         # return plotly graph

In [98]:
@app.callback(
    Output('container-button-basic', 'children'),
    Input('submit-val', 'n_clicks'),
    State('input-on-submit', 'value')
)
def export_it(city='Madrid',year=currentYear):
    my_stations = stations[(stations.city == city.title()) 
                           & (stations.opening <= year) 
                           & (stations.closure > year)]
    my_tracks = tracks[(tracks.city == city.title()) 
                       & (tracks.opening <= year) 
                       & (tracks.closure > year)]
    
   
    #stations    
    kml_st = Kml(name='stations')
    list_st=[]
    for i in range(len(my_stations)):
        d = [my_stations.station_name.iloc[i],f'{my_stations.opening.iloc[i]:g}', my_stations.line_name.iloc[i],
             my_stations.latitude.iloc[i],my_stations.longitude.iloc[i]]
        list_st.append(d)
    
    for row in list_st:
        kml_st.newpoint(name=row[0], description=row[2],
                        coords=[(row[4], row[3])])  
    kml_st.save(f"export/stations_{city}_{year:g}.kml")    
    
    
    #tracks
    kml_tr = Kml(name='tracks')           
    list_tr=[]
    for i in range(len(my_tracks)):
        d = [my_tracks.line_name.iloc[i], my_tracks.linestring_lonlat.iloc[i],f'{my_tracks.opening.iloc[i]:g}']            
        list_tr.append(d)    
    
    for row in list_tr:
        kml_tr.newlinestring(name=row[0],description=row[2],coords=row[1])
    kml_tr.save(f"export/tracks_{city}_{year:g}.kml") 

### 3. Create Dash (no callbacks)

In [97]:
def dash(city='Madrid',year=currentYear):
    app = Dash(__name__)
    app.layout = html.Div(children=[
        html.H1(children=[f'{city} in {year}']),
        
        html.H4(children=['Did you know...?']),
        html.Plaintext(children=[count_it(city,year)]),
        
        html.H4(children=['System growth over time']),
        dcc.Graph(id='summarize',figure=summarize_it(city)),
        
         html.H4(children=['Here\'s a pretty scatterplot for you']),
         dcc.Graph(id='plot',figure=plot_it(city,year)),
        
        html.H4(children=[f'Look, a map of {city} in {year} you can play with!']),               
        html.Div(
            map_it(city,year),style={'width': '80%', 'height': '80vh', 'margin': "auto", "display": "block"}
        ),
        
        html.Button('Export this to KML', id='export')
        html.Div(id='confirmation',
             children='Enter a value and press submit')
        
#         html.H4(children=['Here\'s a pretty scatterplot for you']),
#         dcc.Graph(id='plot',figure=plot_it(city,year)),        
        
    ], style={'align-items': 'center','justify-content': 'center'})
    
    if __name__ == '__main__':
        app.run_server(debug=True,use_reloader=False)

### LAUNCH in cmd

In [96]:
city = None
year = None
    
print('Welcome to Metromania!\n')
print('Happy you are here, taking your first step towards becoming a true metro historian!')
print(f'Let\'s get started.\n')

while city is None:
    i = input('First, pick a city: ')
    if i.title() in stations.city.unique():
        city = i.title()
    else:
        print('ERROR: Hmmm... we don\'t seem to have data for this city. Choose another one')
     
while year is None:
    try:
        y = input('Second, pick a year from 1840 to 2040: ')
        y = int(y)
        if y >= 1840 and y <= 2040:
            year = y
        else:
            print('ERROR: That number is not within the 1840-2040 range')
    except ValueError as err:
        print("ERROR: A number please!")

ex = input(f'Would you like to export 2 kml files stations and tracks for {city} in {year}? (y/n)')
export = 0
while export == 0:
    if ex.lower() == 'y':
        print("Two files exported...")
        export_it(city,year)
        export +=1
    elif ex.lower() == 'n':
        print("That's OK...")
        export +=1
    else: 
        print("ERROR: Only Y or N please!")

print('\nHere you go!\n')        
dash(city,year)

Welcome to Metromania!

Happy you are here, taking your first step towards becoming a true metro historian!
Let's get started.

First, pick a city: beijing
Second, pick a year from 1840 to 2040: 2030
Would you like to export 2 kml files: stations and tracks for Beijing in 2030? (y/n)n
That's OK...

Here you go!

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
