# Find Tokyo Metro Lines and Coordinates

In [3]:
import pandas as pd
import numpy as np

In [4]:
import json

In [5]:
df_train_data = pd.read_json('../_data/raw-data.json')

In [6]:
list_train_lines_tokyo = df_train_data.iloc[12]['lines']

In [7]:
filter_tokyo_subway = np.array(list(map(lambda x: ('Toei' in x['name']['en']) or ('Metro' in x['name']['en']), list_train_lines_tokyo)))

In [8]:
list_subway_lines_tokyo = list(np.array(list_train_lines_tokyo)[filter_tokyo_subway])

## Setup data storage object

In [9]:
dict_subways = {}

In [10]:
list_subway_lines_tokyo[0]['id']

28001

In [11]:
dict_subway_data = {}
for line in list_subway_lines_tokyo:
    name = line['name']['en']
    stations = line['stations']
    list_data = []
    for station in stations:
        list_data.append({'id': station['id'], 'name': station['name']['en'], 
                          'line_id': line['id'], 'line_name': name,
                          'latitude': station['location']['lat'], 'longitude': station['location']['lng']})
    dict_subway_data[name] = list_data

### Convert lists to dataframes

In [12]:
for line in dict_subway_data:
    dict_subway_data[line] = pd.DataFrame(dict_subway_data[line])

In [13]:
list(dict_subway_data.keys())

['Tokyo Metro Ginza Line',
 'Tokyo Metro Marunouchi Line',
 'Tokyo Metro Hibiya Line',
 'Tokyo Metro Tozai Line',
 'Tokyo Metro Chiyoda Line',
 'Tokyo Metro Yurakucho Line',
 'Tokyo Metro Hanzomon Line',
 'Tokyo Metro Namboku Line',
 'Tokyo Metro Fukutoshin Line',
 'Toei Oedo Line',
 'Toei Asakusa Line',
 'Toei Mita Line',
 'Toei Shinjuku Line']

### Add neighboring station data

In [14]:
for line in dict_subway_data:
    dict_subway_data[line]['next_0_name'] = dict_subway_data[line].shift(1)['name']
    dict_subway_data[line]['next_0_id'] = dict_subway_data[line].shift(1)['id']
    dict_subway_data[line]['next_1_name'] = dict_subway_data[line].shift(-1)['name']
    dict_subway_data[line]['next_1_id'] = dict_subway_data[line].shift(-1)['id']
    dict_subway_data[line]['next_0_id'].fillna(0, inplace=True)
    dict_subway_data[line]['next_1_id'].fillna(0, inplace=True)
    dict_subway_data[line]['next_0_id'] = dict_subway_data[line]['next_0_id'].apply(lambda x: int(x))
    dict_subway_data[line]['next_1_id'] = dict_subway_data[line]['next_1_id'].apply(lambda x: int(x))

In [15]:
dict_subway_data['Toei Mita Line']

Unnamed: 0,id,latitude,line_id,line_name,longitude,name,next_0_name,next_0_id,next_1_name,next_1_id
0,9930301,35.633272,99303,Toei Mita Line,139.7155,Meguro,,0,Shirokanedai,9930302
1,9930302,35.637917,99303,Toei Mita Line,139.726133,Shirokanedai,Meguro,9930301,Shinkan Takanawa,9930303
2,9930303,35.642903,99303,Toei Mita Line,139.734104,Shinkan Takanawa,Shirokanedai,9930302,Mita,9930304
3,9930304,35.64818,99303,Toei Mita Line,139.748775,Mita,Shinkan Takanawa,9930303,Shibakoen,9930305
4,9930305,35.654074,99303,Toei Mita Line,139.749824,Shibakoen,Mita,9930304,The gate,9930306
5,9930306,35.661215,99303,Toei Mita Line,139.751535,The gate,Shibakoen,9930305,Uchisaiwasemachi,9930307
6,9930307,35.66975,99303,Toei Mita Line,139.75561,Uchisaiwasemachi,The gate,9930306,Hibiya,9930308
7,9930308,35.676036,99303,Toei Mita Line,139.759998,Hibiya,Uchisaiwasemachi,9930307,Otemachi,9930309
8,9930309,35.684856,99303,Toei Mita Line,139.762959,Otemachi,Hibiya,9930308,Jimbocho,9930310
9,9930310,35.695492,99303,Toei Mita Line,139.75812,Jimbocho,Otemachi,9930309,Aqueduct,9930311


### Generate geojson

In [26]:
def row_to_geojson(row, json_list=[]):
    dict = {"type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [row['longitude'], row['latitude']]
            },
            "properties": {
                "next_0_name": row['next_0_name'],
                "next_1_name": row['next_1_name']
            }}
    json_list.append(dict)

In [78]:
json_list_ginza = []
dict_subway_data['Tokyo Metro Ginza Line'].apply(lambda x: row_to_geojson(x, json_list=json_list_ginza), axis=1)

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
dtype: object

In [79]:
json.dumps(json_list_ginza)

'[{"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.797592, 35.710733]}, "properties": {"next_0_name": NaN, "next_1_name": "Tahara-cho"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.790897, 35.709897]}, "properties": {"next_0_name": "Asakusa", "next_1_name": "Inaricho"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.782593, 35.711273]}, "properties": {"next_0_name": "Tahara-cho", "next_1_name": "Ueno"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.777122, 35.711482]}, "properties": {"next_0_name": "Inaricho", "next_1_name": "Ueno Hirokoji"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.772877, 35.70768]}, "properties": {"next_0_name": "Ueno", "next_1_name": "Suehiro cho"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.771713, 35.702972]}, "properties": {"next_0_name": "Ueno Hirokoji", "next_1_name": "Kanda"}}, {"type": "Feature", "geometry

### Combine all station info into single dataframe

In [15]:
dfs = []
for line in dict_subway_data:
    dfs.append(dict_subway_data[line])
df_subway_data = pd.concat(dfs).reset_index().drop('index', axis=1)

### Color Mapping

In [16]:
line_colors = {'Toei Mita Line': '#0080c6', 'Tokyo Metro Yurakucho Line': '#bb8b38', 
               'Tokyo Metro Marunouchi Line': '#ed1c24', 'Tokyo Metro Tozai Line': '#00b2dd',
               'Tokyo Metro Hanzomon Line': '#937cb9', 'Toei Shinjuku Line': '#6dc067',
               'Tokyo Metro Hibiya Line': '#8ba2ae', 'Tokyo Metro Chiyoda Line': '#00a650',
               'Toei Oedo Line': '#b51e82', 'Tokyo Metro Ginza Line': '#f7931d', 
               'Toei Asakusa Line': '#ef5ba1', 'Tokyo Metro Namboku Line': '#00b5ad', 
               'Tokyo Metro Fukutoshin Line': '#ba6831'}

In [17]:
df_subway_data['line_color'] = df_subway_data['line_name'].apply(lambda x: line_colors[x])

In [18]:
df_subway_data.head()

Unnamed: 0,id,latitude,line_id,line_name,longitude,name,next_0_name,next_0_id,next_1_name,next_1_id,line_color
0,2800101,35.710733,28001,Tokyo Metro Ginza Line,139.797592,Asakusa,,0,Tahara-cho,2800102,#f7931d
1,2800102,35.709897,28001,Tokyo Metro Ginza Line,139.790897,Tahara-cho,Asakusa,2800101,Inaricho,2800103,#f7931d
2,2800103,35.711273,28001,Tokyo Metro Ginza Line,139.782593,Inaricho,Tahara-cho,2800102,Ueno,2800104,#f7931d
3,2800104,35.711482,28001,Tokyo Metro Ginza Line,139.777122,Ueno,Inaricho,2800103,Ueno Hirokoji,2800105,#f7931d
4,2800105,35.70768,28001,Tokyo Metro Ginza Line,139.772877,Ueno Hirokoji,Ueno,2800104,Suehiro cho,2800106,#f7931d


### Save as CSV

In [21]:
df_subway_data.to_csv('../_data/df_subway_stations.csv')