# Find Tokyo Metro Lines and Coordinates

In [1]:
import pandas as pd
import numpy as np

In [6]:
import json

In [4]:
df_train_data = pd.read_json('../_data/raw-data.json')

In [23]:
list_train_lines_tokyo = df_train_data.iloc[12]['lines']

In [38]:
filter_tokyo_subway = np.array(list(map(lambda x: ('Toei' in x['name']['en']) or ('Metro' in x['name']['en']), list_train_lines_tokyo)))

In [41]:
list_subway_lines_tokyo = list(np.array(list_train_lines_tokyo)[filter_tokyo_subway])

## Setup data storage object

In [47]:
import pickle

In [48]:
dict_subways = {}

In [81]:
list_subway_lines_tokyo[0]['id']

28001

In [91]:
dict_subway_data = {}
for line in list_subway_lines_tokyo:
    name = line['name']['en']
    stations = line['stations']
    list_data = []
    for station in stations:
        list_data.append({'id': station['id'], 'name': station['name']['en'], 
                          'line_id': line['id'], 'line_name': name,
                          'latitude': station['location']['lat'], 'longitude': station['location']['lng']})
    dict_subway_data[name] = list_data

### Convert lists to dataframes

In [92]:
for line in dict_subway_data:
    dict_subway_data[line] = pd.DataFrame(dict_subway_data[line])

In [93]:
list(dict_subway_data.keys())

['Tokyo Metro Ginza Line',
 'Tokyo Metro Marunouchi Line',
 'Tokyo Metro Hibiya Line',
 'Tokyo Metro Tozai Line',
 'Tokyo Metro Chiyoda Line',
 'Tokyo Metro Yurakucho Line',
 'Tokyo Metro Hanzomon Line',
 'Tokyo Metro Namboku Line',
 'Tokyo Metro Fukutoshin Line',
 'Toei Oedo Line',
 'Toei Asakusa Line',
 'Toei Mita Line',
 'Toei Shinjuku Line']

### Add neighboring station data

In [94]:
for line in dict_subway_data:
    dict_subway_data[line]['next_0_name'] = dict_subway_data[line].shift(1)['name']
    dict_subway_data[line]['next_1_name'] = dict_subway_data[line].shift(-1)['name']

In [95]:
dict_subway_data['Tokyo Metro Ginza Line']

Unnamed: 0,id,latitude,line_id,line_name,longitude,name,next_0_name,next_1_name
0,2800101,35.710733,28001,Tokyo Metro Ginza Line,139.797592,Asakusa,,Tahara-cho
1,2800102,35.709897,28001,Tokyo Metro Ginza Line,139.790897,Tahara-cho,Asakusa,Inaricho
2,2800103,35.711273,28001,Tokyo Metro Ginza Line,139.782593,Inaricho,Tahara-cho,Ueno
3,2800104,35.711482,28001,Tokyo Metro Ginza Line,139.777122,Ueno,Inaricho,Ueno Hirokoji
4,2800105,35.70768,28001,Tokyo Metro Ginza Line,139.772877,Ueno Hirokoji,Ueno,Suehiro cho
5,2800106,35.702972,28001,Tokyo Metro Ginza Line,139.771713,Suehiro cho,Ueno Hirokoji,Kanda
6,2800107,35.693587,28001,Tokyo Metro Ginza Line,139.770899,Kanda,Suehiro cho,Before Mitsukoshi
7,2800108,35.687101,28001,Tokyo Metro Ginza Line,139.773594,Before Mitsukoshi,Kanda,Nihonbashi
8,2800109,35.682078,28001,Tokyo Metro Ginza Line,139.773516,Nihonbashi,Before Mitsukoshi,Kyobashi
9,2800110,35.676856,28001,Tokyo Metro Ginza Line,139.770126,Kyobashi,Nihonbashi,Ginza


### Generate geojson

In [77]:
def row_to_geojson(row, json_list=[]):
    dict = {"type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [row['longitude'], row['latitude']]
            },
            "properties": {
                "next_0_name": row['next_0_name'],
                "next_1_name": row['next_1_name']
            }}
    json_list.append(dict)

In [78]:
json_list_ginza = []
dict_subway_data['Tokyo Metro Ginza Line'].apply(lambda x: row_to_geojson(x, json_list=json_list_ginza), axis=1)

0     None
1     None
2     None
3     None
4     None
5     None
6     None
7     None
8     None
9     None
10    None
11    None
12    None
13    None
14    None
15    None
16    None
17    None
18    None
dtype: object

In [79]:
json.dumps(json_list_ginza)

'[{"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.797592, 35.710733]}, "properties": {"next_0_name": NaN, "next_1_name": "Tahara-cho"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.790897, 35.709897]}, "properties": {"next_0_name": "Asakusa", "next_1_name": "Inaricho"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.782593, 35.711273]}, "properties": {"next_0_name": "Tahara-cho", "next_1_name": "Ueno"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.777122, 35.711482]}, "properties": {"next_0_name": "Inaricho", "next_1_name": "Ueno Hirokoji"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.772877, 35.70768]}, "properties": {"next_0_name": "Ueno", "next_1_name": "Suehiro cho"}}, {"type": "Feature", "geometry": {"type": "Point", "coordinates": [139.771713, 35.702972]}, "properties": {"next_0_name": "Ueno Hirokoji", "next_1_name": "Kanda"}}, {"type": "Feature", "geometry

### Combine all station info into single dataframe

In [108]:
dfs = []
for line in dict_subway_data:
    dfs.append(dict_subway_data[line])
df_subway_data = pd.concat(dfs).reset_index().drop('index', axis=1)

### Save as CSV

In [110]:
df_subway_data.to_csv('../_data/df_subway_stations')