## ZVV Transit Notebook

The point of this notebook is to come up with a structured file, which includes the necessary stations and lines needed for the Mister X Zürich application.

In [1]:
# Imports
import pandas as pd
import numpy as np

At first we want to specify, which lines we want to look and and get their corresponding route ids and their type of transportation.

In [2]:
routes = pd.read_csv(r'routes.txt')
routes = routes[["route_id", "route_short_name"]]
line_labels = np.array(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "31", "32", "33"])
routes = routes[routes.route_short_name.isin(line_labels)]

tram_routes = routes[routes.route_id.str.startswith("1-")].assign(route_type="tram")
bus_routes = routes[routes.route_id.str.startswith("2-")].assign(route_type="bus")

routes = pd.concat([tram_routes, bus_routes], sort=False)
routes

Unnamed: 0,route_id,route_short_name,route_type
6,1-10-P-j21-1,10,tram
9,1-11-P-j21-1,11,tram
10,1-12-P-j21-1,12,tram
11,1-13-P-j21-1,13,tram
12,1-14-P-j21-1,14,tram
13,1-15-P-j21-1,15,tram
17,1-2-P-j21-1,2,tram
18,1-3-P-j21-1,3,tram
20,1-4-P-j21-1,4,tram
23,1-5-P-j21-1,5,tram


In [3]:
trips = pd.read_csv(r'trips.txt')
trips = trips[["route_id","trip_id"]]
trips = trips[trips.route_id.isin(routes.route_id)]
trips
trips_by_route_id = trips.groupby(["route_id"])["trip_id"].apply(list).reset_index(name='trip_ids')
trips_by_route_id

Unnamed: 0,route_id,trip_ids
0,1-10-P-j21-1,"[10.T3.1-10-P-j21-1.2.R, 100.T0.1-10-P-j21-1.4..."
1,1-11-P-j21-1,"[100.T0.1-11-P-j21-1.13.R, 1004.T0.1-11-P-j21-..."
2,1-12-P-j21-1,"[1.T2.1-12-P-j21-1.1.R, 10.T2.1-12-P-j21-1.1.R..."
3,1-13-P-j21-1,"[1.T2.1-13-P-j21-1.2.R, 1.T3.1-13-P-j21-1.2.R,..."
4,1-14-P-j21-1,"[10.T2.1-14-P-j21-1.4.R, 10.T3.1-14-P-j21-1.2...."
5,1-15-P-j21-1,"[1.T2.1-15-P-j21-1.5.R, 1.T3.1-15-P-j21-1.5.R,..."
6,1-2-P-j21-1,"[1.T2.1-2-P-j21-1.1.R, 10.T2.1-2-P-j21-1.4.R, ..."
7,1-3-P-j21-1,"[1.T2.1-3-P-j21-1.7.R, 10.T2.1-3-P-j21-1.8.R, ..."
8,1-4-P-j21-1,"[1.T2.1-4-P-j21-1.5.R, 1.T3.1-4-P-j21-1.5.R, 1..."
9,1-5-P-j21-1,"[1.T3.1-5-P-j21-1.1.R, 10.T2.1-5-P-j21-1.2.R, ..."


In [4]:
stop_times = pd.read_csv(r'stop_times.txt')
stop_times = stop_times[["trip_id","stop_id","stop_sequence"]]
stop_times = stop_times[stop_times.trip_id.isin(trips.trip_id)]
stop_times

Unnamed: 0,trip_id,stop_id,stop_sequence
0,2.T2.1-9-P-j21-1.14.R,gen:23026:1643:1:1,1
1,2.T2.1-9-P-j21-1.14.R,gen:23026:2666:1:1,2
2,2.T2.1-9-P-j21-1.14.R,gen:23026:2814:1:1,3
3,2.T2.1-9-P-j21-1.14.R,gen:23026:2326:1:1,4
4,2.T2.1-9-P-j21-1.14.R,gen:23026:2294:1:1,5
...,...,...,...
513757,497.T0.2-33-P-j21-1.8.H,gen:23026:1252:1:1,3
513758,497.T0.2-33-P-j21-1.8.H,gen:23026:2147:1:10,4
513759,497.T0.2-33-P-j21-1.8.H,gen:23026:3155:1:10,5
513760,497.T0.2-33-P-j21-1.8.H,gen:23026:1320:1:10,6


In [5]:
ordered_stops_by_trip_id = stop_times.groupby(["trip_id"])["stop_id"].apply(list).reset_index(name='ordered_stop_ids')
ordered_stops_by_trip_id

Unnamed: 0,trip_id,ordered_stop_ids
0,1.T2.1-12-P-j21-1.1.R,"[gen:23026:2575:1:1, gen:23026:3785:3:1, gen:2..."
1,1.T2.1-13-P-j21-1.2.R,"[gen:23026:2950:1:0, gen:23026:586:1:1, gen:23..."
2,1.T2.1-15-P-j21-1.5.R,"[gen:23026:3039:1:1, gen:23026:440:1:3, gen:23..."
3,1.T2.1-2-P-j21-1.1.R,"[gen:23026:1357:1:1, gen:23026:1579:1:1, gen:2..."
4,1.T2.1-3-P-j21-1.7.R,"[gen:23026:1357:1:1, gen:23026:1579:1:1, gen:2..."
...,...,...
20391,999.T0.1-4-P-j21-1.27.H,"[gen:23026:5001:2:0, gen:23026:6648:1:0, gen:2..."
20392,999.T0.1-6-P-j21-1.27.H,"[gen:23026:3005:1:0, gen:23026:2618:1:0, gen:2..."
20393,999.T0.1-8-P-j21-1.22.R,"[gen:23026:1430:1:1, gen:23026:1278:1:1, gen:2..."
20394,999.T2.1-13-P-j21-1.37.H,"[gen:23026:872:1:0, gen:23026:2935:1:0, gen:23..."


In [6]:
ordered_stops_by_trip_id['length'] = ordered_stops_by_trip_id["ordered_stop_ids"].apply(len)
ordered_stops_by_trip_id

Unnamed: 0,trip_id,ordered_stop_ids,length
0,1.T2.1-12-P-j21-1.1.R,"[gen:23026:2575:1:1, gen:23026:3785:3:1, gen:2...",18
1,1.T2.1-13-P-j21-1.2.R,"[gen:23026:2950:1:0, gen:23026:586:1:1, gen:23...",31
2,1.T2.1-15-P-j21-1.5.R,"[gen:23026:3039:1:1, gen:23026:440:1:3, gen:23...",13
3,1.T2.1-2-P-j21-1.1.R,"[gen:23026:1357:1:1, gen:23026:1579:1:1, gen:2...",17
4,1.T2.1-3-P-j21-1.7.R,"[gen:23026:1357:1:1, gen:23026:1579:1:1, gen:2...",9
...,...,...,...
20391,999.T0.1-4-P-j21-1.27.H,"[gen:23026:5001:2:0, gen:23026:6648:1:0, gen:2...",26
20392,999.T0.1-6-P-j21-1.27.H,"[gen:23026:3005:1:0, gen:23026:2618:1:0, gen:2...",26
20393,999.T0.1-8-P-j21-1.22.R,"[gen:23026:1430:1:1, gen:23026:1278:1:1, gen:2...",20
20394,999.T2.1-13-P-j21-1.37.H,"[gen:23026:872:1:0, gen:23026:2935:1:0, gen:23...",31


In [7]:
def get_trip_length(x):
    return ordered_stops_by_trip_id.loc[ordered_stops_by_trip_id["trip_id"] == x]["length"].to_numpy()[0]

def choose_element_with_max_length_list(x):
    max_trip = x[0]
    for trip in x:
        if get_trip_length(trip) > get_trip_length(max_trip):
            max_trip = trip
    return max_trip

In [8]:
trips_by_route_id["max_trip_id"] = trips_by_route_id["trip_ids"].apply(choose_element_with_max_length_list)

In [9]:
trips_by_route_id

Unnamed: 0,route_id,trip_ids,max_trip_id
0,1-10-P-j21-1,"[10.T3.1-10-P-j21-1.2.R, 100.T0.1-10-P-j21-1.4...",1010.T0.1-10-P-j21-1.24.H
1,1-11-P-j21-1,"[100.T0.1-11-P-j21-1.13.R, 1004.T0.1-11-P-j21-...",100.T0.1-11-P-j21-1.13.R
2,1-12-P-j21-1,"[1.T2.1-12-P-j21-1.1.R, 10.T2.1-12-P-j21-1.1.R...",1.T2.1-12-P-j21-1.1.R
3,1-13-P-j21-1,"[1.T2.1-13-P-j21-1.2.R, 1.T3.1-13-P-j21-1.2.R,...",1.T2.1-13-P-j21-1.2.R
4,1-14-P-j21-1,"[10.T2.1-14-P-j21-1.4.R, 10.T3.1-14-P-j21-1.2....",10.T2.1-14-P-j21-1.4.R
5,1-15-P-j21-1,"[1.T2.1-15-P-j21-1.5.R, 1.T3.1-15-P-j21-1.5.R,...",117.T2.1-15-P-j21-1.6.R
6,1-2-P-j21-1,"[1.T2.1-2-P-j21-1.1.R, 10.T2.1-2-P-j21-1.4.R, ...",10.T2.1-2-P-j21-1.4.R
7,1-3-P-j21-1,"[1.T2.1-3-P-j21-1.7.R, 10.T2.1-3-P-j21-1.8.R, ...",10.T2.1-3-P-j21-1.8.R
8,1-4-P-j21-1,"[1.T2.1-4-P-j21-1.5.R, 1.T3.1-4-P-j21-1.5.R, 1...",10.T2.1-4-P-j21-1.16.R
9,1-5-P-j21-1,"[1.T3.1-5-P-j21-1.1.R, 10.T2.1-5-P-j21-1.2.R, ...",313.T2.1-5-P-j21-1.22.H


In [31]:
def get_stops(route_id):
    max_trip_id = trips_by_route_id.loc[trips_by_route_id["route_id"] == route_id]["max_trip_id"].to_numpy()[0]
    ordered_stops = ordered_stops_by_trip_id.loc[ordered_stops_by_trip_id["trip_id"] == max_trip_id]["ordered_stop_ids"].to_numpy()[0]
    return ordered_stops
    
lines_tram_bus = routes
lines_tram_bus["ordered_stops"] = routes["route_id"].apply(get_stops)
lines_tram_bus = lines_tram_bus.reset_index(drop=True)
lines_tram_bus

Unnamed: 0,route_id,route_short_name,route_type,ordered_stops
0,1-10-P-j21-1,10,tram,"[gen:23026:6232:2:0, gen:23026:3027:3:0, gen:2..."
1,1-11-P-j21-1,11,tram,"[gen:23026:2549:1:0, gen:23026:894:1:1, gen:23..."
2,1-12-P-j21-1,12,tram,"[gen:23026:2575:1:1, gen:23026:3785:3:1, gen:2..."
3,1-13-P-j21-1,13,tram,"[gen:23026:2950:1:0, gen:23026:586:1:1, gen:23..."
4,1-14-P-j21-1,14,tram,"[gen:23026:2680:1:1, gen:23026:2160:1:1, gen:2..."
5,1-15-P-j21-1,15,tram,"[gen:23026:3039:1:1, gen:23026:440:1:3, gen:23..."
6,1-2-P-j21-1,2,tram,"[gen:23026:3041:1:0, gen:23026:2907:1:1, gen:2..."
7,1-3-P-j21-1,3,tram,"[gen:23026:1430:1:0, gen:23026:1278:1:1, gen:2..."
8,1-4-P-j21-1,4,tram,"[gen:23026:3041:1:1, gen:23026:2907:1:1, gen:2..."
9,1-5-P-j21-1,5,tram,"[gen:23026:1334:1:0, gen:23026:1507:1:0, gen:2..."


In [34]:
# Adding simple Train lines
simple_train_lines = {'route_id':['3-S4-P-j21-1', '3-S6-P-j21-1', '3-S9-P-j21-1', '3-S10-P-j21-1', '3-S16-P-j21-1', '3-S24-P-j21-1'],
                     'route_short_name':['S4', 'S6', 'S9', 'S10', 'S16', 'S24'],
                     'route_type':['train', 'train', 'train', 'train', 'train', 'train'],
                     'ordered_stops':[['gen:23026:2550:1:0', 'gen:23026:2551:1:0'],
                                     ['gen:23026:2346:1:1', 'gen:23026:3034:1:1'],
                                     ['gen:23026:3039:1:0', 'gen:23026:2575:1:0'],
                                     ['gen:23026:2680:1:0','gen:23026:2551:1:0','gen:23026:412:1:0'],
                                     ['gen:23026:3041:1:0', 'gen:23026:3039:1:0', 'gen:23026:412:1:0', 'gen:23026:3029:1:11', 'gen:23026:3034:1:1', 'gen:23026:3027:1:50'],
                                     ['gen:23026:3044:1:0', 'gen:23026:3026:1:0','gen:23026:3042:1:0','gen:23026:412:1:0','gen:23026:3043:1:10']]}
simple_train_lines = [pd.Series(['3-S4-P-j21-1', 'S4', 'train', ['gen:23026:2550:1:0', 'gen:23026:2551:1:0']], index=lines_tram_bus.columns ) ,
                      pd.Series(['3-S6-P-j21-1', 'S6', 'train', ['gen:23026:2346:1:1', 'gen:23026:3034:1:1']], index=lines_tram_bus.columns ) ,
                      pd.Series(['3-S9-P-j21-1', 'S9', 'train', ['gen:23026:3039:1:0', 'gen:23026:2575:1:0']], index=lines_tram_bus.columns ) ,
                      pd.Series(['3-S10-P-j21-1', 'S10', 'train', ['gen:23026:2680:1:0','gen:23026:2551:1:0','gen:23026:412:1:0']], index=lines_tram_bus.columns ) ,
                      pd.Series(['3-S16-P-j21-1', 'S16', 'train', ['gen:23026:3041:1:0', 'gen:23026:3039:1:0', 'gen:23026:412:1:0', 'gen:23026:3029:1:11', 'gen:23026:3034:1:1', 'gen:23026:3027:1:50']], index=lines_tram_bus.columns ) ,
                      pd.Series(['3-S24-P-j21-1', 'S24', 'train', ['gen:23026:3044:1:0', 'gen:23026:3026:1:0','gen:23026:3042:1:0','gen:23026:412:1:0','gen:23026:3043:1:10']], index=lines_tram_bus.columns )]

lines_tram_bus_train = lines_tram_bus.append(simple_train_lines, ignore_index=True)
lines_tram_bus_train

Unnamed: 0,route_id,route_short_name,route_type,ordered_stops
0,1-10-P-j21-1,10,tram,"[gen:23026:6232:2:0, gen:23026:3027:3:0, gen:2..."
1,1-11-P-j21-1,11,tram,"[gen:23026:2549:1:0, gen:23026:894:1:1, gen:23..."
2,1-12-P-j21-1,12,tram,"[gen:23026:2575:1:1, gen:23026:3785:3:1, gen:2..."
3,1-13-P-j21-1,13,tram,"[gen:23026:2950:1:0, gen:23026:586:1:1, gen:23..."
4,1-14-P-j21-1,14,tram,"[gen:23026:2680:1:1, gen:23026:2160:1:1, gen:2..."
5,1-15-P-j21-1,15,tram,"[gen:23026:3039:1:1, gen:23026:440:1:3, gen:23..."
6,1-2-P-j21-1,2,tram,"[gen:23026:3041:1:0, gen:23026:2907:1:1, gen:2..."
7,1-3-P-j21-1,3,tram,"[gen:23026:1430:1:0, gen:23026:1278:1:1, gen:2..."
8,1-4-P-j21-1,4,tram,"[gen:23026:3041:1:1, gen:23026:2907:1:1, gen:2..."
9,1-5-P-j21-1,5,tram,"[gen:23026:1334:1:0, gen:23026:1507:1:0, gen:2..."


In [35]:
stops = pd.read_csv(r'stops.txt')
stops = stops[["stop_id","stop_name","stop_lat","stop_lon"]]
stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon
0,ch:23026:1198:1:52,"Wiesendangen, Hinterdorf",47.522019,8.794354
1,ch:23026:1902:1:80,"Benken ZH, Dorf",47.652405,8.655322
2,ch:23026:1902:1:81,"Benken ZH, Dorf",47.652423,8.655474
3,ch:23026:2095:1:52,"Dübendorf, Ringstrasse",47.400040,8.607379
4,ch:23026:2283:2:12,"Winterthur, Schulhaus Seen",47.483295,8.761592
...,...,...,...,...
5631,Parent971,Germaniastrasse,47.387130,8.552222
5632,Parent974,Gewerbeschule,47.498039,8.732568
5633,Parent983,Industriepark,47.501371,8.761826
5634,Parent994,Bahnhof,47.430567,8.559355


In [37]:
def get_station_names(ordered_stops):
    ordered_stop_names = []
    for stop_id in ordered_stops:
        stop_name = stops.loc[stops["stop_id"] == stop_id]["stop_name"].to_numpy()[0]
        ordered_stop_names.append(stop_name)
    return ordered_stop_names

lines_tram_bus_train["ordered_stop_names"] = lines_tram_bus_train["ordered_stops"].apply(get_station_names)
lines_tram_bus_train

Unnamed: 0,route_id,route_short_name,route_type,ordered_stops,ordered_stop_names
0,1-10-P-j21-1,10,tram,"[gen:23026:6232:2:0, gen:23026:3027:3:0, gen:2...","[Zürich Flughafen, Fracht, Zürich Flughafen, B..."
1,1-11-P-j21-1,11,tram,"[gen:23026:2549:1:0, gen:23026:894:1:1, gen:23...","[Zürich, Rehalp, Zürich, Friedhof Enzenbühl, Z..."
2,1-12-P-j21-1,12,tram,"[gen:23026:2575:1:1, gen:23026:3785:3:1, gen:2...","[Zürich, Bahnhof Stettbach, Dübendorf, Ringwie..."
3,1-13-P-j21-1,13,tram,"[gen:23026:2950:1:0, gen:23026:586:1:1, gen:23...","[Zürich, Wollishoferplatz, Zürich, Butzenstras..."
4,1-14-P-j21-1,14,tram,"[gen:23026:2680:1:1, gen:23026:2160:1:1, gen:2...","[Zürich, Triemli, Zürich, Schaufelbergerstrass..."
5,1-15-P-j21-1,15,tram,"[gen:23026:3039:1:1, gen:23026:440:1:3, gen:23...","[Zürich, Bahnhof Stadelhofen, Zürich, Bellevue..."
6,1-2-P-j21-1,2,tram,"[gen:23026:3041:1:0, gen:23026:2907:1:1, gen:2...","[Zürich, Bahnhof Tiefenbrunnen, Zürich, Wildba..."
7,1-3-P-j21-1,3,tram,"[gen:23026:1430:1:0, gen:23026:1278:1:1, gen:2...","[Zürich, Klusplatz, Zürich, Hölderlinstrasse, ..."
8,1-4-P-j21-1,4,tram,"[gen:23026:3041:1:1, gen:23026:2907:1:1, gen:2...","[Zürich, Bahnhof Tiefenbrunnen, Zürich, Wildba..."
9,1-5-P-j21-1,5,tram,"[gen:23026:1334:1:0, gen:23026:1507:1:0, gen:2...","[Zürich, Universität Irchel, Zürich, Langmauer..."


In [39]:
lines_tram_bus_train_csv = lines_tram_bus_train.to_csv("lines_tram_bus_train", index=False)

In [40]:
list_of_stations = []
for station_list in lines_tram_bus_train["ordered_stop_names"]:
    list_of_stations += station_list
list_of_stations = list(set(list_of_stations))
list_of_stations
len(list_of_stations)

249

In [41]:
station_info = pd.DataFrame(columns = ["name", "stop_lat", "stop_lon", "reachable_by_tram", "reachable_by_bus", "reachable_by_train"])
station_info["name"] = list_of_stations

def get_stop_lat(stop_name):
    return stops.loc[stops["stop_name"] == stop_name]["stop_lat"].to_numpy()[0]

def get_stop_lon(stop_name):
    return stops.loc[stops["stop_name"] == stop_name]["stop_lon"].to_numpy()[0]

def get_stop_id(stop_name):
    return stops.loc[stops["stop_name"] == stop_name]["stop_id"].to_numpy()[0]

station_info["stop_lat"] = station_info["name"].apply(get_stop_lat)
station_info["stop_lon"] = station_info["name"].apply(get_stop_lon)
#station_info["stop_id"] = station_info["name"].apply(get_stop_id)

station_info["reachable_by_tram"] = np.empty((len(station_info), 0)).tolist()
station_info["reachable_by_bus"] = np.empty((len(station_info), 0)).tolist()
station_info["reachable_by_train"] = np.empty((len(station_info), 0)).tolist()

station_info = station_info.set_index(["name"])

station_info

Unnamed: 0_level_0,stop_lat,stop_lon,reachable_by_tram,reachable_by_bus,reachable_by_train
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Zürich, Kronenstrasse",47.387926,8.539358,[],[],[]
"Zürich, Kreuzstrasse",47.363374,8.549518,[],[],[]
"Rümlang, Bäuler",47.434906,8.557289,[],[],[]
"Zürich, Grünaustrasse",47.395036,8.489942,[],[],[]
"Zürich, Holzerhurd",47.423773,8.496643,[],[],[]
...,...,...,...,...,...
"Zürich, Hungerbergstrasse",47.420916,8.502168,[],[],[]
"Zürich, Sihlquai/HB",47.379764,8.537400,[],[],[]
"Zch, Bhf.Wollishofen/Staubstr.",47.346882,8.532917,[],[],[]
"Zürich, Brunaustrasse",47.355926,8.532199,[],[],[]


In [46]:

for i in range(lines_tram_bus_train.shape[0]):
    ordered_stop_names = lines_tram_bus_train.loc[i]["ordered_stop_names"]
    if lines_tram_bus_train.loc[i]["route_type"]=="tram":
        station_info.loc[ordered_stop_names[0],"reachable_by_tram"].extend([ordered_stop_names[1]])
        for j in range(1,len(lines_tram_bus_train.loc[i]["ordered_stop_names"])-1):
            station_info.loc[ordered_stop_names[j],"reachable_by_tram"].extend([ordered_stop_names[j-1],ordered_stop_names[j+1]])
        station_info.loc[ordered_stop_names[-1],"reachable_by_tram"].extend([ordered_stop_names[-2]])
    elif lines_tram_bus_train.loc[i]["route_type"]=="bus":
        station_info.loc[ordered_stop_names[0],"reachable_by_bus"].extend([ordered_stop_names[1]])
        for j in range(1,len(lines_tram_bus.loc[i]["ordered_stop_names"])-1):
            station_info.loc[ordered_stop_names[j],"reachable_by_bus"].extend([ordered_stop_names[j-1],ordered_stop_names[j+1]])
        station_info.loc[ordered_stop_names[-1],"reachable_by_bus"].extend([ordered_stop_names[-2]])
    elif lines_tram_bus_train.loc[i]["route_type"]=="train":
        station_info.loc[ordered_stop_names[0],"reachable_by_train"].extend([ordered_stop_names[1]])
        for j in range(1,len(lines_tram_bus_train.loc[i]["ordered_stop_names"])-1):
            station_info.loc[ordered_stop_names[j],"reachable_by_train"].extend([ordered_stop_names[j-1],ordered_stop_names[j+1]])
        station_info.loc[ordered_stop_names[-1],"reachable_by_train"].extend([ordered_stop_names[-2]])
    else:
        print("ERROR")
 
station_info

Unnamed: 0_level_0,stop_lat,stop_lon,reachable_by_tram,reachable_by_bus,reachable_by_train
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Zürich, Kronenstrasse",47.387926,8.539358,"[Zürich, Schaffhauserplatz, Zürich, Beckenhof,...",[],[]
"Zürich, Kreuzstrasse",47.363374,8.549518,"[Zürich, Opernhaus, Zürich, Feldeggstrasse, Zü...",[],[]
"Rümlang, Bäuler",47.434906,8.557289,"[Glattbrugg, Bahnhof, Glattbrugg, Unterriet, G...",[],[]
"Zürich, Grünaustrasse",47.395036,8.489942,"[Zürich, Bändliweg, Zürich, Tüffenwies, Zürich...",[],[]
"Zürich, Holzerhurd",47.423773,8.496643,[],"[Zürich, Hungerbergstrasse, Zürich, Hungerberg...",[]
...,...,...,...,...,...
"Zürich, Hungerbergstrasse",47.420916,8.502168,[],"[Zürich, Zehntenhausplatz, Zürich, Holzerhurd,...",[]
"Zürich, Sihlquai/HB",47.379764,8.537400,"[Zürich, Museum für Gestaltung, Zürich, Bahnho...",[],[]
"Zch, Bhf.Wollishofen/Staubstr.",47.346882,8.532917,"[Zürich, Billoweg, Zürich, Post Wollishofen, Z...",[],"[Zürich, Bahnhof Enge]"
"Zürich, Brunaustrasse",47.355926,8.532199,"[Zürich, Billoweg, Zürich, Museum Rietberg, Zü...",[],[]


In [47]:
def make_values_unique(l):
    return list(set(l))

    
station_info["reachable_by_tram"] = station_info["reachable_by_tram"].apply(make_values_unique)
station_info["reachable_by_bus"] = station_info["reachable_by_bus"].apply(make_values_unique)
station_info["reachable_by_train"] = station_info["reachable_by_train"].apply(make_values_unique)
station_info

Unnamed: 0_level_0,stop_lat,stop_lon,reachable_by_tram,reachable_by_bus,reachable_by_train
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Zürich, Kronenstrasse",47.387926,8.539358,"[Zürich, Schaffhauserplatz, Zürich, Beckenhof]",[],[]
"Zürich, Kreuzstrasse",47.363374,8.549518,"[Zürich, Opernhaus, Zürich, Feldeggstrasse]",[],[]
"Rümlang, Bäuler",47.434906,8.557289,"[Glattbrugg, Bahnhof, Glattbrugg, Unterriet]",[],[]
"Zürich, Grünaustrasse",47.395036,8.489942,"[Zürich, Bändliweg, Zürich, Tüffenwies]",[],[]
"Zürich, Holzerhurd",47.423773,8.496643,[],"[Zürich, Hungerbergstrasse]",[]
...,...,...,...,...,...
"Zürich, Hungerbergstrasse",47.420916,8.502168,[],"[Zürich, Zehntenhausplatz, Zürich, Holzerhurd]",[]
"Zürich, Sihlquai/HB",47.379764,8.537400,"[Zürich, Museum für Gestaltung, Zürich, Bahnho...",[],[]
"Zch, Bhf.Wollishofen/Staubstr.",47.346882,8.532917,"[Zürich, Billoweg, Zürich, Post Wollishofen]",[],"[Zürich, Bahnhof Enge]"
"Zürich, Brunaustrasse",47.355926,8.532199,"[Zürich, Billoweg, Zürich, Museum Rietberg]",[],[]


In [48]:
station_info_csv = station_info.to_csv("station_info", index=True)

In [51]:
station_dict = station_info.reset_index().to_dict('records')
for idx,station in enumerate(station_dict):
    station["id"]= idx+1

def find_station_id(name):
    for station in station_dict:
        if station["name"]== name:
            return station["id"]
        
for station in station_dict:
    temp_bus = []
    temp_tram = []
    temp_train = []
    for bus in station["reachable_by_bus"]:
        temp_bus.append(find_station_id(bus))
    for tram in station["reachable_by_tram"]:
        temp_tram.append(find_station_id(tram))
    for train in station["reachable_by_train"]:
        temp_train.append(find_station_id(train))
    station["reachable_by_bus"] = temp_bus
    station["reachable_by_tram"] = temp_tram
    station["reachable_by_train"] = temp_train
    

print(station_dict)
import json
with open('station_dict.json', 'w', encoding='utf-8') as outfile:
    json.dump(station_dict, outfile, ensure_ascii=False)

[{'name': 'Zürich, Kronenstrasse', 'stop_lat': 47.3879263489224, 'stop_lon': 8.53935814257034, 'reachable_by_tram': [73, 236], 'reachable_by_bus': [], 'reachable_by_train': [], 'id': 1}, {'name': 'Zürich, Kreuzstrasse', 'stop_lat': 47.36337419139621, 'stop_lon': 8.54951808843514, 'reachable_by_tram': [193, 99], 'reachable_by_bus': [], 'reachable_by_train': [], 'id': 2}, {'name': 'Rümlang, Bäuler', 'stop_lat': 47.43490611329371, 'stop_lon': 8.55728851564386, 'reachable_by_tram': [19, 74], 'reachable_by_bus': [], 'reachable_by_train': [], 'id': 3}, {'name': 'Zürich, Grünaustrasse', 'stop_lat': 47.3950355796226, 'stop_lon': 8.48994181878404, 'reachable_by_tram': [21, 231], 'reachable_by_bus': [], 'reachable_by_train': [], 'id': 4}, {'name': 'Zürich, Holzerhurd', 'stop_lat': 47.42377287817099, 'stop_lon': 8.49664325080451, 'reachable_by_tram': [], 'reachable_by_bus': [245], 'reachable_by_train': [], 'id': 5}, {'name': 'Zürich, Berghaldenstrasse', 'stop_lat': 47.3586826744651, 'stop_lon': 8