In [201]:
#!pip install plotly
#!pip install folium

In [202]:
import pandas as pd
import numpy as np

import pickle

import plotly.express as px

import folium

In [203]:
pkl_file = open('stations.pkl', 'rb')
df_stations = pickle.load(pkl_file)
pkl_file.close()

In [204]:
pkl_file = open('stopplaces.pkl', 'rb')
df_stopplaces = pickle.load(pkl_file)
pkl_file.close()

In [205]:
#df_stations = pd.read_csv('stations.csv')
#df_stopplaces = pd.read_csv('stopplaces.csv')

In [206]:
print(f'Stations: {df_stations.shape}')
print(f'Stopplaces: {df_stopplaces.shape}')

Stations: (5690, 16)
Stopplaces: (4208, 9)


In [207]:
df_stations.isna().sum()

id                      0
name                    0
metropolis              0
street                  8
houseNumber           893
postalCode              7
city                    4
state                   0
country                 0
stationCategory        12
owner                   0
organisationalUnit      0
countryCode             0
latitude              282
longitude             282
timeZone                0
dtype: int64

In [208]:
df_stopplaces.isna().sum()

id                       0
name                     0
availableTransports      0
transportAssociations    0
countryCode              0
state                    6
timeZone                 0
latitude                 0
longitude                0
dtype: int64

In [209]:
#missing_values = df_stations[df_stations.isna().any(axis=1)]
#print(missing_values)

In [210]:
px.bar(df_stations, x=['owner'], barmode='group')

In [211]:
df_stations.groupby(by='state').count()['id'].sort_values(ascending=False)

state
Bayern                    1025
Baden-Württemberg          720
Nordrhein-Westfalen        711
Hessen                     479
Sachsen                    478
Rheinland-Pfalz            419
Niedersachsen              357
Brandenburg                310
Sachsen-Anhalt             289
Thüringen                  289
Mecklenburg-Vorpommern     180
Schleswig-Holstein         137
Berlin                     133
Saarland                    77
Hamburg                     58
Bremen                      16
Schweiz CH                  12
Name: id, dtype: int64

## Join Stations and Stop Places

In [212]:
df_stopplaces.drop(columns=['name', 'state', 'countryCode', 'latitude', 'longitude','timeZone'], inplace=True)

In [213]:
df_stations['id'] = df_stations['id'].astype(int)
df_stations['latitude'] = df_stations['latitude'].astype(float)
df_stations['longitude'] = df_stations['longitude'].astype(float)

df_stations.dtypes

id                      int32
name                   object
metropolis             object
street                 object
houseNumber            object
postalCode             object
city                   object
state                  object
country                object
stationCategory        object
owner                  object
organisationalUnit     object
countryCode            object
latitude              float64
longitude             float64
timeZone               object
dtype: object

In [214]:
df_stopplaces['id'] = df_stopplaces['id'].astype(int)
df_stopplaces.dtypes

id                        int32
availableTransports      object
transportAssociations    object
dtype: object

In [215]:
df = df_stations.join(df_stopplaces, on='id', how='left', rsuffix='_s')
df.drop(columns=['id_s'], inplace=True)

In [216]:
df['availableTransports']

0                        [REGIONAL_TRAIN]
1       [CITY_TRAIN, BUS, REGIONAL_TRAIN]
2                        [REGIONAL_TRAIN]
3                   [BUS, REGIONAL_TRAIN]
4                        [REGIONAL_TRAIN]
                      ...                
5685                     [REGIONAL_TRAIN]
5686                     [REGIONAL_TRAIN]
5687                     [REGIONAL_TRAIN]
5688                [BUS, REGIONAL_TRAIN]
5689                [BUS, REGIONAL_TRAIN]
Name: availableTransports, Length: 5690, dtype: object

In [217]:
transports = []
for entry in df['transportAssociations']:
    try:
        for e in entry:
            transports.append(e)
    except:
        pass

transportAssociations = pd.Series(transports).value_counts()

In [218]:
transports = []
for entry in df['availableTransports']:
    try:
        for e in entry:
            transports.append(e)
    except:
        pass

availableTransports = pd.Series(transports).value_counts()

In [219]:
px.bar(transportAssociations)

In [220]:
px.bar(availableTransports)

## Display map

In [272]:
df_stations.dropna(subset = ['latitude'], inplace=True)

In [271]:
pkl_file = open('station_images.pkl', 'rb')
df_station_images = pickle.load(pkl_file)
pkl_file.close()

In [295]:
df_images = pd.DataFrame.from_dict({k: v for k, v in df_station_images.items() if v and len(v) == 1}).T
df_images.columns = ['image']
df_images = df_images.reset_index()
df_images['index'] = df_images['index'].astype(int)

In [296]:
df_images.dtypes

index     int32
image    object
dtype: object

In [297]:
df_stations.dtypes

id                      int32
name                   object
metropolis             object
street                 object
houseNumber            object
postalCode             object
city                   object
state                  object
country                object
stationCategory        object
owner                  object
organisationalUnit     object
countryCode            object
latitude              float64
longitude             float64
timeZone               object
dtype: object

In [301]:
df_station_and_images = pd.merge(left=df_stations, right=df_images, left_on=['id'], right_on=['index'], how='left')

In [302]:
df_station_and_images

Unnamed: 0,id,name,metropolis,street,houseNumber,postalCode,city,state,country,stationCategory,owner,organisationalUnit,countryCode,latitude,longitude,timeZone,index,image
0,1,Aachen Hbf,{},Bahnhofstr.,2a,52064,Aachen,Nordrhein-Westfalen,DE,CATEGORY_2,DB S&S,RB West,DE,50.767800,6.091499,Europe/Berlin,1.0,https://api.railway-stations.org/photos/de/1_1...
1,1000,Burkhardswalde-Maxen,{},Gesundbrunnen,60c,01809,Müglitztal-Burkhardswalde,Sachsen,DE,CATEGORY_7,DB S&S,RB Südost,DE,50.925146,13.838369,Europe/Berlin,1000.0,https://api.railway-stations.org/photos/de/100...
2,1002,Bürstadt,{},Bahnhofsallee,17,68642,Bürstadt,Hessen,DE,CATEGORY_6,DB S&S,RB Mitte,DE,49.645769,8.458188,Europe/Berlin,1002.0,https://api.railway-stations.org/photos/de/100...
3,1005,Buschow,{},Bahnhofstr.,28,14715,Märkisch Luch OT Buschow,Brandenburg,DE,CATEGORY_6,DB S&S,RB Ost,DE,52.592203,12.628996,Europe/Berlin,1005.0,https://api.railway-stations.org/photos/de/100...
4,1006,Büsenbachtal,{},Am Büsenbach,16l,21256,Handeloh-Wörme,Niedersachsen,DE,CATEGORY_7,DB S&S,RB Nord,DE,53.269595,9.858592,Europe/Berlin,1006.0,https://api.railway-stations.org/photos/de/100...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5403,995,Burgstädt,{},Bahnhofstr.,1,09217,Burgstädt,Sachsen,DE,CATEGORY_6,DB S&S,RB Südost,DE,50.915817,12.812707,Europe/Berlin,,
5404,996,Burgstall (Murr),{},Bahnhofstr.,1,71576,Burgstetten,Baden-Württemberg,DE,CATEGORY_6,DB S&S,RB Südwest,DE,48.928647,9.369932,Europe/Berlin,,
5405,997,Steinfurt-Burgsteinfurt,{},Bahnhofsplatz,6,48565,Steinfurt-Burgsteinfurt,Nordrhein-Westfalen,DE,CATEGORY_6,DB S&S,RB West,DE,52.147384,7.329340,Europe/Berlin,,
5406,998,Burgthann,{},Bahnhofstr.,40,90559,Burgthann,Bayern,DE,CATEGORY_5,DB S&S,RB Süd,DE,49.342474,11.309307,Europe/Berlin,,


In [303]:
df_station_and_images = df_station_and_images.join(df_stopplaces, on='id', how='left', rsuffix='_sp')

In [304]:
df_station_and_images

Unnamed: 0,id,name,metropolis,street,houseNumber,postalCode,city,state,country,stationCategory,...,organisationalUnit,countryCode,latitude,longitude,timeZone,index,image,id_sp,availableTransports,transportAssociations
0,1,Aachen Hbf,{},Bahnhofstr.,2a,52064,Aachen,Nordrhein-Westfalen,DE,CATEGORY_2,...,RB West,DE,50.767800,6.091499,Europe/Berlin,1.0,https://api.railway-stations.org/photos/de/1_1...,1000.0,[REGIONAL_TRAIN],[VVO]
1,1000,Burkhardswalde-Maxen,{},Gesundbrunnen,60c,01809,Müglitztal-Burkhardswalde,Sachsen,DE,CATEGORY_7,...,RB Südost,DE,50.925146,13.838369,Europe/Berlin,1000.0,https://api.railway-stations.org/photos/de/100...,2145.0,"[CITY_TRAIN, BUS, REGIONAL_TRAIN]",[VRS]
2,1002,Bürstadt,{},Bahnhofsallee,17,68642,Bürstadt,Hessen,DE,CATEGORY_6,...,RB Mitte,DE,49.645769,8.458188,Europe/Berlin,1002.0,https://api.railway-stations.org/photos/de/100...,2149.0,"[BUS, REGIONAL_TRAIN]",[]
3,1005,Buschow,{},Bahnhofstr.,28,14715,Märkisch Luch OT Buschow,Brandenburg,DE,CATEGORY_6,...,RB Ost,DE,52.592203,12.628996,Europe/Berlin,1005.0,https://api.railway-stations.org/photos/de/100...,2153.0,[REGIONAL_TRAIN],[]
4,1006,Büsenbachtal,{},Am Büsenbach,16l,21256,Handeloh-Wörme,Niedersachsen,DE,CATEGORY_7,...,RB Nord,DE,53.269595,9.858592,Europe/Berlin,1006.0,https://api.railway-stations.org/photos/de/100...,2158.0,[REGIONAL_TRAIN],[RMV]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5403,995,Burgstädt,{},Bahnhofstr.,1,09217,Burgstädt,Sachsen,DE,CATEGORY_6,...,RB Südost,DE,50.915817,12.812707,Europe/Berlin,,,2139.0,[REGIONAL_TRAIN],[VVO]
5404,996,Burgstall (Murr),{},Bahnhofstr.,1,71576,Burgstetten,Baden-Württemberg,DE,CATEGORY_6,...,RB Südwest,DE,48.928647,9.369932,Europe/Berlin,,,2140.0,[REGIONAL_TRAIN],[RMV]
5405,997,Steinfurt-Burgsteinfurt,{},Bahnhofsplatz,6,48565,Steinfurt-Burgsteinfurt,Nordrhein-Westfalen,DE,CATEGORY_6,...,RB West,DE,52.147384,7.329340,Europe/Berlin,,,2141.0,[REGIONAL_TRAIN],[VVO]
5406,998,Burgthann,{},Bahnhofstr.,40,90559,Burgthann,Bayern,DE,CATEGORY_5,...,RB Süd,DE,49.342474,11.309307,Europe/Berlin,,,2142.0,"[BUS, REGIONAL_TRAIN]",[VMS]


In [305]:
df_bw = df_station_and_images[df_station_and_images['state']=='Baden-Württemberg']

In [306]:
df_bw

Unnamed: 0,id,name,metropolis,street,houseNumber,postalCode,city,state,country,stationCategory,...,organisationalUnit,countryCode,latitude,longitude,timeZone,index,image,id_sp,availableTransports,transportAssociations
18,1025,Calw,{},Bischofstr.,10,75365,Calw,Baden-Württemberg,DE,CATEGORY_6,...,RB Südwest,DE,48.714682,8.741854,Europe/Berlin,1025.0,https://api.railway-stations.org/photos/de/102...,2182.0,[REGIONAL_TRAIN],[VBB]
58,1079,Crailsheim,{},Bahnhof,1,74564,Crailsheim,Baden-Württemberg,DE,CATEGORY_3,...,RB Südwest,DE,49.137872,10.064326,Europe/Berlin,1079.0,https://api.railway-stations.org/photos/de/107...,2257.0,[REGIONAL_TRAIN],[RVL]
84,1115,Dallau,{},Bahnhofstrasse,25,74834,Elztal-Dallau,Baden-Württemberg,DE,CATEGORY_6,...,RB Südwest,DE,49.388078,9.187550,Europe/Berlin,1115.0,https://api.railway-stations.org/photos/de/111...,2300.0,"[CITY_TRAIN, BUS, REGIONAL_TRAIN]",[RMV]
109,115,Altglashütten-Falkau,{},Falkauerstr.,18,79868,Feldberg-Altglashütten,Baden-Württemberg,DE,CATEGORY_7,...,RB Südwest,DE,47.859392,8.114113,Europe/Berlin,115.0,https://api.railway-stations.org/photos/de/115...,1141.0,[REGIONAL_TRAIN],[WT]
123,1165,Denzlingen,{},Bahnhofstr.,1,79211,Denzlingen,Baden-Württemberg,DE,CATEGORY_4,...,RB Südwest,DE,48.068625,7.881434,Europe/Berlin,1165.0,https://api.railway-stations.org/photos/de/116...,2367.0,"[BUS, REGIONAL_TRAIN]","[NASA, MDV]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5341,911,Brucken,{},Am Mühlbach,30,73252,Lenningen,Baden-Württemberg,DE,CATEGORY_7,...,RB Südwest,DE,48.573918,9.453061,Europe/Berlin,,,2039.0,[REGIONAL_TRAIN],[]
5358,939,Buchholz (Baden),{},Eisenbahnstr.,6,79183,Waldkirch,Baden-Württemberg,DE,CATEGORY_6,...,RB Südwest,DE,48.083581,7.915166,Europe/Berlin,,,207.0,[REGIONAL_TRAIN],[WT]
5374,959,Buggingen,{},Grissheimerstr.,1,79426,Buggingen,Baden-Württemberg,DE,CATEGORY_6,...,RB Südwest,DE,47.855254,7.628161,Europe/Berlin,,,2098.0,"[BUS, REGIONAL_TRAIN]",[VRN]
5376,960,Bühl (Baden),{},Güterstr.,7,77815,Bühl,Baden-Württemberg,DE,CATEGORY_5,...,RB Südwest,DE,48.696676,8.129299,Europe/Berlin,,,2099.0,"[BUS, REGIONAL_TRAIN]","[VRS, VRT]"


In [307]:
len(df_bw)

691

In [313]:
df_bw['transportAssociations']

18            [VBB]
58            [RVL]
84            [RMV]
109            [WT]
123     [NASA, MDV]
           ...     
5341             []
5358           [WT]
5374          [VRN]
5376     [VRS, VRT]
5404          [RMV]
Name: transportAssociations, Length: 691, dtype: object

In [372]:
[item for item in df_bw['availableTransports'] if item is not np.nan]

[['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['CITY_TRAIN', 'BUS', 'REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['BUS', 'REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['BUS', 'REGIONAL_TRAIN'],
 ['CITY_TRAIN', 'BUS'],
 ['CITY_TRAIN', 'REGIONAL_TRAIN'],
 ['BUS', 'REGIONAL_TRAIN'],
 ['CITY_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['BUS', 'REGIONAL_TRAIN'],
 ['CITY_TRAIN', 'REGIONAL_TRAIN'],
 ['CITY_TRAIN', 'BUS'],
 ['INTERCITY_TRAIN', 'REGIONAL_TRAIN'],
 ['CITY_TRAIN', 'BUS', 'REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['CITY_TRAIN', 'REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['BUS', 'REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['BUS', 'REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['CITY_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['REGIONAL_TRAIN'],
 ['BUS', 'REGIONA

In [315]:
m = folium.Map(location=[50.111, 8.682],zoom_start=6)
for i in df_bw.index:
    html=f"""
    <img src="{df_bw['image'][i]}" width="500px">
    <br/>
    <b><p>{df_bw['id'][i]}: {df_bw['name'][i]}</b></p>
    <p>Transports: {','.join([str(item) for item in df_bw['availableTransports'][i] if df_bw['availableTransports'].any()])}</p>
    <p>Associations: {','.join(df_bw['transportAssociations'][i])}</p>
    """

    test = folium.Html(html, script=True)
    popup = folium.Popup(test, max_width=2650)

    folium.Marker(location=[ df_bw['latitude'][i], df_bw['longitude'][i] ], fill_color='#43d9de', radius=8,tooltip=df_bw['name'][i], popup=popup).add_to(m)

m

TypeError: 'float' object is not iterable