In [21]:
#!pip install plotly
#!pip install folium

In [112]:
import pandas as pd
import plotly.express as px

import ast
from collections import Counter
import json

import folium

In [119]:
df_stations = pd.read_csv('stations.csv')
df_stopplaces = pd.read_csv('stopplaces.csv')

In [9]:
print(f'Stations: {df_stations.shape}')
print(f'Stopplaces: {df_stopplaces.shape}')

Stations: (5690, 16)
Stopplaces: (5727, 9)


In [10]:
df_stations.isna().sum()

id                      0
name                    0
metropolis              0
street                  8
houseNumber           893
postalCode              7
city                    4
state                   0
country                 0
stationCategory        12
owner                   0
organisationalUnit      0
countryCode             0
latitude              282
longitude             282
timeZone                0
dtype: int64

In [11]:
df_stopplaces.isna().sum()

id                        0
name                      0
availableTransports       0
transportAssociations     0
countryCode               0
state                    11
timeZone                  0
latitude                  0
longitude                 0
dtype: int64

In [12]:
#missing_values = df_stations[df_stations.isna().any(axis=1)]
#print(missing_values)

In [14]:
px.bar(df_stations, x=['owner'], barmode='group')

In [120]:
df_stations.groupby(by='state').count()['id'].sort_values(ascending=False)

state
Bayern                    1025
Baden-Württemberg          720
Nordrhein-Westfalen        711
Hessen                     479
Sachsen                    478
Rheinland-Pfalz            419
Niedersachsen              357
Brandenburg                310
Sachsen-Anhalt             289
Thüringen                  289
Mecklenburg-Vorpommern     180
Schleswig-Holstein         137
Berlin                     133
Saarland                    77
Hamburg                     58
Bremen                      16
Schweiz CH                  12
Name: id, dtype: int64

## Join Stations and Stop Places

In [121]:
df_stopplaces.drop(columns=['name', 'state', 'countryCode', 'latitude', 'longitude','timeZone'], inplace=True)

In [122]:
df = df_stations.join(df_stopplaces, on='id', how='left', rsuffix='_s')
df.drop(columns=['id_s'], inplace=True)

In [123]:
df

Unnamed: 0,id,name,metropolis,street,houseNumber,postalCode,city,state,country,stationCategory,owner,organisationalUnit,countryCode,latitude,longitude,timeZone,availableTransports,transportAssociations
0,1,Aachen Hbf,{},Bahnhofstr.,2a,52064.0,Aachen,Nordrhein-Westfalen,DE,CATEGORY_2,DB S&S,RB West,DE,50.767800,6.091499,Europe/Berlin,['REGIONAL_TRAIN'],['VVO']
1,1000,Burkhardswalde-Maxen,{},Gesundbrunnen,60c,1809.0,Müglitztal-Burkhardswalde,Sachsen,DE,CATEGORY_7,DB S&S,RB Südost,DE,50.925146,13.838369,Europe/Berlin,"['CITY_TRAIN', 'REGIONAL_TRAIN', 'BUS']",['VRS']
2,1001,Burkhardtsdorf,{},Bahnhofstraße,,9235.0,Burkhardtsdorf,Sachsen,DE,CATEGORY_6,DB Regio-Netze,Erzgebirgsbahn (EGB),DE,,,Europe/Berlin,['REGIONAL_TRAIN'],"['NASA', 'VBB']"
3,1002,Bürstadt,{},Bahnhofsallee,17,68642.0,Bürstadt,Hessen,DE,CATEGORY_6,DB S&S,RB Mitte,DE,49.645769,8.458188,Europe/Berlin,"['REGIONAL_TRAIN', 'BUS']",[]
4,1005,Buschow,{},Bahnhofstr.,28,14715.0,Märkisch Luch OT Buschow,Brandenburg,DE,CATEGORY_6,DB S&S,RB Ost,DE,52.592203,12.628996,Europe/Berlin,['REGIONAL_TRAIN'],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5685,995,Burgstädt,{},Bahnhofstr.,1,9217.0,Burgstädt,Sachsen,DE,CATEGORY_6,DB S&S,RB Südost,DE,50.915817,12.812707,Europe/Berlin,['REGIONAL_TRAIN'],['VVO']
5686,996,Burgstall (Murr),{},Bahnhofstr.,1,71576.0,Burgstetten,Baden-Württemberg,DE,CATEGORY_6,DB S&S,RB Südwest,DE,48.928647,9.369932,Europe/Berlin,['REGIONAL_TRAIN'],['RMV']
5687,997,Steinfurt-Burgsteinfurt,{},Bahnhofsplatz,6,48565.0,Steinfurt-Burgsteinfurt,Nordrhein-Westfalen,DE,CATEGORY_6,DB S&S,RB West,DE,52.147384,7.329340,Europe/Berlin,['REGIONAL_TRAIN'],['VVO']
5688,998,Burgthann,{},Bahnhofstr.,40,90559.0,Burgthann,Bayern,DE,CATEGORY_5,DB S&S,RB Süd,DE,49.342474,11.309307,Europe/Berlin,"['REGIONAL_TRAIN', 'BUS']",['VMS']


In [124]:
transports = []
for entry in df['transportAssociations']:
    if (entry != '[]'):
        try:
            #print(entry)
            for e in ast.literal_eval(entry):
                transports.append(e)
        except:
            #print(f'Unable to convert {entry}.')
            pass

pd.Series(transports).value_counts()

VBB       360
IAM       342
RMV       336
NASA      325
WT        298
VRN       258
VRR       248
VRS       213
VGN       183
MDV       175
SH        155
MVV       127
VRM       118
HVV       113
VMS       112
VVS        99
VVO        93
VMT        89
NVV        77
MAR        75
KVV        68
RNN        67
SAARVV     62
HNV        58
VBN        56
NALDO      54
DING       51
GVH        44
MOVE       44
RVF        40
ZVON       39
VVW        39
VRT        39
VRB        37
BODO       36
RVL        31
VSN        30
VAB        30
AVV        29
TGO        27
AAV        22
OAM        18
VHB        17
VGF        15
VPE        15
VGC        12
KVSHA      12
HTV        11
WTV        11
dtype: int64

In [125]:
transports = []
for entry in df['availableTransports']:
    if (entry != '[]'):
        try:
            #print(entry)
            for e in ast.literal_eval(entry):
                transports.append(e)
        except:
            #print(f'Unable to convert {entry}.')
            pass

pd.Series(transports).value_counts()

REGIONAL_TRAIN          3520
BUS                     1522
CITY_TRAIN              1245
INTERCITY_TRAIN          253
HIGH_SPEED_TRAIN         179
INTER_REGIONAL_TRAIN      71
TRAM                       9
SHUTTLE                    2
FERRY                      1
SUBWAY                     1
dtype: int64

In [62]:
df_group = df.groupby('transportAssociations').count().sort_values(by='id', ascending=False)
px.bar(x = df_group.index, y = df_group['id'])

In [65]:
df_group = df.groupby('organisationalUnit').count().sort_values(by='id', ascending=False)
px.bar(x = df_group.index, y = df_group['id'])

In [66]:
df_group = df.groupby('availableTransports').count().sort_values(by='id', ascending=False)
px.bar(x = df_group.index, y = df_group['id'])

## Display map

In [16]:
df_stations.dropna(subset = ['latitude'], inplace=True)

In [17]:
df_bw = df_stations[df_stations['state']=='Baden-Württemberg']

In [18]:
len(df_bw)

691

In [23]:
m = folium.Map(location=[50.111, 8.682],zoom_start=6)
for i in df_bw.index:
    folium.Marker(location=[ df_bw['latitude'][i], df_bw['longitude'][i] ], fill_color='#43d9de', radius=8,tooltip=df_bw['name'][i]).add_to(m)

#m