# Bahn API to rail network conversion

General setup and initialization:

In [1]:
from datetime import datetime
import json

import requests

date_format = "%Y-%m-%d"
today = datetime.now().strftime(date_format)
today_short = datetime.now().strftime("%y%m%d")


# stations with stops for long distance connections
ld_location_base_url = "https://api.deutschebahn.com/fahrplan-plus/v1/location/"

# arrival board for long distance connections
ld_arrival_base_url = "https://api.deutschebahn.com/fahrplan-plus/v1/arrivalBoard/"

# all stations, no matter what connections are offered
station_overview_url = "https://api.deutschebahn.com/stada/v2/stations/"

timetable_base_url = "https://api.deutschebahn.com/timetables/v1/plan/"


auth_token = ""
with open("auth.json", "r") as auth_file:
    auth_data = json.load(auth_file)
    auth_token = auth_data["token"]
base_headers = {"Authorization": auth_token, "Accept": "application/json"}


Bearer f26a780b3a3767f808d7f1c39c4be75d


## Long-distance connection checks

Get all stations in Dresden, which provide access to long distance connections

In [69]:
r = requests.get(ld_location_base_url + "dresden", headers=base_headers)
r

<Response [200]>

In [70]:
matches = r.json()
matches

[{'name': 'Dresden Hbf', 'lon': 13.732039, 'lat': 51.040562, 'id': 8010085},
 {'name': 'Dresden-Neustadt',
  'lon': 13.740704,
  'lat': 51.065903,
  'id': 8010089}]

We will look at Dresden main station in more detail now.

In [60]:
dd_hbf = next(station for station in matches if "Hbf" in station["name"])
dd_hbf

{'name': 'Dresden Hbf', 'lon': 13.732039, 'lat': 51.040562, 'id': 8010085}

Query for all long distance connections for today:

In [71]:
r = requests.get(ld_arrival_base_url + str(dd_hbf["id"]), headers=base_headers, params={"date": today})
r

<Response [200]>

In [72]:
r.json()

[{'name': 'IC 2249',
  'type': 'IC',
  'boardId': 8010085,
  'stopId': 8010085,
  'stopName': 'Dresden Hbf',
  'dateTime': '2021-10-27T07:37',
  'origin': 'Leipzig Hbf',
  'track': '1',
  'detailsId': '29493%2F17213%2F162576%2F71457%2F80%3fstation_evaId%3D8010085'},
 {'name': 'RJ 257',
  'type': 'RJ',
  'boardId': 8010085,
  'stopId': 8010085,
  'stopName': 'Dresden Hbf',
  'dateTime': '2021-10-27T08:07',
  'origin': 'Berlin Hbf &#x0028;tief&#x0029;',
  'track': '1',
  'detailsId': '249381%2F103607%2F271114%2F52430%2F80%3fstation_evaId%3D8010085'},
 {'name': 'IC 2449',
  'type': 'IC',
  'boardId': 8010085,
  'stopId': 8010085,
  'stopName': 'Dresden Hbf',
  'dateTime': '2021-10-27T08:44',
  'origin': 'Magdeburg Hbf',
  'track': '3',
  'detailsId': '525297%2F183907%2F742436%2F196119%2F80%3fstation_evaId%3D8010085'},
 {'name': 'EC 178',
  'type': 'EC',
  'boardId': 8010085,
  'stopId': 8010085,
  'stopName': 'Dresden Hbf',
  'dateTime': '2021-10-27T08:52',
  'origin': 'Praha hl.n.',
  't

## All stations in Saxony

Extract all stations in Saxony, alongside with their EVA number. That number is used as an ID in the remainder of this application.

In [73]:
r = requests.get(station_overview_url, headers=base_headers, params={"federalstate": "sachsen"})
r

<Response [200]>

In [77]:
station_data = r.json()["result"]
stations = {s["name"]: s["evaNumbers"][0]["number"] for s in station_data}
stations

{'Adorf (Vogtl)': 8010001,
 'Altenbach': 8011018,
 'Kurort Altenberg (Erzgeb)': 8011019,
 'Altmittweida': 8011033,
 'Amtshainersdorf': 8011039,
 'Leipzig Anger-Crottendorf': 8010008,
 'Arnsdorf (b Dresden)': 8010006,
 'Auerbach (Vogtl) Hp': 8011059,
 'Auerbach (Vogtl) ob Bf': 8010011,
 'Auerbach (Vogtl) unt Bf': 8010012,
 'Bad Brambach': 8011068,
 'Bad Elster': 8011072,
 'Bad Lausick': 8011077,
 'Bad Schandau': 8010022,
 'Bärenhecke-Johnsbach': 8011091,
 'Bärenstein (b Glashütte/Sachs)': 8011095,
 'Barthmühle': 8011115,
 'Bautzen': 8010026,
 'Beilrode': 8011131,
 'Belgershain': 8011132,
 'Berthelsdorf (Erzgeb)': 8010044,
 'Beucha': 8010046,
 'Bienenmühle': 8011185,
 'Bischheim-Gersdorf': 8011199,
 'Bischofswerda': 8010049,
 'Böhlen (b Leipzig)': 8013471,
 'Böhlen Werke': 8011222,
 'Borna (b Leipzig)': 8011242,
 'Borsdorf (Sachs)': 8010059,
 'Braunsdorf-Lichtenwalde': 8011257,
 'Breitendorf': 8011264,
 'Burgstädt': 8011297,
 'Burkhardswalde-Maxen': 8011298,
 'Chemnitz Hbf': 8010184,
 'C

In [79]:
len(stations)

400

The total number of requests to fetch the complete timetables for all stations:

In [81]:
hours_per_day = 24
requests_per_minute = 20
total_requests = len(stations) * hours_per_day
total_fetch_time = total_requests / requests_per_minute

## Fetch all connections of all stations

In [7]:
import itertools
import os
import time

hours = list(range(0, 24))
rate_limit = 20
base_dir = "timetables"
outfile_format = "timetable_{station}_{hour:02}.xml"
timetable_url_pattern = "{base}{station}/{date}/{hour:02}"

xml_headers = base_headers.copy()
xml_headers["Accept"] = "application/xml"

os.makedirs(base_dir, exist_ok=True)

In [None]:
station_evas = list(stations.values())

In [134]:
test_stations = [station_evas[i] for i in range(7)]
test_hours = list(range(15, 18))

In [135]:
timetable_idx = 0
timetable_combinations = list(itertools.product(station_evas, hours))
n_timetable_combinations = len(timetable_combinations)
error_combinations = []
for timetable in timetable_combinations:
    stat, hour = timetable
    timetable_idx += 1
    
    req_url = timetable_url_pattern.format(base=timetable_base_url, station=stat, date=today_short, hour=hour)
    r = requests.get(req_url, headers=xml_headers)
    
    if r.status_code != 200:
        print("Error on request", timetable_idx, "(station={}, hour={})".format(stat, hour), " code =", r.status_code)
        error_combinations.append({"station": stat, "hour": hour, "status": r.status_code})
    
    outfile_name = base_dir + "/" + outfile_format.format(station=stat, hour=hour)
    with open(outfile_name, "w") as outfile:
        outfile.write(r.text)
    
    if timetable_idx % rate_limit == 0:
        print(timetable_idx, "timetables (of", n_timetable_combinations, ") so far. Taking a break.")
        time.sleep(60)

20 timetables (of 9600 ) so far. Taking a break.
40 timetables (of 9600 ) so far. Taking a break.
60 timetables (of 9600 ) so far. Taking a break.
80 timetables (of 9600 ) so far. Taking a break.
100 timetables (of 9600 ) so far. Taking a break.
120 timetables (of 9600 ) so far. Taking a break.
140 timetables (of 9600 ) so far. Taking a break.
160 timetables (of 9600 ) so far. Taking a break.
180 timetables (of 9600 ) so far. Taking a break.
200 timetables (of 9600 ) so far. Taking a break.
220 timetables (of 9600 ) so far. Taking a break.
240 timetables (of 9600 ) so far. Taking a break.
260 timetables (of 9600 ) so far. Taking a break.
280 timetables (of 9600 ) so far. Taking a break.
300 timetables (of 9600 ) so far. Taking a break.
320 timetables (of 9600 ) so far. Taking a break.
340 timetables (of 9600 ) so far. Taking a break.
360 timetables (of 9600 ) so far. Taking a break.
380 timetables (of 9600 ) so far. Taking a break.
400 timetables (of 9600 ) so far. Taking a break.
420 

1600 timetables (of 9600 ) so far. Taking a break.
Error on request 1609 (station=8011422, hour=0)  code = 410
Error on request 1610 (station=8011422, hour=1)  code = 410
1620 timetables (of 9600 ) so far. Taking a break.
Error on request 1633 (station=8011423, hour=0)  code = 410
Error on request 1634 (station=8011423, hour=1)  code = 410
1640 timetables (of 9600 ) so far. Taking a break.
Error on request 1657 (station=8013476, hour=0)  code = 410
Error on request 1658 (station=8013476, hour=1)  code = 410
1660 timetables (of 9600 ) so far. Taking a break.
1680 timetables (of 9600 ) so far. Taking a break.
Error on request 1681 (station=8011424, hour=0)  code = 410
Error on request 1682 (station=8011424, hour=1)  code = 410
1700 timetables (of 9600 ) so far. Taking a break.
Error on request 1705 (station=8011425, hour=0)  code = 410
Error on request 1706 (station=8011425, hour=1)  code = 410
1720 timetables (of 9600 ) so far. Taking a break.
Error on request 1729 (station=8011426, hou

Error on request 2689 (station=8011705, hour=0)  code = 410
Error on request 2690 (station=8011705, hour=1)  code = 410
2700 timetables (of 9600 ) so far. Taking a break.
Error on request 2713 (station=8011709, hour=0)  code = 410
Error on request 2714 (station=8011709, hour=1)  code = 410
2720 timetables (of 9600 ) so far. Taking a break.
Error on request 2737 (station=8011712, hour=0)  code = 410
Error on request 2738 (station=8011712, hour=1)  code = 410
Error on request 2739 (station=8011712, hour=2)  code = 410
2740 timetables (of 9600 ) so far. Taking a break.
2760 timetables (of 9600 ) so far. Taking a break.
Error on request 2761 (station=8013447, hour=0)  code = 410
Error on request 2762 (station=8013447, hour=1)  code = 410
Error on request 2763 (station=8013447, hour=2)  code = 410
2780 timetables (of 9600 ) so far. Taking a break.
Error on request 2785 (station=8010144, hour=0)  code = 410
Error on request 2786 (station=8010144, hour=1)  code = 410
Error on request 2787 (st

Error on request 3507 (station=8010174, hour=2)  code = 410
3520 timetables (of 9600 ) so far. Taking a break.
Error on request 3529 (station=8010177, hour=0)  code = 410
Error on request 3530 (station=8010177, hour=1)  code = 410
Error on request 3531 (station=8010177, hour=2)  code = 410
3540 timetables (of 9600 ) so far. Taking a break.
Error on request 3553 (station=8011922, hour=0)  code = 410
Error on request 3554 (station=8011922, hour=1)  code = 410
Error on request 3555 (station=8011922, hour=2)  code = 410
3560 timetables (of 9600 ) so far. Taking a break.
Error on request 3577 (station=8011928, hour=0)  code = 410
Error on request 3578 (station=8011928, hour=1)  code = 410
Error on request 3579 (station=8011928, hour=2)  code = 410
3580 timetables (of 9600 ) so far. Taking a break.
3600 timetables (of 9600 ) so far. Taking a break.
Error on request 3601 (station=8011937, hour=0)  code = 410
Error on request 3602 (station=8011937, hour=1)  code = 410
Error on request 3603 (st

Error on request 4131 (station=8012115, hour=2)  code = 410
Error on request 4132 (station=8012115, hour=3)  code = 410
4140 timetables (of 9600 ) so far. Taking a break.
Error on request 4153 (station=8012117, hour=0)  code = 410
Error on request 4154 (station=8012117, hour=1)  code = 410
Error on request 4155 (station=8012117, hour=2)  code = 410
Error on request 4156 (station=8012117, hour=3)  code = 410
4160 timetables (of 9600 ) so far. Taking a break.
Error on request 4177 (station=8010040, hour=0)  code = 410
Error on request 4178 (station=8010040, hour=1)  code = 410
Error on request 4179 (station=8010040, hour=2)  code = 410
Error on request 4180 (station=8010040, hour=3)  code = 410
4180 timetables (of 9600 ) so far. Taking a break.
4200 timetables (of 9600 ) so far. Taking a break.
Error on request 4201 (station=8012138, hour=0)  code = 410
Error on request 4202 (station=8012138, hour=1)  code = 410
Error on request 4203 (station=8012138, hour=2)  code = 410
Error on request

4800 timetables (of 9600 ) so far. Taking a break.
Error on request 4801 (station=8012220, hour=0)  code = 410
Error on request 4802 (station=8012220, hour=1)  code = 410
Error on request 4803 (station=8012220, hour=2)  code = 410
Error on request 4804 (station=8012220, hour=3)  code = 410
4820 timetables (of 9600 ) so far. Taking a break.
Error on request 4825 (station=8012223, hour=0)  code = 410
Error on request 4826 (station=8012223, hour=1)  code = 410
Error on request 4827 (station=8012223, hour=2)  code = 410
Error on request 4828 (station=8012223, hour=3)  code = 410
4840 timetables (of 9600 ) so far. Taking a break.
Error on request 4849 (station=8012224, hour=0)  code = 410
Error on request 4850 (station=8012224, hour=1)  code = 410
Error on request 4851 (station=8012224, hour=2)  code = 410
Error on request 4852 (station=8012224, hour=3)  code = 410
4860 timetables (of 9600 ) so far. Taking a break.
Error on request 4873 (station=8012228, hour=0)  code = 410
Error on request

Error on request 5357 (station=8010234, hour=4)  code = 410
5360 timetables (of 9600 ) so far. Taking a break.
Error on request 5377 (station=8012369, hour=0)  code = 410
Error on request 5378 (station=8012369, hour=1)  code = 410
Error on request 5379 (station=8012369, hour=2)  code = 410
Error on request 5380 (station=8012369, hour=3)  code = 410
5380 timetables (of 9600 ) so far. Taking a break.
Error on request 5381 (station=8012369, hour=4)  code = 410
5400 timetables (of 9600 ) so far. Taking a break.
Error on request 5401 (station=8012371, hour=0)  code = 410
Error on request 5402 (station=8012371, hour=1)  code = 410
Error on request 5403 (station=8012371, hour=2)  code = 410
Error on request 5404 (station=8012371, hour=3)  code = 410
Error on request 5405 (station=8012371, hour=4)  code = 410
5420 timetables (of 9600 ) so far. Taking a break.
Error on request 5425 (station=8012375, hour=0)  code = 410
Error on request 5426 (station=8012375, hour=1)  code = 410
Error on request

Error on request 5750 (station=8010400, hour=13)  code = 503
Error on request 5751 (station=8010400, hour=14)  code = 503
Error on request 5752 (station=8010400, hour=15)  code = 503
Error on request 5753 (station=8010400, hour=16)  code = 503
Error on request 5754 (station=8010400, hour=17)  code = 503
Error on request 5755 (station=8010400, hour=18)  code = 503
Error on request 5756 (station=8010400, hour=19)  code = 503
Error on request 5757 (station=8010400, hour=20)  code = 503
Error on request 5758 (station=8010400, hour=21)  code = 503
Error on request 5759 (station=8010400, hour=22)  code = 503
Error on request 5760 (station=8010400, hour=23)  code = 503
5760 timetables (of 9600 ) so far. Taking a break.
Error on request 5761 (station=8012463, hour=0)  code = 410
Error on request 5762 (station=8012463, hour=1)  code = 410
Error on request 5763 (station=8012463, hour=2)  code = 410
Error on request 5764 (station=8012463, hour=3)  code = 410
Error on request 5765 (station=8012463

Error on request 6221 (station=8012535, hour=4)  code = 410
Error on request 6222 (station=8012535, hour=5)  code = 410
6240 timetables (of 9600 ) so far. Taking a break.
Error on request 6241 (station=8012540, hour=0)  code = 410
Error on request 6242 (station=8012540, hour=1)  code = 410
Error on request 6243 (station=8012540, hour=2)  code = 410
Error on request 6244 (station=8012540, hour=3)  code = 410
Error on request 6245 (station=8012540, hour=4)  code = 410
Error on request 6246 (station=8012540, hour=5)  code = 410
6260 timetables (of 9600 ) so far. Taking a break.
Error on request 6265 (station=8012550, hour=0)  code = 410
Error on request 6266 (station=8012550, hour=1)  code = 410
Error on request 6267 (station=8012550, hour=2)  code = 410
Error on request 6268 (station=8012550, hour=3)  code = 410
Error on request 6269 (station=8012550, hour=4)  code = 410
Error on request 6270 (station=8012550, hour=5)  code = 410
6280 timetables (of 9600 ) so far. Taking a break.
Error o

Error on request 6699 (station=8012660, hour=2)  code = 410
Error on request 6700 (station=8012660, hour=3)  code = 410
6700 timetables (of 9600 ) so far. Taking a break.
Error on request 6701 (station=8012660, hour=4)  code = 410
Error on request 6702 (station=8012660, hour=5)  code = 410
6720 timetables (of 9600 ) so far. Taking a break.
Error on request 6721 (station=8012656, hour=0)  code = 410
Error on request 6722 (station=8012656, hour=1)  code = 410
Error on request 6723 (station=8012656, hour=2)  code = 410
Error on request 6724 (station=8012656, hour=3)  code = 410
Error on request 6725 (station=8012656, hour=4)  code = 410
Error on request 6726 (station=8012656, hour=5)  code = 410
6740 timetables (of 9600 ) so far. Taking a break.
Error on request 6745 (station=8012662, hour=0)  code = 410
Error on request 6746 (station=8012662, hour=1)  code = 410
Error on request 6747 (station=8012662, hour=2)  code = 410
Error on request 6748 (station=8012662, hour=3)  code = 410
Error o

Error on request 7158 (station=8012751, hour=5)  code = 410
7160 timetables (of 9600 ) so far. Taking a break.
Error on request 7177 (station=8012759, hour=0)  code = 410
Error on request 7178 (station=8012759, hour=1)  code = 410
Error on request 7179 (station=8012759, hour=2)  code = 410
Error on request 7180 (station=8012759, hour=3)  code = 410
7180 timetables (of 9600 ) so far. Taking a break.
Error on request 7181 (station=8012759, hour=4)  code = 410
Error on request 7182 (station=8012759, hour=5)  code = 410
Error on request 7183 (station=8012759, hour=6)  code = 410
7200 timetables (of 9600 ) so far. Taking a break.
Error on request 7201 (station=8010297, hour=0)  code = 410
Error on request 7202 (station=8010297, hour=1)  code = 410
Error on request 7203 (station=8010297, hour=2)  code = 410
Error on request 7204 (station=8010297, hour=3)  code = 410
Error on request 7205 (station=8010297, hour=4)  code = 410
Error on request 7206 (station=8010297, hour=5)  code = 410
Error o

Error on request 7567 (station=8012946, hour=6)  code = 410
7580 timetables (of 9600 ) so far. Taking a break.
Error on request 7585 (station=8012947, hour=0)  code = 410
Error on request 7586 (station=8012947, hour=1)  code = 410
Error on request 7587 (station=8012947, hour=2)  code = 410
Error on request 7588 (station=8012947, hour=3)  code = 410
Error on request 7589 (station=8012947, hour=4)  code = 410
Error on request 7590 (station=8012947, hour=5)  code = 410
Error on request 7591 (station=8012947, hour=6)  code = 410
7600 timetables (of 9600 ) so far. Taking a break.
Error on request 7609 (station=8012959, hour=0)  code = 410
Error on request 7610 (station=8012959, hour=1)  code = 410
Error on request 7611 (station=8012959, hour=2)  code = 410
Error on request 7612 (station=8012959, hour=3)  code = 410
Error on request 7613 (station=8012959, hour=4)  code = 410
Error on request 7614 (station=8012959, hour=5)  code = 410
Error on request 7615 (station=8012959, hour=6)  code = 41

7980 timetables (of 9600 ) so far. Taking a break.
Error on request 7993 (station=8013080, hour=0)  code = 410
Error on request 7994 (station=8013080, hour=1)  code = 410
Error on request 7995 (station=8013080, hour=2)  code = 410
Error on request 7996 (station=8013080, hour=3)  code = 410
Error on request 7997 (station=8013080, hour=4)  code = 410
Error on request 7998 (station=8013080, hour=5)  code = 410
Error on request 7999 (station=8013080, hour=6)  code = 410
8000 timetables (of 9600 ) so far. Taking a break.
Error on request 8017 (station=8013087, hour=0)  code = 410
Error on request 8018 (station=8013087, hour=1)  code = 410
Error on request 8019 (station=8013087, hour=2)  code = 410
Error on request 8020 (station=8013087, hour=3)  code = 410
8020 timetables (of 9600 ) so far. Taking a break.
Error on request 8021 (station=8013087, hour=4)  code = 410
Error on request 8022 (station=8013087, hour=5)  code = 410
Error on request 8023 (station=8013087, hour=6)  code = 410
8040 ti

Error on request 8380 (station=8010367, hour=3)  code = 410
8380 timetables (of 9600 ) so far. Taking a break.
Error on request 8381 (station=8010367, hour=4)  code = 410
Error on request 8382 (station=8010367, hour=5)  code = 410
Error on request 8383 (station=8010367, hour=6)  code = 410
Error on request 8384 (station=8010367, hour=7)  code = 410
8400 timetables (of 9600 ) so far. Taking a break.
Error on request 8401 (station=8010370, hour=0)  code = 410
Error on request 8402 (station=8010370, hour=1)  code = 410
Error on request 8403 (station=8010370, hour=2)  code = 410
Error on request 8404 (station=8010370, hour=3)  code = 410
Error on request 8405 (station=8010370, hour=4)  code = 410
Error on request 8406 (station=8010370, hour=5)  code = 410
Error on request 8407 (station=8010370, hour=6)  code = 410
Error on request 8408 (station=8010370, hour=7)  code = 410
8420 timetables (of 9600 ) so far. Taking a break.
Error on request 8425 (station=8013268, hour=0)  code = 410
Error o

Error on request 8741 (station=8013381, hour=4)  code = 410
Error on request 8742 (station=8013381, hour=5)  code = 410
Error on request 8743 (station=8013381, hour=6)  code = 410
Error on request 8744 (station=8013381, hour=7)  code = 410
8760 timetables (of 9600 ) so far. Taking a break.
Error on request 8761 (station=8010393, hour=0)  code = 410
Error on request 8762 (station=8010393, hour=1)  code = 410
Error on request 8763 (station=8010393, hour=2)  code = 410
Error on request 8764 (station=8010393, hour=3)  code = 410
Error on request 8765 (station=8010393, hour=4)  code = 410
Error on request 8766 (station=8010393, hour=5)  code = 410
Error on request 8767 (station=8010393, hour=6)  code = 410
Error on request 8768 (station=8010393, hour=7)  code = 410
8780 timetables (of 9600 ) so far. Taking a break.
Error on request 8785 (station=8013413, hour=0)  code = 410
Error on request 8786 (station=8013413, hour=1)  code = 410
Error on request 8787 (station=8013413, hour=2)  code = 41

Error on request 9103 (station=8012869, hour=6)  code = 410
Error on request 9104 (station=8012869, hour=7)  code = 410
9120 timetables (of 9600 ) so far. Taking a break.
Error on request 9121 (station=8012641, hour=0)  code = 410
Error on request 9122 (station=8012641, hour=1)  code = 410
Error on request 9123 (station=8012641, hour=2)  code = 410
Error on request 9124 (station=8012641, hour=3)  code = 410
Error on request 9125 (station=8012641, hour=4)  code = 410
Error on request 9126 (station=8012641, hour=5)  code = 410
Error on request 9127 (station=8012641, hour=6)  code = 410
Error on request 9128 (station=8012641, hour=7)  code = 410
9140 timetables (of 9600 ) so far. Taking a break.
Error on request 9145 (station=8011431, hour=0)  code = 410
Error on request 9146 (station=8011431, hour=1)  code = 410
Error on request 9147 (station=8011431, hour=2)  code = 410
Error on request 9148 (station=8011431, hour=3)  code = 410
Error on request 9149 (station=8011431, hour=4)  code = 41

Error on request 9461 (station=8012318, hour=4)  code = 410
Error on request 9462 (station=8012318, hour=5)  code = 410
Error on request 9463 (station=8012318, hour=6)  code = 410
Error on request 9464 (station=8012318, hour=7)  code = 410
Error on request 9465 (station=8012318, hour=8)  code = 410
9480 timetables (of 9600 ) so far. Taking a break.
Error on request 9481 (station=8017442, hour=0)  code = 410
Error on request 9482 (station=8017442, hour=1)  code = 410
Error on request 9483 (station=8017442, hour=2)  code = 410
Error on request 9484 (station=8017442, hour=3)  code = 410
Error on request 9485 (station=8017442, hour=4)  code = 410
Error on request 9486 (station=8017442, hour=5)  code = 410
Error on request 9487 (station=8017442, hour=6)  code = 410
Error on request 9488 (station=8017442, hour=7)  code = 410
Error on request 9489 (station=8017442, hour=8)  code = 410
9500 timetables (of 9600 ) so far. Taking a break.
Error on request 9505 (station=8012686, hour=0)  code = 41

In [136]:
len(error_combinations)

1844

In [1]:
import json

In [139]:
with open("timetables/errors.json", "w") as error_file:
    json.dump(error_combinations, error_file)

In [4]:
errors = None
with open("timetables/errors.json", "r") as error_file:
    errors = json.load(error_file)
errors

[[8011132, 0],
 [8010044, 0],
 [8010046, 0],
 [8011185, 0],
 [8011199, 0],
 [8010049, 0],
 [8013471, 0],
 [8011222, 0],
 [8011242, 0],
 [8010059, 0],
 [8011257, 0],
 [8011264, 0],
 [8011297, 0],
 [8011298, 0],
 [8010184, 0],
 [8011970, 0],
 [8011971, 0],
 [8010185, 0],
 [8011974, 0],
 [8013450, 0],
 [8011979, 0],
 [8011980, 0],
 [8011315, 0],
 [8080840, 0],
 [8011320, 0],
 [8010072, 0],
 [8010074, 0],
 [8011325, 0],
 [8011339, 0],
 [8080280, 0],
 [8010076, 0],
 [8011354, 0],
 [8011371, 0],
 [8011385, 0],
 [8010080, 0],
 [8011387, 0],
 [8011398, 0],
 [8011407, 0],
 [8011407, 1],
 [8011407, 2],
 [8011407, 3],
 [8011407, 4],
 [8011407, 5],
 [8011407, 6],
 [8011407, 7],
 [8011407, 8],
 [8011407, 9],
 [8011407, 10],
 [8011407, 11],
 [8011407, 12],
 [8011407, 13],
 [8011407, 14],
 [8011407, 15],
 [8011407, 16],
 [8011407, 17],
 [8011407, 18],
 [8011407, 19],
 [8011407, 20],
 [8011407, 21],
 [8011407, 22],
 [8011407, 23],
 [8011417, 0],
 [8010085, 0],
 [8011418, 0],
 [8013444, 0],
 [8011419, 

In [8]:
err1_stat, err1_hour = errors[0]
err1_stat, err1_hour

(8011132, 0)

In [9]:
req_url = timetable_url_pattern.format(base=timetable_base_url, station=err1_stat, date=today_short, hour=err1_hour)
req_url

'https://api.deutschebahn.com/timetables/v1/plan/8011132/211028/00'

In [10]:
r = requests.get(req_url, headers=xml_headers)
r

<Response [200]>

In [11]:
r.text

'<?xml version=\'1.0\' encoding=\'UTF-8\'?><timetable station=\'Belgershain\'><s id="8575858415775127007-2110280010-9"><tl f="N" t="p" o="800413" c="RB" n="16661"/><ar pt="2110280034" pp="2" l="113" ppth="Leipzig Hbf|Leipzig-Paunsdorf|Leipzig Werkst&#228;ttenstra&#223;e|Leipzig-M&#246;lkau|Leipzig-Holzhausen|Leipzig-Liebertwolkwitz|Gro&#223;p&#246;sna|Oberholz"/><dp pt="2110280035" pp="2" l="113" ppth="Otterwisch|Lauterbach-Steinbach|Bad Lausick|Hopfgarten(Sachs)|Tautenhain|Geithain"/></s></timetable>'

In [20]:
timetable_idx = 0
n_timetable_combinations = len(errors)
error_combinations = []
for timetable in errors:
    stat, hour = timetable
    timetable_idx += 1
    
    req_url = timetable_url_pattern.format(base=timetable_base_url, station=stat, date=today_short, hour=hour)
    r = requests.get(req_url, headers=xml_headers)
    
    if r.status_code != 200:
        print("Error on request", timetable_idx, "(station={}, hour={})".format(stat, hour), " code =", r.status_code)
        error_combinations.append({"station": stat, "hour": hour, "status": r.status_code})
    
    outfile_name = base_dir + "/" + outfile_format.format(station=stat, hour=hour)
    with open(outfile_name, "w") as outfile:
        outfile.write(r.text)
    
    if timetable_idx % rate_limit == 0:
        print(timetable_idx, "timetables (of", n_timetable_combinations, ") so far. Taking a break.")
        time.sleep(60)

Error on request 2 (station=8011407, hour=0)  code = 400
Error on request 3 (station=8011407, hour=1)  code = 400
Error on request 4 (station=8011407, hour=2)  code = 400
Error on request 5 (station=8011407, hour=3)  code = 400
Error on request 6 (station=8011407, hour=4)  code = 400
Error on request 7 (station=8011407, hour=5)  code = 400
Error on request 8 (station=8011407, hour=6)  code = 400
Error on request 9 (station=8011407, hour=7)  code = 400
Error on request 10 (station=8011407, hour=8)  code = 400
Error on request 11 (station=8011407, hour=9)  code = 400
Error on request 12 (station=8011407, hour=10)  code = 400
Error on request 13 (station=8011407, hour=11)  code = 400
Error on request 14 (station=8011407, hour=12)  code = 400
Error on request 15 (station=8011407, hour=13)  code = 400
Error on request 16 (station=8011407, hour=14)  code = 400
Error on request 17 (station=8011407, hour=15)  code = 400
Error on request 18 (station=8011407, hour=16)  code = 400
Error on reques

In [15]:
with open("timetables/errors_rerun2.json", "w") as error_file:
    json.dump(error_combinations, error_file)

## Merge and convert all timetables into a more usefull data structure

In [9]:
import lxml
import lxml.etree
import lxml.objectify

In [10]:
def getchildtags(node):
    return [c.tag for c in node.getchildren()]

In [11]:
def joinDropNone(iterable, delim):
    return delim.join([x for x in iterable if x is not None])

In [12]:
test_file = open("timetables/timetable_8000273_05.xml", "r")
parsed = lxml.objectify.parse(test_file)
parsed

<lxml.etree._ElementTree at 0x7fa52c2df440>

In [13]:
lxml.objectify.dump(parsed.getroot())

"timetable = None [ObjectifiedElement]\n  * station = 'Muldenberg Floßplatz'\n    s = None [ObjectifiedElement]\n      * id = '3991938303171863868-2110270532-4'\n        tl = '' [StringElement]\n          * c = 'VBG'\n          * f = 'D'\n          * n = '20802'\n          * o = 'RD'\n          * t = 'p'\n        ar = '' [StringElement]\n          * l = 'RB1'\n          * pp = '1'\n          * ppth = 'Zwotental|Schöneck(Vogtl) Ferienpark|Schöneck(Vogtl)'\n          * pt = '2110270544'\n        dp = '' [StringElement]\n          * l = 'RB1'\n          * pp = '1'\n          * ppth = 'Grünbach(Vogtl)|Falkenstein(Vogtl)|Ellefeld|Auerbach(V) unt Bf|Rodewisch|Lengenfeld(Vogtl)|Irfersgrün|Voigtsgrün|Ebersbrunn|Stenn|Zwickau(Sachs)Hbf|Zwickau Stadthalle|Zwickau Zentrum'\n          * pt = '2110270544'\n    s = None [ObjectifiedElement]\n      * id = '4048785167705909746-2110270503-3'\n        tl = '' [StringElement]\n          * c = 'VBG'\n          * f = 'D'\n          * n = '20801'\n         

In [14]:
timetable_node = parsed.getroot()

In [18]:
timetable_node.s.get("id")

'3991938303171863868-2110270532-4'

In [8]:
station = timetable_node.get("station"), timetable_node.get("eva")
station

('Muldenberg Floßplatz', None)

In [44]:
connection_nodes = timetable_node.getchildren()

In [78]:
connections = []
for cn in connection_nodes:
    line = {"train_type": cn.tl.get("c")}
    route_before = None
    route_after = None
    if "ar" in getchildtags(cn):
        line["line"] = cn.ar.get("l")
    elif "dp" in getchildtags(cn):
        line["line"] = cn.dp.get("l")
    else:
        line["line"] = ""
        
    if "ar" in getchildtags(cn):
        route_before = cn.ar.get("ppth")
    if "dp" in getchildtags(cn):
        route_after = cn.dp.get("ppth")
    complete_route = joinDropNone([route_before, station[0], route_after], "|")
    line["route"] = complete_route
    
    connections.append(line)

connections

[{'train_type': 'VBG',
  'line': 'RB1',
  'route': 'Zwotental|Schöneck(Vogtl) Ferienpark|Schöneck(Vogtl)|Muldenberg Floßplatz|Grünbach(Vogtl)|Falkenstein(Vogtl)|Ellefeld|Auerbach(V) unt Bf|Rodewisch|Lengenfeld(Vogtl)|Irfersgrün|Voigtsgrün|Ebersbrunn|Stenn|Zwickau(Sachs)Hbf|Zwickau Stadthalle|Zwickau Zentrum'},
 {'train_type': 'VBG',
  'line': 'RB1',
  'route': 'Falkenstein(Vogtl)|Grünbach(Vogtl)|Muldenberg Floßplatz|Schöneck(Vogtl)|Schöneck(Vogtl) Ferienpark|Zwotental'}]

In [5]:
def extract_connection_data(timetable):
    timetable_node = parsed.getroot()
    station = timetable_node.get("station"), timetable_node.get("eva")
    
    connection_nodes = timetable_node.getchildren()
    connections = []
    
    for cn in connection_nodes:
        line = {"train_type": cn.tl.get("c"), "extracted_from": station[0]}
        route_before = None
        route_after = None
        if "ar" in getchildtags(cn):
            line["line"] = cn.ar.get("l")
        elif "dp" in getchildtags(cn):
            line["line"] = cn.dp.get("l")
        else:
            line["line"] = ""

        if "ar" in getchildtags(cn):
            route_before = cn.ar.get("ppth")
        if "dp" in getchildtags(cn):
            route_after = cn.dp.get("ppth")
        complete_route = joinDropNone([route_before, station[0], route_after], "|")
        line["route"] = complete_route

        connections.append(line)
    
    return connections

In [82]:
extract_connection_data(parsed)

[{'train_type': 'VBG',
  'extracted_from': 'Muldenberg Floßplatz',
  'line': 'RB1',
  'route': 'Zwotental|Schöneck(Vogtl) Ferienpark|Schöneck(Vogtl)|Muldenberg Floßplatz|Grünbach(Vogtl)|Falkenstein(Vogtl)|Ellefeld|Auerbach(V) unt Bf|Rodewisch|Lengenfeld(Vogtl)|Irfersgrün|Voigtsgrün|Ebersbrunn|Stenn|Zwickau(Sachs)Hbf|Zwickau Stadthalle|Zwickau Zentrum'},
 {'train_type': 'VBG',
  'extracted_from': 'Muldenberg Floßplatz',
  'line': 'RB1',
  'route': 'Falkenstein(Vogtl)|Grünbach(Vogtl)|Muldenberg Floßplatz|Schöneck(Vogtl)|Schöneck(Vogtl) Ferienpark|Zwotental'}]

In [6]:
import pathlib

In [7]:
all_connections = []
for timetable_xml in pathlib.Path("timetables").glob("*.xml"):
    with open(timetable_xml, "r") as timetable_file:
        try:
            parsed = lxml.objectify.parse(timetable_file)
            all_connections += extract_connection_data(parsed)
        except lxml.etree.XMLSyntaxError:
            print("Could not parse", timetable_xml)
len(all_connections)

Could not parse timetables\timetable_8011407_00.xml
Could not parse timetables\timetable_8011407_01.xml
Could not parse timetables\timetable_8011407_02.xml
Could not parse timetables\timetable_8011407_03.xml
Could not parse timetables\timetable_8011407_04.xml
Could not parse timetables\timetable_8011407_05.xml
Could not parse timetables\timetable_8011407_06.xml
Could not parse timetables\timetable_8011407_07.xml
Could not parse timetables\timetable_8011407_08.xml
Could not parse timetables\timetable_8011407_09.xml
Could not parse timetables\timetable_8011407_10.xml
Could not parse timetables\timetable_8011407_11.xml
Could not parse timetables\timetable_8011407_12.xml
Could not parse timetables\timetable_8011407_13.xml
Could not parse timetables\timetable_8011407_14.xml
Could not parse timetables\timetable_8011407_15.xml
Could not parse timetables\timetable_8011407_16.xml
Could not parse timetables\timetable_8011407_17.xml
Could not parse timetables\timetable_8011407_18.xml
Could not pa

28890

In [8]:
all_connections[-4:]

[{'train_type': 'RB',
  'extracted_from': 'Leipzig-Sellerhausen',
  'line': '110',
  'route': 'Leipzig Hbf|Leipzig-Sellerhausen|Leipzig-Paunsdorf|Leipzig-Engelsdorf|Borsdorf(Sachs)|Beucha|Naunhof|Großsteinberg|Grimma ob Bf|Großbothen|Tanndorf|Leisnig|Klosterbuch|Westewitz-Hochweitzschen|Döbeln Hbf'},
 {'train_type': 'RB',
  'extracted_from': 'Leipzig-Sellerhausen',
  'line': '110',
  'route': 'Grimma ob Bf|Großsteinberg|Naunhof|Beucha|Borsdorf(Sachs)|Leipzig-Engelsdorf|Leipzig-Paunsdorf|Leipzig-Sellerhausen|Leipzig Hbf'},
 {'train_type': 'RB',
  'extracted_from': 'Leipzig-Sellerhausen',
  'line': '110',
  'route': 'Leipzig Hbf|Leipzig-Sellerhausen|Leipzig-Paunsdorf|Leipzig-Engelsdorf|Borsdorf(Sachs)|Beucha|Naunhof|Großsteinberg|Grimma ob Bf'},
 {'train_type': 'RB',
  'extracted_from': 'Leipzig-Sellerhausen',
  'line': '110',
  'route': 'Grimma ob Bf|Großsteinberg|Naunhof|Beucha|Borsdorf(Sachs)|Leipzig-Engelsdorf|Leipzig-Paunsdorf|Leipzig-Sellerhausen|Leipzig Hbf'}]

In [112]:
with open("timetables/complete_timetables.json", "w") as timetables_output:
    json.dump(all_connections, timetables_output)

In [10]:
import pandas as pd

In [12]:
tt_df = pd.DataFrame(all_connections)
tt_df

Unnamed: 0,train_type,extracted_from,line,route
0,VBG,Muldenberg Floßplatz,RB1,Zwotental|Schöneck(Vogtl) Ferienpark|Schöneck(...
1,VBG,Muldenberg Floßplatz,RB1,Falkenstein(Vogtl)|Grünbach(Vogtl)|Muldenberg ...
2,VBG,Muldenberg Floßplatz,RB1,Zwotental|Schöneck(Vogtl) Ferienpark|Schöneck(...
3,VBG,Muldenberg Floßplatz,RB5,Plauen(Vogtl) ob Bf|Jößnitz|Jocketa|Ruppertsgr...
4,VBG,Muldenberg Floßplatz,RB5,Zwotental|Schöneck(Vogtl) Ferienpark|Schöneck(...
...,...,...,...,...
28885,RB,Leipzig-Sellerhausen,110,Döbeln Hbf|Westewitz-Hochweitzschen|Klosterbuc...
28886,RB,Leipzig-Sellerhausen,110,Leipzig Hbf|Leipzig-Sellerhausen|Leipzig-Pauns...
28887,RB,Leipzig-Sellerhausen,110,Grimma ob Bf|Großsteinberg|Naunhof|Beucha|Bors...
28888,RB,Leipzig-Sellerhausen,110,Leipzig Hbf|Leipzig-Sellerhausen|Leipzig-Pauns...


In [14]:
tt_df.sort_values(by=["line", "train_type"])

Unnamed: 0,train_type,extracted_from,line,route
363,S,Bad Schandau,1,Bad Schandau|Königstein(Sächs Schw)|Kurort Rat...
364,S,Bad Schandau,1,Meißen Triebischtal|Meißen Altstadt|Meißen|Neu...
365,S,Bad Schandau,1,Schöna|Schmilka-Hirschmühle|Krippen|Bad Schand...
366,S,Bad Schandau,1,Dresden Hbf|Dresden-Strehlen|Dresden-Reick|Dre...
368,S,Bad Schandau,1,Schöna|Schmilka-Hirschmühle|Krippen|Bad Schand...
...,...,...,...,...
8560,SOE,Zittau,,Zittau|Zittau Hp|Zittau Süd|Zittau Vorstadt|Ol...
8566,SOE,Zittau,,Kurort Oybin|Teufelsmühle|Kurort Oybin-Niederd...
8572,SOE,Zittau,,Zittau|Zittau Hp|Zittau Süd|Zittau Vorstadt|Ol...
8584,SOE,Zittau,,Kurort Oybin|Teufelsmühle|Kurort Oybin-Niederd...


In [33]:
def adjust_route_order(stops):
    if len(stops) < 2:
        return stops
    first_stop = stops[0]
    last_stop = stops[-1]
    if first_stop > last_stop:
        return list(reversed(stops))
    else:
        return stops

In [34]:
tt_df["route_normalized"] = tt_df.route.str.split("|").apply(adjust_route_order)
tt_df["route_normalized_str"] = tt_df.route_normalized.str.join("|")

In [35]:
tt_route_grp = tt_df.groupby("route_normalized_str")
len(tt_route_grp)

328

In [41]:
tt_unique_route = tt_route_grp.first().reset_index()

In [48]:
tt_unique_route[tt_unique_route.line == "S5"]

Unnamed: 0,route_normalized_str,train_type,extracted_from,line,route,route_normalized
4,Altenburg|Treben-Lehma|Regis-Breitingen|Deutze...,Bus,Neukieritzsch,S5,Altenburg|Treben-Lehma|Regis-Breitingen|Deutze...,"[Altenburg, Treben-Lehma, Regis-Breitingen, De..."
44,Böhlen(Leipzig)|Böhlen Werke|Neukieritzsch|Deu...,Bus,Neukieritzsch,S5,Gößnitz|Lehndorf(Altenburg)|Altenburg|Treben-L...,"[Böhlen(Leipzig), Böhlen Werke, Neukieritzsch,..."
323,Werdau|Steinpleis|Lichtentanne(Sachs)|Zwickau(...,Bus,Steinpleis,S5,Werdau|Steinpleis|Lichtentanne(Sachs)|Zwickau(...,"[Werdau, Steinpleis, Lichtentanne(Sachs), Zwic..."


In [49]:
tt_unique_route[tt_unique_route.line == "S5"].route.values

array(['Altenburg|Treben-Lehma|Regis-Breitingen|Deutzen Markt|Neukieritzsch|Böhlen Werke|Böhlen(Leipzig)',
       'Gößnitz|Lehndorf(Altenburg)|Altenburg|Treben-Lehma|Regis-Breitingen|Deutzen Markt|Neukieritzsch|Böhlen Werke|Böhlen(Leipzig)',
       'Werdau|Steinpleis|Lichtentanne(Sachs)|Zwickau(Sachs)Hbf'],
      dtype=object)

In [51]:
tt_projected = tt_unique_route[["train_type", "extracted_from", "line", "route_normalized"]]
tt_projected

Unnamed: 0,train_type,extracted_from,line,route_normalized
0,VBG,Adorf(Vogtl),RB4,"[Adorf(Vogtl), Hundsgrün, Oelsnitz(Vogtl), Pir..."
1,VBG,Adorf(Vogtl),RB2,"[Adorf(Vogtl), Hundsgrün, Oelsnitz(Vogtl), Pir..."
2,VBG,Adorf(Vogtl),RB2,"[Adorf(Vogtl), Hundsgrün, Oelsnitz(Vogtl), Pir..."
3,S,Steinpleis,5,"[Altenburg, Lehndorf(Altenburg), Gößnitz, Poni..."
4,Bus,Neukieritzsch,S5,"[Altenburg, Treben-Lehma, Regis-Breitingen, De..."
...,...,...,...,...
323,Bus,Steinpleis,S5,"[Werdau, Steinpleis, Lichtentanne(Sachs), Zwic..."
324,VBG,Muldenberg Floßplatz,RB1,"[Zwickau Stadthalle, Zwickau(Sachs)Hbf, Stenn,..."
325,VBG,Muldenberg Floßplatz,RB1,"[Zwickau Zentrum, Zwickau Stadthalle, Zwickau(..."
326,VBG,Muldenberg Floßplatz,RB1,"[Zwickau Zentrum, Zwickau Stadthalle, Zwickau(..."


In [55]:
tt_projected.to_json("timetables/timetables.json", index=False, orient="table")