In [1]:
import subprocess
import sys

import overpy
import pandas as pd
from shapely.geometry import Polygon

sys.path.append("/home/ppeter/repo/mcr-py/src/")
from package.geometa import GeoMeta

In [2]:
gtfs_key = 781
city_name_german = "Berlin"
city_name = "Berlin"
center = (52.5170365, 13.3888599)
start_date = "2024-05-27"
end_date = "2024-06-02"
bbox = {
    "max_lat": 52.5170365 + 0.05,
    "min_lat": 52.5170365 - 0.05,
    "max_lon": 13.3888599 + 0.04,
    "min_lon": 13.3888599 - 0.07,
}

In [3]:
geometa_path = f"../data/stateful_variables/{city_name.lower()}_geometa.pkl"
today = pd.Timestamp.today().strftime("%Y%m%d")

In [4]:
subprocess.run(
    [
        "python",
        "../src/main.py",
        "gtfs",
        "download",
        str(gtfs_key),
        f"../data/gtfs-raw/{city_name.lower()}_{today}.zip",
    ]
)

[2;36m[13:14:44][0m[2;36m [0m[34mINFO    [0m Downloading GTFS feed with ID [1;36m781[0m             ]8;id=242752;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py\[2mcatalog.py[0m]8;;\[2m:[0m]8;id=324474;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py#155\[2m155[0m]8;;\
[2;36m[13:14:56][0m[2;36m [0m[34mINFO    [0m Downloading GTFS feed with ID [1;36m781[0m done [1m([0m[1;36m12.75[0m ]8;id=236405;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py\[2mcatalog.py[0m]8;;\[2m:[0m]8;id=600046;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py#155\[2m155[0m]8;;\
[2;36m           [0m         seconds[1m)[0m                                      [2m              [0m


CompletedProcess(args=['python', '../src/main.py', 'gtfs', 'download', '781', '../data/gtfs-raw/berlin_20240507.zip'], returncode=0)

In [5]:
polygon = Polygon(
    [
        [bbox["min_lon"], bbox["min_lat"]],
        [bbox["min_lon"], bbox["max_lat"]],
        [bbox["max_lon"], bbox["max_lat"]],
        [bbox["max_lon"], bbox["min_lat"]],
    ]
)

geometa = GeoMeta(boundary=polygon)
geometa.save(geometa_path)

In [6]:
def order_ways_and_nodes(result):
    ways_dict = {way.id: [node for node in way.nodes] for way in result.ways}

    current_way_id, current_way_nodes = ways_dict.popitem()
    ordered_nodes = current_way_nodes

    while ways_dict:
        previous_way_id = current_way_id
        for next_way_id, next_way_nodes in ways_dict.items():
            if ordered_nodes[-1] == next_way_nodes[0]:
                ordered_nodes.extend(next_way_nodes[1:])
                current_way_id = next_way_id
                break
            elif ordered_nodes[-1] == next_way_nodes[-1]:
                ordered_nodes.extend(reversed(next_way_nodes[:-1]))
                current_way_id = next_way_id
                break
        if previous_way_id == current_way_id:
            break
        ways_dict.pop(current_way_id)

    return [(node.lat, node.lon) for node in ordered_nodes]


def fetch_boundary_coords(city_name):
    api = overpy.Overpass()

    # Koeln -> Admin level 6
    # Berlin -> Admin level 4
    query = f"""
    [out:json][timeout:50];
    area["name"="{city_name}"]->.searchArea;
    relation["boundary"="administrative"]["admin_level"="4"](area.searchArea);
    out body;
    >;
    out skel qt;
    """

    result = api.query(query)
    boundary_coords = order_ways_and_nodes(result)

    return boundary_coords


boundary_coords = fetch_boundary_coords(city_name_german)
polygon = Polygon([(lon, lat) for lat, lon in boundary_coords])
geometa = GeoMeta(boundary=polygon)
geometa.save(geometa_path)

In [7]:
for start_date_ in pd.date_range(start_date, end_date, freq="1d"):
    end_date_ = start_date_ + pd.Timedelta(days=1)
    subprocess.run(
        [
            "python",
            "../src/main.py",
            "gtfs",
            "crop",
            f"../data/gtfs-raw/{city_name.lower()}_{today}.zip",
            f"../data/gtfs-reduced/{city_name.lower()}_gtfs_{start_date_.strftime('%Y%m%d')}.zip",
            "--geometa-path",
            geometa_path,
            "--time-start",
            start_date_.strftime("%d.%m.%Y-00:00:00"),
            "--time-end",
            end_date_.strftime("%d.%m.%Y-00:00:00"),
        ]
    )
    subprocess.run(
        [
            "python",
            "../src/main.py",
            "gtfs",
            "clean",
            f"../data/gtfs-reduced/{city_name.lower()}_gtfs_{start_date_.strftime('%Y%m%d')}.zip",
            f"../data/gtfs-cleaned/{city_name.lower()}_{start_date_.strftime('%Y%m%d')}",
        ]
    )
    subprocess.run(
        [
            "python",
            "../src/main.py",
            "build-structures",
            f"../data/gtfs-cleaned/{city_name.lower()}_{start_date_.strftime('%Y%m%d')}",
            f"../data/gtfs-cleaned/{city_name.lower()}_{start_date_.strftime('%Y%m%d')}/structs.pkl",
        ]
    )

[2;36m[13:14:59][0m[2;36m [0m[34mINFO    [0m Cropping GTFS data                               ]8;id=229424;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=225645;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m          [0m[2;36m [0m[34mINFO    [0m Reading GTFS data                                 ]8;id=875830;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=945065;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\


  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:15:04][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m5.09[0m seconds[1m)[0m             ]8;id=424036;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=826055;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:15:08][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=206367;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=991088;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:18:46][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m5.02[0m seconds[1m)[0m             ]8;id=883099;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=489796;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:18:49][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=378565;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=514498;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:22:29][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m5.03[0m seconds[1m)[0m             ]8;id=686693;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=508825;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:22:33][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=202186;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=318339;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:26:07][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.90[0m seconds[1m)[0m             ]8;id=436261;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=751446;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:26:11][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=910244;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=335248;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:29:48][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m5.00[0m seconds[1m)[0m             ]8;id=436980;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=600950;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:29:51][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=294101;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=10626;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m 

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:33:29][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m5.02[0m seconds[1m)[0m             ]8;id=325379;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=472158;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:33:33][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=752823;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=378843;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[13:37:11][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.92[0m seconds[1m)[0m             ]8;id=649728;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=629258;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[13:37:14][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=11719;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=258388;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m 