In [1]:
import subprocess
import sys

import overpy
import pandas as pd
from shapely.geometry import Polygon

sys.path.append("/home/ppeter/repo/mcr-py/src/")
from package.geometa import GeoMeta

In [2]:
gtfs_key = 781
city_name_german = "Berlin"
city_name = "Berlin"
center = (52.5170365, 13.3888599)
start_date = "2024-05-28"
end_date = "2024-06-02"
bbox = {
    "max_lat": 52.5170365 + 0.05,
    "min_lat": 52.5170365 - 0.05,
    "max_lon": 13.3888599 + 0.04,
    "min_lon": 13.3888599 - 0.07,
}

In [3]:
geometa_path = f"../data/stateful_variables/{city_name.lower()}_geometa.pkl"
today = pd.Timestamp.today().strftime("%Y%m%d")

In [4]:
subprocess.run(
    [
        "python",
        "../src/main.py",
        "gtfs",
        "download",
        str(gtfs_key),
        f"../data/gtfs-raw/{city_name.lower()}_{today}.zip",
    ]
)

[2;36m[15:26:41][0m[2;36m [0m[34mINFO    [0m Downloading GTFS feed with ID [1;36m781[0m             ]8;id=263360;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py\[2mcatalog.py[0m]8;;\[2m:[0m]8;id=37998;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py#155\[2m155[0m]8;;\
[2;36m[15:26:48][0m[2;36m [0m[34mINFO    [0m Downloading GTFS feed with ID [1;36m781[0m done [1m([0m[1;36m6.97[0m  ]8;id=959567;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py\[2mcatalog.py[0m]8;;\[2m:[0m]8;id=658191;file:///home/ppeter/repo/mcr-py/src/package/gtfs/catalog.py#155\[2m155[0m]8;;\
[2;36m           [0m         seconds[1m)[0m                                      [2m              [0m


CompletedProcess(args=['python', '../src/main.py', 'gtfs', 'download', '781', '../data/gtfs-raw/berlin_20240506.zip'], returncode=0)

In [5]:
polygon = Polygon(
    [
        [bbox["min_lon"], bbox["min_lat"]],
        [bbox["min_lon"], bbox["max_lat"]],
        [bbox["max_lon"], bbox["max_lat"]],
        [bbox["max_lon"], bbox["min_lat"]],
    ]
)

geometa = GeoMeta(boundary=polygon)
geometa.save(geometa_path)

In [6]:
def order_ways_and_nodes(result):
    ways_dict = {way.id: [node for node in way.nodes] for way in result.ways}

    current_way_id, current_way_nodes = ways_dict.popitem()
    ordered_nodes = current_way_nodes

    while ways_dict:
        previous_way_id = current_way_id
        for next_way_id, next_way_nodes in ways_dict.items():
            if ordered_nodes[-1] == next_way_nodes[0]:
                ordered_nodes.extend(next_way_nodes[1:])
                current_way_id = next_way_id
                break
            elif ordered_nodes[-1] == next_way_nodes[-1]:
                ordered_nodes.extend(reversed(next_way_nodes[:-1]))
                current_way_id = next_way_id
                break
        if previous_way_id == current_way_id:
            break
        ways_dict.pop(current_way_id)

    return [(node.lat, node.lon) for node in ordered_nodes]


def fetch_boundary_coords(city_name):
    api = overpy.Overpass()

    # Koeln -> Admin level 6
    # Berlin -> Admin level 4
    query = f"""
    [out:json][timeout:50];
    area["name"="{city_name}"]->.searchArea;
    relation["boundary"="administrative"]["admin_level"="4"](area.searchArea);
    out body;
    >;
    out skel qt;
    """

    result = api.query(query)
    boundary_coords = order_ways_and_nodes(result)

    return boundary_coords


boundary_coords = fetch_boundary_coords(city_name_german)
polygon = Polygon([(lon, lat) for lat, lon in boundary_coords])
geometa = GeoMeta(boundary=polygon)
geometa.save(geometa_path)

In [7]:
for start_date_ in pd.date_range(start_date, end_date, freq="1d"):
    end_date_ = start_date_ + pd.Timedelta(days=1)
    subprocess.run(
        [
            "python",
            "../src/main.py",
            "gtfs",
            "crop",
            f"../data/gtfs-raw/{city_name.lower()}_{today}.zip",
            f"../data/gtfs-reduced/{city_name.lower()}_gtfs_{start_date_.strftime('%Y%m%d')}.zip",
            "--geometa-path",
            geometa_path,
            "--time-start",
            start_date_.strftime("%d.%m.%Y-00:00:00"),
            "--time-end",
            end_date_.strftime("%d.%m.%Y-00:00:00"),
        ]
    )
    subprocess.run(
        [
            "python",
            "../src/main.py",
            "gtfs",
            "clean",
            f"../data/gtfs-reduced/{city_name.lower()}_gtfs_{start_date_.strftime('%Y%m%d')}.zip",
            f"../data/gtfs-cleaned/{city_name.lower()}_{start_date_.strftime('%Y%m%d')}",
        ]
    )
    subprocess.run(
        [
            "python",
            "../src/main.py",
            "build-structures",
            f"../data/gtfs-cleaned/{city_name.lower()}_{start_date_.strftime('%Y%m%d')}",
            f"../data/gtfs-cleaned/{city_name.lower()}_{start_date_.strftime('%Y%m%d')}/structs.pkl",
        ]
    )

[2;36m[15:26:51][0m[2;36m [0m[34mINFO    [0m Cropping GTFS data                               ]8;id=724977;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=734542;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m          [0m[2;36m [0m[34mINFO    [0m Reading GTFS data                                 ]8;id=499735;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=758208;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\


  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[15:26:56][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m5.03[0m seconds[1m)[0m             ]8;id=377753;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=528602;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[15:26:59][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=525416;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=915570;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[15:30:46][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.99[0m seconds[1m)[0m             ]8;id=98064;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=849893;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[15:30:49][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=396333;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=99748;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m  

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[15:34:30][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.93[0m seconds[1m)[0m             ]8;id=378162;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=446513;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[15:34:33][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=400673;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=639677;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[15:38:11][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.92[0m seconds[1m)[0m             ]8;id=275620;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=493989;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[15:38:15][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=796109;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=767353;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[15:41:48][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.91[0m seconds[1m)[0m             ]8;id=33079;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=26115;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[15:41:52][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=652485;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=90496;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m   

  df = pd.read_csv(f, dtype=dtypes.GTFS_DTYPES)  # type: ignore


[2;36m[15:45:25][0m[2;36m [0m[34mINFO    [0m Reading GTFS data done [1m([0m[1;36m4.87[0m seconds[1m)[0m             ]8;id=15285;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py\[2mcrop.py[0m]8;;\[2m:[0m]8;id=462501;file:///home/ppeter/repo/mcr-py/src/package/gtfs/crop.py#19\[2m19[0m]8;;\
[2;36m[15:45:28][0m[2;36m [0m[34mINFO    [0m         Crop results:                            ]8;id=273776;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py\[2mgtfs.py[0m]8;;\[2m:[0m]8;id=126382;file:///home/ppeter/repo/mcr-py/src/command/gtfs/gtfs.py#139\[2m139[0m]8;;\
[2;36m           [0m                 # of trips: [1;36m224212[0m [1m([0m[1;36m86.10[0m%[1m)[0m              [2m           [0m
[2;36m           [0m                 # of stop times: [1;36m5260481[0m [1m([0m[1;36m87.66[0m%[1m)[0m        [2m           [0m
[2;36m           [0m                 # of stops: [1;36m9799[0m [1m([0m[1;36m23.62[0m%[1m)[0m 