# Exploring relations to shapes

In [None]:
from pathlib import Path

import duckdb
import folium
import geopandas

In [None]:
gtfs_root = Path().resolve().parent / "data/gtfs/rb_norway-aggregated-gtfs"
assert gtfs_root.exists()

In [None]:
agency = duckdb.read_parquet(str(gtfs_root / "agency.parquet"))
shapes = duckdb.read_parquet(str(gtfs_root / "shapes.parquet"))
trips = duckdb.read_parquet(str(gtfs_root / "trips.parquet"))
calendar = duckdb.read_parquet(str(gtfs_root / "calendar.parquet"))
calendar_dates = duckdb.read_parquet(str(gtfs_root / "calendar_dates.parquet"))
stop_times = duckdb.read_parquet(str(gtfs_root / "stop_times.parquet"))
routes = duckdb.read_parquet(str(gtfs_root / "routes.parquet"))
stops = duckdb.read_parquet(str(gtfs_root / "stops.parquet"))
shapes_linestring_simple = duckdb.read_parquet(
    str(gtfs_root / "shapes_linestring_simple.parquet")
)

In [None]:
line_ref = "GJB:Line:R30"
dated_vehicle_journey_ref = "GJB:ServiceJourney:224-ROA_204613-R"
vehicle_ref = "224-2024-07-27"
shape_id = "GJB:JourneyPattern:R30-256"

# http://172.18.73.229:8501/histmapone?data_frame_ref=2024-07-27&dated_vehicle_journey_ref=GJB:ServiceJourney:225-OSL_204615-R

In [None]:
df_routes = routes.to_df()
df_trips = trips.to_df()

## Relationships between Entur Real-Time data and Entur GTFS data
| Real Time                 | GTFS        |
| ------------------------- | ----------- |
| dated_vehicle_journey_ref | trip_id     |
| line_ref                  | route_id    |

In [None]:
(df_routes.query(f"route_id.str.contains('{line_ref}')"))

In [None]:
(df_trips.query(f"route_id.str.contains('{line_ref}')"))

In [None]:
(df_trips.query(f"trip_id.str.contains('{dated_vehicle_journey_ref}')"))

## Annotating duckdb

In [None]:
res = duckdb.sql(
    f"""
        SELECT
--            *,
            stops.stop_id,
            stops.stop_name,
            stop_times.arrival_time,
            stop_times.shape_dist_traveled
        FROM stop_times
        JOIN stops ON stop_times.stop_id = stops.stop_id
        WHERE trip_id = '{dated_vehicle_journey_ref}'
        ORDER BY stop_times.stop_sequence
        ;
    """
)
res.show(max_width=500)

In [None]:
df_raw = res.to_df()
gdf = geopandas.GeoDataFrame(
    df_raw,
    geometry=geopandas.points_from_xy(df_raw.stop_lon, df_raw.stop_lat),
    crs="EPSG:4326",
)
gdf.explore()

In [None]:
location = gdf.dissolve().convex_hull.centroid
# location = gdf.geometry.centroid
map = folium.Map(location=[location.y, location.x], zoom_start=9)
for index, row in gdf.iterrows():
    map.add_child(
        folium.Marker(
            location=[row.geometry.y, row.geometry.x],
        )
    )

map

## Stop times statistics

In [None]:
res = duckdb.sql(
    f"""
        SELECT
            trip_id,
            max(shape_dist_traveled) AS total_distance,
            min(arrival_time) AS start_time,
            max(arrival_time) AS end_time,
--            (max(strptime(arrival_time, '%H:%M:%S')) - min(strptime(arrival_time, '%H:%M:%S'))) as trip_time -- Hours are sometimes 24 or more if trip ends after midnight.
            INTERVAL(CAST(end_time[0:2] AS INTEGER) * 60 * 60 + CAST(end_time[0:2] AS INTEGER) * 60 - CAST(start_time[0:2] AS INTEGER) * 60 * 60 + CAST(start_time[0:2] AS INTEGER) * 60) SECONDS AS total_time
        FROM stop_times
--        WHERE trip_id = '{dated_vehicle_journey_ref}'
        GROUP BY trip_id;
    """
)

res.to_df()