# Make a map of GTFS data

In [None]:
from pathlib import Path

import duckdb
import geopandas as gpd
from shapely.geometry import LineString

In [None]:
shapes_parquet = (
    Path().resolve().parent / "data/gtfs/rb_norway-aggregated-gtfs/shapes.parquet"
)
print(shapes_parquet)
assert shapes_parquet.exists()
shapes = duckdb.read_parquet(str(shapes_parquet))

df_raw = shapes.limit(n=5_100_000, offset=20_000_000).to_df()
df_raw

In [None]:
gdf = gpd.GeoDataFrame(
    df_raw,
    geometry=gpd.points_from_xy(df_raw.shape_pt_lon, df_raw.shape_pt_lat),
    crs="EPSG:4326",
)
gdf

In [None]:
df_shape_lines = (
    gdf.groupby("shape_id")["geometry"]
    .apply(lambda x: LineString(x.tolist()))
    .to_frame()
    .assign(
        org_length=lambda df1: df1.geometry.apply(lambda x: len(x.coords)),
        geometry=lambda df1: df1.geometry.apply(lambda x: x.simplify(0.0001)),
        simplified_length=lambda df1: df1.geometry.apply(lambda x: len(x.coords)),
    )
)
df_shape_lines

In [None]:
location = df_shape_lines.dissolve().convex_hull.centroid
location

In [None]:
df_shape_lines.head(10).set_crs("epsg:4326").explore()