# Creating Vector files from CSVs

In [1]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon

### Declaring Paths

In [2]:
data_path = os.path.join(os.getcwd(), 'data', '02_data')
output_path = os.path.join(os.getcwd(), 'output', '02_output')

point_path = os.path.join(data_path, 'bus_stops.csv')
routes_path = os.path.join(data_path, 'bus_routes.csv')
zones_path = os.path.join(data_path, 'bus_zones.csv')

### Creating Point Data

In [3]:
points_data = pd.read_csv(point_path)

# Converting to a GeoDataFrame
points_gdf = gpd.GeoDataFrame(
    points_data,
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(points_data.longitude, points_data.latitude)
)

points_name = os.path.join(output_path, 'bus_stops.gpkg')
points_gdf.to_file(points_name, driver='GPKG')

### Lines (groupby will discard all other columns)

In [4]:
routes_data = pd.read_csv(routes_path)

# Group the data by the route
route_gdf = routes_data.groupby('route').apply(
    lambda route: LineString(zip(route.longitude, route.latitude))
).reset_index(name='geom')

line_gdf = gpd.GeoDataFrame(
    route_gdf, crs='EPSG:4326', geometry='geom'
)

line_name = os.path.join(output_path, 'bus_routes.gpkg')
line_gdf.to_file(line_name, driver='GPKG')

### Lines (all other columns preserved after grouping)

In [5]:
# Group the data by the route
route_gdf_2 = routes_data.groupby('route').agg({
    'street': 'first',
    'longitude': list,
    'latitude': list,
}).reset_index()

route_gdf_2['geometry'] = route_gdf_2.apply(
    lambda x: LineString(zip(x.longitude, x.latitude)), axis=1
)

route_gdf_2.drop(columns=['longitude', 'latitude'], inplace=True)

line_2 = gpd.GeoDataFrame(
    route_gdf_2, crs="EPSG:4326", geometry="geometry"
)

line_2_name = os.path.join(output_path, 'bus_routes_2.gpkg')
line_2.to_file(line_2_name, driver='GPKG')

### Polygons

In [6]:
zone_data = pd.read_csv(zones_path)

# Group by zone name
zone_gdf = zone_data.groupby('zone_name').apply(
    lambda x: Polygon(zip(x.longitude, x.latitude))
).reset_index(name='geom')

zones = gpd.GeoDataFrame(
    zone_gdf, crs="EPSG:4326", geometry="geom"
)

zones_name = os.path.join(output_path, 'bus_zones.gpkg')
zones.to_file(zones_name, driver='GPKG')