In [4]:
import geopandas as gpd
import pandas as pd
from shapely import wkt

In [5]:
def load_gdf(df):
    gdf = df.copy()
    gdf['geometry'] = gdf.geometry.apply(wkt.loads)
    gdf = gpd.GeoDataFrame(gdf)
    return gdf

In [18]:
def compute_cluster_centroids(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    """
    Aggregates points in each cluster and computes the centroid.
    
    Parameters
    ----------
    gdf : gpd.GeoDataFrame
        Input GeoDataFrame with Point geometries and a 'cluster_id' column.
        
    Returns
    -------
    gpd.GeoDataFrame
        GeoDataFrame with one row per cluster containing:
        - cluster_id
        - point_count: number of points in the cluster
        - geometry: the centroid (as Point) of all points in the cluster
    """
    if 'cluster_id' not in gdf.columns:
        raise ValueError("GeoDataFrame must contain a 'cluster_id' column.")
    if gdf.geometry.geom_type.unique().tolist() != ['Point']:
        raise ValueError("All geometries must be Points.")

    # Compute centroids by group
    grouped = gdf.groupby("cluster_id")

    centroids = grouped.geometry.apply(lambda geoms: geoms.union_all().centroid)
    counts = grouped.size()

    # Combine into a GeoDataFrame
    result = gpd.GeoDataFrame({
        'cluster_id': centroids.index,
        'point_count': counts.values,
        'geometry': centroids.values
    }, geometry='geometry', crs=gdf.crs)

    return result


In [23]:
h3_results_path = '../data/point_unification_results/h3_results.csv'
bbox_results_path = '../data/point_unification_results/bbox_results.csv'
dbscan_results_path = '../data/point_unification_results/dbscan_results.csv'

In [24]:
h3_results = load_gdf(pd.read_csv(h3_results_path))
bbox_results = load_gdf(pd.read_csv(bbox_results_path))
dbscan_results = load_gdf(pd.read_csv(dbscan_results_path))

In [25]:
dbscan_clusters = compute_cluster_centroids(dbscan_results)
bbox_clusters = compute_cluster_centroids(bbox_results)

In [None]:
dbscan_clusters.to_csv('dbclusters.csv')
bbox_clusters.to_csv('bboxclusters.csv')

h3_results.groupby('hex_id').size().to_frame('point_count').to_csv('h3_clusters.csv')