In [1]:
# 1. CARGA DE LIBRERÍAS
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import hdbscan
import shapely
import alphashape
from shapely.geometry import MultiPoint, Point, Polygon, MultiPolygon
from shapely import concave_hull
# scaler = StandardScaler()

In [2]:
# 2. DEFINIR VARIABLES GLOBALES
region = "R12"
gdb_in = "datos_input/APC2023_" + region + ".gdb"
gdb_out = "datos_output/GIS_resultados_entrega_1.gdb"
fc_in = region + "_puntos_edificacion_rural"
fc_viviendas_out = region + "_viviendas_py"
fc_clusters_out = region + "_cl_hull_py"
minViv = 50

In [3]:
# 3. CARGA DE DATOS
viviendas = gpd.read_file(filename = gdb_in, layer = fc_in)

In [4]:
# 4. SELECCIONAR VARIABLES CLUSTERING
xy_data = viviendas[["POINT_X", "POINT_Y"]].copy()

In [None]:
# 5. EJECUTAR DBSCAN
cluster_hdb = hdbscan.HDBSCAN(min_cluster_size = minViv,
                              min_samples = minViv,
                              core_dist_n_jobs = 22,
                            #   prediction_data=True
                              ).fit(xy_data)

In [None]:
# 6. PEGAR RESULTADOS A CAPA VIVIENDAS
# viviendas["id_cluster"] = np.where(cluster_hdb.labels_ == -1, np.nan, cluster_hdb.labels_ + 1)
viviendas["id_cluster"] = np.where(cluster_hdb.labels_ == -1, np.nan, cluster_hdb.labels_ + 1)
viviendas["membership_prob"] = cluster_hdb.probabilities_
viviendas["outlier_score"]   = cluster_hdb.outlier_scores_

In [None]:
# 7. AJUSTAR CLUSTERS SEGÚN OLTIER SCORE (> quantil 0.9)
viviendas["id_cluster_final"] = np.where(
    viviendas["outlier_score"] > 0.9,
    np.nan,
    viviendas["id_cluster"]
)


## Calcular el umbral como el percentil 90 de los outlier scores
# umbral = viviendas["outlier_score"].quantile(0.9)


# ## Marcar como outlier si supera ese umbral
# viviendas["es_outlier"] = viviendas["outlier_score"] > umbral_outlier
# viviendas["umbral_region"] = umbral

# ## Transformar a NA los id_cluster que sobrepase umbral
# viviendas["id_cluster_final"] = np.where(
#     viviendas["outlier_score"] > umbral_outlier,
#     np.nan,
#     viviendas["id_cluster"]
# )

In [None]:
# 8. CALCULAR CONCAVE HULL POR CLUSTER
viviendas_cl = viviendas.dropna(subset=["id_cluster_final"]).copy()

# Crear lista de resultados
poligonos2 = []

# Agrupar por ID y aplicar concave_hull
for group_id, group in viviendas_cl.groupby("id_cluster_final"):
    puntos = [geom for geom in group.geometry if isinstance(geom, Point)]
    if len(puntos) < 4:
        continue
    multipunto = MultiPoint(puntos)
    try:
        hull = concave_hull(multipunto, ratio=0.5)
        poligonos2.append({"id_cluster_final": group_id, "geometry": hull})
    except Exception as e:
        print(f"Error en grupo {group_id}: {e}")

# Crear GeoDataFrame y guardar
if poligonos2:
    cluster_hulls = gpd.GeoDataFrame(poligonos2, geometry="geometry", crs=viviendas.crs)
    # gdf_hulls.to_file("concave_hulls_por_id.gpkg", layer="hullsdep", driver="GPKG")
else:
    print("No se generaron polígonos.")

In [None]:
# 9. CALCULAR STATS POR CLUSTER
stats_cluster = viviendas.dropna(subset=["id_cluster_final"]).groupby("id_cluster_final").agg(
    n_viviendas     = ("id_cluster_final", "count"),
    membership_mean = ("membership_prob", "mean"),
    membership_std  = ("membership_prob", "std"),
    membership_min  = ("membership_prob", "min"),
    membership_max  = ("membership_prob", "max"),
    outlier_mean = ("outlier_score", "mean"),
    outlier_std  = ("outlier_score", "std"),
    outlier_min  = ("outlier_score", "min"),
    outlier_max  = ("outlier_score", "max")
).reset_index()
stats_cluster["persistence"] = cluster_hdb.cluster_persistence_

In [None]:
# 10. JOIN STATS A CONCAVE HULL
cluster_hulls_final = pd.merge(cluster_hulls, stats_cluster, on = "id_cluster_final", how = "left")

In [None]:
cluster_hulls_final.head()

In [None]:
# 11. EXPORTAR A GDB
viviendas.to_file(gdb_out, layer=(fc_viviendas_out + str(minViv)), driver="OpenFileGDB")
# cluster_hulls_final.to_file(gdb_out, layer=(fc_clusters_out + str(minViv)), driver="OpenFileGDB")
# print(gdb_out, (fc_viviendas_out + str(minViv)))
# print(gdb_out, (fc_clusters_out + str(minViv)))

In [None]:
# 11. EXPORTAR A GDB
# viviendas.to_file(gdb_out, layer=(fc_viviendas_out + str(minViv)), driver="OpenFileGDB")
cluster_hulls_final.to_file(gdb_out, layer=(fc_clusters_out + str(minViv)), driver="OpenFileGDB")
# print(gdb_out, (fc_viviendas_out + str(minViv)))
# print(gdb_out, (fc_clusters_out + str(minViv)))

In [None]:
viviendas.to_file(gdb_out, layer=(fc_viviendas_out + str(minViv) + "TEST"), driver="OpenFileGDB")
cluster_hulls_final.to_file(gdb_out, layer=(fc_clusters_out + str(minViv) + "TEST"), driver="OpenFileGDB")