In [2]:
import os
import sys
import geopandas as gpd
import pandas as pd
import networkx as nx
from tqdm import tqdm
import re

sys.path.append(r"D:\Data Analytical\SERVICE\API")

# from modules.h3_route import retrieve_building, retrieve_roads, build_graph
# from modules.clustering import main_clustering

In [4]:
compile_road = gpd.read_file(r"D:\Data Analytical\PROJECT\TASK\SEPTEMBER\Week 5\NEAREST LOC ROAD CLASSIFICATION\Jalan Nasional Jawa Bali_Identified_V1_02102025.shp")
compile_road = compile_road.to_crs(epsg=3857)
compile_road['length'] = compile_road.length
compile_road.drop(columns='geometry').to_excel(r"D:\Data Analytical\PROJECT\TASK\SEPTEMBER\Week 5\NEAREST LOC ROAD CLASSIFICATION\Jalan Nasional Jawa Bali_Identified_V1_02102025.xlsx")

In [None]:
def validate_poligonize(excel_path:str):
    with pd.ExcelFile(excel_path) as excel:
        sheetname = excel.sheet_names
        used_sheet = ['sitelist', 'hubs']
        for sheet in used_sheet:
            if sheet not in sheetname:
                raise ValueError(f"Sheet {sheet} not found in Excel file. Check your input.")
        
        sitelist = pd.read_excel(excel, sheet_name='sitelist')
        hubs = pd.read_excel(excel, sheet_name='hubs')

        print(f"‚ÑπÔ∏è Total sitelist: {len(sitelist):,}")
        print(f"‚ÑπÔ∏è Total hubs    : {len(hubs):,}")

        used_col = ['site_id', 'long', 'lat', 'site_type']
        for col in used_col:
            if col not in sitelist.columns:
                raise ValueError(f"Column {col} not found in Sitelist. Check your input.")
            if col not in hubs.columns:
                raise ValueError(f"Column {col} not found in Hubs. Check your input.")

        sitelist_geom = gpd.points_from_xy(sitelist['long'], sitelist['lat'], crs="EPSG:4326")
        hubs_geom = gpd.points_from_xy(hubs['long'], hubs['lat'], crs='EPSG:4326')

        sitelist_gdf = gpd.GeoDataFrame(sitelist, geometry=sitelist_geom)
        hubs_gdf = gpd.GeoDataFrame(hubs, geometry=hubs_geom)
        hubs_gdf['site_type'] = "FO Hub"
        
    return sitelist_gdf, hubs_gdf

def polygonize_ring(sitelist_gdf:gpd.GeoDataFrame, hubs_gdf:gpd.GeoDataFrame, polygon_gdf:gpd.GeoDataFrame, project_name:str):
    def define_ringname(hub_id:str, ring_num:str, project_name:str):
        ring_name = None
        pattern = re.compile(r"(?P<code>\w+)")
        match = pattern.search(hub_id)
        if match:
            code = match.group("code")
            ring_name = f"TBG-{code}-{project_name}-DF{str(ring_num).zfill(4)}"
        return ring_name
        
    print(f"‚ÑπÔ∏è Total Polygon: {len(polygon_gdf):,}")
    polygon_gdf['name'] = polygon_gdf.index + 1

    # CONVERT CRS
    sitelist_gdf = sitelist_gdf.to_crs(epsg=3857)
    hubs_gdf = hubs_gdf.to_crs(epsg=3857)
    polygon_gdf = polygon_gdf.to_crs(epsg=3857)

    # JOIN POLYGON
    sites_joined = gpd.sjoin(sitelist_gdf, polygon_gdf[['name', 'geometry']], predicate="intersects").drop(columns='index_right')
    hubs_joined = gpd.sjoin(hubs_gdf, polygon_gdf[['name', 'geometry']], predicate="intersects").drop(columns='index_right')
    sites_joined = sites_joined.rename(columns={'name':'group'})
    hubs_joined = hubs_joined.rename(columns={'name':'group'})
    polygon_list = polygon_gdf['name'].unique().tolist()

    identified_ring = []
    for ring in tqdm(polygon_list, total=len(polygon_list), desc="Polygonize Ring"):
        sites_ring = sites_joined[sites_joined['group'] == ring].copy()
        hubs_ring = hubs_joined[hubs_joined['group'] == ring].copy()
        total_hub = len(hubs_ring)
        
        if "flag" not in hubs_ring.columns:
            hubs_ring["flag"] = None

        match total_hub:
            case 1:
                hubs_ring.iloc[0, hubs_ring.columns.get_loc("flag")] = "Start"
                start_hub = hubs_ring.iloc[[0]]
                ring_data = pd.concat([start_hub, sites_ring])

            case 2:
                hubs_ring.iloc[0, hubs_ring.columns.get_loc("flag")] = "Start"
                hubs_ring.iloc[-1, hubs_ring.columns.get_loc("flag")] = "End"
                start_hub = hubs_ring.iloc[[0]]
                end_hub = hubs_ring.iloc[[-1]]
                ring_data = pd.concat([start_hub, sites_ring, end_hub])

            case n if n > 2:
                print(f"üü† Ring {ring} | Hubs more than 2, selecting first 2 hubs only")
                hubs_ring = hubs_ring.iloc[:2].copy()
                hubs_ring.iloc[0, hubs_ring.columns.get_loc("flag")] = "Start"
                hubs_ring.iloc[-1, hubs_ring.columns.get_loc("flag")] = "End"
                start_hub = hubs_ring.iloc[[0]]
                end_hub = hubs_ring.iloc[[-1]]
                ring_data = pd.concat([start_hub, sites_ring, end_hub])

            case _:
                print(f"üî¥ Ring {ring} | No hubs found in this polygon.")
                ring_data = pd.DataFrame()
                continue
            
        region = hubs_ring['region'].mode().values[0]
        hub_id = hubs_ring['site_id'].iat[0]
        ring_name = define_ringname(hub_id, ring_num=ring, project_name=project_name)
        ring_data['region'] = region
        ring_data['ring_name'] = ring_name

        if not ring_data.empty:
            identified_ring.append(ring_data)

    if len(identified_ring) > 0:
        identified_ring = pd.concat(identified_ring)
        identified_ring = identified_ring.reset_index(drop=True)
        print(f"‚úÖ Ring polygonize completed.")
    else:
        print(f"üî¥ Ring data empty.")
    return identified_ring

In [None]:
excel_file = r"D:\Data Analytical\PROJECT\TASK\SEPTEMBER\Week 5\IOH RING PROCESS\Template_Unsupervised_New site 2026 v1.2 - Combined.xlsx"
polygon_file = r"D:\Data Analytical\PROJECT\TASK\SEPTEMBER\Week 5\IOH RING PROCESS\Polygon Part 1.parquet"
project_name = "NewSite2026"

sitelist_gdf, hubs_gdf = validate_poligonize(excel_file)
polygon_gdf = gpd.read_parquet(polygon_file)
poligonized = polygonize_ring(sitelist_gdf, hubs_gdf, polygon_gdf, project_name)


‚ÑπÔ∏è Total sitelist: 11,972
‚ÑπÔ∏è Total hubs    : 3,526
‚ÑπÔ∏è Total Polygon: 200


Polygonize Ring:  10%|‚ñâ         | 19/200 [00:00<00:00, 183.89it/s]

üî¥ Ring 2 | No hubs found in this polygon.


Polygonize Ring: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 250.06it/s]

‚úÖ Ring polygonize completed.





In [None]:
poligonized

Unnamed: 0,site_id,site_name,long,lat,region,site_type,geometry,group,flag,ring_name
0,11RKB0019,CILEGONG,106.000417,-6.555411,BRO,FO Hub,POINT (11799912.444 -731342.36),1,Start,TBG-11RKB0019-NewSite2026-DF0001
1,new site 41GUNUNG KENCANA,new site 41GUNUNG KENCANA,106.020000,-6.551290,BRO,New Site,POINT (11802092.414 -730880.595),1,,TBG-11RKB0019-NewSite2026-DF0001
2,11RKB0029,CIGINGGANG_MT,106.041010,-6.553880,BRO,Existing Site,POINT (11804431.236 -731170.808),1,,TBG-11RKB0019-NewSite2026-DF0001
3,11RKB0040,GUNUNGKENCANA-2,106.075000,-6.574810,BRO,FO Hub,POINT (11808214.986 -733516.101),1,End,TBG-11RKB0019-NewSite2026-DF0001
4,11PDG0090,BINUANGEUN_SMI-1,105.879000,-6.833990,BRO,FO Hub,POINT (11786396.366 -762566.568),3,Start,TBG-11PDG0090-NewSite2026-DF0003
...,...,...,...,...,...,...,...,...,...,...
1135,DBCJ_5_516,DBCJ_5_516,109.068674,-7.052219,CJRO,New Site,POINT (12141469.251 -787039.19),200,,TBG-14BRS0172-NewSite2026-DF0200
1136,BottomUp_Tegal_1,BottomUp_Tegal_1,109.085332,-7.003429,CJRO,New Site,POINT (12143323.622 -781566.786),200,,TBG-14BRS0172-NewSite2026-DF0200
1137,New_Nominal_155,New_Nominal_155,109.102056,-6.990611,CJRO,New Site,POINT (12145185.318 -780129.197),200,,TBG-14BRS0172-NewSite2026-DF0200
1138,BottomUp_Tegal_8,BottomUp_Tegal_8,109.109853,-6.970771,CJRO,New Site,POINT (12146053.276 -777904.124),200,,TBG-14BRS0172-NewSite2026-DF0200


: 