In [1]:
import os
os.environ['USE_PYGEOS'] = '0'

import pandas as pd
import geopandas as gpd
import json
import warnings

from pyproj import CRS, Transformer
from shapely.geometry import Polygon
from shapely.wkb import loads, dumps

warnings.filterwarnings('ignore')

In [2]:
# # Create city-level shapefile from national-level shapefile
# # Uncomment when needed

# # The city you want to filter by
# target_city = 'Kota Bandung'

# # Create directory for the new shapefile
# target_path = f'G:/My Drive/Spatial data/SHP Batas Desa/{target_city}'
# # os.mkdir(target_path)

# # Read the shapefile into a GeoDataFrame
# shapefile_path = r'G:/My Drive/Spatial data/SHP Batas Desa/Batas Desa terbaru Maret 2020/Batas Desa terbaru Maret 2020.shp'
# shp = gpd.read_file(shapefile_path) # runtime ~9 minutes

# # Filter the GeoDataFrame to include only the rows where the 'WADMKK' column matches the target city
# shp_filtered = shp[shp.WADMKK==target_city]

# # Save the filtered GeoDataFrame to a new shapefile
# filtered_shapefile_path = os.path.join(target_path, 'admDesa.shp').replace('\\', '/')
# shp_filtered.to_file(filtered_shapefile_path)

# # Plot the filtered GeoDataFrame
# shp_filtered.plot()
# plt.show()

In [3]:
# Read the shapefiles

# RBI 
pidie = gpd.read_file(r'G:/My Drive/Spatial data/SHP Batas Desa/Pidie/admDesa.shp')
garut = gpd.read_file(r'G:/My Drive/Spatial data/SHP Batas Desa/Garut/admDesa.shp')
badung = gpd.read_file(r'G:/My Drive/Spatial data/SHP Batas Desa/Badung/admDesa.shp')
sumbawabarat = gpd.read_file(r'G:/My Drive/Spatial data/SHP Batas Desa/SumbawaBarat/admDesa.shp')
lombokbarat = gpd.read_file(r'G:/My Drive/Spatial data/SHP RBI/LombokBarat/ADMINISTRASIDESA_AR.shp')
lomboktimur = gpd.read_file(r'G:/My Drive/Spatial data/SHP RBI/LombokTimur/ADMINISTRASIDESA_AR.shp')

# AccessMod (travel time statistics by cost allocation zones)
pidie_am = gpd.read_file(r'G:/My Drive/Spatial data/Pidie/cost_allocation/cost_allocation_travel_time_to_pkm.shp')

In [4]:
def convert_polygon_coordinates(polygon, transformer):
    """
    Transforms the coordinates of a polygon from one coordinate reference system (CRS) to another
    and converts the polygon to GeoJSON format.

    Parameters:
    polygon (shapely.geometry.Polygon): The input polygon to be transformed.
    transformer (pyproj.Transformer): The transformer object used to convert coordinates.

    Returns:
    dict: The transformed polygon in GeoJSON format.
    """
    # Transform and round the coordinates of the polygon
    transformed_coords = [
        (round(transformer.transform(x, y)[0], 6), round(transformer.transform(x, y)[1], 6))
        for x, y in polygon.exterior.coords
    ]
    transformed_polygon = Polygon(transformed_coords)
    
    # Convert to GeoJSON format
    geojson = gpd.GeoSeries([transformed_polygon]).__geo_interface__
    geojson_dict = geojson['features'][0]

    return geojson_dict

def transform_and_convert_geometry_to_geojson(gdf):
    """
    Transforms the geometries in a GeoDataFrame to WGS84 (EPSG:4326) and converts them to GeoJSON format.

    Parameters:
    gdf (geopandas.GeoDataFrame): The input GeoDataFrame with geometries to be transformed.

    Returns:
    pandas.DataFrame: A DataFrame with the transformed geometries in GeoJSON format.
    """
    # Define the source CRS from the input GeoDataFrame
    utm_proj = CRS.from_user_input(gdf.crs)
    
    # Create a transformer object to convert from the source CRS to WGS84
    transformer = Transformer.from_crs(utm_proj, 'EPSG:4326', always_xy=True)
    
    # Create a copy of the GeoDataFrame to avoid modifying the original
    df = pd.DataFrame(gdf)
    
    # Apply the transformation and conversion to GeoJSON for each geometry in the GeoDataFrame
    df['geometry'] = gdf.geometry.apply(lambda geom: convert_polygon_coordinates(geom, transformer))
    
    return df

In [5]:
def convert_geometry_to_geojson(geometry):
    """
    Converts a Shapely geometry to GeoJSON format.

    Parameters:
    - geometry: Shapely geometry object

    Returns:
    - geojson_string: GeoJSON representation of the geometry
    """
    
    # Convert the geometry to 2D using WKB format
    geometry_2d = loads(dumps(geometry, output_dimension=2))
    
    # Create a GeoSeries from the 2D geometry
    geo_series = gpd.GeoSeries(geometry_2d)
    
    # Convert GeoSeries to GeoJSON format
    geo_interface = geo_series.__geo_interface__
    
    # Extract the GeoJSON string
    geojson_string = json.dumps(geo_interface['features'][0])
    
    return geojson_string

In [6]:
def data_preprocessing(gdf):
    # Fill missing values in 'WADMKD' with corresponding values in 'NAMOBJ',
    # then take the first part of the string before the first '/'.
    gdf['WADMKD'] = gdf['WADMKD'].fillna(gdf['NAMOBJ']).apply(lambda x: x.split('/')[0])
    
    # Fill missing values in 'WADMKC' with corresponding values in 'WADMKD',
    # then take the first part of the string before the first '/'.
    gdf['WADMKC'] = gdf['WADMKC'].fillna(gdf['WADMKD']).apply(lambda x: x.split('/')[0])
    
    # Combine 'WADMKC' and 'WADMKD' into 'NAMOBJ', separated by a space.
    gdf['NAMOBJ'] = gdf['WADMKC'] + ' ' + gdf['WADMKD']
    
    # Replace spaces in 'NAMOBJ' with underscores.
    gdf['NAMOBJ'] = gdf['NAMOBJ'].str.replace(' ', '_')
    
    # Dissolve geometries by 'NAMOBJ', aggregating all geometries that share the same 'NAMOBJ' value.
    gdf = gdf.dissolve(by='NAMOBJ', as_index=False).explode(index_parts=False)
    
    # Calculate the area of each geometry and store it in 'SHAPE_Area'.
    gdf['SHAPE_Area'] = gdf['geometry'].area
    
    # Sort the DataFrame by 'NAMOBJ' and 'SHAPE_Area', and drop duplicates, keeping the largest area for each 'NAMOBJ'.
    gdf = gdf.sort_values(['NAMOBJ', 'SHAPE_Area']).drop_duplicates('NAMOBJ', keep='last').set_index('NAMOBJ')
    
    # Create a new DataFrame with selected columns.
    df = pd.DataFrame(gdf[['WADMKD', 'WADMKC', 'WADMKK', 'SHAPE_Area']])
    
    # Convert the geometry to GeoJSON format and store it in a new 'geometry' column.
    df['geometry'] = gdf['geometry'].apply(convert_geometry_to_geojson)
    
    return df

In [7]:
df_pidie = data_preprocessing(pidie)
df_garut = data_preprocessing(garut)
df_badung = data_preprocessing(badung[badung['WADMKK']=='Badung'])
df_lombokbarat = data_preprocessing(lombokbarat[lombokbarat['WADMKK']=='Lombok Barat'])
df_lomboktimur = data_preprocessing(lomboktimur[lomboktimur['WADMKK']=='Lombok Timur'])
df_sumbawabarat = data_preprocessing(sumbawabarat)
df_pidie_am = transform_and_convert_geometry_to_geojson(pidie_am)

# Save as CSV with "," separator