In [1]:
import os

from helpers_clean import *
from helpers_difference import *
from helpers_buffer import *
from helpers_merge import *
from helpers_read_data import *


In [None]:
### Input Data 
input_folder = "D:/2_Analytics/9_LULC_classification/automation/data_3/Chirmiri OCM_Fix_Geometry_Mansi_Final_Input/Chirmiri OCM_Fix_Geometry_Mansi_Final"

# output folder 
output_folder_path = "D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new"
os.makedirs(output_folder_path, exist_ok=True)


## parameters
buffer = 10   ## in meters
min_area_removed = 25  # sq.m
inset_buffer_for_AOI = 0.2   # in meters

buffer_for_roads = 0.2

In [3]:

## fetch the input data 

aoi_path, settlement_path, road_network_path, other_shps = fetch_shapefile_paths(input_folder)

print("AOI file:", aoi_path)
print("Settlement file:", settlement_path)
print("road network file:", road_network_path)
# road network
print("Other shapefiles:", other_shps)

pairs = make_shapefile_pairs(other_shps)
print(pairs)


AOI file: D:\2_Analytics\9_LULC_classification\automation\data_3\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\Chirmiri OCM_Fix_Geometry_Mansi_Final\aoi.shp
Settlement file: D:\2_Analytics\9_LULC_classification\automation\data_3\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\Chirmiri OCM_Fix_Geometry_Mansi_Final\settlement.shp
road network file: D:\2_Analytics\9_LULC_classification\automation\data_3\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\Chirmiri OCM_Fix_Geometry_Mansi_Final\Road Network.shp
Other shapefiles: ['D:\\2_Analytics\\9_LULC_classification\\automation\\data_3\\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\\Chirmiri OCM_Fix_Geometry_Mansi_Final\\C__Agricultural_Land.shp', 'D:\\2_Analytics\\9_LULC_classification\\automation\\data_3\\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\\Chirmiri OCM_Fix_Geometry_Mansi_Final\\C__Barren_Area.shp', 'D:\\2_Analytics\\9_LULC_classification\\automation\\data_3\\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\\Chirmiri OCM_Fix_Geometry_Mansi_Final\\C

In [4]:
## operation 1 
settlement_folder_path = os.path.join(output_folder_path, "related_files_settlement")
os.makedirs(settlement_folder_path, exist_ok=True)

buffered_settlement_path = os.path.join(settlement_folder_path, "buffered_settlement_shp.shp")
dissolved_settlement_path = os.path.join(settlement_folder_path, "dissolved_settlement_shp.shp")
dissolved_and_gap_filled_settlement_path = os.path.join(settlement_folder_path, "dissoved_and_gap_filled_settlement_shp.shp")


# Example usage:
gdf_buf, gdf_diss, gdf_diss_filled = buffer_and_dissolve(
    input_shp=settlement_path,
    buffer_m=buffer,
    out_buffered=buffered_settlement_path,
    out_dissolved=dissolved_settlement_path,
    out_filled_dissolved=dissolved_and_gap_filled_settlement_path ,
    min_area_remove=0
)


Loaded: D:\2_Analytics\9_LULC_classification\automation\data_3\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\Chirmiri OCM_Fix_Geometry_Mansi_Final\settlement.shp
Original CRS: EPSG:32644
Created 7174 buffered geometries (buffer=10 m).
Dissolved all buffered geometries into one.
Filled holes inside dissolved geometry (interiors removed).
Saved buffered: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_settlement\buffered_settlement_shp.shp
Saved dissolved: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_settlement\dissolved_settlement_shp.shp
Saved filled dissolved: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_settlement\dissoved_and_gap_filled_settlement_shp.shp


In [5]:
## operation 2
# merge all classes without dissolved settlment

classes_folder_path = os.path.join(output_folder_path, "related_files_classes")
os.makedirs(classes_folder_path, exist_ok=True)

merged_classes_without_settlement_path = os.path.join(classes_folder_path, "merged_classes_except_settlement_shp.shp")

merged_classes_without_settlement_gdf = merge_shapefiles_with_class(pairs, merged_classes_without_settlement_path, dissolve=False)



## subtract all features from dissolved settlement 


cropped_dissolved_settlement_path = os.path.join(settlement_folder_path, "cropped_dissolved_settlement_shp.shp")

cropped_dissolved_settlement_gdf = subtract_shapefiles(dissolved_and_gap_filled_settlement_path, 
                                                        merged_classes_without_settlement_path, 
                                                        cropped_dissolved_settlement_path)



exploded_path = os.path.join(settlement_folder_path, "exploded_settlement_shp.shp")
filtered_settlement_path = os.path.join(settlement_folder_path, "final_cropped_settlement_shp.shp")

exploded, filtered = explode_and_filter_features(
    cropped_dissolved_settlement_path,
    exploded_path,
    filtered_settlement_path,
    min_area_sqm=min_area_removed
)

Saved difference shapefile to: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_settlement\cropped_dissolved_settlement_shp.shp
Area (dissolved): 1423800.43
Area (merged to subtract): 7585349.81
Area (difference/out): 1165537.12


In [6]:
## operation 3
merged_classes_path = os.path.join(classes_folder_path, "merged_classes_except_unclassified.shp")
merged_gdf = merge_two_shapefiles_keep_class(filtered_settlement_path, 
                                             merged_classes_without_settlement_path, 
                                             merged_classes_path)


unclassified_path = os.path.join(output_folder_path, "unclassified_shp.shp")
merged_with_unclassified_path = os.path.join(classes_folder_path, "all_classes_including_unclassified_shp.shp")
unclassified_gdf, merged_gdf = subtract_using_aoi_inset_and_write(
    aoi_shp=aoi_path,
    classes_shp=merged_classes_path,
    unclassified_shp=unclassified_path,
    merged_shp=merged_with_unclassified_path,
    inset_m=inset_buffer_for_AOI,
    reproject_classes_to_aoi=True,
    explode_result=True,
    min_area_sqm=0.1,
    dissolve_by_class=False
)


output_path = os.path.join(output_folder_path, "final_classes_shp.shp")

cleaned_gdf = clean_attributes_with_area(
    input_shp=merged_with_unclassified_path ,
    output_shp=output_path
)


Merged 66 + 393 features -> 459 written to: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_classes\merged_classes_except_unclassified.shp
AOI inset by 0.2 m: features 1
Unclassified features: 163, total area (units²): 270337.91
Merged (clipped to AOI) features: 621, total area (units²): 9001824.86
Saved unclassified -> D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\unclassified_shp.shp
Saved merged (classes + unclassified, clipped to AOI) -> D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_classes\all_classes_including_unclassified_shp.shp
Saved cleaned shapefile to D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\final_classes_shp.shp with 621 features.


In [11]:
## operation 4: 
road_folder_path = os.path.join(output_folder_path, "related_files_road_network")
os.makedirs(road_folder_path, exist_ok=True)

buffered_road_path = os.path.join(road_folder_path, "buffered_road_shp.shp")
dissolved_road_path = os.path.join(road_folder_path, "dissolved_road_shp.shp")


def buffer_and_dissolve_roads(
    input_shp: str,
    buffer_m: float,
    out_buffered: str,
    out_dissolved: str,
    min_area_remove: float = 0.0
) -> Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame, gpd.GeoDataFrame]:
    """
    Buffer features, dissolve them, and produce a version with holes filled (no interiors).

    Returns:
      gdf_buffered, dissolved_gdf, filled_dissolved_gdf
    """
    shp = Path(input_shp)
    if not shp.exists():
        raise FileNotFoundError(f"Input file not found: {shp}")

    gdf = gpd.read_file(str(shp))
    print("Loaded:", shp)
    print("Original CRS:", gdf.crs)

    if gdf.crs is None:
        raise ValueError("Input shapefile has no CRS. Assign a CRS (e.g., EPSG:4326) and re-run.")


    # Fix invalid geometries
    gdf['geometry'] = gdf.geometry.buffer(0)

    # Buffer with sharp corners (mitre)
    join_style = 2  # mitre
    cap_style = 1   # round (for lines only)
    resolution = 16

    buffered_geoms = []
    for geom in gdf.geometry:
        if geom is None or geom.is_empty:
            buffered_geoms.append(None)
            continue
        b = geom.buffer(buffer_m, resolution=resolution,
                        join_style=join_style, cap_style=cap_style)
        buffered_geoms.append(b)

    gdf_buffered = gdf.copy()
    gdf_buffered['geometry'] = buffered_geoms
    gdf_buffered = gdf_buffered[~(gdf_buffered.geometry.is_empty | gdf_buffered.geometry.isnull())].reset_index(drop=True)
    print(f"Created {len(gdf_buffered)} buffered geometries (buffer={buffer_m} m).")

    # Remove tiny features
    if min_area_remove > 0:
        before = len(gdf_buffered)
        gdf_buffered['area_m2'] = gdf_buffered.geometry.area
        gdf_buffered = gdf_buffered[gdf_buffered['area_m2'] >= min_area_remove].copy()
        gdf_buffered.drop(columns=['area_m2'], inplace=True)
        after = len(gdf_buffered)
        print(f"Removed {before-after} features smaller than {min_area_remove} m²")

    # Dissolve into single geometry
    dissolved_geom = unary_union(gdf_buffered.geometry.values)
    dissolved_gdf = gpd.GeoDataFrame(geometry=[dissolved_geom], crs=gdf_buffered.crs)
    print("Dissolved all buffered geometries into one.")


    # Save outputs
    Path(out_buffered).parent.mkdir(parents=True, exist_ok=True)
    Path(out_dissolved).parent.mkdir(parents=True, exist_ok=True)
    gdf_buffered.to_file(out_buffered)
    dissolved_gdf.to_file(out_dissolved)
    print("Saved buffered:", out_buffered)
    print("Saved dissolved:", out_dissolved)

    
    return gdf_buffered, dissolved_gdf
 
gdf_buff_road, gdf_diss_road = buffer_and_dissolve_roads(
    input_shp=road_network_path,
    buffer_m=buffer_for_roads,
    out_buffered=buffered_road_path,
    out_dissolved=dissolved_road_path ,
    min_area_remove=0
)





Loaded: D:\2_Analytics\9_LULC_classification\automation\data_3\Chirmiri OCM_Fix_Geometry_Mansi_Final_Input\Chirmiri OCM_Fix_Geometry_Mansi_Final\Road Network.shp
Original CRS: EPSG:32644
Created 19 buffered geometries (buffer=0.2 m).
Dissolved all buffered geometries into one.
Saved buffered: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_road_network\buffered_road_shp.shp
Saved dissolved: D:/2_Analytics/9_LULC_classification/automation/data_3/output_folder_new\related_files_road_network\dissolved_road_shp.shp


In [14]:
## subtract final - road


def subtract_dissolved_roads_from_classes(
    final_classes_shp: Union[str, Path],
    road_shp: Union[str, Path],
    out_shp: Union[str, Path]
) -> gpd.GeoDataFrame:
    """
    Subtract dissolved road geometry from each feature of final_classes_shp.
    - final_classes_shp: path to classes shapefile (many polygons, keep their attributes)
    - road_shp: path to road shapefile (can be many features; they will be unioned/dissolved)
    - out_shp: path to write the resulting shapefile (features with parts outside roads removed)

    Returns: GeoDataFrame of the difference (and also writes out_shp)
    """
    final_classes_shp = Path(final_classes_shp)
    road_shp = Path(road_shp)
    out_shp = Path(out_shp)

    if not final_classes_shp.exists():
        raise FileNotFoundError(f"final_classes file not found: {final_classes_shp}")
    if not road_shp.exists():
        raise FileNotFoundError(f"road file not found: {road_shp}")

    # read inputs
    classes_gdf = gpd.read_file(str(final_classes_shp))
    roads_gdf = gpd.read_file(str(road_shp))

    if classes_gdf.empty:
        raise ValueError("final_classes shapefile contains no features.")
    if roads_gdf.empty:
        # nothing to subtract, just copy classes to output
        out_gdf = classes_gdf.copy()
        out_shp.parent.mkdir(parents=True, exist_ok=True)
        out_gdf.to_file(str(out_shp))
        return out_gdf

    # ensure same CRS (reproject roads to classes CRS)
    if classes_gdf.crs is None or roads_gdf.crs is None:
        raise ValueError("Both inputs must have a defined CRS.")
    if roads_gdf.crs != classes_gdf.crs:
        roads_gdf = roads_gdf.to_crs(classes_gdf.crs)

    # clean invalid geometries
    classes_gdf['geometry'] = classes_gdf.geometry.buffer(0)
    roads_gdf['geometry'] = roads_gdf.geometry.buffer(0)

    # dissolve/union all roads into single geometry
    road_union = unary_union(roads_gdf.geometry.values)

    # subtract road_union from each class feature
    out_records = []
    for idx, row in classes_gdf.iterrows():
        geom = row.geometry
        if geom is None or geom.is_empty:
            continue
        try:
            diff = geom.difference(road_union)
        except Exception:
            # if something goes wrong, skip this feature
            continue
        if diff is None or diff.is_empty:
            # feature removed entirely by subtraction -> skip
            continue
        rec = row.copy()
        rec.geometry = diff
        out_records.append(rec)

    # build GeoDataFrame
    if not out_records:
        out_gdf = gpd.GeoDataFrame(columns=classes_gdf.columns, geometry='geometry', crs=classes_gdf.crs)
        out_gdf = out_gdf[out_gdf.geometry.notnull()]
        out_gdf = out_gdf[[g.geom_type in ["Polygon", "MultiPolygon"] for g in out_gdf.geometry]]

    else:
        out_gdf = gpd.GeoDataFrame(out_records, columns=classes_gdf.columns, crs=classes_gdf.crs)
        out_gdf = out_gdf[out_gdf.geometry.notnull()]
        out_gdf = out_gdf[[g.geom_type in ["Polygon", "MultiPolygon"] for g in out_gdf.geometry]]


    # optional: explode multiparts into single features (uncomment if desired)
    # out_gdf = out_gdf.explode(index_parts=False).reset_index(drop=True)

    # save output
    out_shp.parent.mkdir(parents=True, exist_ok=True)
    out_gdf.to_file(str(out_shp))

    print(f"Saved result to {out_shp}. Kept {len(out_gdf)} features (dropped {len(classes_gdf) - len(out_gdf)} fully removed).")
    return out_gdf


try_output_path = os.path.join(output_folder_path, "try_1.shp")

res = subtract_dissolved_roads_from_classes(
    output_path,
    dissolved_road_path,   # or "roads_raw.shp" (function will union it)
    try_output_path
)

Saved result to D:\2_Analytics\9_LULC_classification\automation\data_3\output_folder_new\try_1.shp. Kept 594 features (dropped 27 fully removed).
