In [4]:
import geopandas as gpd
from shapely.geometry import mapping
import rasterio
from rasterio.features import geometry_mask
import numpy as np
from sklearn.cluster import DBSCAN

# Read the polygon shapefile
polygons = gpd.read_file('h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_shp_labeled\\DNDF101_clip_seg_labeled.shp')
tif_file = 'h:\\Yehmh\\DNDF\\101_1_focus\\DNDF101_clip.tif'

# Open the TIF file
with rasterio.open(tif_file) as src:
    image_data = src.read(1)
    transform = src.transform

# Function to extract image data within a polygon
def extract_polygon_data(polygon, image_data, transform):
    mask = rasterio.features.geometry_mask([mapping(polygon)], transform=transform, invert=True, out_shape=image_data.shape)
    return image_data[mask]

# List to store cluster labels for each polygon
cluster_labels = []

# Extract image data for each polygon and perform clustering
for polygon in polygons.geometry:
    data = extract_polygon_data(polygon, image_data, transform)
    data = data.reshape(-1, 1)  # Reshape data for clustering
    
    if len(data) > 0:
        # Apply DBSCAN clustering
        dbscan = DBSCAN(eps=0.3, min_samples=10)  # Adjust eps and min_samples as needed
        dbscan.fit(data)
        # Take the most common cluster label
        labels, counts = np.unique(dbscan.labels_, return_counts=True)
        cluster_label = labels[np.argmax(counts)]
    else:
        cluster_label = -1  # Assign a special label if no data was extracted
    
    cluster_labels.append(cluster_label)

# Add cluster labels to the GeoDataFrame
polygons['cluster'] = cluster_labels

# Save the new shapefile with cluster information
polygons.to_file('h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_shp_labeled_clustered\\DNDF101_clip_seg_labeled_clustered.shp')
