In [4]:
import torch
import torch.nn as nn
import geopandas as gpd
from shapely.geometry import mapping
import rasterio
from rasterio.features import geometry_mask
import numpy as np
import torchvision.transforms as T
from PIL import Image
import h5py
import cv2
from skimage.color import rgb2lab, rgb2gray
from skimage.filters import roberts, sobel
from skimage.filters.rank import entropy
from skimage.morphology import disk
from skimage.transform import resize
from skimage.util import img_as_ubyte
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# Define a transformation to apply to image patches
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor()
])

def extract_polygon_patch(polygon, image_data, transform):
    """Extract image patch within a polygon"""
    mask = geometry_mask([mapping(polygon)], transform=transform, invert=True, out_shape=(image_data.shape[1], image_data.shape[2]))
    masked_data = np.zeros_like(image_data[:3])
    for i in range(3):
        masked_data[i] = image_data[i] * mask
    bounds = polygon.bounds
    window = rasterio.windows.from_bounds(*bounds, transform=transform)
    row_off = int(window.row_off)
    col_off = int(window.col_off)
    height = int(window.height)
    width = int(window.width)
    patch = masked_data[:, row_off:row_off + height, col_off:col_off + width]
    patch = np.moveaxis(patch, 0, -1)
    patch = Image.fromarray(patch.astype(np.uint8))
    return patch

def process_shapefile(shapefile_path, tif_file_path, transform):
    """Process shapefile to extract patches, features, and perform clustering"""
    polygons = gpd.read_file(shapefile_path)
    patches_and_labels = []
    with rasterio.open(tif_file_path) as src:
        image_data = src.read()

    for idx, row in polygons.iterrows():
        polygon = row.geometry
        label = row['sp']
        patch = extract_polygon_patch(polygon, image_data, src.transform)

        # Transform the patch and append to the container
        transformed_patch = transform(patch)
        patches_and_labels.append((transformed_patch, label))
        
        # # Display the resized patch
        # resized_patch = transformed_patch.permute(1, 2, 0)  # Change from (C, H, W) to (H, W, C)
        # resized_patch = resized_patch.clamp(0, 1)  # Ensure the values are within [0, 1]
        # plt.imshow(resized_patch.numpy())
        # plt.title(f'Polygon Label: {label}')
        # plt.show()

    return patches_and_labels

def extract_custom_features(img):
    # Color features
    LAB_img = rgb2lab(img)
    A_img = LAB_img[:,:,1]
    A_feat = A_img.mean()
    
    B_img = LAB_img[:,:,2]
    B_feat = B_img.mean()
    
    # Textural features based on the gray image
    gray_img = rgb2gray(img) 
    gray_img = resize(gray_img, (256,256))  # Resize to smaller size
    gray_img = img_as_ubyte(gray_img)
   
    # Entropy
    entropy_img = entropy(gray_img, disk(3))
    entropy_mean = entropy_img.mean()
    entropy_std = entropy_img.std()
    
    roberts_img = roberts(gray_img)
    roberts_mean = roberts_img.mean()

    sobel_img = sobel(gray_img)
    sobel_mean = sobel_img.mean()
    
    # Gabor filters
    kernel1 = cv2.getGaborKernel((9, 9), 3, np.pi/4, np.pi, 0.5, 0, ktype=cv2.CV_32F)    
    gabor1 = (cv2.filter2D(gray_img, cv2.CV_8UC3, kernel1)).mean()
    
    kernel2 = cv2.getGaborKernel((9, 9), 3, np.pi/2, np.pi/4, 0.9, 0, ktype=cv2.CV_32F)    
    gabor2 = (cv2.filter2D(gray_img, cv2.CV_8UC3, kernel2)).mean()

    kernel3 = cv2.getGaborKernel((9, 9), 5, np.pi/2, np.pi/2, 0.1, 0, ktype=cv2.CV_32F)    
    gabor3 = (cv2.filter2D(gray_img, cv2.CV_8UC3, kernel3)).mean()

    custom_features = np.array([A_feat, B_feat, entropy_mean, entropy_std, roberts_mean, 
                                sobel_mean, gabor1, gabor2, gabor3])
    
    return custom_features

def extract_features(patches_and_labels):
    """Extract custom features"""
    features = []
    labels = []
    for patch, label in patches_and_labels:
        patch_np = patch.permute(1, 2, 0).numpy() * 255  # Convert to numpy array and scale back to 0-255
        patch_np = patch_np.astype(np.uint8)
        feature = extract_custom_features(patch_np)
        features.append(feature)
        labels.append(label)
    
    return features, labels

def perform_kmeans_clustering(features_array, n_clusters=6):
    """Perform K-means clustering on extracted features"""
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    return kmeans.fit_predict(features_array)

def process_shapefile_for_clustering(shapefile_path, tif_file_path, output_shapefile_path, transform):
    """Process shapefile to extract patches, features, and perform clustering"""
    patches_and_labels = process_shapefile(shapefile_path, tif_file_path, transform)
    features, labels = extract_features(patches_and_labels)

    features_array = np.array(features)
    cluster_labels = perform_kmeans_clustering(features_array)
    
    polygons = gpd.read_file(shapefile_path)
    polygons['cluster'] = cluster_labels
    polygons.to_file(output_shapefile_path)
    return polygons

# Example usage
shapefile_path = 'h:\\Yehmh\\DNDF\\101_1_focus\\202404_101_seg_shp_labeled\\DNDF101_clip_seg_labeled.shp'
tif_file_path = 'h:\\Yehmh\\DNDF\\101_1_focus\\DNDF101_clip.tif'
output_shapefile_path = 'h:\\Yehmh\\DNDF\\101_1_focus\\DNDF_202404_101_1_kmeans_custom.shp'

clustered_polygons = process_shapefile_for_clustering(shapefile_path, tif_file_path, output_shapefile_path, transform)


  super()._check_params_vs_input(X, default_n_init=10)
