In [3]:
import geopandas as gpd
from shapely.geometry import mapping
import rasterio
from rasterio.features import geometry_mask
import numpy as np
from sklearn.cluster import DBSCAN
import torch
import torchvision.models as models
import torchvision.transforms as T
from PIL import Image

# Check if a GPU is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load a pre-trained model (e.g., ResNet-50) and move it to the GPU
model = models.resnet50(pretrained=True).to(device)
model.eval()  # Set to evaluation mode

# Define a transformation to apply to image patches
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def extract_polygon_patch(polygon, image_data, transform):
    """Extract image patch within a polygon"""
    mask = geometry_mask([mapping(polygon)], transform=transform, invert=True, out_shape=image_data.shape[1:])
    masked_data = np.zeros_like(image_data[:3])
    for i in range(3):
        masked_data[i] = image_data[i] * mask
    bounds = polygon.bounds
    window = rasterio.windows.from_bounds(*bounds, transform=transform)
    patch = masked_data[:, window.toslices()[0], window.toslices()[1]]
    patch = np.moveaxis(patch, 0, -1)
    patch = Image.fromarray(patch.astype(np.uint8))
    return patch

def extract_features(patch, model, transform, device):
    """Extract features from an image patch"""
    image = transform(patch).unsqueeze(0).to(device)  # Move the image tensor to the GPU
    with torch.no_grad():
        features = model(image)
    return features.squeeze().cpu().numpy()  # Move the features back to the CPU and convert to numpy array

def perform_clustering(features_array, eps=0.5, min_samples=5):
    """Perform DBSCAN clustering on extracted features"""
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    return dbscan.fit_predict(features_array)

def process_shapefile(shapefile_path, tif_file_path, output_shapefile_path, model, transform, device):
    """Process shapefile to extract patches, features, and perform clustering"""
    polygons = gpd.read_file(shapefile_path)
    with rasterio.open(tif_file_path) as src:
        image_data = src.read()

    polygon_features = []
    for polygon in polygons.geometry:
        patch = extract_polygon_patch(polygon, image_data, src.transform)
        if patch.getbbox() is not None:
            features = extract_features(patch, model, transform, device)
            polygon_features.append(features)
        else:
            polygon_features.append(np.zeros(2048))

    features_array = np.array(polygon_features)
    cluster_labels = perform_clustering(features_array)
    polygons['cluster'] = cluster_labels
    polygons.to_file(output_shapefile_path)
    return polygons

# Example usage
shapefile_path = '\\Yehmh\\DNDF\\202404_DNDF\\seg_merged\\merged_seg_masks.shp'
tif_file_path = '\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif'
output_shapefile_path = '\\Yehmh\\DNDF\\202404_DNDF\\transects_DBSCAN_ResNet50\\transects_DBSCAN_ResNet50.shp'

clustered_polygons = process_shapefile(shapefile_path, tif_file_path, output_shapefile_path, model, transform, device)




ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3340,) + inhomogeneous part.

In [4]:
import geopandas as gpd
from shapely.geometry import mapping
import rasterio
from rasterio.features import geometry_mask
import numpy as np
from sklearn.cluster import DBSCAN
import torch
import torchvision.models as models
import torchvision.transforms as T
from PIL import Image

# Check if a GPU is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load a pre-trained model (e.g., ResNet-50) and move it to the GPU
model = models.resnet50(pretrained=True).to(device)
model.eval()  # Set to evaluation mode

# Define a transformation to apply to image patches
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def extract_polygon_patch(polygon, image_data, transform):
    """Extract image patch within a polygon"""
    mask = geometry_mask([mapping(polygon)], transform=transform, invert=True, out_shape=image_data.shape[1:])
    masked_data = np.zeros_like(image_data[:3])
    for i in range(3):
        masked_data[i] = image_data[i] * mask
    bounds = polygon.bounds
    window = rasterio.windows.from_bounds(*bounds, transform=transform)
    patch = masked_data[:, window.toslices()[0], window.toslices()[1]]
    patch = np.moveaxis(patch, 0, -1)
    patch = Image.fromarray(patch.astype(np.uint8))
    return patch

def extract_features(patch, model, transform, device):
    """Extract features from an image patch"""
    image = transform(patch).unsqueeze(0).to(device)  # Move the image tensor to the GPU
    with torch.no_grad():
        features = model(image)
    return features.squeeze().cpu().numpy()  # Move the features back to the CPU and convert to numpy array

def perform_clustering(features_array, eps=0.5, min_samples=5):
    """Perform DBSCAN clustering on extracted features"""
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    return dbscan.fit_predict(features_array)

def process_shapefile(shapefile_path, tif_file_path, output_shapefile_path, model, transform, device):
    """Process shapefile to extract patches, features, and perform clustering"""
    polygons = gpd.read_file(shapefile_path)
    with rasterio.open(tif_file_path) as src:
        image_data = src.read()

    polygon_features = []
    for polygon in polygons.geometry:
        patch = extract_polygon_patch(polygon, image_data, src.transform)
        if patch.getbbox() is not None:
            features = extract_features(patch, model, transform, device)
            if features.shape == (2048,):  # Ensure that the features have the expected shape
                polygon_features.append(features)
            else:
                print(f"Unexpected feature shape: {features.shape}")
                polygon_features.append(np.zeros(2048))  # Placeholder for unexpected shapes
        else:
            polygon_features.append(np.zeros(2048))  # Placeholder for empty patches

    features_array = np.array(polygon_features)
    cluster_labels = perform_clustering(features_array)
    polygons['cluster'] = cluster_labels
    polygons.to_file(output_shapefile_path)
    return polygons

# Example usage
shapefile_path = '\\Yehmh\\DNDF\\202404_DNDF\\seg_merged\\merged_seg_masks.shp'
tif_file_path = '\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif'
output_shapefile_path = '\\Yehmh\\DNDF\\202404_DNDF\\transects_DBSCAN_ResNet50\\transects_DBSCAN_ResNet50.shp'

clustered_polygons = process_shapefile(shapefile_path, tif_file_path, output_shapefile_path, model, transform, device)




Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected feature shape: (1000,)
Unexpected fea