In [None]:
tiff_file = "h:\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif"
results.to_csv("h:\\Yehmh\\DNDF\\202404_DNDF\\202404_DNDF_cluster_resnet50_5_5.csv", index=False)

In [None]:
import rasterio

# Load the TIFF file
tiff_file = "h:\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif"

# Open the file with rasterio
with rasterio.open(tiff_file) as src:
    # Print general metadata
    print(f"Width: {src.width}")
    print(f"Height: {src.height}")
    print(f"Number of bands: {src.count}")
    print(f"Data type: {src.dtypes}")
    print(f"CRS: {src.crs}")
    print(f"Transform: {src.transform}")


In [2]:
import rasterio
import numpy as np
import torch
import torchvision.models as models
from torchvision import transforms
from sklearn.cluster import KMeans
import pandas as pd

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the TIFF file
# tiff_file = "h:\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif"
tiff_file = "h:\\Yehmh\\DNDF\\202406_DNDF\\202406DNDFmerge.tif"
with rasterio.open(tiff_file) as src:
    # Metadata
    transform = src.transform
    width = src.width
    height = src.height

    # Calculate pixel size (in meters) based on the resolution
    pixel_width = abs(transform[0])  # Pixel width in meters
    pixel_height = abs(transform[4])  # Pixel height in meters

    # Calculate the number of pixels that correspond to 5 meters
    crop_width_px = int(5 / pixel_width)
    crop_height_px = int(5 / pixel_height)

    bands = src.read([1, 2, 3])  # Read only the RGB bands (1, 2, 3)

# Pretrained ResNet50 model
model = models.resnet50(pretrained=True)
model = torch.nn.Sequential(*list(model.children())[:-1])  # Remove the final classification layer
model = model.to(device)  # Move model to GPU if available
model.eval()

# Transform for ResNet-50 input
preprocess = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

features = []
centers = []

# Iterate over the TIFF image to crop based on the calculated pixel dimensions
for i in range(0, width, crop_width_px):
    for j in range(0, height, crop_height_px):
        # Crop the image
        crop = bands[:, j:j+crop_height_px, i:i+crop_width_px]

        # Check for zero-value RGB pixels
        if np.all(crop == 0):
            continue  # Skip crops with all 0-value pixels

        # Drop the alpha band (no alpha band in this case, but we keep RGB)
        crop = crop[:3]  # Ensure only RGB channels

        # Convert the crop to uint8
        crop = np.transpose(crop, (1, 2, 0)).astype(np.uint8)  # Change dtype to uint8

        # Transform and extract features
        input_tensor = preprocess(crop).unsqueeze(0).to(device)  # Move tensor to GPU if available
        with torch.no_grad():
            feature = model(input_tensor).squeeze().cpu().numpy()  # Move output back to CPU

        # Store the features and the center coordinates
        features.append(feature)
        center_coord = (i + crop_width_px // 2, j + crop_height_px // 2)
        centers.append(src.transform * center_coord)  # Convert to TM2 coordinates

# Cluster the images using k-means
kmeans = KMeans(n_clusters=10, random_state=0).fit(features)
labels = kmeans.labels_

# Save results to CSV
results = pd.DataFrame({
    "x_coord": [coord[0] for coord in centers],
    "y_coord": [coord[1] for coord in centers],
    "cluster": labels
})
# results.to_csv("h:\\Yehmh\\DNDF\\202404_DNDF\\202404_DNDF_cluster_resnet50_5_5.csv", index=False)
results.to_csv("h:\\Yehmh\\DNDF\\202406_DNDF\\202406DNDFmerge_cluster_resnet50_5_5_10.csv", index=False)


  super()._check_params_vs_input(X, default_n_init=10)


: 

In [5]:
import rasterio
import numpy as np
import torch
import torchvision.models as models
from torchvision import transforms
from sklearn.cluster import KMeans
import pandas as pd
import geopandas as gpd
from shapely.geometry import box
from rasterio.transform import from_origin

# Initialize the model
def initialize_model(device):
    model = models.resnet50(pretrained=True)
    model = torch.nn.Sequential(*list(model.children())[:-1])  # Remove the final classification layer
    model = model.to(device)  # Move model to GPU if available
    model.eval()
    return model

# Extract features from the image
def extract_features_from_image(tiff_file, crop_size_meters, model, device):
    with rasterio.open(tiff_file) as src:
        transform = src.transform
        width = src.width
        height = src.height
        crs = src.crs

        pixel_width = abs(transform[0])  # Pixel width in meters
        pixel_height = abs(transform[4])  # Pixel height in meters

        crop_width_px = int(crop_size_meters / pixel_width)
        crop_height_px = int(crop_size_meters / pixel_height)

        bands = src.read([1, 2, 3])  # Read only the RGB bands

    preprocess = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    features = []
    centers = []
    geometries = []

    for i in range(0, width, crop_width_px):
        for j in range(0, height, crop_height_px):
            crop = bands[:, j:j+crop_height_px, i:i+crop_width_px]

            if np.all(crop == 0):
                continue  # Skip crops with all 0-value pixels

            crop = np.transpose(crop[:3], (1, 2, 0)).astype(np.uint8)  # Ensure only RGB channels and convert dtype to uint8

            input_tensor = preprocess(crop).unsqueeze(0).to(device)  # Move tensor to GPU if available
            with torch.no_grad():
                feature = model(input_tensor).squeeze().cpu().numpy()  # Move output back to CPU

            features.append(feature)
            center_coord = (i + crop_width_px // 2, j + crop_height_px // 2)
            centers.append(src.transform * center_coord)  # Convert to TM2 coordinates

            minx, miny = src.transform * (i, j)
            maxx, maxy = src.transform * (i + crop_width_px, j + crop_height_px)
            geometries.append(box(minx, miny, maxx, maxy))

    return features, centers, geometries, crs, transform, height, width

# Perform clustering on the features
def perform_clustering(features, n_clusters=5):
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(features)
    return kmeans.labels_

# Save the results as a shapefile
def save_shapefile(geometries, labels, crs, shapefile_path):
    gdf = gpd.GeoDataFrame({
        'geometry': geometries,
        'cluster': labels
    }, crs=crs)
    gdf.to_file(shapefile_path)

# Save the clustered labels as a TIFF file
def save_tiff(labels, transform, height, width, crs, tiff_file_path):
    labels_image = np.zeros((height, width), dtype=np.int32)
    labels_image[:len(labels)] = labels.reshape(-1, 1)  # Assuming labels length fits into image dimensions

    with rasterio.open(
        tiff_file_path,
        'w',
        driver='GTiff',
        height=height,
        width=width,
        count=1,
        dtype=rasterio.int32,
        crs=crs,
        transform=transform,
    ) as dst:
        dst.write(labels_image, 1)

# Save the clustering results as a CSV file
def save_csv(centers, labels, csv_file_path):
    results = pd.DataFrame({
        "x_coord": [coord[0] for coord in centers],
        "y_coord": [coord[1] for coord in centers],
        "cluster": labels
    })
    results.to_csv(csv_file_path, index=False)

# Main function to process the image
def process_image(tiff_file, crop_size_meters, shapefile_path, tiff_file_path, csv_file_path, n_clusters=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = initialize_model(device)
    
    features, centers, geometries, crs, transform, height, width = extract_features_from_image(
        tiff_file, crop_size_meters, model, device
    )
    labels = perform_clustering(features, n_clusters)

    save_shapefile(geometries, labels, crs, shapefile_path)
    save_tiff(labels, transform, height, width, crs, tiff_file_path)
    save_csv(centers, labels, csv_file_path)


In [10]:
print(labels)

[7 7 9 ... 9 0 9]


In [11]:
import os

shapefile_path = "h:\\Yehmh\\DNDF\\202405_DNDF\\5_5_cluster\\DNDF_merge_grid_5_5.shp"
tiff_file_path = "h:\\Yehmh\\DNDF\\202405_DNDF\\5_5_cluster\\DNDF_merge_clustered.tif"
csv_file_path = "h:\\Yehmh\\DNDF\\202405_DNDF\\5_5_cluster\\DNDF_mergee_cluster_resnet50_5_5.csv"

crop_size_meters = 5

features, centers, geometries, crs, transform, height, width = extract_features_from_image(
        tiff_file, crop_size_meters, model, device
    )

os.makedirs(os.path.dirname(shapefile_path), exist_ok=True)

save_shapefile(geometries, labels, crs, shapefile_path)
save_tiff(labels, transform, height, width, crs, tiff_file_path)
save_csv(centers, labels, csv_file_path)

ValueError: could not broadcast input array from shape (175255,1) into shape (49597,44910)

In [7]:
# Example usage:
tiff_file = "h:\\Yehmh\\DNDF\\202404_DNDF\\DNDF_merge.tif"
shapefile_path = "h:\\Yehmh\\DNDF\\202405_DNDF\\5_5_cluster\\DNDF_merge_grid_5_5.shp"
tiff_file_path = "h:\\Yehmh\\DNDF\\202405_DNDF\\5_5_cluster\\DNDF_merge_clustered.tif"
csv_file_path = "h:\\Yehmh\\DNDF\\202405_DNDF\\5_5_cluster\\DNDF_mergee_cluster_resnet50_5_5.csv"

process_image(tiff_file, crop_size_meters=5, shapefile_path=shapefile_path, tiff_file_path=tiff_file_path, csv_file_path=csv_file_path)

  super()._check_params_vs_input(X, default_n_init=10)


DriverIOError: Failed to create file h:\Yehmh\DNDF\202405_DNDF\5_5_cluster\DNDF_merge_grid_5_5.shp: No such file or directory

In [3]:
import pickle
import h5py

# Save features, labels, and centers to a pickle file
with open("h:\\Yehmh\\DNDF\\202404_DNDF\\intermediate_clustering_results.pkl", "wb") as f:
    pickle.dump({"features": features, "labels": labels, "centers": centers}, f)

with h5py.File("h:\\Yehmh\\DNDF\\202404_DNDF\\intermediate_clustering_results.h5", "w") as f:
    f.create_dataset('features', data=features)
    f.create_dataset('labels', data=labels)
    f.create_dataset('centers', data=centers)

In [4]:
import h5py

with h5py.File("h:\\Yehmh\\DNDF\\202404_DNDF\\intermediate_clustering_results.h5", "r") as f:
    features = f['features'][:]
    labels = f['labels'][:]
    centers = f['centers'][:]

target_cluster = 3  # The cluster you want to re-cluster
filtered_features = [feat for feat, label in zip(features, labels) if label == target_cluster]
filtered_centers = [center for center, label in zip(centers, labels) if label == target_cluster]

# Re-cluster the filtered features
new_kmeans = KMeans(n_clusters=2, random_state=42).fit(filtered_features)
new_labels = new_kmeans.labels_

# Save the new cluster labels for further analysis
results = pd.DataFrame({
    "x_coord": [coord[0] for coord in filtered_centers],
    "y_coord": [coord[1] for coord in filtered_centers],
    "new_cluster": new_labels
})
results.to_csv(f"h:\\Yehmh\\DNDF\\202404_DNDF\\202404_DNDF_reclustered_cluster_{target_cluster}_2.csv", index=False)


  super()._check_params_vs_input(X, default_n_init=10)
