In [1]:
import rasterio
import geopandas as gpd
import numpy as np
from rasterio.features import rasterize
from shapely.geometry import box
from sklearn.metrics import confusion_matrix
import os

def load_shape_files(shape_file_paths):
    """Load shape files and return a list of GeoDataFrames."""
    return [gpd.read_file(path) for path in shape_file_paths]

def rasterize_shapes(shape_files, out_shape, transform, classes, bounds):
    """Rasterize the shape files to match the raster image dimensions."""
    ground_truth_raster = np.zeros(out_shape, dtype=np.int32)
    image_box = box(*bounds)  # Create a bounding box of the TIF image

    for i, shape_file in enumerate(shape_files):
        valid_geometries = shape_file[shape_file.geometry.intersects(image_box)]  # Filter geometries within image bounds
        if valid_geometries.empty:
            continue

        shapes = ((geom, classes[i]) for geom in valid_geometries.geometry)
        try:
            raster = rasterize(shapes, out_shape=out_shape, transform=transform, fill=0)
            ground_truth_raster[raster > 0] = classes[i]
        except Exception as e:
            print(f"Error rasterizing shapes for class {classes[i]}: {e}")

    return ground_truth_raster

# Example usage to load shape files and rasterize them
shape_file_dir = 'data_directory'  # Subdirectory for shape files
shape_file_paths = [os.path.join(shape_file_dir, f'class{i}.shp') for i in range(1, 7)]
shape_files = load_shape_files(shape_file_paths)

# Load the classified image
classified_image_path = 'Ensembled_output.tif'  # Adjust with your classified image path
with rasterio.open(classified_image_path) as src:
    classified_image = src.read(1)  # Assuming single-band TIF
    transform = src.transform
    out_shape = (src.height, src.width)
    bounds = src.bounds

# Define the classes corresponding to each shape file
classes = [1, 2, 3, 4, 5, 6]

# Rasterize the shape files to create the ground truth raster
ground_truth_raster = rasterize_shapes(shape_files, out_shape, transform, classes, bounds)

# Flatten both the ground truth raster and the classified image to match for comparison
ground_truth_labels = ground_truth_raster.flatten()
predicted_labels = classified_image.flatten()

# Calculate the confusion matrix and accuracy
conf_matrix = confusion_matrix(ground_truth_labels, predicted_labels, labels=classes)
accuracy = np.diag(conf_matrix).sum() / conf_matrix.sum()

# Output the results
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 82.22%
Confusion Matrix:
[[ 0  0  0  0  0  0]
 [ 0 13  0  0  0  0]
 [ 1  4  5  0  0  0]
 [ 1  0  0  1  0  0]
 [ 1  0  0  0  9  0]
 [ 0  0  0  0  1  9]]
