In [None]:
%pip install tensorflow keras numpy pandas matplotlib scikit-learn keras-tuner building_footprint_segmentation

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting building_footprint_segmentation
  Downloading building_footprint_segmentation-0.2.4-py3-none-any.whl.metadata (5.0 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading building_footprint_segmentation-0.2.4-py3-none-any.whl (35 kB)
Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, building_footprint_segmentation, keras-tuner
Successfully installed building_footprint_segmentation-0.2.4 keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


#Section 1: Imports and Constants

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import random
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from building_footprint_segmentation.seg.binary.models import ReFineNet
from building_footprint_segmentation.helpers.normalizer import min_max_image_net
from building_footprint_segmentation.utils.py_network import (
    to_input_image_tensor, add_extra_dimension, adjust_model
)

# Constants
MAX_SIZE = 512
MODEL_URL = "https://github.com/fuzailpalnak/building-footprint-segmentation/releases/download/alpha/refine.zip"
THRESHOLD = 0.4
MIN_BUILDING_SIZE = 500
MAX_BUILDING_SIZE = 100000

#Section 2: Building Detection Model Functions

In [None]:
def get_trained_model():
    model = ReFineNet()
    set_model_weights(model)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    return model, device

def set_model_weights(model):
    cache_dir = "./model_weights"
    os.makedirs(cache_dir, exist_ok=True)
    weights_path = os.path.join(cache_dir, "refine.pth")
    if not os.path.exists(weights_path):
        state_dict = torch.hub.load_state_dict_from_url(MODEL_URL, progress=True, map_location="cpu")
        torch.save(state_dict, weights_path)
    else:
        state_dict = torch.load(weights_path, map_location="cpu")
    if "model" in state_dict:
        state_dict = state_dict["model"]
    model.load_state_dict(adjust_model(state_dict))

#Section 3: Image Processing Functions

In [None]:
def resize_image(image):
    original_height, original_width = image.shape[:2]
    if (original_height != MAX_SIZE) or (original_width != MAX_SIZE):
        resized_image = cv2.resize(image, (MAX_SIZE, MAX_SIZE), interpolation=cv2.INTER_AREA)
        return resized_image
    return image

# Implement Test-Time Augmentation (TTA)
def predict_with_tta(model, device, image_tensor):
    # Original prediction
    prediction = model(image_tensor).sigmoid()
    predictions = [prediction]

    # Augmentations: horizontal flip, vertical flip
    flips = [torch.flip(image_tensor, dims=[3]), torch.flip(image_tensor, dims=[2])]

    for flip in flips:
        pred_flip = model(flip).sigmoid()
        # Flip back
        if flip is flips[0]:
            pred_flip = torch.flip(pred_flip, dims=[3])
        else:
            pred_flip = torch.flip(pred_flip, dims=[2])
        predictions.append(pred_flip)

    # Average the predictions
    prediction_avg = torch.mean(torch.stack(predictions), dim=0)
    return prediction_avg

# Extract building mask and generate binary output
def extract(model, device, original_image):
    # Resize and normalize image
    resized_image = resize_image(original_image)
    normalized_image = min_max_image_net(img=resized_image)
    tensor_image = add_extra_dimension(to_input_image_tensor(normalized_image)).to(device)

    with torch.no_grad():
        # Predict building segmentation with TTA
        prediction = predict_with_tta(model, device, tensor_image)

    # Convert prediction to binary mask with adjusted threshold
    prediction_numpy = prediction.cpu().numpy()[0, 0]
    pred_binary = (prediction_numpy > THRESHOLD).astype(np.uint8)

    # Removed visualization of raw segmentation output and binary mask

    # Post-process the binary mask to remove noise and separate attached buildings
    cleaned_mask = postprocess_mask(pred_binary)

    # Removed visualization of cleaned mask after post-processing

    return cleaned_mask, resized_image

#Section 4: Mask Post-processing Functions

In [None]:
# Post-process the mask to remove small regions, noise, and separate attached buildings
def postprocess_mask(mask):
    # Apply morphological opening to remove small connections
    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open, iterations=2)

    # Separate attached buildings using watershed algorithm
    separated_mask = separate_attached_buildings(mask)

    # Remove small and large connected components
    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(separated_mask, connectivity=8)
    sizes = stats[1:, cv2.CC_STAT_AREA]
    final_mask = np.zeros((labels.shape), np.uint8)

    for i in range(0, num_labels - 1):
        area = sizes[i]
        if MIN_BUILDING_SIZE <= area <= MAX_BUILDING_SIZE:
            final_mask[labels == i + 1] = 1

    return final_mask

# Function to separate attached buildings using watershed algorithm
def separate_attached_buildings(mask):
    # Compute the distance transform
    distance = cv2.distanceTransform(mask, cv2.DIST_L2, 5)
    # Normalize the distance image for display
    cv2.normalize(distance, distance, 0, 1.0, cv2.NORM_MINMAX)
    # Adjusted threshold to capture more peaks
    ret, sure_fg = cv2.threshold(distance, 0.1, 1.0, cv2.THRESH_BINARY)
    # Increase dilation iterations to better separate buildings
    kernel = np.ones((3, 3), np.uint8)
    sure_fg = cv2.dilate(sure_fg, kernel, iterations=2)
    # Finding unknown region
    sure_bg = cv2.dilate(mask, kernel, iterations=3)
    unknown = cv2.subtract(sure_bg, np.uint8(sure_fg))
    # Marker labeling
    ret, markers = cv2.connectedComponents(np.uint8(sure_fg))
    # Add one to all labels so that sure background is not 0, but 1
    markers = markers + 1
    # Mark the unknown regions with zero
    markers[unknown == 1] = 0
    # Apply watershed
    markers = cv2.watershed(cv2.cvtColor(mask * 255, cv2.COLOR_GRAY2BGR), markers)
    # Generate new mask with separated buildings
    separated_mask = np.zeros_like(mask)
    separated_mask[markers > 1] = 1

    # Removed visualization of markers for watershed

    return separated_mask

#Section 5: Building Classification Functions

In [None]:
# Function to classify multiple building regions in a batch with extended bounding boxes
def classify_building_regions(image, contours, model, class_labels, padding=10):
    image_height, image_width = image.shape[:2]
    building_regions = []
    bboxes = []

    for contour in contours:
        # Extract the bounding box for the contour
        x, y, w, h = cv2.boundingRect(contour)

        # Extend the bounding box with padding
        x_new = max(0, x - padding)
        y_new = max(0, y - padding)
        x_max = min(image_width, x + w + padding)
        y_max = min(image_height, y + h + padding)
        w_new = x_max - x_new
        h_new = y_max - y_new

        # Extract the extended building region
        building_region = image[y_new:y_new + h_new, x_new:x_new + w_new]

        # Resize to match the input size of the classifier
        resized_region = cv2.resize(building_region, (224, 224))
        building_regions.append(resized_region)
        bboxes.append((x_new, y_new, w_new, h_new))

    if not building_regions:
        return []

    # Convert to array suitable for the classifier
    region_array = np.array([img_to_array(region) for region in building_regions])
    region_array = preprocess_input(region_array)

    # Predict using the classification model
    predictions = model.predict(region_array)
    top_class_indices = np.argmax(predictions, axis=1)
    probabilities = np.max(predictions, axis=1)
    top_classes = [class_labels[idx] for idx in top_class_indices]

    results = list(zip(top_classes, probabilities, bboxes))
    return results

#Section 6: Single Image Processing Function

In [None]:
# Process each image: segment it, classify detected buildings, and store results
def process_single_image(image_path, actual_class, model, device, classification_model, class_labels, visualize=False):
    try:
        # Load the original image
        original_image = cv2.imread(image_path)
        if original_image is None:
            raise ValueError(f"Error reading image {image_path}")

        # Convert to RGB
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        # Run the building segmentation model
        pred_binary, resized_image = extract(model, device, original_image)

        # Find building contours
        contours, _ = cv2.findContours(pred_binary.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        building_contours = []
        for contour in contours:
            area = cv2.contourArea(contour)
            if MIN_BUILDING_SIZE <= area <= MAX_BUILDING_SIZE:
                building_contours.append(contour)

        # Show the number of buildings detected
        num_buildings = len(building_contours)
        print(f"{os.path.basename(image_path)}: Detected {num_buildings} building(s)")

        correct_predictions = 0
        total_predictions = 0
        vis_image = resized_image.copy()
        per_building_results = []  # List to store per-building results

        if num_buildings > 0:
            # Classify building regions in batch with extended bounding boxes
            classification_results = classify_building_regions(
                resized_image, building_contours, classification_model, class_labels, padding=20)

            # Draw bounding boxes and labels
            for i, (top_class, probability, bbox) in enumerate(classification_results):
                x, y, w, h = bbox

                # Draw bounding box (thin red box)
                cv2.rectangle(vis_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

                # Prepare label text
                label = f"Predicted: {top_class} ({probability:.2f})\nActual: {actual_class}"
                y_offset = y - 10 if y - 10 > 10 else y + h + 20

                # Split label into lines
                label_lines = label.split('\n')
                font_scale = 0.5
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_thickness = 1

                # Calculate text size for background rectangle
                text_sizes = [cv2.getTextSize(line, font, font_scale, font_thickness)[0] for line in label_lines]
                text_width = max([w for (w, h) in text_sizes])
                text_height = sum([h + 5 for (w, h) in text_sizes])

                # Set background rectangle coordinates
                rect_x1 = x
                rect_y1 = y_offset - text_height if y_offset - text_height > 0 else y_offset
                rect_x2 = x + text_width + 10
                rect_y2 = y_offset + 5

                # Draw filled rectangle for text background
                cv2.rectangle(vis_image, (rect_x1, rect_y1), (rect_x2, rect_y2), (255, 255, 255), -1)

                # Put text on the image
                text_y = rect_y1 + text_sizes[0][1] + 5
                for idx, line in enumerate(label_lines):
                    cv2.putText(vis_image, line, (x + 5, text_y), font, font_scale, (0, 0, 0), font_thickness)
                    text_y += text_sizes[idx][1] + 5

                # Print the classification result
                print(f"Building {i + 1} in {os.path.basename(image_path)}: Predicted: {top_class} ({probability:.4f}), Actual: {actual_class}")

                # Update accuracy counts
                total_predictions += 1
                if top_class == actual_class:
                    correct_predictions += 1

                # Store per-building result
                result = {
                    'actual_class': actual_class,
                    'predicted_class': top_class,
                    'correct': top_class == actual_class
                }
                per_building_results.append(result)

            if visualize:
                plt.figure(figsize=(10, 10))
                plt.imshow(vis_image)
                plt.title(f"Detected Buildings and Classifications in {os.path.basename(image_path)}")
                plt.axis('off')
                plt.show()
        else:
            print(f"No buildings detected in {os.path.basename(image_path)}.")

            if visualize:
                plt.figure(figsize=(10, 10))
                plt.imshow(original_image)
                plt.title(f"No buildings detected in {os.path.basename(image_path)}")
                plt.axis('off')
                plt.show()

        return correct_predictions, total_predictions, per_building_results
    except Exception as e:
        print(f"Failed to process {os.path.basename(image_path)}: {e}")
        return 0, 0, []


#Section 7: Main Processing Function

In [None]:
# Main function to process all images in the class folders
def process_images_in_folders(data_folder, model, device, classification_model, class_labels, visualize=False):
    total_correct = 0
    total_predictions = 0

    # Initialize per-class accuracy counts
    correct_predictions_per_class = {class_name: 0 for class_name in class_labels}
    total_predictions_per_class = {class_name: 0 for class_name in class_labels}

    # Lists to collect actual and predicted labels for confusion matrix
    y_true = []
    y_pred = []

    # Get list of class folders
    class_folders = [os.path.join(data_folder, d) for d in os.listdir(data_folder)
                     if os.path.isdir(os.path.join(data_folder, d))]

    # Collect all image paths and their actual classes
    image_paths = []
    for class_folder in class_folders:
        class_name = os.path.basename(class_folder)
        images_in_class = [os.path.join(class_folder, f) for f in os.listdir(class_folder)
                           if f.lower().endswith(('.tif', '.jpg', '.png', '.jpeg'))]
        for img_path in images_in_class:
            image_paths.append((img_path, class_name))

    # Shuffle image paths
    random.shuffle(image_paths)

    # Process images sequentially
    for image_path, actual_class in tqdm(image_paths):
        try:
            correct, total, per_building_results = process_single_image(
                image_path, actual_class, model, device, classification_model, class_labels, visualize=True)
            total_correct += correct
            total_predictions += total

            # Update per-class accuracy counts and collect labels
            for result in per_building_results:
                actual = result['actual_class']
                predicted = result['predicted_class']
                correct_predictions_per_class[actual] += int(result['correct'])
                total_predictions_per_class[actual] += 1

                # Collect labels for confusion matrix
                y_true.append(actual)
                y_pred.append(predicted)
        except Exception as e:
            print(f"Failed to process {image_path}: {e}")

    # Calculate overall accuracy
    if total_predictions > 0:
        accuracy = (total_correct / total_predictions) * 100
    else:
        accuracy = 0.0

    print(f"\nTotal correct predictions: {total_correct}")
    print(f"Total predictions: {total_predictions}")
    print(f"Overall Accuracy: {accuracy:.2f}%")

    # Calculate and print per-class accuracy
    print("\nAccuracy per class:")
    for class_name in class_labels:
        correct = correct_predictions_per_class[class_name]
        total = total_predictions_per_class[class_name]
        if total > 0:
            class_accuracy = (correct / total) * 100
            print(f"{class_name}: {class_accuracy:.2f}% ({correct}/{total})")
        else:
            print(f"{class_name}: No predictions.")

    # Generate confusion matrix and classification report
    if y_true and y_pred:
        # Generate confusion matrix
        cm = confusion_matrix(y_true, y_pred, labels=class_labels)
        plt.figure(figsize=(10, 7))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                    xticklabels=class_labels, yticklabels=class_labels)
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Confusion Matrix')
        plt.show()

        # Print classification report
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, labels=class_labels))

        # Analyze misclassifications
        print("\nMisclassified Samples:")
        misclassified = [(true, pred) for true, pred in zip(y_true, y_pred) if true != pred]
        for actual, predicted in misclassified:
            print(f'Actual: {actual}, Predicted: {predicted}')
    else:
        print("No predictions to generate confusion matrix and classification report.")

#Section 8: Final function

In [None]:
if __name__ == "__main__":
    # Data directory containing class folders
    data_folder = r'/content/drive/MyDrive/Madhu RA/val'  # Update the path to your data folder

    # Define class labels (ensure these are in the correct order corresponding to your model's output)
    class_labels = ['Commercial', 'High', 'Hospital', 'Industrial', 'Multi', 'Schools', 'Single']  # Replace with your actual class names

    # Load building detection model and device
    model, device = get_trained_model()

    # Load the pre-trained classification model
    classification_model_path = r"/content/drive/MyDrive/Madhu RA/Densenet201_best.h5"  # Update the path to your model

    # Load the classification model
    try:
        classification_model = load_model(classification_model_path)
        print("Classification model loaded successfully.")
    except Exception as e:
        print(f"Error loading classification model: {e}")
        exit(1)

    # Set to True to visualize all images
    visualize = True  # Visualization is enabled to inspect building detections

    # Process all images in the data folder
    process_images_in_folders(data_folder, model, device, classification_model, class_labels, visualize)

Output hidden; open in https://colab.research.google.com to view.