In [None]:
## Building image classification using vision transformer
## Article: Developing Building Exposure Models Using Computer Vision and Deep Learning
#
# Authors: Sukh Sagar Shukla, Amit Bhatiya, Dhanya J, Saman Ghaffarian, Roberto Gentile
#
# Description:
# This script provides a unified pipeline for the image classification task using Google Street View Panorama images.
# This script processes a folder of images to:
# 1. Detect and crop buildings using TensorFlow Hub's Faster R-CNN model
# 2. Remove duplicate cropped images using feature similarity
# 3. Classify unique buildings using a trained BEiT model
# Please refer to the article for further details.
# The present model can classify images into 24 building classes ['AD_H1', 'AD_H2', 'MR_H1 flat roof', 'MR_H1 gable roof', 
                   # 'MR_H2 flat roof', 'MR_H2 gable roof', 'MR_H3', 'Metal_H1', 
                   # 'Non_Building', 'RCC_H1 flat roof', 'RCC_H1 gable roof', 
                   # 'RCC_H2 flat roof', 'RCC_H2 gable roof', 'RCC_H3 flat roof', 
                   # 'RCC_H3 gable roof', 'RCC_H4 flat roof', 'RCC_H4 gaqble roof', 
                   # 'RCC_H5', 'RCC_H6', 'RCC_OS_H1', 'RCC_OS_H2', 'RCC_OS_H3', 
                   # 'RCC_OS_H4', 'Timber']
# output_folder/
# ├── cropped/              # Detected buildings (intermediate)
# ├── original/             # Unique building images
# ├── duplicate/            # Duplicate building images
# └── classification_results.xlsx  # Classification results

# Necessary libraries 
import os
import shutil
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from PIL import Image
import cv2
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.applications.efficientnet import preprocess_input
from sklearn.cluster import DBSCAN
import pandas as pd
import torch
from torchvision import transforms
from transformers import BeitForImageClassification, BeitConfig

# ============================================================================
# PART 1: BUILDING DETECTION AND CROPPING
# ============================================================================
def crop_and_save(original_image, box, save_path, expand_factor=0.1, min_dim=200):
    """Crops and saves an image based on a bounding box."""
    img_height, img_width, _ = original_image.shape
    ymin, xmin, ymax, xmax = box
    box_width = xmax - xmin
    box_height = ymax - ymin
    expand_w = box_width * expand_factor
    expand_h = box_height * expand_factor
    xmin_expanded = max(0, xmin - expand_w)
    ymin_expanded = max(0, ymin - expand_h)
    xmax_expanded = min(1, xmax + expand_w)
    ymax_expanded = min(1, ymax + expand_h)
    xmin_pixel = int(xmin_expanded * img_width)
    ymin_pixel = int(ymin_expanded * img_height)
    xmax_pixel = int(xmax_expanded * img_width)
    ymax_pixel = int(ymax_expanded * img_height)
    cropped_image = original_image[ymin_pixel:ymax_pixel, xmin_pixel:xmax_pixel]
    
    cropped_height, cropped_width, _ = cropped_image.shape
    if cropped_width < min_dim or cropped_height < min_dim:
        return
    cropped_image = (cropped_image * 255).astype(np.uint8)
    try:
        cropped_image_pil = Image.fromarray(cropped_image)
        cropped_image_pil.save(save_path, format="JPEG", quality=180)
    except Exception as e:
        print(f"Error saving image: {str(e)}")

def calculate_iou(box1, box2):
    """Calculates the Intersection over Union (IoU) of two bounding boxes."""
    y1_1, x1_1, y2_1, x2_1 = box1
    y1_2, x1_2, y2_2, x2_2 = box2
    intersection_y1 = max(y1_1, y1_2)
    intersection_x1 = max(x1_1, x1_2)
    intersection_y2 = min(y2_1, y2_2)
    intersection_x2 = min(x2_1, x2_2)
    intersection_area = max(0, intersection_x2 - intersection_x1) * max(0, intersection_y2 - intersection_y1)
    
    box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
    box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
    
    union_area = box1_area + box2_area - intersection_area
    
    return intersection_area / union_area if union_area > 0 else 0

def detect_and_crop_buildings(image_path, output_dir, detector=None):
    """Detects and crops buildings from an image."""
    print(f"Processing: {os.path.basename(image_path)}")
    os.makedirs(output_dir, exist_ok=True)
    
    # Load detector if not provided (for reuse across images)
    if detector is None:
        module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"
        detector = hub.load(module_handle).signatures['default']
    
    TARGET_CLASSES = ['House', 'Building', 'Skyscraper', 'Tower']
    
    original_image_name = os.path.splitext(os.path.basename(image_path))[0]
    image = np.array(Image.open(image_path)) / 255.0
    
    converted_img = tf.image.convert_image_dtype(image, tf.float32)[tf.newaxis, ...]
    result = detector(converted_img)
    
    final_detections = []
    
    for i in range(len(result['detection_boxes'])):
        class_name = result['detection_class_entities'][i].numpy().decode('utf-8')
        if class_name in TARGET_CLASSES and result['detection_scores'][i] >= 0.25:
            current_box = result['detection_boxes'][i].numpy()
            current_score = result['detection_scores'][i].numpy()
            
            is_duplicate = False
            for existing_detection in final_detections:
                if calculate_iou(current_box, existing_detection['box']) > 0.5:
                    is_duplicate = True
                    break
            
            if not is_duplicate:
                final_detections.append({
                    'class': class_name,
                    'box': current_box,
                    'score': current_score,
                })
    
    for i, detection in enumerate(final_detections):
        save_path = os.path.join(output_dir, f'{original_image_name}_{i+1}.jpg')
        crop_and_save(image, detection['box'], save_path)
    
    print(f"  - Found {len(final_detections)} buildings")
    return detector

def process_folder_for_buildings(input_folder, output_dir):
    """Process all images in a folder for building detection."""
    print("=" * 60)
    print("STEP 1: Detecting and cropping buildings from all images")
    print("=" * 60)
    
    # Get list of valid image files
    valid_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.tiff')
    image_files = [f for f in os.listdir(input_folder) 
                   if f.lower().endswith(valid_extensions)]
    
    if not image_files:
        print(f"No images found in {input_folder}")
        return
    
    print(f"Found {len(image_files)} images to process\n")
    
    # Load detector once for all images
    print("Loading detection model...")
    module_handle = "https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1"
    detector = hub.load(module_handle).signatures['default']
    print("Model loaded successfully\n")
    
    # Process each image
    for idx, img_file in enumerate(image_files, 1):
        img_path = os.path.join(input_folder, img_file)
        print(f"[{idx}/{len(image_files)}] ", end="")
        try:
            detect_and_crop_buildings(img_path, output_dir, detector)
        except Exception as e:
            print(f"  - Error processing {img_file}: {str(e)}")
    
    print(f"\nBuilding detection complete. Cropped images saved to: {output_dir}\n")

# ============================================================================
# PART 2: DUPLICATE IMAGE DETECTION AND REMOVAL
# ============================================================================
def load_cnn_model():
    """Loads the EfficientNetB7 model."""
    return EfficientNetB7(weights="imagenet", include_top=False, pooling="avg")

def extract_features(img_path, model):
    """Extracts features from an image using the CNN model."""
    img = cv2.imread(img_path)
    img = cv2.resize(img, (600, 600))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    features = model.predict(img)
    return features.flatten()

def find_and_separate_duplicates(input_dir, original_dir, duplicate_dir):
    """Finds and separates duplicate images."""
    print("=" * 60)
    print("STEP 2: Finding and separating duplicate images")
    print("=" * 60)
    
    os.makedirs(original_dir, exist_ok=True)
    os.makedirs(duplicate_dir, exist_ok=True)
    
    print("Loading feature extraction model...")
    model = load_cnn_model()
    
    features = []
    image_paths = []
    
    print("Extracting features from images...")
    for img_name in os.listdir(input_dir):
        img_path = os.path.join(input_dir, img_name)
        if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                feat = extract_features(img_path, model)
                features.append(feat)
                image_paths.append(img_path)
            except Exception as e:
                print(f"Error processing {img_name}: {e}")
    
    if not features:
        print("No valid images found for duplicate detection")
        return
    
    features = np.array(features)
   # Cluster images based on feature similarity
    # eps=0.26: maximum distance between samples to be considered neighbours 
    # min_samples=2: minimum cluster size (2 means at least one duplicate)
    # These parametrs are optimised for a sample set of 200 images
    print(f"Clustering {len(features)} images...")
    dbscan = DBSCAN(eps=0.26, min_samples=2, metric="cosine")
    cluster_labels = dbscan.fit_predict(features)
    
    cluster_dict = {}
    for idx, cluster_id in enumerate(cluster_labels):
        if cluster_id not in cluster_dict:
            cluster_dict[cluster_id] = []
        cluster_dict[cluster_id].append(image_paths[idx])
    
    duplicate_count = 0
    for cluster_id, img_list in cluster_dict.items():
        if cluster_id == -1:
            for img in img_list:
                shutil.move(img, os.path.join(original_dir, os.path.basename(img)))
        else:
            shutil.move(img_list[0], os.path.join(original_dir, os.path.basename(img_list[0])))
            for img in img_list[1:]:
                shutil.move(img, os.path.join(duplicate_dir, os.path.basename(img)))
                duplicate_count += 1
    
    original_count = len(os.listdir(original_dir))
    print(f"\nDuplicate separation complete:")
    print(f"  - Original images: {original_count}")
    print(f"  - Duplicate images: {duplicate_count}\n")

# ============================================================================
# PART 3: IMAGE CLASSIFICATION
# ============================================================================
def load_classification_model(checkpoint_path, num_classes):
    """Loads the trained classification model."""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    config = BeitConfig.from_pretrained(
        "microsoft/beit-base-patch16-224-pt22k-ft22k",
        num_labels=num_classes,
        id2label={str(i): f"CLASS_{i}" for i in range(num_classes)},
        label2id={f"CLASS_{i}": i for i in range(num_classes)}
    )
    model = BeitForImageClassification.from_pretrained(
        "microsoft/beit-base-patch16-224-pt22k-ft22k",
        config=config,
        ignore_mismatched_sizes=True
    )
    model.load_state_dict(torch.load(checkpoint_path, map_location=device)['model_state_dict'])
    model.to(device)
    model.eval()
    return model, device

def classify_image(model, device, image_path, class_names):
    """Classifies a single image."""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs.logits, 1)
        
    return class_names[predicted.item()]

def classify_and_save_results(image_dir, checkpoint_path, num_classes, class_names, output_excel_path):
    """Classifies images in a directory and saves the results to an Excel file."""
    print("=" * 60)
    print("STEP 3: Classifying images and saving results")
    print("=" * 60)
    
    print("Loading classification model...")
    model, device = load_classification_model(checkpoint_path, num_classes)
    print(f"Using device: {device}\n")
    
    results = []
    image_files = [f for f in os.listdir(image_dir) 
                   if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    print(f"Classifying {len(image_files)} images...")
    for idx, img_name in enumerate(image_files, 1):
        img_path = os.path.join(image_dir, img_name)
        try:
            predicted_class = classify_image(model, device, img_path, class_names)
            results.append({"Image": img_name, "Classification": predicted_class})
            if idx % 10 == 0:
                print(f"  Processed {idx}/{len(image_files)} images")
        except Exception as e:
            print(f"  Error classifying {img_name}: {e}")
            
    df = pd.DataFrame(results)
    df.to_excel(output_excel_path, index=False)
    print(f"\nClassification complete. Results saved to: {output_excel_path}\n")

# ============================================================================
# MAIN EXECUTION
# ============================================================================

def main():
    """
    Main execution function that runs the complete pipeline:
    1. Detect and crop buildings from all images in input folder
    2. Remove duplicate cropped images
    3. Classify unique buildings and save results
    
    Configuration:
        - Update INPUT_FOLDER with your input image directory
        - Update BASE_OUTPUT_DIR with your desired output location
        - Update CLASSIFICATION_MODEL_CHECKPOINT with your model path
        - Update NUM_CLASSES and CLASS_NAMES to match your model
    """
    # ========================================================================
    # CONFIGURATION - UPDATE THESE PATHS ACCORDING TO YOUR DATA 
    # ========================================================================

    INPUT_FOLDER = "path to your panorama directory"
    BASE_OUTPUT_DIR = "path to your base output directory"
    CROPPED_DIR = os.path.join(BASE_OUTPUT_DIR, "cropped")
    ORIGINAL_DIR = os.path.join(BASE_OUTPUT_DIR, "original")
    DUPLICATE_DIR = os.path.join(BASE_OUTPUT_DIR, "duplicate")
    CLASSIFICATION_RESULTS_EXCEL = os.path.join(BASE_OUTPUT_DIR, "classification_results.xlsx")
    
  # ========================================================================
    # CLASSIFICATION MODEL CONFIGURATION
    # ========================================================================
    
    # Path to your trained classification model checkpoint
    CLASSIFICATION_MODEL_CHECKPOINT = "path of the directory where best model is saved/best_model.pth"
    
    # Number of classes the BEiT model of this study is trained on
    NUM_CLASSES = 24
    
    # Class names corresponding present model's output labels
    CLASS_NAMES = [
        'AD_H1',                    # Adobe House with single storey
        'AD_H2',                    # Adobe House having 2 storeys
        'MR_H1 flat roof',          # Masonry House 1 storey - Flat Roof
        'MR_H1 gable roof',         # Masonry House 1 storey - Gable Roof
        'MR_H2 flat roof',          # Masonry House 2-storey - Flat Roof
        'MR_H2 gable roof',         # Masonry House 2-storey - Gable Roof
        'MR_H3',                    # Masonry House 3 storey
        'Metal_H1',                 # Metal Structure with single storey
        'Non_Building',             # Non-Building images
        'RCC_H1 flat roof',         # RCC House 1 storey - Flat Roof
        'RCC_H1 gable roof',        # RCC House 1 storey - Gable Roof
        'RCC_H2 flat roof',         # RCC House 2-storey - Flat Roof
        'RCC_H2 gable roof',        # RCC House 2-storey - Gable Roof
        'RCC_H3 flat roof',         # RCC House 3 storey - Flat Roof
        'RCC_H3 gable roof',        # RCC House 3 storey - Gable Roof
        'RCC_H4 flat roof',         # RCC House 4 storey - Flat Roof
        'RCC_H4 gaqble roof',       # RCC House 4 storey - Gable Roof
        'RCC_H5',                   # RCC House 5 storey
        'RCC_H6',                   # RCC House 6 storey
        'RCC_OS_H1',                # RCC House 1 storey - presence of open storey
        'RCC_OS_H2',                # RCC House 2 storey - presence of open storey
        'RCC_OS_H3',                # RCC House 3 storey - presence of open storey
        'RCC_OS_H4',                # RCC House 4 storey - presence of open storey
        'Timber'                    # Timber Structure
    ]
    
    # ========================================================================
    # PIPELINE EXECUTION
    # ========================================================================
    
    # Create base output directory
    os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)
    
    # Print pipeline header
    print("\n" + "=" * 60)
    print("BATCH IMAGE PROCESSING PIPELINE")
    print("=" * 60)
    print(f"Input folder: {INPUT_FOLDER}")
    print(f"Output folder: {BASE_OUTPUT_DIR}")
    print("=" * 60 + "\n")
    
    # Step 1: Detect and crop buildings from all images in folder
    process_folder_for_buildings(INPUT_FOLDER, CROPPED_DIR)
    
    # Step 2: Find and separate duplicate cropped images
    find_and_separate_duplicates(CROPPED_DIR, ORIGINAL_DIR, DUPLICATE_DIR)
    
    # Step 3: Classify unique buildings and save results to Excel
    classify_and_save_results(
        ORIGINAL_DIR, 
        CLASSIFICATION_MODEL_CHECKPOINT, 
        NUM_CLASSES, 
        CLASS_NAMES, 
        CLASSIFICATION_RESULTS_EXCEL
    )
    
    # Print completion message
    print("=" * 60)
    print("PIPELINE COMPLETE!")
    print("=" * 60)

if __name__ == "__main__":
    main()