In [27]:
import os
import json
import shutil
from tqdm import tqdm
import numpy as np

# Set the path to your xDB directory
xDB_dir = "C:/Users/isxzl/OneDrive/Code/AutoSSL/Datasets/xBD"

# Define the damage level mapping
damage_level = {"no-damage": 1, "minor-damage": 2, "major-damage": 3, "destroyed": 4}

# Corresponding subfolder names
damage_subfolders = {0: "no_building", 1: "no-damage", 2: "minor-damage", 3: "major-damage", 4: "destroyed"}
 
 
# Go through train, test, hold directories
for dataset in ['train', 'test', 'hold']:
    dataset_dir = os.path.join(xDB_dir, dataset)
    images_dir = os.path.join(dataset_dir, 'images')
    labels_dir = os.path.join(dataset_dir, 'labels')
    
    # Create subfolders for each damage level
    for subfolder in damage_subfolders.values():
        os.makedirs(os.path.join(dataset_dir, subfolder), exist_ok=True)

    # Go through each json file in the labels directory
    for file in tqdm(os.listdir(labels_dir), desc=f"Processing {dataset}"):
        if file.endswith("_post_disaster.json"):
            with open(os.path.join(labels_dir, file)) as f:
                data = json.load(f)

            # Get damage counts
            damages = [feature['properties']['subtype'] for feature in data['features']['lng_lat'] if feature['properties']['subtype'] in damage_level]
            damage_counts = {damage: damages.count(damage) for damage in set(damages)}

            # Calculate average damage level
            if damage_counts:
                total_count = sum(damage_counts.values())
                weighted_values = [damage_level[damage] * count / total_count for damage, count in damage_counts.items()]
                avg_damage_level = np.round(sum(weighted_values)).astype(int)
            else:
                avg_damage_level = 0  # no_building
            
            # Get corresponding image file
            image_file_name = data['metadata']['img_name']
            image_file_path = os.path.join(images_dir, image_file_name)

            # Determine the target subfolder
            target_subfolder = damage_subfolders[avg_damage_level]

            # Copy image to the target subfolder
            target_dir = os.path.join(dataset_dir, target_subfolder)
            shutil.copy2(image_file_path, target_dir)



Processing train: 100%|███████████████████████████████████████████████████████████| 5598/5598 [00:08<00:00, 659.85it/s]
Processing test: 100%|████████████████████████████████████████████████████████████| 1866/1866 [00:02<00:00, 654.53it/s]
Processing hold: 100%|████████████████████████████████████████████████████████████| 1866/1866 [00:02<00:00, 636.36it/s]
