In [1]:
# Mount google drive for accessing Complaints dataset
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!unzip /content/drive/MyDrive/human.zip -d /content/human

Archive:  /content/drive/MyDrive/human.zip
   creating: /content/human/human/
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A000R0_08241716_170_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A001R0_08241716_216_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A002R0_08241716_103_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A003R0_08241716_133_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A004R0_08241716_039_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A005R0_08241716_011_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A006R0_08241716_181_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A007R0_08241716_111_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000LC021000A008R0_08241716_135_bbx.jpg  
  inflating: /content/human/human/P000S00G10B10H10UC022000

In [3]:
!unzip /content/drive/MyDrive/AIDER.zip -d /content/AIDER

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0404.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0405.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0406.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0407.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0408.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0409.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0410.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0411.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0412.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0413.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0414.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0415.jpg  
  inflating: /content/AIDER/AIDER/flooded_areas/flood_image0416.jpg  
  inflating: /content/AID

In [6]:
import cv2
import numpy as np
from PIL import Image, ImageOps
import os
import random

# Paths to disaster and human images
disaster_image_dir = '/content/AIDER/AIDER'
human_image_dir = '/content/human/human'
output_dir = '/content/generated-dataset/'
label_dir = '/content/generated-dataset/labels/'

# Create output directories if they don't exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(label_dir, exist_ok=True)

# Function to get the first 100 images from each subfolder
def get_first_n_images_from_subfolders(directory, n=100):
    image_paths = []
    for root, dirs, files in os.walk(directory):
        images_in_folder = sorted([os.path.join(root, file) for file in files if file.endswith(('.png', '.jpg', '.jpeg'))])
        image_paths.extend(images_in_folder[:n])  # Take only the first `n` images
    return image_paths

# Get the first 100 disaster images from each subfolder
disaster_images = get_first_n_images_from_subfolders(disaster_image_dir, n=100)
human_images = [os.path.join(human_image_dir, img) for img in os.listdir(human_image_dir) if img.endswith(('.png', '.jpg', '.jpeg'))]

# Function to apply random rotation and flipping to an image
def preprocess_human_image(image):
    # Randomly rotate the image by 0, 90, 180, or 270 degrees
    angle = random.choice([0, 90, 180, 270])
    image = image.rotate(angle, expand=True)

    # Randomly flip the image horizontally
    if random.choice([True, False]):
        image = ImageOps.mirror(image)

    return image

# Function to overlay all human images on a single disaster image and create a label file
def overlay_all_humans_on_disaster(disaster_path, human_image_list, output_image_path, label_path, min_scale=0.05, max_scale=0.1):
    disaster_img = Image.open(disaster_path).convert('RGBA')
    combined_img = Image.new('RGBA', disaster_img.size)
    combined_img.paste(disaster_img, (0, 0))

    with open(label_path, 'w') as label_file:
        for human_path in human_image_list:
            human_img = Image.open(human_path).convert('RGBA')

            # Apply preprocessing (rotation and flipping)
            human_img = preprocess_human_image(human_img)

            # Resize the human image to appear tiny (between 5% and 10% of its original size)
            scale_factor = random.uniform(min_scale, max_scale)
            human_img = human_img.resize((int(human_img.width * scale_factor), int(human_img.height * scale_factor)))

            # Get random position for pasting
            max_x = disaster_img.width - human_img.width
            max_y = disaster_img.height - human_img.height
            random_position = (random.randint(0, max_x), random.randint(0, max_y))

            # Calculate normalized coordinates for YOLO format
            x_center = (random_position[0] + human_img.width / 2) / disaster_img.width
            y_center = (random_position[1] + human_img.height / 2) / disaster_img.height
            width = human_img.width / disaster_img.width
            height = human_img.height / disaster_img.height

            # Write label line for each human image
            label_file.write(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

            # Paste the human image onto the disaster image at the random position
            combined_img.paste(human_img, random_position, human_img)

    # Convert to RGB and save the final image
    combined_img = combined_img.convert('RGB')
    combined_img.save(output_image_path)

# Combine all human images with each disaster image
image_count = 0
for disaster in disaster_images:
    human_image_paths = [os.path.join(human_image_dir, human) for human in human_images]
    output_image_path = os.path.join(output_dir, f'combined_image_{image_count}.jpg')
    label_path = os.path.join(label_dir, f'combined_image_{image_count}.txt')

    overlay_all_humans_on_disaster(disaster, human_image_paths, output_image_path, label_path)
    image_count += 1

print(f"Generated {image_count} composite images with labels in {output_dir} and {label_dir}")


Generated 500 composite images with labels in /content/generated-dataset/ and /content/generated-dataset/labels/


In [7]:
from sklearn.model_selection import train_test_split
import os
import shutil
from google.colab import drive

# Paths to the generated dataset
image_dir = '/content/generated-dataset/'
label_dir = '/content/generated-dataset/labels/'

# Output directories for splits
output_base = '/content/split-dataset/'
os.makedirs(output_base, exist_ok=True)

for split in ['train', 'val', 'test']:
    os.makedirs(f'{output_base}{split}/images', exist_ok=True)
    os.makedirs(f'{output_base}{split}/labels', exist_ok=True)

# Mount Google Drive
drive.mount('/content/drive')
drive_output_dir = '/content/drive/My Drive/YOLOv_Dataset/'
os.makedirs(drive_output_dir, exist_ok=True)

# Get all images
all_images = [img for img in os.listdir(image_dir) if img.endswith('.jpg')]

# Split dataset (80% train, 10% val, 10% test)
train_images, temp_images = train_test_split(all_images, test_size=0.2, random_state=42)
val_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)

# Helper function to move files
def move_files(image_list, split):
    for img in image_list:
        label = img.replace('.jpg', '.txt')
        # Move images
        shutil.copy(os.path.join(image_dir, img), f'{output_base}{split}/images/{img}')
        # Move labels
        shutil.copy(os.path.join(label_dir, label), f'{output_base}{split}/labels/{label}')

# Move files to respective directories
move_files(train_images, 'train')
move_files(val_images, 'val')
move_files(test_images, 'test')

# Copy the split dataset to Google Drive
shutil.copytree(output_base, drive_output_dir, dirs_exist_ok=True)

print(f"Dataset successfully split and saved to Google Drive at {drive_output_dir}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset successfully split and saved to Google Drive at /content/drive/My Drive/YOLOv_Dataset/
