# How we did our image preprocessing

In [None]:
from image_processing import load_annotations, crop_and_size_with_bbox, chop_cropped_images,adjust_bbox_for_patch
import os
import shutil

In [None]:
# Set the directory paths
annotation_dir = os.getcwd() + '/annotations/bnd_box'  # Folder containing your XML files.
images_folder = os.getcwd() + '/annotations/original_imgs'       # Folder containing your images.

# Load the annotations from XML files.
annotations = load_annotations(annotation_dir)

In [None]:
# Cropping the original images such that we get the same size of all
# Just a normalization task

new_annots = crop_and_size_with_bbox(os.getcwd()+'/annotations/original_imgs',
                                     os.getcwd()+'/cropped-and-resized', (1024, 1024),
                                     annotations)

In [None]:
# From these cropped images, we need to chop them into smaller patches
# and adjust the bounding boxes accordingly.
# The patches will be of size 256, 128, and 64 pixels.

patch_annots256 = chop_cropped_images(256, 'cropped-and-resized', 'chopped-256', new_annots)
patch_annots128 = chop_cropped_images(128, 'cropped-and-resized', 'chopped-128', new_annots)
patch_annots64 = chop_cropped_images(64, 'cropped-and-resized', 'chopped-64', new_annots)

In [None]:
# Now, we need to sort the images into two folders: 'waldo' and 'notwaldo'
# based on the presence of Waldo in each patch.

for i, new_img_res in enumerate([patch_annots256, patch_annots128, patch_annots64]):
    # print(i)
    if i == 0:
        chopped_folder = os.getcwd()+'/chopped-256'
    elif i == 1:
        chopped_folder = os.getcwd()+'/chopped-128'
    elif i == 2:
        chopped_folder = os.getcwd()+'/chopped-64'

    # Define destination folders for Waldo and not Waldo images
    waldo_folder = os.path.join(chopped_folder, 'waldo')
    notwaldo_folder = os.path.join(chopped_folder, 'notwaldo')

    # Create destination folders if they don't exist
    os.makedirs(waldo_folder, exist_ok=True)
    os.makedirs(notwaldo_folder, exist_ok=True)
    for filename, data in new_img_res.items():

        src_path = os.path.join(chopped_folder, filename)
        # Check if the file exists before moving it
        if not os.path.exists(src_path):
            print(f"File {filename} not found in {chopped_folder}.")
            continue

        # Decide the destination based on bbox
        if data.get('bbox') is not None:
            dest_path = os.path.join(waldo_folder, filename)
        else:
            dest_path = os.path.join(notwaldo_folder, filename)
        
        # Move the file from the source folder to the destination folder
        shutil.move(src_path, dest_path)

In [None]:
patches = [256, 128, 64]

# Now we need to create CSV files for each patch size
for patch in patches:
    # Select the corresponding patch annotations dictionary
    if patch == 256:
        patch_annot = patch_annots256
    elif patch == 128:
        patch_annot = patch_annots128
    else:
        patch_annot = patch_annots64

    # Define the output folder for the patch size
    output_folder = os.path.join(os.getcwd(), 'annotations', 'imgs', str(patch))
    os.makedirs(output_folder, exist_ok=True)
    
    # Define a single CSV file for all patches of this size
    csv_path = os.path.join(output_folder, f"patch_annotations_{patch}.csv")
    
    with open(csv_path, 'w', newline='') as csvfile:
        # Define the fieldnames in the desired order
        fieldnames = ['filename', 'width', 'height', 'label', 'startX', 'startY', 'endX', 'endY']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        
        # Loop over each entry in the patch annotations dictionary
        for filename, values in patch_annot.items():
            bbox = values.get('bbox')
            if bbox is None:
                row = {
                    'filename': filename,
                    'width': '',
                    'height': '',
                    'label': 'notwaldo',
                    'startX': '',
                    'startY': '',
                    'endX': '',
                    'endY': ''
                }
            else:
                # Unpack bounding box coordinates (assumed order: [startX, startY, endX, endY])
                startX, startY, endX, endY = bbox
                width = endX - startX
                height = endY - startY
                row = {
                    'filename': filename,
                    'width': width,
                    'height': height,
                    'label': 'waldo',
                    'startX': startX,
                    'startY': startY,
                    'endX': endX,
                    'endY': endY
                }
            writer.writerow(row)
    print(f"Created CSV: {csv_path}")
