## Import requirements files

In [None]:
from PIL import Image
import PIL.Image
import os
import matplotlib.pyplot as plt
import shutil
import zipfile
from glob import glob
from typing import List, Tuple

import click
import h5py
import numpy as np
from scipy.io import loadmat
from scipy.ndimage import gaussian_filter
from matplotlib import pyplot as plt

from utils import avg_box, gaussian_filter_density
import math
import torch
import scipy.io as sio

# Increase the aximum numbers of pixels in order for PIL to be able to open large images
PIL.Image.MAX_IMAGE_PIXELS = 1262080000 

# Cropping Yosemite dataset

**Create annotation for Yosemite dataset**

In [None]:
file_path = "datasets/yosemite/labels.txt"  
im = Image.open('datasets/yosemite/z20_label.png') 
pix = im.load()
x, y = im.size
with open(file_path, "w") as f:
  for i in range(x):
    for j in range(y):
      value = pix[i, j]
      if value > 0:
        rel_x = i
        rel_y = j
        f.write(f"{rel_x} {rel_y}\n")

## Crop function from full image

In [None]:
def smallCrop(image, cw, ch, labels, left, top):
    """
    Crop a specific region from an image and adjust the coordinates of labels
    within that cropped region.

    Note:
    The function assumes that the provided crop width (cw) and height (ch) 
    do not exceed the dimensions of the image. It raises an error if the crop size 
    is invalid.

    Args:
        image: The image to crop, typically a PIL Image object.
        cw: The width of the crop area.
        ch: The height of the crop area.
        labels: A list of tuples containing the (x, y) coordinates of the labels.
        left: The x-coordinate of the top-left corner of the crop area.
        top: The y-coordinate of the top-left corner of the crop area.

    Returns:
        A tuple consisting of:
            cropped_image: The cropped portion of the image.
            updated_labels: A list of updated label coordinates relative to the cropped area.
    """
    
    width, height = image.size
    if cw >= min(width, height):
        raise ValueError("Crop size exceeds image dimensions")

    # 27200 x 46400 pixels in image
    # zone 19200 x 38400
    while True:
        #Define the zone to crop
        right = left + cw
        bottom = top + ch
        cropped_image = image.crop((left, top, right, bottom))

        updated_labels = []
        for x, y in labels:
            # Condition to check if the label is inside the cropped area
            # '=' is used to include the border of the cropped area
            if left <= x <= right and top <= y  <= bottom:
                # Label is inside the cropped area, update its coordinates
                updated_x = (x - left)
                updated_y = (y - top)
                updated_labels.append((updated_x, updated_y))

        return cropped_image, updated_labels

In [None]:
def main_row(input_image_path, input_label_path, output_folder, cw, ch):
  
    """
    Process an input image by cropping it into smaller regions and saving 
    both the cropped images and their associated labels to specified output folders.

    This function divides the image into four zones (A, B, C, D), and for each zone,
    crops the image into smaller sub-images based on the given crop width (cw) and crop height (ch). 
    It then saves the cropped images and their corresponding labels into the appropriate directories.

    Note:
    - The image is split into 4 predefined zones (A, B, C, D).
    - The function assumes labels are given as (x, y) coordinates, one per line in a text file.
    - The image and label files are saved in separate subfolders within each zone folder.

    Args:
        input_image_path: Path to the input image file.
        input_label_path: Path to the input label file, where each label is represented as "x y".
        output_folder: The parent directory where the cropped images and labels will be saved.
        cw: The width of the crop area (in pixels).
        ch: The height of the crop area (in pixels).

    Returns:
        None: The function does not return any values but saves the cropped images and labels in the specified output folder.
    """
  
    # Create output folders if they don't exist
    os.makedirs(output_folder, exist_ok=True)

    # Yosemite has 4 zones
    folders = ['zone_A', 'zone_B', 'zone_C', 'zone_D']

    # Create subfolders for images and labels
    for folder in folders:
      zone = os.path.join(output_folder, folder)
      os.makedirs(os.path.join(zone, 'images'), exist_ok=True)
      os.makedirs(os.path.join(zone, 'labels'), exist_ok=True)

    # Load the input image
    image = Image.open(input_image_path)

    # Load labels from a txt file (assuming one label per line in the format "x y")
    with open(input_label_path, 'r') as label_file:
        labels = [tuple(map(float, line.strip().split())) for line in label_file]

    # Define limits for the zones to crop
    x_min = 4000
    x_max = x_min + int(19200/4) - cw
    y_min = 4000
    y_max = y_min + 38400
    count = 0

    # Loop through the zones
    for i in range(len(folders)):
      print(f"Width range {x_min} - {x_max}")
      print(f"Height range {y_min} - {y_max}")
      j = 1

      # Loop through the crops
      for top in range(y_min, y_max, ch):
        for left in range(x_min, x_max, cw):
          print(count, end = " ")
          print("Left:", left, "Top:", top)
          # Crop the image based on the coordinates
          cropped_image, updated_labels = smallCrop(image, cw, ch, labels, left, top)

          # Save the cropped image
          output_image_path = os.path.join(output_folder, folders[i], 'images', f'IMG_{count}.jpg')
          cropped_image.save(output_image_path)

          # Save the updated labels to a new txt file
          output_label_path = os.path.join(output_folder, folders[i], 'labels', f'IMG_{count}.txt')
          with open(output_label_path, 'w') as updated_label_file:
              for x, y in updated_labels:
                  updated_label_file.write(f"{x} {y}\n")
          j += 1
          count += 1
      print(f"The number of images and labels in {folders[i]}: {j-1}\n")
      # Increase the x_min and x_max for the next zone
      x_min += int(19200/4)
      x_max += int(19200/4)

In [None]:
#Row by Row cropping
input_image_path = 'datasets/yosemite/z20_data.png'  # Specify the path to your input image
input_label_path = 'datasets/yosemite/labels.txt'  # Specify the path to your input labels file
crop_size =  1536 # Specify the size of the square crop (both width and height)
output_folder = f'datasets/yosemite_1536_no_crop'  # Specify the output folder where cropped images and labels will be saved

main_row(input_image_path, input_label_path, output_folder, crop_size, crop_size)

In [None]:
def plot_cropped_images_with_labels(folder):
    """
    Display cropped images from a specified folder along with their corresponding labels.

    Args:
        folder: The path to the folder containing the subfolders 'images' and 'labels'.
                Each subfolder should contain the cropped image files and label files.

    Returns:
        None: The function does not return any values. Instead, it displays the cropped image 
              with overlaid labels using `matplotlib`.
    """
    
    image_folder = os.path.join(folder,'images')
    label_folder = os.path.join(folder,'labels')
    # List image and label files in the respective folders
    image_files = os.listdir(image_folder)
    label_files = os.listdir(label_folder)

    for image_filename in image_files:
        # Check if a corresponding label file exists
        label_filename = os.path.splitext(image_filename)[0] + '.txt'
        if label_filename not in label_files:
            continue

        image_path = os.path.join(image_folder, image_filename)
        label_path = os.path.join(label_folder, label_filename)

        # Load the cropped image
        cropped_image = Image.open(image_path)

        # Load labels from the provided label file
        with open(label_path, 'r') as label_file:
            labels = [tuple(map(float, line.strip().split())) for line in label_file]

        plt.figure(figsize=(6, 6))
        plt.imshow(cropped_image)
        plt.title(image_filename)

        for x, y in labels:
            plt.plot(x, y, 'ro', markersize=2)  # Plot labels as red points

        plt.axis('off')
        plt.show()

# Specify the path to the target folder containing cropped images and labels
# example: datasets/yosemite_1536/zone_A
folder = 'Path/to/your/target/folder'  
plot_cropped_images_with_labels(folder)

# Crop function from image of size 1536x1536

In [None]:
import os
from PIL import Image

def split_9_images_labels(source_img, source_label, output, zone, num_width, num_height):
    """
    Split images and their corresponding labels into smaller segments.

    This function takes an image and its corresponding label file and splits them into smaller segments (grid).
    The function divides the image into a grid of size `num_width` x `num_height`, cropping the image into
    smaller regions and updating the label coordinates based on the respective segment.

    The smaller segments are saved as new image and label files in the output folder. A separate folder for
    each zone is created in the output directory to store the cropped images and updated labels.

    Args:
        source_img (str): The path to the source folder containing the original images.
        source_label (str): The path to the source folder containing the corresponding label files.
        output (str): The path to the output folder where cropped images and labels will be saved.
        zone (str): The subfolder name (zone) where the cropped images and labels will be stored.
        num_width (int): The number of segments to divide the image into along the width.
        num_height (int): The number of segments to divide the image into along the height.
    """
    # Make folders if they don't exist
    os.makedirs(os.path.join(output, zone, 'images'), exist_ok=True)
    os.makedirs(os.path.join(output, zone, 'labels'), exist_ok=True)

    # Get list of images in the source folder
    image_files = [f for f in os.listdir(source_img) if f.endswith((".jpg", ".jpeg", ".png"))]

    for image_filename in image_files:
        # Get label file corresponding to the image
        label_filename = os.path.splitext(image_filename)[0] + '.txt'
        label_path = os.path.join(source_label, label_filename)

        # Check if the corresponding label file exists
        if not os.path.exists(label_path):
            print(f"Warning: Label file not found for image '{image_filename}'")
            continue

        # Load labels from the label file
        with open(label_path, 'r') as label_file:
            labels = [tuple(map(float, line.strip().split())) for line in label_file]

        # Open the image
        image_path = os.path.join(source_img, image_filename)
        image = Image.open(image_path)
        width, height = image.size
        
        # Calculate the size of each smaller segment
        segment_height = height // num_height
        segment_width = width // num_width

        # Loop through the grid
        for i in range(num_height):
            for j in range(num_width):
                # Define the coordinates of the segment
                left = j * segment_width
                right = (j + 1) * segment_width
                top = i * segment_height
                bottom = (i + 1) * segment_height

                # Crop the image
                cropped_image = image.crop((left, top, right, bottom))
                
                # Save the cropped image
                output_image_path = os.path.join(output, zone, 'images', f'{os.path.splitext(image_filename)[0]}_section_{i*num_width+j}.jpg')
                cropped_image.save(output_image_path)

                # Update labels
                update_labels = []
                for x, y in labels:
                    if left <= x <= right and top <= y <= bottom:
                        update_x = x - left
                        update_y = y - top
                        update_labels.append((update_x, update_y))

                # Save the updated labels
                output_label_path = os.path.join(output, zone, 'labels', f'{os.path.splitext(image_filename)[0]}_section_{i*num_width+j}.txt')
                with open(output_label_path, 'w') as update_label_file:
                    for x, y in update_labels:
                        update_label_file.write(f"{x} {y}\n")

In [None]:
image_source = 'datasets/yosemite_1536_no_crop/zone_A/images'
label_source = 'datasets/yosemite_1536_no_crop/zone_A/labels'
output_folder = 'datasets/yosemite_1536'
zone = 'zone_A'
split_9_images_labels(image_source, label_source, output_folder, zone, 3, 3)

# Create HDF5 for London and Yosemite dataset

## Code for generate HDF5 for Yosemite

In [None]:
# For Yosemite dataset
TRAIN_PATH = ['datasets/yosemite_1536_np_crop/zone_B', 'datasets/yosemite_1536_no_crop/zone_D']
TEST_PATH = ['datasets/yosemite_1536_no_crop/zone_A', 'datasets/yosemite_1536_no_crop/zone_C']
DATASET_PATH = 'datasets/yosemite_1536/'

In [None]:
# Read Yosemite dataset files
# Get the list of image files in the training and testing data
TRAIN_IMAGES = []
for path in TRAIN_PATH:
    TRAIN_IMAGES += list([os.path.join(path, "images", file) for file in os.listdir(os.path.join(path, "images")) if file[-4:] == ".jpg"])

TEST_IMAGES = []
for path in TEST_PATH:
    TEST_IMAGES += list([os.path.join(path, "images", file) for file in os.listdir(os.path.join(path, "images")) if file[-4:] == ".jpg"])

TRAIN_SIZE = len(TRAIN_IMAGES)
TEST_SIZE = len(TEST_IMAGES)

X, Y, _ = plt.imread('datasets/yosemite_1536_no_crop/zone_A/images/IMG_0.jpg').shape
print(X, Y)

# Print the number of images in the training and testing data
print((TRAIN_SIZE, TEST_SIZE))

In [None]:
def create_hdf5(dataset_path: str, dataset: str):
    """
    Create empty training and validation HDF5 files with placeholders
    for images and labels (density maps).

    Note:
    Datasets are saved in [dataset_name]/train.h5 and [dataset_name]/valid.h5.
    Existing files will be overwritten.

    Args:
        dataset_name: used to create a folder for train.h5 and valid.h5

    Returns:
        A tuple of pointers to training and validation HDF5 files.
    """
    # create output folder if it does not exist
    os.makedirs(dataset_path, exist_ok=True)

    # create HDF5 files: [dataset_name]/(train | valid).h5
    train_h5 = h5py.File(os.path.join(dataset_path, f'train_{dataset}.h5'), 'w')
    valid_h5 = h5py.File(os.path.join(dataset_path, f'valid_{dataset}.h5'), 'w')

    return train_h5, valid_h5


def generate_label(label_info: np.array, image_shape: List[int]):
    """
    Generate a density map based on objects positions.

    Args:
        label_info: (x, y) objects positions
        image_shape: (width, height) of a density map to be generated

    Returns:
        A density map.
    """
    # create an empty density map
    label = np.zeros(image_shape, dtype=np.float32)

    # loop over objects positions and marked them with 100 on a label
    # note: *_ because some datasets contain more info except x, y coordinates
    for x, y in label_info:
        if y < image_shape[0] and x < image_shape[1]:
            label[int(y)][int(x)] = 1

    # apply a convolution with a Gaussian kernel
    # sigma = avg_box(label_info, image_shape[0])
    # label = gaussian_filter(label, sigma = 10)
    label = gaussian_filter_density(label)

    return label


def generate_our_own_data(dataset_name):
    # create training and validation HDF5 files

    # train.h5 and valid.h5 are created in dataloader
    train_h5, valid_h5 = create_hdf5("dataloader/", dataset_name)

    def fill_h5(h5, label_path, train=True):
        """
        Save images and labels in given HDF5 file.

        Args:
            h5: HDF5 file
            label_path: path to label file
        """
        # source directory of the image

        labels = []

        with open(label_path, "r") as f:
            for tree in f.readlines():
                x, y = tree.split(" ")
                labels.append((float(x), float(y)))

        # generate a density map by applying a Gaussian filter
        label = generate_label(labels, [Y, X])

        # save data to HDF5 file
        h5.create_dataset(os.path.basename(label_path).replace(".txt", ""), (1, 1, *(X, Y)))
        h5[os.path.basename(label_path).replace(".txt", "")][0, 0] = label

    # fill HDF5 files with data and labels from training and testing datasets
    for i, img_path in enumerate(TRAIN_IMAGES):
        print("train", i)
        fill_h5(train_h5, img_path.replace(".jpg", ".txt").replace("images", "labels"))
    train_h5.close()

    for i, img_path in enumerate(TEST_IMAGES):
        print("test", i)
        fill_h5(valid_h5, img_path.replace(".jpg", ".txt").replace("images", "labels"), train=False)
    # close HDF5 files
    valid_h5.close()

In [None]:
generate_our_own_data("yosemite_1536_no_crop")

## Code for split Yosemite density map o size 1536x1536 to 512x512

In [None]:
#Split code
import os
import h5py
import numpy as np

def splitDensityMap(original_file_path: str):
    """
    Split the density map into nine smaller sections and save them as separate datasets in a new HDF5 file.

    Args:
        original_file_path: The path to the original HDF5 file containing the density maps.

    Returns:
        None
    """
    # Open the original HDF5 file
    with h5py.File(original_file_path, 'r') as f:
        combined_sections = {}
        print(len(f.keys()))
        for key in f.keys():
            data = f[key][:]

            # Split the data into nine smaller sections
            height, width = data.shape[2:4]
            third_height = height // 3
            third_width = width // 3

            sections = []
            for i in range(3):
                for j in range(3):
                    sections.append(data[:, :, i*third_height:(i+1)*third_height, j*third_width:(j+1)*third_width])

            combined_sections[key] = sections

        # Get the directory path of the original file
        original_dir = os.path.dirname(original_file_path)

        # Create a new HDF5 file to save the combined sections
        # Change the file name
        combined_file_name = os.path.basename(original_file_path).replace('.h5', '_sections.h5')
        combined_file_path = os.path.join(original_dir, combined_file_name)
        with h5py.File(combined_file_path, 'w') as combined_file:
            for key, sections in combined_sections.items():
                for i, section_data in enumerate(sections):
                    # Create a dataset for each section in the new HDF5 file
                    combined_file.create_dataset(f'{key}_section_{i}', data=section_data)

In [None]:
original_file_path = 'dataloader/train_yosemite_1536.h5' # Specify the path to the original HDF5 file
splitDensityMap(original_file_path)

## Code for generate HDF5 for London dataset

In [None]:
# For London dataset
# Define the paths to the training, validation and testing data
TRAIN_PATH = 'datasets/london/train'
VAL_PATH = 'datasets/london/val'
TEST_PATH = 'datasets/london/test'

In [None]:
# Read London dataset files
TRAIN_IMAGES = list([file for file in os.listdir(TRAIN_PATH + "/images") if file[-4:] == ".jpg"])
VAL_IMAGES = list([file for file in os.listdir(VAL_PATH + "/images") if file[-4:] == ".jpg"])
TRAIN_VAL_IMAGES = TRAIN_IMAGES + VAL_IMAGES

TEST_IMAGES = list([file for file in os.listdir(TEST_PATH + "/images") if file[-4:] == ".jpg"])

# Get the number of images in the training, validation and testing data
TRAIN_SIZE = len(TRAIN_IMAGES)
VAL_SIZE = len(VAL_IMAGES)
TRAIN_VAL_SIZE = TRAIN_SIZE + VAL_SIZE
TEST_SIZE = len(TEST_IMAGES)

# Get the dimensions of the images
X, Y, _ = plt.imread(os.path.join(TRAIN_PATH, "images", TRAIN_IMAGES[0])).shape
print(X, Y)

print((TRAIN_VAL_SIZE, TEST_SIZE))

In [None]:
def create_hdf5(dataset_path: str, dataset: str):
    """
    Create empty training and validation HDF5 files with placeholders
    for images and labels (density maps).

    Note:
    Datasets are saved in [dataset_name]/train.h5 and [dataset_name]/valid.h5.
    Existing files will be overwritten.

    Args:
        dataset_name: used to create a folder for train.h5 and valid.h5

    Returns:
        A tuple of pointers to training and validation HDF5 files.
    """
    # create output folder if it does not exist
    os.makedirs(dataset_path, exist_ok=True)

    # create HDF5 files: [dataset_name]/(train | valid).h5
    train_h5 = h5py.File(os.path.join(dataset_path, f'train_{dataset}.h5'), 'w')
    valid_h5 = h5py.File(os.path.join(dataset_path, f'valid_{dataset}.h5'), 'w')

    return train_h5, valid_h5


def generate_label(img_path, root_path):
    """
    Generate a density map based on objects positions.

    Args:
        label_info: (x, y) objects positions
        image_shape: (width, height) of a density map to be generated

    Returns:
        A density map.
    """
    # create an empty density map
    # Get the original label from the .mat file and convert it to a numpy array
    name = os.path.basename(img_path).split('.')[0]
    gd_path = os.path.join(root_path, 'labels', 'GT_{}.mat'.format(name))
    gauss_path = os.path.join(root_path, 'labels', '{}_densitymap.npy'.format(name))
    gauss_im = torch.from_numpy(np.load(gauss_path)).float()

    return gauss_im, name


def generate_our_own_data(dataset_name):
    # create training and validation HDF5 files

    # train.h5 and valid.h5 are created in /content
    train_h5, valid_h5 = create_hdf5("dataloader/", dataset_name)

    def fill_h5(h5, file, root):
        """
        Save images and labels in given HDF5 file.

        Args:
            h5: HDF5 file
            file: filename
        """
        # source directory of the image

        # generate a density map by applying a Gaussian filter
        label, name = generate_label(file, root)

        # save data to HDF5 file
        h5.create_dataset(name, (1, 1, *(X, Y)))
        h5[name][0, 0] = label

    # fill HDF5 files with data and labels from training and testing datasets
    for i, file_name in enumerate(TRAIN_VAL_IMAGES):
        print("train", i)
        if i < TRAIN_SIZE:
            path = TRAIN_PATH
        else:
            path = VAL_PATH
        fill_h5(train_h5, os.path.join(path, "images", file_name), path)

    for i, file_name in enumerate(TEST_IMAGES):
        print("test", i)
        fill_h5(valid_h5, os.path.join(path, "images", file_name), TEST_PATH)
    # close HDF5 files
    train_h5.close()
    valid_h5.close()

In [None]:
generate_our_own_data("london")