## Import requirements files

In [1]:
from PIL import Image
import PIL.Image
import os
import random
import matplotlib.pyplot as plt
import shutil
import zipfile
from glob import glob
from typing import List, Tuple

import click
import h5py
import numpy as np
from scipy.io import loadmat
from scipy.ndimage import gaussian_filter
from matplotlib import pyplot as plt

from utils import avg_box, gaussian_filter_density
import math
import torch
import scipy.io as sio

PIL.Image.MAX_IMAGE_PIXELS = 1262080000

# Cropping Yosemite dataset

**Create annotation for Yosemite dataset**

In [4]:
file_path = "Dataset/Yosemite/labels.txt"
im = Image.open('Dataset/Yosemite/z20_label.png') # Can be many different formats.
pix = im.load()
x, y = im.size
with open(file_path, "w") as f:
  for i in range(x):
    for j in range(y):
      value = pix[i, j]
      if value > 0:
        rel_x = i
        rel_y = j
        f.write(f"{rel_x} {rel_y}\n")

## Crop function

In [5]:
def smallCrop(image, cw, ch, labels, left, top):
    width, height = image.size
    if cw >= min(width, height):
        raise ValueError("Crop size exceeds image dimensions")

    # 27200 x 46400 pixels in image
    # zone 19200 x 38400
    while True:
        #Define the zone to crop
        right = left + cw
        bottom = top + ch
        cropped_image = image.crop((left, top, right, bottom))

        updated_labels = []
        for x, y in labels:
            if left <= x <= right and top <= y  <= bottom:
                # Label is inside the cropped area, update its coordinates
                updated_x = (x - left)
                updated_y = (y - top)
                updated_labels.append((updated_x, updated_y))

        return cropped_image, updated_labels

In [10]:
def main_row(input_image_path, input_label_path, output_folder, cw, ch):
    # Create output folders if they don't exist
    os.makedirs(output_folder, exist_ok=True)

    folders = ['zone_A', 'zone_B', 'zone_C', 'zone_D']

    for folder in folders:
      zone = os.path.join(output_folder, folder)
      os.makedirs(os.path.join(zone, 'images'), exist_ok=True)
      os.makedirs(os.path.join(zone, 'labels'), exist_ok=True)

    # Load the input image
    image = Image.open(input_image_path)

    # Load labels from a txt file (assuming one label per line in the format "x y")
    with open(input_label_path, 'r') as label_file:
        labels = [tuple(map(float, line.strip().split())) for line in label_file]

    x_min = 4000
    x_max = x_min + int(19200/4) - cw
    y_min = 4000
    y_max = y_min + 38400
    count = 0

    for i in range(len(folders)):
      print(f"Width range {x_min} - {x_max}")
      print(f"Height range {y_min} - {y_max}")
      j = 1

      for top in range(y_min, y_max, ch):
        for left in range(x_min, x_max, cw):
          print(count, end = " ")
          print("Left:", left, "Top:", top)
          cropped_image, updated_labels = smallCrop(image, cw, ch, labels, left, top)

          # Save the cropped image
          output_image_path = os.path.join(output_folder, folders[i], 'images', f'IMG_{count}.jpg')
          cropped_image.save(output_image_path)

          # Save the updated labels to a new txt file
          output_label_path = os.path.join(output_folder, folders[i], 'labels', f'IMG_{count}.txt')
          with open(output_label_path, 'w') as updated_label_file:
              for x, y in updated_labels:
                  updated_label_file.write(f"{x} {y}\n")
          j += 1
          count += 1
      print(f"The number of images and labels in {folders[i]}: {j-1}\n")
      x_min += int(19200/4)
      x_max += int(19200/4)

In [11]:
#Row by Row cropping
input_image_path = 'Dataset/Yosemite/z20_data.png'  # Specify the path to your input image
input_label_path = 'Dataset/Yosemite/labels.txt'  # Specify the path to your input labels file
output_folder = 'Dataset/Yosemite/Dataset_Row_1536x2560'  # Specify the output folder where cropped images and labels will be saved
crop_width =  1536 # Specify the size of the square crop (both width and height)
crop_height = 2560

main_row(input_image_path, input_label_path, output_folder, crop_width, crop_height)

Width range 4000 - 7264
Height range 4000 - 42400
0 Left: 4000 Top: 4000
1 Left: 5536 Top: 4000
2 Left: 7072 Top: 4000
3 Left: 4000 Top: 6560
4 Left: 5536 Top: 6560
5 Left: 7072 Top: 6560
6 Left: 4000 Top: 9120
7 Left: 5536 Top: 9120
8 Left: 7072 Top: 9120
9 Left: 4000 Top: 11680
10 Left: 5536 Top: 11680
11 Left: 7072 Top: 11680
12 Left: 4000 Top: 14240
13 Left: 5536 Top: 14240
14 Left: 7072 Top: 14240
15 Left: 4000 Top: 16800
16 Left: 5536 Top: 16800
17 Left: 7072 Top: 16800
18 Left: 4000 Top: 19360
19 Left: 5536 Top: 19360
20 Left: 7072 Top: 19360
21 Left: 4000 Top: 21920
22 Left: 5536 Top: 21920
23 Left: 7072 Top: 21920
24 Left: 4000 Top: 24480
25 Left: 5536 Top: 24480
26 Left: 7072 Top: 24480
27 Left: 4000 Top: 27040
28 Left: 5536 Top: 27040
29 Left: 7072 Top: 27040
30 Left: 4000 Top: 29600
31 Left: 5536 Top: 29600
32 Left: 7072 Top: 29600
33 Left: 4000 Top: 32160
34 Left: 5536 Top: 32160
35 Left: 7072 Top: 32160
36 Left: 4000 Top: 34720
37 Left: 5536 Top: 34720
38 Left: 7072 Top: 

In [None]:
def plot_cropped_images_with_labels(folder):

    image_folder = os.path.join(folder,'images')
    label_folder = os.path.join(folder,'labels')
    # List image and label files in the respective folders
    image_files = os.listdir(image_folder)
    label_files = os.listdir(label_folder)

    for image_filename in image_files:
        # Check if a corresponding label file exists
        label_filename = os.path.splitext(image_filename)[0] + '.txt'
        if label_filename not in label_files:
            continue

        image_path = os.path.join(image_folder, image_filename)
        label_path = os.path.join(label_folder, label_filename)

        # Load the cropped image
        cropped_image = Image.open(image_path)

        # Load labels from the provided label file
        with open(label_path, 'r') as label_file:
            labels = [tuple(map(float, line.strip().split())) for line in label_file]

        plt.figure(figsize=(6, 6))
        plt.imshow(cropped_image)
        plt.title(image_filename)

        for x, y in labels:
            plt.plot(x, y, 'ro', markersize=2)  # Plot labels as red points

        plt.axis('off')
        plt.show()

folder = 'Dataset/Yosemite/Dataset_Row_1536x2560/zone_A'
plot_cropped_images_with_labels(folder)

# Create HDF5 for London and Yosemite dataset

## Code for generate HDF5 for Yosemite

In [29]:
# For Yosemite dataset
TRAIN_PATH = ['Dataset/Yosemite/Dataset_Row_1536x2560/zone_B', 'Dataset/Yosemite/Dataset_Row_1536x2560/zone_D']
TEST_PATH = ['Dataset/Yosemite/Dataset_Row_1536x2560/zone_A', 'Dataset/Yosemite/Dataset_Row_1536x2560/zone_C']
DATASET_PATH = 'Dataset/Yosemite/Dataset_Row_1536x2560'

In [None]:
# Read Yosemite dataset files
TRAIN_IMAGES = []
for path in TRAIN_PATH:
    TRAIN_IMAGES += list([os.path.join(path, "images", file) for file in os.listdir(os.path.join(path, "images")) if file[-4:] == ".jpg"])

TEST_IMAGES = []
for path in TEST_PATH:
    TEST_IMAGES += list([os.path.join(path, "images", file) for file in os.listdir(os.path.join(path, "images")) if file[-4:] == ".jpg"])

TRAIN_SIZE = len(TRAIN_IMAGES)
TEST_SIZE = len(TEST_IMAGES)

X, Y, _ = plt.imread('Dataset/Yosemite/Dataset_Row_1536x2560/zone_A/images/IMG_0.jpg').shape
print(X, Y)

print((TRAIN_SIZE, TEST_SIZE))

In [34]:
def create_hdf5(dataset_path: str, dataset: str):
    """
    Create empty training and validation HDF5 files with placeholders
    for images and labels (density maps).

    Note:
    Datasets are saved in [dataset_name]/train.h5 and [dataset_name]/valid.h5.
    Existing files will be overwritten.

    Args:
        dataset_name: used to create a folder for train.h5 and valid.h5

    Returns:
        A tuple of pointers to training and validation HDF5 files.
    """
    # create output folder if it does not exist
    os.makedirs(dataset_path, exist_ok=True)

    # create HDF5 files: [dataset_name]/(train | valid).h5
    train_h5 = h5py.File(os.path.join(dataset_path, f'train_{dataset}.h5'), 'w')
    valid_h5 = h5py.File(os.path.join(dataset_path, f'valid_{dataset}.h5'), 'w')

    return train_h5, valid_h5


def generate_label(label_info: np.array, image_shape: List[int]):
    """
    Generate a density map based on objects positions.

    Args:
        label_info: (x, y) objects positions
        image_shape: (width, height) of a density map to be generated

    Returns:
        A density map.
    """
    # create an empty density map
    label = np.zeros(image_shape, dtype=np.float32)

    # loop over objects positions and marked them with 100 on a label
    # note: *_ because some datasets contain more info except x, y coordinates
    for x, y in label_info:
        if y < image_shape[0] and x < image_shape[1]:
            label[int(y)][int(x)] = 1

    # apply a convolution with a Gaussian kernel
    # sigma = avg_box(label_info, image_shape[0])
    # label = gaussian_filter(label, sigma = 10)
    label = gaussian_filter_density(label)

    return label


def generate_our_own_data(dataset_name):
    # create training and validation HDF5 files

    # train.h5 and valid.h5 are created in /content
    train_h5, valid_h5 = create_hdf5("Density_Map/", dataset_name)

    def fill_h5(h5, label_path, train=True):
        """
        Save images and labels in given HDF5 file.

        Args:
            h5: HDF5 file
            label_path: path to label file
        """
        # source directory of the image

        labels = []

        with open(label_path, "r") as f:
            for tree in f.readlines():
                x, y = tree.split(" ")
                labels.append((float(x), float(y)))

        # generate a density map by applying a Gaussian filter
        label = generate_label(labels, [Y, X])

        # save data to HDF5 file
        h5.create_dataset(os.path.basename(label_path).replace(".txt", ""), (1, 1, *(X, Y)))
        h5[os.path.basename(label_path).replace(".txt", "")][0, 0] = label

    for i, img_path in enumerate(TRAIN_IMAGES):
        print("train", i)
        fill_h5(train_h5, img_path.replace(".jpg", ".txt").replace("images", "labels"))
    train_h5.close()

    for i, img_path in enumerate(TEST_IMAGES):
        print("test", i)
        fill_h5(valid_h5, img_path.replace(".jpg", ".txt").replace("images", "labels"), train=False)
    # close HDF5 files
    valid_h5.close()

In [None]:
generate_our_own_data("yosemite_1536x2560")

## Code for generate HDF5 for London dataset

In [36]:
# For London dataset
TRAIN_PATH = 'Dataset/London/train'
VAL_PATH = 'Dataset/London/val'
TEST_PATH = 'Dataset/London/test'

In [None]:
# Read London dataset files
TRAIN_IMAGES = list([file for file in os.listdir(TRAIN_PATH + "/images") if file[-4:] == ".jpg"])
VAL_IMAGES = list([file for file in os.listdir(VAL_PATH + "/images") if file[-4:] == ".jpg"])
TRAIN_VAL_IMAGES = TRAIN_IMAGES + VAL_IMAGES
TEST_IMAGES = list([file for file in os.listdir(TEST_PATH + "/images") if file[-4:] == ".jpg"])

TRAIN_SIZE = len(TRAIN_IMAGES)
VAL_SIZE = len(VAL_IMAGES)
TRAIN_VAL_SIZE = TRAIN_SIZE + VAL_SIZE
TEST_SIZE = len(TEST_IMAGES)

X, Y, _ = plt.imread(os.path.join(TRAIN_PATH, "images", TRAIN_IMAGES[0])).shape
print(X, Y)

print((TRAIN_VAL_SIZE, TEST_SIZE))

In [38]:
def create_hdf5(dataset_path: str, dataset: str):
    """
    Create empty training and validation HDF5 files with placeholders
    for images and labels (density maps).

    Note:
    Datasets are saved in [dataset_name]/train.h5 and [dataset_name]/valid.h5.
    Existing files will be overwritten.

    Args:
        dataset_name: used to create a folder for train.h5 and valid.h5

    Returns:
        A tuple of pointers to training and validation HDF5 files.
    """
    # create output folder if it does not exist
    os.makedirs(dataset_path, exist_ok=True)

    # create HDF5 files: [dataset_name]/(train | valid).h5
    train_h5 = h5py.File(os.path.join(dataset_path, f'train_{dataset}.h5'), 'w')
    valid_h5 = h5py.File(os.path.join(dataset_path, f'valid_{dataset}.h5'), 'w')

    return train_h5, valid_h5


def generate_label(img_path, root_path):
    """
    Generate a density map based on objects positions.

    Args:
        label_info: (x, y) objects positions
        image_shape: (width, height) of a density map to be generated

    Returns:
        A density map.
    """
    # create an empty density map
    name = os.path.basename(img_path).split('.')[0]
    gd_path = os.path.join(root_path, 'labels', 'GT_{}.mat'.format(name))
    gauss_path = os.path.join(root_path, 'labels', '{}_densitymap.npy'.format(name))
    gauss_im = torch.from_numpy(np.load(gauss_path)).float()

    return gauss_im, name


def generate_our_own_data(dataset_name):
    # create training and validation HDF5 files

    # train.h5 and valid.h5 are created in /content
    train_h5, valid_h5 = create_hdf5("Density_Map/", dataset_name)

    def fill_h5(h5, file, root):
        """
        Save images and labels in given HDF5 file.

        Args:
            h5: HDF5 file
            file: filename
        """
        # source directory of the image

        # generate a density map by applying a Gaussian filter
        label, name = generate_label(file, root)

        # save data to HDF5 file
        h5.create_dataset(name, (1, 1, *(X, Y)))
        h5[name][0, 0] = label

    for i, file_name in enumerate(TRAIN_VAL_IMAGES):
        print("train", i)
        if i < TRAIN_SIZE:
            path = TRAIN_PATH
        else:
            path = VAL_PATH
        fill_h5(train_h5, os.path.join(path, "images", file_name), path)

    for i, file_name in enumerate(TEST_IMAGES):
        print("test", i)
        fill_h5(valid_h5, os.path.join(path, "images", file_name), TEST_PATH)
    # close HDF5 files
    train_h5.close()
    valid_h5.close()

In [None]:
generate_our_own_data("london")