Datasets Preprocessing

In [48]:
import os
import glob
from pathlib import Path
import shutil
from tqdm import tqdm
import json
import numpy as np
import cv2

In [49]:
jhu_path = "crowd_datasets/jhu_crowd_v2.0"
nwpu_path = "crowd_datasets/NWPU-Crowd"
unified_path = "crowd_datasets/Unified-Crowd"

JHU-CROWD++

In [50]:
def create_annotation_file_jhu(src: str, dst: str, width: int, height: int):
    with open(src, "r") as src_file:
        lines = src_file.readlines()
    points = []
    for line in lines:
        points.append(line.split(" ")[:2])
    points = np.array(points, dtype=int)
    if points.shape[0] > 0:
        points = np.clip(points, a_min=[0, 0], a_max=[width-1, height-1])
    else:
        points = np.empty((0, 2))
    np.save(dst, points)

In [51]:
def process_jhu(dataset_path, split):
    image_paths = glob.glob(os.path.join(dataset_path, split, 'images', '*.jpg'))
    for image_path in tqdm(image_paths):
        image_id = Path(image_path).stem
        dir_path = os.path.join(unified_path, split, f'JHU_{image_id}')
        if os.path.exists(dir_path) and os.path.isdir(dir_path):
            shutil.rmtree(dir_path)
        os.mkdir(dir_path)
        os.symlink(os.path.abspath(image_path), os.path.join(dir_path, 'img.jpg'))
        img = cv2.imread(image_path)
        height, width, _ = img.shape
        create_annotation_file_jhu(os.path.join(dataset_path, split, 'gt', f'{image_id}.txt'), os.path.join(dir_path, 'points.npy'), width=width, height=height)

In [52]:
# Train images
process_jhu(jhu_path, "train")
# Validation images
process_jhu(jhu_path, "val")
# Test images
process_jhu(jhu_path, "test")

  0%|          | 0/2272 [00:00<?, ?it/s]

100%|██████████| 2272/2272 [01:01<00:00, 36.76it/s]
100%|██████████| 500/500 [00:11<00:00, 41.92it/s]
100%|██████████| 1599/1599 [00:48<00:00, 33.15it/s]


NWPU-CROWD

In [53]:
def create_annotation_file_nwpu(src: str, dst: str, width: int, height: int):
    with open(src, "r") as src_file:
        points = json.load(src_file)['points']
    points = np.array(points, dtype=int)
    if points.shape[0] > 0:
        points = np.clip(points, a_min=[0, 0], a_max=[width-1, height-1])
    else:
        points = np.empty((0, 2))
    np.save(dst, points)

In [54]:
def process_nwpu(dataset_path, split):
    with open(os.path.join(dataset_path, f'{split}.txt')) as f:
        image_ids = f.readlines()
    for image_id in tqdm(image_ids):
        image_id = image_id.split(" ")[0]
        dir_path = os.path.join(unified_path, split, f'NWPU_{image_id}')
        if os.path.exists(dir_path) and os.path.isdir(dir_path):
            shutil.rmtree(dir_path)
        os.mkdir(dir_path)
        image_path = os.path.join(dataset_path, 'images', f'{image_id}.jpg')
        os.symlink(os.path.abspath(image_path), os.path.join(dir_path, 'img.jpg'))
        img = cv2.imread(image_path)
        height, width, _ = img.shape
        create_annotation_file_nwpu(os.path.join(dataset_path, 'jsons', f'{image_id}.json'), os.path.join(dir_path, 'points.npy'), width=width, height=height)

In [55]:
# Train images
process_nwpu(nwpu_path, "train")
# Validation images
process_nwpu(nwpu_path, "val")
# Test samples have no annotations

  0%|          | 0/3109 [00:00<?, ?it/s]

 74%|███████▍  | 2293/3109 [04:28<01:23,  9.78it/s]Invalid SOS parameters for sequential JPEG
100%|██████████| 3109/3109 [06:00<00:00,  8.63it/s]
100%|██████████| 500/500 [00:53<00:00,  9.26it/s]
