In [3]:
import os
import cv2
import numpy as np
import h5py
import pickle
from tqdm import tqdm

dataset_dir = r"C:\Users\sagni\Downloads\archive (1)\archive\animals\animals"
save_dir = r"C:\Users\sagni\Downloads\Aminal Poching Detection"
os.makedirs(save_dir, exist_ok=True)

class_to_id = {}
id_to_class = {}
for idx, class_name in enumerate(sorted(os.listdir(dataset_dir))):
    if os.path.isdir(os.path.join(dataset_dir, class_name)):
        class_to_id[class_name] = idx
        id_to_class[idx] = class_name

print("Class mapping:", class_to_id)

def parse_yolo_annotation(ann_path, img_width, img_height):
    bboxes = []
    labels = []
    if not os.path.exists(ann_path):
        return bboxes, labels
    with open(ann_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            cls_id, x_center, y_center, w, h = parts
            cls_id = int(cls_id)
            x_center = float(x_center)
            y_center = float(y_center)
            w = float(w)
            h = float(h)
            xmin = int((x_center - w / 2) * img_width)
            ymin = int((y_center - h / 2) * img_height)
            xmax = int((x_center + w / 2) * img_width)
            ymax = int((y_center + h / 2) * img_height)
            xmin = max(0, xmin)
            ymin = max(0, ymin)
            xmax = min(img_width - 1, xmax)
            ymax = min(img_height - 1, ymax)
            bboxes.append([xmin, ymin, xmax, ymax])
            labels.append(cls_id)
    return bboxes, labels

print("Processing dataset with image resizing...")

target_size = (480, 360)  # width, height

data = []

for class_name in tqdm(sorted(os.listdir(dataset_dir))):
    class_folder = os.path.join(dataset_dir, class_name)
    if not os.path.isdir(class_folder):
        continue

    for file in os.listdir(class_folder):
        if file.endswith(".jpg"):
            img_path = os.path.join(class_folder, file)
            img = cv2.imread(img_path)
            if img is None:
                print(f"Failed to load image {img_path}")
                continue

            img = cv2.resize(img, target_size)
            h, w = target_size[1], target_size[0]

            ann_file = os.path.splitext(file)[0] + ".xml.txt"
            ann_path = os.path.join(class_folder, ann_file)

            bboxes, labels = parse_yolo_annotation(ann_path, w, h)

            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

            data.append({
                'image': img_rgb,
                'bboxes': np.array(bboxes).reshape(-1, 4),
                'labels': np.array(labels).reshape(-1),
            })

print(f"Total samples collected: {len(data)}")

max_boxes = max(len(d['bboxes']) for d in data) if data else 0
print(f"Max boxes in one image: {max_boxes}")

num_samples = len(data)
img_h, img_w, img_c = data[0]['image'].shape

images_np = np.zeros((num_samples, img_h, img_w, img_c), dtype=np.uint8)
bboxes_np = np.zeros((num_samples, max_boxes, 4), dtype=np.int32)
labels_np = np.zeros((num_samples, max_boxes), dtype=np.int32)

for i, d in enumerate(data):
    images_np[i] = d['image']
    # Only assign if there are boxes
    if d['bboxes'].shape[0] > 0:
        bboxes_np[i, :d['bboxes'].shape[0]] = d['bboxes']
    if d['labels'].shape[0] > 0:
        labels_np[i, :d['labels'].shape[0]] = d['labels']

h5_path = os.path.join(save_dir, "animal_poaching_data.h5")
with h5py.File(h5_path, "w") as f:
    f.create_dataset("images", data=images_np, compression="gzip")
    f.create_dataset("bboxes", data=bboxes_np, compression="gzip")
    f.create_dataset("labels", data=labels_np, compression="gzip")

print(f"HDF5 dataset saved to {h5_path}")

pkl_path = os.path.join(save_dir, "animal_poaching_meta.pkl")
with open(pkl_path, "wb") as f:
    pickle.dump({
        "class_to_id": class_to_id,
        "id_to_class": id_to_class,
        "max_boxes": max_boxes,
        "num_samples": num_samples,
        "image_shape": (img_h, img_w, img_c),
    }, f)
print(f"Metadata saved to {pkl_path}")


Class mapping: {'Gun': 0, 'Human_with_gun': 1, 'Peacock': 2, 'antelope': 3, 'badger': 4, 'bear': 5, 'bison': 6, 'boar': 7, 'chimpanzee': 8, 'coyote': 9, 'deer': 10, 'elephant': 11, 'flamingo': 12, 'fox': 13, 'goose': 14, 'gorilla': 15, 'hedgehog': 16, 'hippopotamus': 17, 'hornbill': 18, 'human': 19, 'hyena': 20, 'kangaroo': 21, 'koala': 22, 'leopard': 23, 'lion': 24, 'okapi': 25, 'orangutan': 26, 'otter': 27, 'ox': 28, 'panda': 29, 'penguin': 30, 'porcupine': 31, 'raccoon': 32, 'reindeer': 33, 'rhinoceros': 34, 'sandpiper': 35, 'seal': 36, 'swan': 37, 'tiger': 38, 'turtle': 39, 'wolf': 40, 'wombat': 41, 'zebra': 42}
Processing dataset with image resizing...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 43/43 [01:52<00:00,  2.62s/it]


Total samples collected: 2463
Max boxes in one image: 15
HDF5 dataset saved to C:\Users\sagni\Downloads\Aminal Poching Detection\animal_poaching_data.h5
Metadata saved to C:\Users\sagni\Downloads\Aminal Poching Detection\animal_poaching_meta.pkl
