Colab NB Setup

In [None]:
# Mount Google Drive (to save your work)
from google.colab import drive
drive.mount('/content/drive')

# Create working directory
!mkdir -p /content/drive/MyDrive/hackathon
%cd /content/drive/MyDrive/hackathon

# Install required libraries
!pip install ultralytics  # YOLOv8
!pip install roboflow     # Optional: if using Roboflow datasets

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/hackathon


C2A main dataset download

In [None]:
# You already uploaded kaggle.json earlier, so skip that step

# Move to Colab temp storage (faster)
%cd /content

# Setup Kaggle credentials (if not already done)
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/hackathon/kaggle.json ~/.kaggle/ 2>/dev/null || echo "kaggle.json already configured"
!chmod 600 ~/.kaggle/kaggle.json

# Download C2A to temp storage
!kaggle datasets download -d rgbnihal/c2a-dataset
!unzip -q c2a-dataset.zip -d datasets/c2a
# Create the directory first
!mkdir -p datasets/c2a

# Then unzip
!unzip -q c2a-dataset.zip -d datasets/c2a

# Check structure
!ls -la datasets/c2a


/content
Dataset URL: https://www.kaggle.com/datasets/rgbnihal/c2a-dataset
License(s): MIT
c2a-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)
checkdir:  cannot create extraction directory: datasets/c2a
           No such file or directory
total 16
drwxr-xr-x 4 root root 4096 Oct  5 01:28 .
drwxr-xr-x 3 root root 4096 Oct  5 01:26 ..
drwxr-xr-x 3 root root 4096 Oct  5 01:26 C2A_Dataset
drwxr-xr-x 2 root root 4096 Oct  5 01:28 Coco_annotation_pose


In [None]:
# Check C2A_Dataset structure
print("C2A_Dataset contents:")
!ls -la datasets/c2a/C2A_Dataset

print("\n" + "="*60)
print("Coco_annotation_pose contents:")
!ls -la datasets/c2a/Coco_annotation_pose

C2A_Dataset contents:
total 12
drwxr-xr-x 3 root root 4096 Oct  5 01:26 .
drwxr-xr-x 4 root root 4096 Oct  5 01:28 ..
drwxr-xr-x 6 root root 4096 Oct  5 01:28 new_dataset3

Coco_annotation_pose contents:
total 49208
drwxr-xr-x 2 root root     4096 Oct  5 01:28 .
drwxr-xr-x 4 root root     4096 Oct  5 01:28 ..
-rw-r--r-- 1 root root 10088565 Sep 18  2024 test_annotations_with_pose_information.json
-rw-r--r-- 1 root root 30247742 Sep 18  2024 train_annotations_with_pose_information.json
-rw-r--r-- 1 root root 10036783 Sep 18  2024 val_annotations_with_pose_information.json


In [None]:
import json
import os
from pathlib import Path
from tqdm import tqdm

# COCO to YOLO Conversion Script
def convert_coco_to_yolo(coco_json_path, images_dir, output_dir):
    """Convert COCO format annotations to YOLO format"""

    # Load COCO JSON
    with open(coco_json_path, 'r') as f:
        coco_data = json.load(f)

    # Create output directory
    os.makedirs(output_dir, exist_ok=True)

    # Build image_id to filename mapping
    images_dict = {img['id']: img for img in coco_data['images']}

    # Build category mapping (we only care about 'person' class)
    # YOLO uses class 0 for person
    category_map = {}
    for cat in coco_data['categories']:
        if cat['name'].lower() == 'person':
            category_map[cat['id']] = 0

    # Process annotations
    annotations_by_image = {}
    for ann in coco_data['annotations']:
        img_id = ann['image_id']
        if img_id not in annotations_by_image:
            annotations_by_image[img_id] = []
        annotations_by_image[img_id].append(ann)

    # Convert each image's annotations
    converted_count = 0
    for img_id, anns in tqdm(annotations_by_image.items()):
        if img_id not in images_dict:
            continue

        img_info = images_dict[img_id]
        img_width = img_info['width']
        img_height = img_info['height']
        img_filename = img_info['file_name']

        # Create YOLO format txt file
        txt_filename = Path(img_filename).stem + '.txt'
        txt_path = os.path.join(output_dir, txt_filename)

        with open(txt_path, 'w') as f:
            for ann in anns:
                cat_id = ann['category_id']
                if cat_id not in category_map:
                    continue  # Skip non-person annotations

                # COCO bbox: [x, y, width, height] (top-left corner)
                # YOLO bbox: [x_center, y_center, width, height] (normalized 0-1)
                bbox = ann['bbox']
                x, y, w, h = bbox

                # Convert to YOLO format
                x_center = (x + w / 2) / img_width
                y_center = (y + h / 2) / img_height
                width = w / img_width
                height = h / img_height

                # Write in YOLO format: class x_center y_center width height
                f.write(f"{category_map[cat_id]} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

        converted_count += 1

    print(f"✓ Converted {converted_count} images")
    return converted_count

# Check images location first
print("Checking image locations...")
!ls datasets/c2a/C2A_Dataset/new_dataset3

Checking image locations...
'All labels with Pose information'   test   train   val


In [None]:
# Check the actual image folders
print("Train images:")
!ls datasets/c2a/C2A_Dataset/new_dataset3/train | head -5

print("\nVal images:")
!ls datasets/c2a/C2A_Dataset/new_dataset3/val | head -5

print("\nTest images:")
!ls datasets/c2a/C2A_Dataset/new_dataset3/test | head -5

Train images:
images
labels
train_annotations.json

Val images:
images
labels
val_annotations.json

Test images:
images
labels
test_annotations.json


In [None]:
# Check if labels already exist in YOLO format
print("Checking train labels:")
!ls datasets/c2a/C2A_Dataset/new_dataset3/train/labels | head -5

print("\nChecking if they're YOLO format (.txt files):")
!head -3 datasets/c2a/C2A_Dataset/new_dataset3/train/labels/*.txt 2>/dev/null | head -10

Checking train labels:
collapsed_building_image0001_0.txt
collapsed_building_image0001_1.txt
collapsed_building_image0001_2.txt
collapsed_building_image0001_4.txt
collapsed_building_image0002_0.txt

Checking if they're YOLO format (.txt files):
==> datasets/c2a/C2A_Dataset/new_dataset3/train/labels/collapsed_building_image0001_0.txt <==
0 0.017794 0.307292 0.014235 0.038194
0 0.854093 0.951389 0.042705 0.062500
0 0.450178 0.559028 0.039146 0.034722

==> datasets/c2a/C2A_Dataset/new_dataset3/train/labels/collapsed_building_image0001_1.txt <==
0 0.250890 0.730903 0.024911 0.052083
0 0.932384 0.904514 0.007117 0.017361
0 0.386121 0.605903 0.010676 0.010417



In [None]:
# Create proper YOLO directory structure
!mkdir -p datasets/c2a_yolo/train/images
!mkdir -p datasets/c2a_yolo/train/labels
!mkdir -p datasets/c2a_yolo/valid/images
!mkdir -p datasets/c2a_yolo/valid/labels
!mkdir -p datasets/c2a_yolo/test/images
!mkdir -p datasets/c2a_yolo/test/labels

# Copy/move files to proper structure
print("Organizing train set...")
!cp -r datasets/c2a/C2A_Dataset/new_dataset3/train/images/* datasets/c2a_yolo/train/images/
!cp -r datasets/c2a/C2A_Dataset/new_dataset3/train/labels/* datasets/c2a_yolo/train/labels/

print("Organizing validation set...")
!cp -r datasets/c2a/C2A_Dataset/new_dataset3/val/images/* datasets/c2a_yolo/valid/images/
!cp -r datasets/c2a/C2A_Dataset/new_dataset3/val/labels/* datasets/c2a_yolo/valid/labels/

print("Organizing test set...")
!cp -r datasets/c2a/C2A_Dataset/new_dataset3/test/images/* datasets/c2a_yolo/test/images/
!cp -r datasets/c2a/C2A_Dataset/new_dataset3/test/labels/* datasets/c2a_yolo/test/labels/

# Create data.yaml for YOLO
data_yaml = """
path: /content/datasets/c2a_yolo
train: train/images
val: valid/images
test: test/images

nc: 1
names: ['person']
"""

with open('datasets/c2a_yolo/data.yaml', 'w') as f:
    f.write(data_yaml)

print("\n✓ C2A dataset ready for training!")

# Verify counts
!echo "Train images:" && ls datasets/c2a_yolo/train/images | wc -l
!echo "Valid images:" && ls datasets/c2a_yolo/valid/images | wc -l
!echo "Test images:" && ls datasets/c2a_yolo/test/images | wc -l

Organizing train set...
Organizing validation set...
Organizing test set...

✓ C2A dataset ready for training!
Train images:
6129
Valid images:
2043
Test images:
2043


Bonus Datasets Downloads

In [None]:
from roboflow import Roboflow

# Initialize with your API key
rf = Roboflow(api_key="QPJAFMFqwQriB5Vfm8gu")

# Download thermal dataset - use hyphens instead of spaces
print("Downloading thermal dataset...")
thermal_project = rf.workspace("myworkspace-x1jig").project("thermal-person-df3lf-xu4ws")
thermal_dataset = thermal_project.version(1).download("yolov8", location="datasets/thermal")

# Download aerial dataset
print("Downloading aerial dataset...")
aerial_project = rf.workspace("myworkspace-x1jig").project("drone-person-detection-ald8g-ku6nj")
aerial_dataset = aerial_project.version(1).download("yolov8", location="datasets/aerial")

print("\n✓ All datasets downloaded!")

Downloading thermal dataset...
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in datasets/thermal to yolov8:: 100%|██████████| 173772/173772 [00:04<00:00, 36866.08it/s]





Extracting Dataset Version Zip to datasets/thermal in yolov8::   3%|▎         | 265/8962 [00:00<00:02, 2998.31it/s]


KeyboardInterrupt: 

verify data structure

In [None]:
import os

print("="*60)
print("DATASET VERIFICATION")
print("="*60)

datasets = {
    'C2A': 'datasets/c2a',
    'Thermal': 'datasets/thermal',
    'Aerial': 'datasets/aerial'
}

for name, path in datasets.items():
    print(f"\n{name} Dataset:")
    if os.path.exists(path):
        # Count train images
        train_path = f"{path}/train/images" if os.path.exists(f"{path}/train/images") else f"{path}/images"
        if os.path.exists(train_path):
            train_count = len([f for f in os.listdir(train_path) if f.endswith(('.jpg', '.png', '.jpeg'))])
            print(f"  ✓ Train images: {train_count}")

        # Count validation images
        valid_path = f"{path}/valid/images" if os.path.exists(f"{path}/valid/images") else None
        if valid_path and os.path.exists(valid_path):
            valid_count = len([f for f in os.listdir(valid_path) if f.endswith(('.jpg', '.png', '.jpeg'))])
            print(f"  ✓ Valid images: {valid_count}")

        # Check for data.yaml
        if os.path.exists(f"{path}/data.yaml"):
            print(f"  ✓ data.yaml found")

        # Check annotation format
        labels_path = f"{path}/train/labels" if os.path.exists(f"{path}/train/labels") else f"{path}/labels"
        if os.path.exists(labels_path):
            print(f"  ✓ Labels in YOLO format")
    else:
        print(f"  ✗ NOT FOUND")

print("\n" + "="*60)

DATASET VERIFICATION

C2A Dataset:
  ✗ NOT FOUND

Thermal Dataset:

Aerial Dataset:
  ✓ Train images: 935
  ✓ Valid images: 274
  ✓ data.yaml found
  ✓ Labels in YOLO format

