In [3]:
import os
from pathlib import Path

base_folder = "/workspace/yolo_dangerous_weapons/classification/classified"



In [4]:
image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.webp'}

holding_something_folders = []
for root, dirs, files in os.walk(base_folder):
    if 'holding_something' in dirs:
        holding_something_folders.append(os.path.join(root, 'holding_something'))

total_images = 0
folder_counts = {}

for folder in holding_something_folders:
    count = 0
    for file in os.listdir(folder):
        if Path(file).suffix.lower() in image_extensions:
            count += 1
    folder_counts[folder] = count
    total_images += count

print("Image counts per holding_something folder:")
for folder, count in sorted(folder_counts.items()):
    relative_path = os.path.relpath(folder, base_folder)
    print(f"  {relative_path}: {count} images")

print(f"\nTotal images in all holding_something folders: {total_images}")


Image counts per holding_something folder:
  outside_front/holding_something: 155 images
  outside_left/holding_something: 86 images
  parking_lot_front/holding_something: 422 images
  parking_lot_left/holding_something: 114 images

Total images in all holding_something folders: 777


In [7]:
import shutil
import random

yolo_dataset_path = "/workspace/yolo_dataset_4_dec"

train_ratio = 0.85
valid_ratio = 0.10
test_ratio = 0.05

all_images = []
for folder in holding_something_folders:
    for file in os.listdir(folder):
        if Path(file).suffix.lower() in image_extensions:
            all_images.append(os.path.join(folder, file))

random.shuffle(all_images)

train_count = int(len(all_images) * train_ratio)
valid_count = int(len(all_images) * valid_ratio)

train_images = all_images[:train_count]
valid_images = all_images[train_count:train_count + valid_count]
test_images = all_images[train_count + valid_count:]

print(f"Total images to add: {len(all_images)}")
print(f"  Train: {len(train_images)} ({len(train_images)/len(all_images)*100:.1f}%)")
print(f"  Valid: {len(valid_images)} ({len(valid_images)/len(all_images)*100:.1f}%)")
print(f"  Test: {len(test_images)} ({len(test_images)/len(all_images)*100:.1f}%)")

splits = {
    'train': train_images,
    'valid': valid_images,
    'test': test_images
}

before_counts = {}
after_counts = {}

for split_name, images in splits.items():
    images_path = os.path.join(yolo_dataset_path, 'images', split_name)
    labels_path = os.path.join(yolo_dataset_path, 'labels', split_name)
    os.makedirs(images_path, exist_ok=True)
    os.makedirs(labels_path, exist_ok=True)
    
    existing_images = [f for f in os.listdir(images_path) if Path(f).suffix.lower() in image_extensions]
    before_counts[split_name] = len(existing_images)
    
    for idx, img_path in enumerate(images, 1):
        ext = Path(img_path).suffix
        new_name = f"clicka_{idx}{ext}"
        dest_img_path = os.path.join(images_path, new_name)
        shutil.copy2(img_path, dest_img_path)
        
        label_name = f"clicka_{idx}.txt"
        dest_label_path = os.path.join(labels_path, label_name)
        with open(dest_label_path, 'w') as f:
            pass
    
    after_images = [f for f in os.listdir(images_path) if Path(f).suffix.lower() in image_extensions]
    after_counts[split_name] = len(after_images)

print("\nImage counts before and after:")
for split_name in ['train', 'valid', 'test']:
    before = before_counts[split_name]
    after = after_counts[split_name]
    added = after - before
    print(f"  {split_name.capitalize()}:")
    print(f"    Before: {before}")
    print(f"    After: {after}")
    print(f"    Added: {added}")

total_before = sum(before_counts.values())
total_after = sum(after_counts.values())
print(f"\nTotal:")
print(f"  Before: {total_before}")
print(f"  After: {total_after}")
print(f"  Added: {total_after - total_before}")


Total images to add: 777
  Train: 660 (84.9%)
  Valid: 77 (9.9%)
  Test: 40 (5.1%)



Image counts before and after:
  Train:
    Before: 12381
    After: 13041
    Added: 660
  Valid:
    Before: 2666
    After: 2743
    Added: 77
  Test:
    Before: 2319
    After: 2359
    Added: 40

Total:
  Before: 17366
  After: 18143
  Added: 777
