In [1]:
# pip install datasets pillow pandas tqdm

In [2]:
## STEP 1: IMPORTS AND CONFIGURATION

import os
import json
from datasets import load_dataset
from PIL import Image
from collections import defaultdict
from tqdm import tqdm
import random

  from .autonotebook import tqdm as notebook_tqdm


### 25 Selected Classes with COCO Category IDs

**Vehicles:** car(3), truck(8), bus(6), motorcycle(4), bicycle(2), airplane(5)  
**Person:** person(1)  
**Outdoor:** traffic light(10), stop sign(13), bench(15)  
**Animals:** dog(18), cat(17), horse(19), bird(16), cow(21), elephant(22)  
**Kitchen & Food:** bottle(44), cup(47), bowl(51), pizza(59), cake(61)  
**Furniture:** chair(62), couch(63), bed(65), potted plant(64)

In [3]:
# 25 Selected Classes (COCO category IDs)

SELECTED_CLASSES = {
    'person': 1,
    'bicycle': 2,
    'car': 3,
    'motorcycle': 4,
    'airplane': 5,
    'bus': 6,
    'truck': 8,
    'traffic light': 10,
    'stop sign': 13,
    'bench': 15,
    'bird': 16,
    'cat': 17,
    'dog': 18,
    'horse': 19,
    'cow': 21,
    'elephant': 22,
    'bottle': 44,
    'cup': 47,
    'bowl': 51,
    'pizza': 59,
    'cake': 61,
    'chair': 62,
    'couch': 63,
    'bed': 65,
    'potted plant': 64
}

IMAGES_PER_CLASS = 100
BASE_DIR = "smartvision_dataset"

In [4]:
## STEP 2: LOAD COCO DATASET FROM HUGGING FACE

print("üì• Loading COCO dataset in STREAMING mode (no download)...")
dataset = load_dataset("detection-datasets/coco", split="train", streaming=True)
print("‚úÖ Dataset loaded in streaming mode!")

üì• Loading COCO dataset in STREAMING mode (no download)...
‚úÖ Dataset loaded in streaming mode!


In [None]:
## STEP 3: COLLECT IMAGES FROM STREAM

print("\nüîç Starting image collection from COCO dataset stream...")
print(f"üéØ Target: {IMAGES_PER_CLASS} images per class")
print()

# Initialize storage for collected images
class_images = {class_name: [] for class_name in SELECTED_CLASSES.keys()}
class_counts = {class_name: 0 for class_name in SELECTED_CLASSES.keys()}

# Progress tracking
total_collected = 0
images_processed = 0
max_iterations = 50000  # Safety limit

print("‚è≥ Processing images from stream...")
print("üí° Progress updates every 100 images collected")
print()

# Iterate through streaming dataset
for idx, item in enumerate(dataset):

    images_processed += 1

    # Progress update every 1000 images processed
    if images_processed % 1000 == 0:
        print(f"üìä Processed {images_processed} images | Collected {total_collected}/{len(SELECTED_CLASSES) * IMAGES_PER_CLASS}")

    # Safety check
    if images_processed >= max_iterations:
        print(f"‚ö†Ô∏è Reached safety limit of {max_iterations} iterations")
        break

    # Check if we have enough images for ALL classes
    if all(count >= IMAGES_PER_CLASS for count in class_counts.values()):
        print("üéâ Successfully collected 100 images for ALL classes!")
        break

    # Get annotations from current image
    annotations = item['objects']
    categories = annotations['category']

    # Check if any of our target classes are in this image
    for cat_id in categories:
        for class_name, class_id in SELECTED_CLASSES.items():
            if cat_id == class_id and class_counts[class_name] < IMAGES_PER_CLASS:

                # Store the ACTUAL image data (not just index!)
                class_images[class_name].append({
                    'image': item['image'],           # PIL Image object
                    'annotations': item['objects'],   # Annotations
                    'idx': images_processed           # For naming
                })

                class_counts[class_name] += 1
                total_collected += 1

                # Progress update every 100 collected
                if total_collected % 100 == 0:
                    print(f"‚úì Collected {total_collected}/{len(SELECTED_CLASSES) * IMAGES_PER_CLASS} images")

                break  # Only count once per class

print()
print("="*60)
print("üìä COLLECTION COMPLETE:")
print("="*60)
print(f"Images Processed: {images_processed}")
print(f"Images Collected: {total_collected}")
print()
for class_name, count in sorted(class_counts.items()):
    status = "‚úÖ" if count >= IMAGES_PER_CLASS else "‚ö†Ô∏è"
    print(f"{status} {class_name:20s}: {count:3d} images")
print("="*60)


üîç Starting image collection from COCO dataset stream...
üéØ Target: 100 images per class

‚è≥ Processing images from stream...
üí° Progress updates every 100 images collected



'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 2010d019-4c01-496a-a737-380a3e888946)')' thrown while requesting GET https://huggingface.co/datasets/detection-datasets/coco/resolve/cf0b22332314a937e9dc8a1957b21725430bb41d/data/train-00000-of-00040-67e35002d152155c.parquet
Retrying in 1s [Retry 1/5].
'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /datasets/detection-datasets/coco/resolve/cf0b22332314a937e9dc8a1957b21725430bb41d/data/train-00000-of-00040-67e35002d152155c.parquet (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000231F51AED50>: Failed to establish a new connection: [WinError 10065] A socket operation was attempted to an unreachable host'))"), '(Request ID: ba9d6050-def4-4e72-a5a2-32fad4276c96)')' thrown while requesting GET https://huggingface.co/datasets/detection-datasets/coco/resolve/cf0b22332314a937e9dc8a19

In [None]:
## STEP 4: CREATE FOLDER STRUCTURE

print("\nüìÅ Creating project folder structure...")
print()

# Create main directory
os.makedirs(BASE_DIR, exist_ok=True)

# Create subdirectories for Classification task
os.makedirs(f"{BASE_DIR}/classification/train", exist_ok=True)
os.makedirs(f"{BASE_DIR}/classification/val", exist_ok=True)
os.makedirs(f"{BASE_DIR}/classification/test", exist_ok=True)

# Create subdirectories for Detection task
os.makedirs(f"{BASE_DIR}/detection/images", exist_ok=True)
os.makedirs(f"{BASE_DIR}/detection/labels", exist_ok=True)

# Create class folders inside train/val/test
for class_name in SELECTED_CLASSES.keys():
    os.makedirs(f"{BASE_DIR}/classification/train/{class_name}", exist_ok=True)
    os.makedirs(f"{BASE_DIR}/classification/val/{class_name}", exist_ok=True)
    os.makedirs(f"{BASE_DIR}/classification/test/{class_name}", exist_ok=True)

print("‚úÖ Folder structure created successfully!")
print()
print("üìÇ Structure:")
print(f"""
{BASE_DIR}/
‚îú‚îÄ‚îÄ classification/
‚îÇ   ‚îú‚îÄ‚îÄ train/
‚îÇ   ‚îÇ   ‚îú‚îÄ‚îÄ person/
‚îÇ   ‚îÇ   ‚îú‚îÄ‚îÄ car/
‚îÇ   ‚îÇ   ‚îî‚îÄ‚îÄ ... (25 class folders)
‚îÇ   ‚îú‚îÄ‚îÄ val/
‚îÇ   ‚îÇ   ‚îî‚îÄ‚îÄ ... (25 class folders)
‚îÇ   ‚îî‚îÄ‚îÄ test/
‚îÇ       ‚îî‚îÄ‚îÄ ... (25 class folders)
‚îÇ
‚îî‚îÄ‚îÄ detection/
    ‚îú‚îÄ‚îÄ images/
    ‚îî‚îÄ‚îÄ labels/
""")


üìÅ Creating project folder structure...

‚úÖ Folder structure created successfully!

üìÇ Structure:

smartvision_dataset/
‚îú‚îÄ‚îÄ classification/
‚îÇ   ‚îú‚îÄ‚îÄ train/
‚îÇ   ‚îÇ   ‚îú‚îÄ‚îÄ person/
‚îÇ   ‚îÇ   ‚îú‚îÄ‚îÄ car/
‚îÇ   ‚îÇ   ‚îî‚îÄ‚îÄ ... (25 class folders)
‚îÇ   ‚îú‚îÄ‚îÄ val/
‚îÇ   ‚îÇ   ‚îî‚îÄ‚îÄ ... (25 class folders)
‚îÇ   ‚îî‚îÄ‚îÄ test/
‚îÇ       ‚îî‚îÄ‚îÄ ... (25 class folders)
‚îÇ
‚îî‚îÄ‚îÄ detection/
    ‚îú‚îÄ‚îÄ images/
    ‚îî‚îÄ‚îÄ labels/



In [None]:
## STEP 5: TRAIN/VAL/TEST SPLIT (70/15/15)

print("="*70)
print("üîÄ Preparing Train/Val/Test splits...")
print("üìä Split Ratio: 70% Train / 15% Val / 15% Test")
print("="*70)
print()

# Initialize metadata dictionary
metadata = {
    'total_images': 0,
    'classes': {},
    'splits': {'train': 0, 'val': 0, 'test': 0}
}

# Create split dictionaries for each class
train_data = {}
val_data = {}
test_data = {}

# Process each class
for class_name in SELECTED_CLASSES.keys():

    all_items = class_images.get(class_name, [])

    if not all_items:
        print(f"‚ö†Ô∏è Warning: No images found for {class_name}")
        continue

    # Calculate split indices
    n = len(all_items)
    train_split = int(0.7 * n)   # 70% for training
    val_split = int(0.85 * n)    # 15% for validation
    # Remaining 15% for test

    # Split the data
    train_data[class_name] = all_items[:train_split]
    val_data[class_name] = all_items[train_split:val_split]
    test_data[class_name] = all_items[val_split:]

    # Store split info in metadata
    metadata['classes'][class_name] = {
        'train': len(train_data[class_name]),
        'val': len(val_data[class_name]),
        'test': len(test_data[class_name]),
        'total': len(all_items)
    }

    metadata['splits']['train'] += len(train_data[class_name])
    metadata['splits']['val'] += len(val_data[class_name])
    metadata['splits']['test'] += len(test_data[class_name])
    metadata['total_images'] += len(all_items)

    print(f"{class_name:20s}: Train={len(train_data[class_name]):3d} | Val={len(val_data[class_name]):2d} | Test={len(test_data[class_name]):2d}")

üîÄ Preparing Train/Val/Test splits...
üìä Split Ratio: 70% Train / 15% Val / 15% Test

person              : Train= 70 | Val=15 | Test=15
bicycle             : Train= 70 | Val=15 | Test=15
car                 : Train= 70 | Val=15 | Test=15
motorcycle          : Train= 70 | Val=15 | Test=15
airplane            : Train= 70 | Val=15 | Test=15
bus                 : Train= 70 | Val=15 | Test=15
truck               : Train= 70 | Val=15 | Test=15
traffic light       : Train= 70 | Val=15 | Test=15
stop sign           : Train= 70 | Val=15 | Test=15
bench               : Train= 70 | Val=15 | Test=15
bird                : Train= 70 | Val=15 | Test=15
cat                 : Train= 70 | Val=15 | Test=15
dog                 : Train= 70 | Val=15 | Test=15
horse               : Train= 70 | Val=15 | Test=15
cow                 : Train= 70 | Val=15 | Test=15
elephant            : Train= 70 | Val=15 | Test=15
bottle              : Train= 70 | Val=15 | Test=15
cup                 : Train= 70 | Val=15 | 

In [None]:
import os
from PIL import Image
from tqdm import tqdm
import json

print("="*70)
print("üíæ STEP 6: SAVING IMAGES TO DISK")
print("="*70)
print()

# PART A: SAVE CLASSIFICATION IMAGES


print("üìÅ PART A: Saving Classification Images...")
print("   Format: Cropped objects, 224x224 pixels\n")

classification_stats = {'train': 0, 'val': 0, 'test': 0}

# Process each split
for split_name, split_data in [('train', train_data), ('val', val_data), ('test', test_data)]:

    print(f"üìÇ Processing {split_name.upper()} split...")

    # Process each class
    for class_name, items in tqdm(split_data.items(), desc=f"  {split_name}"):

        class_folder = f"{BASE_DIR}/classification/{split_name}/{class_name}"

        # Save each image
        for img_idx, item in enumerate(items):

            img = item['image']
            annotations = item['annotations']
            bboxes = annotations['bbox']
            categories = annotations['category']

            class_id = SELECTED_CLASSES[class_name]

            # Find bbox for this class
            for bbox, cat_id in zip(bboxes, categories):
                if cat_id == class_id:
                    x, y, w, h = bbox

                    try:
                        # Crop and resize
                        cropped_img = img.crop((x, y, x + w, y + h))
                        cropped_img = cropped_img.resize((224, 224), Image.LANCZOS)

                        # Save
                        img_filename = f"{class_name}_{split_name}_{img_idx:04d}.jpg"
                        img_path = os.path.join(class_folder, img_filename)
                        cropped_img.save(img_path, quality=95)

                        classification_stats[split_name] += 1

                    except Exception as e:
                        print(f"‚ö†Ô∏è Error: {class_name} image {img_idx}: {e}")

                    break

print()
print("="*70)
print("‚úÖ CLASSIFICATION IMAGES SAVED!")
print("="*70)
print(f"üìä Train: {classification_stats['train']} images")
print(f"üìä Val:   {classification_stats['val']} images")
print(f"üìä Test:  {classification_stats['test']} images")
print(f"üìä Total: {sum(classification_stats.values())} images")
print()

üíæ STEP 6: SAVING IMAGES TO DISK

üìÅ PART A: Saving Classification Images...
   Format: Cropped objects, 224x224 pixels

üìÇ Processing TRAIN split...


  train: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:03<00:00,  6.74it/s]


üìÇ Processing VAL split...


  val: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:00<00:00, 31.66it/s]


üìÇ Processing TEST split...


  test: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 25/25 [00:00<00:00, 31.87it/s]


‚úÖ CLASSIFICATION IMAGES SAVED!
üìä Train: 1750 images
üìä Val:   375 images
üìä Test:  375 images
üìä Total: 2500 images






In [None]:
# PART B: SAVE DETECTION IMAGES (YOLO FORMAT)

print("="*70)
print("üìÅ PART B: Saving Detection Images & Annotations...")
print("   Format: Full images with YOLO .txt labels\n")

detection_stats = {'images': 0, 'annotations': 0, 'objects': 0}

# COCO to YOLO class mapping
coco_to_yolo = {class_id: idx for idx, class_id in enumerate(SELECTED_CLASSES.values())}

# Combine train + val for detection
all_detection_data = []
for class_name in SELECTED_CLASSES.keys():
    all_detection_data.extend(train_data.get(class_name, []))
    all_detection_data.extend(val_data.get(class_name, []))

print(f"üìä Total detection images: {len(all_detection_data)}\n")

# Save images and create YOLO labels
for img_idx, item in enumerate(tqdm(all_detection_data, desc="Saving detection data")):

    img = item['image']
    img_width, img_height = img.size

    # Save full image
    img_filename = f"image_{img_idx:06d}.jpg"
    img_path = os.path.join(f"{BASE_DIR}/detection/images", img_filename)
    img.save(img_path, quality=95)
    detection_stats['images'] += 1

    # Get annotations
    annotations = item['annotations']
    bboxes = annotations['bbox']
    categories = annotations['category']

    # Create YOLO annotation
    label_filename = f"image_{img_idx:06d}.txt"
    label_path = os.path.join(f"{BASE_DIR}/detection/labels", label_filename)

    yolo_annotations = []
    objects_count = 0

    for bbox, cat_id in zip(bboxes, categories):
        if cat_id in coco_to_yolo:
            x, y, w, h = bbox

            # Convert to YOLO format (normalized)
            x_center = (x + w/2) / img_width
            y_center = (y + h/2) / img_height
            w_norm = w / img_width
            h_norm = h / img_height

            yolo_class_id = coco_to_yolo[cat_id]
            yolo_line = f"{yolo_class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}"
            yolo_annotations.append(yolo_line)
            objects_count += 1

    # Save label file
    if yolo_annotations:
        with open(label_path, 'w') as f:
            f.write('\n'.join(yolo_annotations))
        detection_stats['annotations'] += 1
        detection_stats['objects'] += objects_count

print()
print("="*70)
print("‚úÖ DETECTION DATASET CREATED!")
print("="*70)
print(f"üìä Images:     {detection_stats['images']}")
print(f"üìä Labels:     {detection_stats['annotations']}")
print(f"üìä Objects:    {detection_stats['objects']}")
print(f"üìä Avg/image:  {detection_stats['objects']/detection_stats['images']:.2f}")
print()

üìÅ PART B: Saving Detection Images & Annotations...
   Format: Full images with YOLO .txt labels

üìä Total detection images: 2125



Saving detection data: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2125/2125 [00:05<00:00, 366.90it/s]


‚úÖ DETECTION DATASET CREATED!
üìä Images:     2125
üìä Labels:     2125
üìä Objects:    10986
üìä Avg/image:  5.17






In [None]:
# PART C: CREATE YOLO CONFIG FILE

print("üìù Creating YOLO configuration file...\n")

yaml_content = f"""# SmartVision Dataset - YOLOv8 Configuration
path: {os.path.abspath(BASE_DIR)}/detection
train: images
val: images

names:
  0: person
  1: bicycle
  2: car
  3: motorcycle
  4: airplane
  5: bus
  6: truck
  7: traffic light
  8: stop sign
  9: bench
  10: bird
  11: cat
  12: dog
  13: horse
  14: cow
  15: elephant
  16: bottle
  17: cup
  18: bowl
  19: pizza
  20: cake
  21: chair
  22: couch
  23: bed
  24: potted plant

nc: 25
"""

yaml_path = f"{BASE_DIR}/detection/data.yaml"
with open(yaml_path, 'w') as f:
    f.write(yaml_content)

print(f"‚úÖ Created: {yaml_path}\n")

üìù Creating YOLO configuration file...

‚úÖ Created: smartvision_dataset/detection/data.yaml



In [None]:
# PART D: SAVE METADATA

print("üìä Saving metadata...\n")

metadata['classification'] = classification_stats
metadata['detection'] = detection_stats
metadata['dataset_path'] = os.path.abspath(BASE_DIR)

metadata_path = f"{BASE_DIR}/dataset_metadata.json"
with open(metadata_path, 'w') as f:
    json.dump(metadata, indent=2, fp=f)

print(f"‚úÖ Saved: {metadata_path}\n")

üìä Saving metadata...

‚úÖ Saved: smartvision_dataset/dataset_metadata.json



In [None]:
print("="*70)
print("üéâ DATASET SETUP COMPLETE!")
print("="*70)
print()
print(f"üìÅ Location: {os.path.abspath(BASE_DIR)}")
print()
print("üìÇ Classification Dataset:")
print(f"   ‚îú‚îÄ Train:  {classification_stats['train']} images (70%)")
print(f"   ‚îú‚îÄ Val:    {classification_stats['val']} images (15%)")
print(f"   ‚îú‚îÄ Test:   {classification_stats['test']} images (15%)")
print(f"   ‚îî‚îÄ Total:  {sum(classification_stats.values())} cropped images (224x224)")
print()
print("üìÇ Detection Dataset:")
print(f"   ‚îú‚îÄ Images: {detection_stats['images']} full images")
print(f"   ‚îú‚îÄ Labels: {detection_stats['annotations']} YOLO .txt files")
print(f"   ‚îî‚îÄ Objects: {detection_stats['objects']} annotated objects")
print()
print("="*70)
print("‚úÖ LEARNERS CAN NOW START:")
print("="*70)
print("Step 7:  Exploratory Data Analysis (EDA)")
print("Step 8:  Train Classification Models")
print("Step 9:  Train YOLO Detection Model")
print("Step 10: Build Streamlit Application")
print("Step 11: Deploy to Hugging Face Spaces")
print("="*70)

üéâ DATASET SETUP COMPLETE!

üìÅ Location: /Users/subhisapple/Desktop/SmartVision_AI/smartvision_dataset

üìÇ Classification Dataset:
   ‚îú‚îÄ Train:  1750 images (70%)
   ‚îú‚îÄ Val:    375 images (15%)
   ‚îú‚îÄ Test:   375 images (15%)
   ‚îî‚îÄ Total:  2500 cropped images (224x224)

üìÇ Detection Dataset:
   ‚îú‚îÄ Images: 2125 full images
   ‚îú‚îÄ Labels: 2125 YOLO .txt files
   ‚îî‚îÄ Objects: 10986 annotated objects

‚úÖ LEARNERS CAN NOW START:
Step 7:  Exploratory Data Analysis (EDA)
Step 8:  Train Classification Models
Step 9:  Train YOLO Detection Model
Step 10: Build Streamlit Application
Step 11: Deploy to Hugging Face Spaces
