In [None]:
# Environment setup for Colab
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Colab - Installing packages")
    !pip install -q ultralytics kagglehub
    print("Packages installed")
else:
    print("Running locally")

In [None]:
# Mount Google Drive (Colab only)
import os

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Set working directory in Drive
    work_dir = '/content/drive/MyDrive/Aircraft_Detect'
    os.makedirs(work_dir, exist_ok=True)
    os.chdir(work_dir)
    print(f"Working in: {work_dir}")
else:
    print(f"Working in: {os.getcwd()}")

In [1]:
# Import required libraries
import kagglehub
import shutil
from ultralytics.data.converter import convert_coco

print("✓ Libraries imported")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Download the dataset from Kaggle
path = kagglehub.dataset_download("cybersimar08/drone-detection")
print(f"Dataset downloaded to: {path}")

In [None]:
# Set up paths
from tqdm import tqdm

coco_base_path = os.path.join(path, 'coco json drone detection')
output_dir = os.path.join(os.getcwd(), 'dataset')
os.makedirs(output_dir, exist_ok=True)

# Convert each split (train, test, valid)
for split in ['train', 'test', 'valid']:
    split_path = os.path.join(coco_base_path, split)
    
    if not os.path.exists(split_path):
        print(f"Warning: {split} folder not found")
        continue
    
    print(f"\n{'='*60}")
    print(f"Processing {split.upper()} split...")
    print('='*60)
    
    # Create output directories
    split_output = os.path.join(output_dir, split)
    os.makedirs(os.path.join(split_output, 'images'), exist_ok=True)
    os.makedirs(os.path.join(split_output, 'labels'), exist_ok=True)
    
    # Convert COCO annotations to YOLO format
    print("Converting COCO to YOLO format...")
    convert_coco(labels_dir=split_path, use_segments=False, use_keypoints=False)
    
    # Find the converted folder and move labels
    converted_folders = [d for d in os.listdir('.') if d.startswith('coco_converted')]
    if converted_folders:
        latest_folder = sorted(converted_folders, key=lambda x: os.path.getctime(x))[-1]
        
        print("Moving labels...")
        src_labels = os.path.join(latest_folder, 'labels', '_annotations.coco')
        if os.path.exists(src_labels) and os.listdir(src_labels):
            for lbl in os.listdir(src_labels):
                shutil.move(os.path.join(src_labels, lbl), 
                          os.path.join(split_output, 'labels', lbl))
        
        # Clean up temporary folder
        shutil.rmtree(latest_folder)
    
    # Copy images from source with progress bar
    print("Copying images...")
    image_files = [f for f in os.listdir(split_path) 
                  if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp'))]
    
    for img_file in tqdm(image_files, desc=f"  {split}", unit="img"):
        src = os.path.join(split_path, img_file)
        dst = os.path.join(split_output, 'images', img_file)
        shutil.copy2(src, dst)
    
    # Report results
    num_images = len(os.listdir(os.path.join(split_output, 'images')))
    num_labels = len(os.listdir(os.path.join(split_output, 'labels')))
    print(f"✓ Complete: {num_images} images, {num_labels} labels\n")

print(f"\n{'='*60}")
print(f"✓ Dataset ready at: {os.path.abspath(output_dir)}")
print(f"{'='*60}")

In [9]:
# Verify dataset
for split in ['train', 'test', 'valid']:
    split_dir = os.path.join(output_dir, split)
    if os.path.exists(split_dir):
        images_dir = os.path.join(split_dir, 'images')
        labels_dir = os.path.join(split_dir, 'labels')
        
        num_images = len([f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))])
        num_labels = len([f for f in os.listdir(labels_dir) if os.path.isfile(os.path.join(labels_dir, f))])
        
        print(f"\n{split.upper()}:")
        print(f"  Images: {num_images}")
        print(f"  Labels: {num_labels}")


TRAIN:
  Images: 10799
  Labels: 8820
  Location: c:\Users\qwsor\OneDrive\Desktop\Aircraft_Detect\dataset\train

TEST:
  Images: 596
  Labels: 473
  Location: c:\Users\qwsor\OneDrive\Desktop\Aircraft_Detect\dataset\test

VALID:
  Images: 603
  Labels: 493
  Location: c:\Users\qwsor\OneDrive\Desktop\Aircraft_Detect\dataset\valid


In [None]:
# Create data.yaml config file
data_yaml = f"""path: {os.path.abspath(output_dir)}
train: train/images
val: valid/images
test: test/images

names:
  0: aircraft
  1: drone
  2: helicopter

nc: 3
"""

with open('data.yaml', 'w') as f:
    f.write(data_yaml)

print("\ndata.yaml created")
if IN_COLAB:
    print("Dataset saved to Google Drive - ready for training!")
    print(f"  Location: {os.path.abspath('.')}")