In [None]:
import os
import json
import yaml
from pathlib import Path
from PIL import Image
from tqdm import tqdm

def yolo_to_coco(dataset_path, output_path):
    dataset_path = Path(dataset_path)
    output_path = Path(output_path)
    output_path.mkdir(parents=True, exist_ok=True)
    
    with open(dataset_path / 'data.yaml', 'r') as f:
        data_config = yaml.safe_load(f)
    
    categories = [
        {'id': i, 'name': name, 'supercategory': 'object'} 
        for i, name in enumerate(data_config['names'])
    ]
    
    splits = ['train', 'valid', 'test']
    
    for split in splits:
        print(f"\nProcessing {split} split...")
        
        images_dir = dataset_path / 'images' / split
        labels_dir = dataset_path / 'labels' / split
        
        if not images_dir.exists():
            print(f"Skipping {split} - directory not found")
            continue
        
        split_output_dir = output_path / split
        split_output_dir.mkdir(parents=True, exist_ok=True)
        
        coco_data = {
            'images': [],
            'annotations': [],
            'categories': categories
        }
        
        image_id = 0
        annotation_id = 0
        
        image_files = sorted(list(images_dir.glob('*.jpg')) + list(images_dir.glob('*.png')))
        
        for img_path in tqdm(image_files, desc=f"Converting {split}"):
            try:
                img = Image.open(img_path)
                width, height = img.size
                
                coco_data['images'].append({
                    'id': image_id,
                    'file_name': img_path.name,
                    'width': width,
                    'height': height
                })
                
                label_path = labels_dir / f"{img_path.stem}.txt"
                
                if label_path.exists():
                    with open(label_path, 'r') as f:
                        lines = f.readlines()
                    
                    for line in lines:
                        parts = line.strip().split()
                        if len(parts) >= 5:
                            class_id = int(parts[0])
                            x_center = float(parts[1])
                            y_center = float(parts[2])
                            w = float(parts[3])
                            h = float(parts[4])
                            
                            x_min = (x_center - w / 2) * width
                            y_min = (y_center - h / 2) * height
                            bbox_width = w * width
                            bbox_height = h * height
                            
                            coco_data['annotations'].append({
                                'id': annotation_id,
                                'image_id': image_id,
                                'category_id': class_id,
                                'bbox': [x_min, y_min, bbox_width, bbox_height],
                                'area': bbox_width * bbox_height,
                                'iscrowd': 0
                            })
                            annotation_id += 1
                
                image_id += 1
                
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                continue
        
        output_file = split_output_dir / "_annotations.coco.json"
        with open(output_file, 'w') as f:
            json.dump(coco_data, f, indent=2)
        
        print(f"Saved {split}: {len(coco_data['images'])} images, {len(coco_data['annotations'])} annotations")
        print(f"Output: {output_file}")

dataset_path = '/workspace/yolo_dataset_4_dec'
output_path = '/workspace/coco_dataset'

yolo_to_coco(dataset_path, output_path)

In [None]:
import shutil
from pathlib import Path

print("Copying images to COCO dataset structure...")

source_path = Path('/workspace/yolo_dataset_4_dec')
dest_path = Path('/workspace/coco_dataset')

for split in ['train', 'valid', 'test']:
    source_images = source_path / 'images' / split
    dest_images = dest_path / split
    
    if source_images.exists():
        dest_images.mkdir(parents=True, exist_ok=True)
        print(f"\nCopying {split} images...")
        
        for img_file in source_images.glob('*'):
            if img_file.suffix.lower() in ['.jpg', '.jpeg', '.png']:
                shutil.copy2(img_file, dest_images / img_file.name)
        
        image_count = len([f for f in dest_images.glob('*') if f.suffix.lower() in ['.jpg', '.jpeg', '.png']])
        print(f"Copied {image_count} {split} images")

print("\nCOCO dataset structure ready!")
print(f"Dataset location: {dest_path}")
print("\nExpected structure:")
print("coco_dataset/")
print("├── train/")
print("│   ├── image1.jpg")
print("│   ├── image2.jpg")
print("│   └── _annotations.coco.json")
print("├── valid/")
print("│   ├── image1.jpg")
print("│   └── _annotations.coco.json")
print("└── test/")
print("    ├── image1.jpg")
print("    └── _annotations.coco.json")

## Train RF-DETR Model

Training configuration:
- **Classes**: 4 (knife, gun, rifle, baseball_bat)
- **Backbone**: ResNet50
- **Epochs**: 50
- **Batch Size**: 4 (adjust based on GPU memory)
- **Learning Rate**: 1e-4

## Train RF-DETR Using Python API

Using the RFDETRBase class with callbacks to track training progress.

In [None]:
from rfdetr import RFDETRBase

model = RFDETRBase()
history = []

def callback2(data):
    history.append(data)

model.callbacks["on_fit_epoch_end"].append(callback2)

model.train(
    dataset_dir="/workspace/coco_dataset",
    epochs=50,
    batch_size=16,
    lr=1e-4
)

print("\nTraining completed!")
print(f"Total epochs: {len(history)}")

for epoch_data in history[-5:]:
    print(f"Epoch {epoch_data.get('epoch', 'N/A')}: Loss = {epoch_data.get('loss', 'N/A')}")