In [37]:
import pandas as pd
from collections import defaultdict

# Load CSV
df = pd.read_csv('eccentricity_data.csv')

# Map object types to numeric labels
label_map = {"star": 0, "streak": 1}

# Group bounding boxes per image
image_annotations = defaultdict(lambda: {"boxes": [], "labels": []})

for _, row in df.iterrows():
    image = row['image']
    x1 = int(row['bbox_x'])
    y1 = int(row['bbox_y'])
    w = int(row['bbox_width'])
    h = int(row['bbox_height'])
    x2 = x1 + w
    y2 = y1 + h
    label = label_map[row['object_type']]

    image_annotations[image]["boxes"].append([x1, y1, x2, y2])
    image_annotations[image]["labels"].append(label)

# Convert to lists for indexing
image_paths = list(image_annotations.keys())
annotations = [image_annotations[img] for img in image_paths]
# Check if annotations are loaded correctly
for img_name, annots in image_annotations.items():
    print(f"Image: {img_name}")
    print(f"  Boxes: {annots['boxes']}")  # Should be a list of [x1,y1,x2,y2]
    print(f"  Labels: {annots['labels']}")  # Should be a list of integers

Image: Raw_Observation_001_Set1.tiff
  Boxes: [[1808, 189, 1822, 203], [2486, 505, 2497, 516], [1928, 533, 1940, 545], [1585, 690, 1603, 695], [1061, 765, 1080, 784], [3696, 888, 3709, 901], [1977, 948, 1988, 958], [3577, 958, 3589, 970], [3514, 967, 3525, 978], [2888, 972, 2900, 984], [3457, 1083, 3467, 1094], [3448, 1147, 3460, 1160], [2352, 1195, 2362, 1206], [190, 1384, 205, 1399], [1052, 1421, 1146, 1448], [3291, 1430, 3303, 1441], [1241, 1436, 1253, 1448], [2469, 1653, 2481, 1665], [2177, 1959, 2187, 1970], [3227, 2460, 3240, 2473], [678, 2496, 690, 2508], [3472, 2595, 3481, 2605], [1569, 2643, 1583, 2657], [187, 2742, 201, 2756], [2519, 2773, 2539, 2794], [1372, 2902, 1385, 2915], [1809, 3279, 1820, 3290], [1586, 3366, 1601, 3381], [3777, 3399, 3796, 3418], [1898, 3494, 1908, 3504], [924, 3507, 940, 3523], [3163, 3741, 3179, 3758], [2415, 3778, 2430, 3793], [1910, 4152, 1924, 4166]]
  Labels: [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [55]:
import torch
from torch.utils.data import Dataset
import pandas as pd
from collections import defaultdict
import cv2  # For TIFF loading
from torchvision.ops import box_convert
class TIFFDetectionDataset(Dataset):
    def __init__(self, image_paths, annotations, transform=None):
        self.image_paths = []
        self.annotations = []
        self.transform = transform
        
        # Filter out empty annotations
        for img_path, annot in zip(image_paths, annotations):
            if len(annot["boxes"]) > 0:
                self.image_paths.append(img_path)
                self.annotations.append(annot)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.from_numpy(img).permute(2, 0, 1).float() / 255.0

        annot = self.annotations[idx]
        boxes = torch.tensor(annot["boxes"], dtype=torch.float32)
        labels = torch.tensor(annot["labels"], dtype=torch.int64)

        return img, {"boxes": boxes, "labels": labels}

    @staticmethod
    def collate_fn(batch):
        return tuple(zip(*batch))

In [60]:
df.shape

(1063, 8)

In [None]:
import os
from glob import glob

# Load annotations (from your original code)
df = pd.read_csv('eccentricity_data.csv')
label_map = {"star": 0, "streak": 1}
image_annotations = defaultdict(lambda: {"boxes": [], "labels": []})

for _, row in df.iterrows():
    image_name = row['image']
    x1, y1, w, h = int(row['bbox_x']), int(row['bbox_y']), int(row['bbox_width']), int(row['bbox_height'])
    image_annotations[image_name]["boxes"].append([x1, y1, x1 + w, y1 + h])
    image_annotations[image_name]["labels"].append(label_map[row['object_type']])

# Get all TIFF paths (assuming filenames in CSV match TIFFs in 'Raw_Images')
image_paths = []
for img_name in image_annotations.keys():
    tiff_path = os.path.join("Datasets/Padded/", f"{img_name}")  # Adjust extension if needed
    if os.path.exists(tiff_path):
        image_paths.append(tiff_path)

# Split into train/val
from sklearn.model_selection import train_test_split
train_paths, val_paths = train_test_split(image_paths, test_size=0.2, random_state=42)

# Create datasets   
train_dataset = TIFFDetectionDataset(train_paths, [image_annotations[os.path.splitext(os.path.basename(p))[0]] for p in train_paths])
val_dataset = TIFFDetectionDataset(val_paths, [image_annotations[os.path.splitext(os.path.basename(p))[0]] for p in val_paths])
print(f"Total images in dataset: {len(train_dataset)}")  # Should be > 0
if len(train_dataset) == 0:
    print("Error: Dataset is empty! Check paths/annotations.")

Total images in dataset: 0
Error: Dataset is empty! Check paths/annotations.


In [57]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

def get_fasterrcnn(num_classes):
    # Load pre-trained Faster R-CNN with ResNet50-FPN backbone
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Replace the classifier head
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    
    return model

model = get_fasterrcnn(num_classes=2)  # 2 classes: star (0), streak (1)



In [58]:
import torch.optim as optim
from torch.utils.data import DataLoader

# DataLoaders (handle varying image sizes)
train_loader = DataLoader(
    train_dataset, batch_size=4, shuffle=True,
    collate_fn=TIFFDetectionDataset.collate_fn
)
val_loader = DataLoader(
    val_dataset, batch_size=2, shuffle=False,
    collate_fn=TIFFDetectionDataset.collate_fn
)

# Optimizer and device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)

# Training
for epoch in range(10):
    model.train()
    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets = [{
                    "boxes": t["boxes"].to(device),
                    "labels": t["labels"].to(device)  # Required for classification
                } for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    
    print(f"Epoch {epoch}, Loss: {losses.item()}")

ValueError: num_samples should be a positive integer value, but got num_samples=0

In [61]:
print(f"Total images in dataset: {len(dataset)}")  # Should be > 0
if len(dataset) == 0:
    print("Error: Dataset is empty! Check paths/annotations.")

Total images in dataset: 32


In [26]:
# Define your transformations (resize, normalization, etc)
from torch.utils.data import DataLoader
from torchvision import transforms as T
transform = T.Compose([
    T.ToTensor(),  # Convert to PyTorch tensor
    # Optional resizing
])

# Create the dataset
dataset = TiffCv2WithEccentricityDataset(tiff_images, eccentricity_data, transform=transform)

# Create a DataLoader
loader = DataLoader(dataset, batch_size=4, shuffle=True)


In [27]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

def get_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

model = get_model(num_classes=3)  # 0: background, 1: star, 2: streak




In [32]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models, transforms
from torchvision.models.detection import FasterRCNN, FasterRCNN_ResNet50_FPN_Weights

import cv2
import numpy as np
from tqdm import tqdm  # Optional: to track progress in the loop
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [33]:
# Define the optimizer (Adam optimizer for simplicity)
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
model.train()
total_loss = 0
num_epochs = 10

for epoch in range(num_epochs):
    epoch_loss = 0
    for images, boxes, labels, eccentricities, filenames in tqdm(loader):  # Loop through the DataLoader
        images = [img.to(device) for img in images]
        boxes = [box.to(device) for box in boxes]  # Bounding boxes
        labels = [label.to(device) for label in labels]  # Labels
        eccentricities = [eccentricity.to(device) for eccentricity in eccentricities]  # Eccentricities

        # Forward pass
        loss_dict = model(images, targets=[{'boxes': box, 'labels': label} for box, label in zip(boxes, labels)])
        
        # If you're including eccentricity in the loss calculation, you can add it here
         # You will need to define this custom loss

        # Calculate total loss (sum of classification and regression losses)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        # Backpropagate and update weights
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss}")

# Optionally save the model after training
torch.save(model.state_dict(), 'model.pth')


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]


AssertionError: Expected target boxes to be a tensor of shape [N, 4], got torch.Size([0]).

In [64]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the CSV file
df = pd.read_csv('eccentricity_data.csv')

# Group by image name since each image has multiple objects
grouped = df.groupby('image')

# Prepare the data structure expected by object detection frameworks
data = []
for image_name, group in grouped:
    image_path = f"Datasets/padded/{image_name}"
    boxes = group[['bbox_x', 'bbox_y', 'bbox_width', 'bbox_height']].values
    labels = group['object_type'].apply(lambda x: 0 if x == 'star' else 1).values  # Convert to numeric labels
    eccentricity = group['eccentricity'].values
    
    data.append({
        'image_path': image_path,
        'boxes': boxes,
        'labels': labels,
        'eccentricity': eccentricity
    })

# Split into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [66]:
import os
from PIL import Image

def convert_to_yolo_format(bbox, img_width, img_height):
    # Convert from (x, y, width, height) to YOLO format (center_x, center_y, width, height) normalized
    x_center = (bbox[0] + bbox[2] / 2) / img_width
    y_center = (bbox[1] + bbox[3] / 2) / img_height
    width = bbox[2] / img_width
    height = bbox[3] / img_height
    return [x_center, y_center, width, height]

# Create directories
os.makedirs('yolo_dataset/images/train', exist_ok=True)
os.makedirs('yolo_dataset/labels/train', exist_ok=True)
os.makedirs('yolo_dataset/images/val', exist_ok=True)
os.makedirs('yolo_dataset/labels/val', exist_ok=True)

# Process each image
for i, item in enumerate(train_data):
    # Copy image to train directory
    img = Image.open(item['image_path'])
    img_width, img_height = img.size
    img.save(f'yolo_dataset/images/train/{os.path.basename(item["image_path"])}')
    
    # Create label file
    label_path = f'yolo_dataset/labels/train/{os.path.basename(item["image_path"]).split(".")[0]}.txt'
    with open(label_path, 'w') as f:
        for box, label in zip(item['boxes'], item['labels']):
            yolo_box = convert_to_yolo_format(box, img_width, img_height)
            f.write(f"{label} {' '.join(map(str, yolo_box))}\n")

# Repeat for test_data into val directory

In [69]:
from ultralytics import YOLO

# Load a pretrained model
model = YOLO('yolov8n.pt')  # or yolov8s.pt, yolov8m.pt for larger models

# Train the model
results = model.train(
    data='dataset.yaml',
    epochs=100,
    imgsz=640,
    batch=8,
    device='0'  # use 'cpu' if no GPU available
)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100.0%


Ultralytics 8.3.130  Python-3.13.3 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 3050 Ti Laptop GPU, 4096MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=dataset.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, p

RuntimeError: Dataset 'dataset.yaml' error  Dataset 'dataset.yaml' images not found, missing path 'C:\projects\Crystalgrowth\datasets\yolo_dataset\images\val'
Note dataset download directory is 'C:\projects\Crystalgrowth\datasets'. You can update this in 'C:\Users\thooy\AppData\Roaming\Ultralytics\settings.json'

In [70]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import shutil

# 1. Configuration
CONFIG = {
    'dataset_path': 'Datasets/padded',
    'csv_path': 'eccentricity_data.csv',  # Update with your CSV path
    'output_dir': 'yolo_dataset',
    'tile_size': 1024,  # Optimal for 4500x4500 images
    'tile_overlap': 0.2,
    'model_type': 'yolov8l.pt',  # Larger model better for small objects
    'train_size': 0.8,
    'random_state': 42,
    'img_extensions': ['.png', '.jpg', '.jpeg', '.tif']
}

# 2. Data Loading and Preparation
def load_and_prepare_data(csv_path, dataset_path):
    """Load CSV and prepare data structure"""
    df = pd.read_csv(csv_path)
    
    # Filter only relevant image extensions
    df['image'] = df['image'].apply(lambda x: x if os.path.splitext(x)[1].lower() in CONFIG['img_extensions'] else None)
    df = df.dropna(subset=['image'])
    
    # Group by image
    grouped = df.groupby('image')
    
    data = []
    for image_name, group in grouped:
        image_path = os.path.join(dataset_path, image_name)
        if not os.path.exists(image_path):
            continue
            
        # Get image dimensions
        with Image.open(image_path) as img:
            img_width, img_height = img.size
        
        # Convert boxes to absolute coordinates if they're normalized
        boxes = group[['bbox_x', 'bbox_y', 'bbox_width', 'bbox_height']].values
        if np.all(boxes[:, 2] <= 1) and np.all(boxes[:, 3] <= 1):  # If normalized
            boxes[:, 0] *= img_width
            boxes[:, 1] *= img_height
            boxes[:, 2] *= img_width
            boxes[:, 3] *= img_height
        
        # Convert to x1,y1,x2,y2 format
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]  # x2 = x + width
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]  # y2 = y + height
        
        labels = group['object_type'].apply(lambda x: 0 if x.lower() == 'star' else 1).values
        
        data.append({
            'image_path': image_path,
            'boxes': boxes,
            'labels': labels,
            'original_size': (img_width, img_height)
        })
    
    return data

# 3. Tiling Functions
def tile_image(image_path, output_dir):
    """Split large image into smaller tiles with overlap"""
    img = Image.open(image_path)
    img_width, img_height = img.size
    tile_size = CONFIG['tile_size']
    tile_step = int(tile_size * (1 - CONFIG['tile_overlap']))
    
    os.makedirs(output_dir, exist_ok=True)
    
    tiles = []
    for y in range(0, img_height, tile_step):
        for x in range(0, img_width, tile_step):
            # Ensure we don't go past image boundaries
            x_end = min(x + tile_size, img_width)
            y_end = min(y + tile_size, img_height)
            
            # Skip if tile would be too small (adjust if needed)
            if (x_end - x) < tile_size//2 or (y_end - y) < tile_size//2:
                continue
            
            box = (x, y, x_end, y_end)
            tile = img.crop(box)
            
            # Save tile
            base_name = os.path.splitext(os.path.basename(image_path))[0]
            tile_name = f"{base_name}_{x}_{y}.png"
            tile_path = os.path.join(output_dir, tile_name)
            tile.save(tile_path)
            
            tiles.append({
                'tile_path': tile_path,
                'original_coords': (x, y, x_end - x, y_end - y),
                'original_image': image_path
            })
    return tiles

def adjust_bbox_for_tile(original_bbox, tile_coords):
    """Convert original bbox coordinates to tile coordinates"""
    tile_x, tile_y, tile_w, tile_h = tile_coords
    x1, y1, x2, y2 = original_bbox
    
    # Check if bbox intersects with this tile
    if (x2 < tile_x or x1 > tile_x + tile_w or
        y2 < tile_y or y1 > tile_y + tile_h):
        return None
    
    # Calculate intersection
    new_x1 = max(0, x1 - tile_x)
    new_y1 = max(0, y1 - tile_y)
    new_x2 = min(tile_w, x2 - tile_x)
    new_y2 = min(tile_h, y2 - tile_y)
    
    # Skip boxes that are too small after cropping
    if (new_x2 - new_x1) < 5 or (new_y2 - new_y1) < 5:
        return None
    
    return [new_x1, new_y1, new_x2, new_y2]

# 4. YOLO Format Conversion
def convert_to_yolo_format(bbox, tile_width, tile_height):
    """Convert from x1,y1,x2,y2 to YOLO format (normalized center x, center y, width, height)"""
    width = bbox[2] - bbox[0]
    height = bbox[3] - bbox[1]
    x_center = (bbox[0] + bbox[2]) / 2 / tile_width
    y_center = (bbox[1] + bbox[3]) / 2 / tile_height
    width = width / tile_width
    height = height / tile_height
    return [x_center, y_center, width, height]

# 5. Main Processing Function
def process_data_to_yolo_format(data, output_base_dir):
    """Process all data into YOLO format with tiling"""
    # Create directory structure
    os.makedirs(os.path.join(output_base_dir, 'images', 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_base_dir, 'images', 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_base_dir, 'labels', 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_base_dir, 'labels', 'val'), exist_ok=True)
    
    # Split data into train/val
    train_data, val_data = train_test_split(
        data, 
        train_size=CONFIG['train_size'], 
        random_state=CONFIG['random_state']
    )
    
    # Process training data
    process_split(train_data, 'train', output_base_dir)
    
    # Process validation data
    process_split(val_data, 'val', output_base_dir)
    
    # Create dataset.yaml
    create_yaml_file(output_base_dir)

def process_split(data_split, split_name, output_base_dir):
    """Process a single split (train/val)"""
    for item in data_split:
        # Create tiles for the large image
        tiles = tile_image(
            item['image_path'],
            os.path.join(output_base_dir, 'images', split_name)
        )
        
        for tile in tiles:
            # Get tile dimensions
            with Image.open(tile['tile_path']) as img:
                tile_width, tile_height = img.size
            
            # Adjust bounding boxes for this tile
            adjusted_boxes = []
            adjusted_labels = []
            for box, label in zip(item['boxes'], item['labels']):
                adjusted_box = adjust_bbox_for_tile(box, tile['original_coords'])
                if adjusted_box:
                    adjusted_boxes.append(adjusted_box)
                    adjusted_labels.append(label)
            
            # Only proceed if there are objects in this tile
            if adjusted_boxes:
                # Create label file
                label_file = os.path.splitext(os.path.basename(tile['tile_path']))[0] + '.txt'
                label_path = os.path.join(output_base_dir, 'labels', split_name, label_file)
                
                with open(label_path, 'w') as f:
                    for box, label in zip(adjusted_boxes, adjusted_labels):
                        yolo_box = convert_to_yolo_format(box, tile_width, tile_height)
                        f.write(f"{label} {' '.join(map(str, yolo_box))}\n")

def create_yaml_file(output_base_dir):
    """Create the dataset YAML file"""
    content = f"""path: {os.path.abspath(output_base_dir)}
train: images/train
val: images/val

names:
  0: star
  1: streak
"""
    with open(os.path.join(output_base_dir, 'dataset.yaml'), 'w') as f:
        f.write(content)

# 6. Training Function
def train_yolo_model():
    """Train the YOLO model"""
    # Load model
    model = YOLO(CONFIG['model_type'])
    
    # Train
    results = model.train(
        data=os.path.join(CONFIG['output_dir'], 'dataset.yaml'),
        epochs=100,
        imgsz=CONFIG['tile_size'],
        batch=8,  # Adjust based on GPU memory
        device='0',  # Use GPU
        patience=10,  # Early stopping patience
        optimizer='AdamW',
        lr0=0.001,
        weight_decay=0.0005,
        amp=True  # Mixed precision
    )
    
    return results

# 7. Main Execution
if __name__ == '__main__':
    # Step 1: Load and prepare data
    print("Loading and preparing data...")
    data = load_and_prepare_data(CONFIG['csv_path'], CONFIG['dataset_path'])
    
    # Step 2: Process into YOLO format with tiling
    print("Processing data into YOLO format...")
    if os.path.exists(CONFIG['output_dir']):
        shutil.rmtree(CONFIG['output_dir'])
    process_data_to_yolo_format(data, CONFIG['output_dir'])
    
    # Step 3: Train the model
    print("Training YOLO model...")
    train_yolo_model()
    
    print("Training complete! Model saved in runs/detect/train/")

# 8. Inference Function (for later use)
def predict_large_image(model_path, large_image_path, output_dir=None, conf_thresh=0.25):
    """Run detection on large image using tiling approach"""
    # Load model
    model = YOLO(model_path)
    
    # Create tiles
    temp_tile_dir = 'temp_tiles'
    tiles = tile_image(large_image_path, temp_tile_dir)
    
    all_results = []
    for tile in tiles:
        # Run detection
        results = model(tile['tile_path'], conf=conf_thresh)
        
        # Convert detections back to original coordinates
        tile_x, tile_y, tile_w, _ = tile['original_coords']
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()
                conf = box.conf.item()
                cls = box.cls.item()
                
                # Convert to original image coordinates
                orig_x1 = x1 + tile_x
                orig_y1 = y1 + tile_y
                orig_x2 = x2 + tile_x
                orig_y2 = y2 + tile_y
                
                all_results.append({
                    'box': [orig_x1, orig_y1, orig_x2, orig_y2],
                    'confidence': conf,
                    'class': cls,
                    'class_name': 'star' if cls == 0 else 'streak'
                })
    
    # Clean up temporary tiles
    shutil.rmtree(temp_tile_dir)
    
    # Visualize results if output directory specified
    if output_dir:
        os.makedirs(output_dir, exist_ok=True)
        with Image.open(large_image_path) as img:
            for detection in all_results:
                # Draw boxes (simplified)
                # In practice, you'd use OpenCV or PIL drawing functions
                pass
            output_path = os.path.join(output_dir, os.path.basename(large_image_path))
            img.save(output_path)
    
    return all_results

Loading and preparing data...
Processing data into YOLO format...


ValueError: With n_samples=0, test_size=None and train_size=0.8, the resulting train set will be empty. Adjust any of the aforementioned parameters.