In [None]:
import pandas as pd

# Path to your Excel file
file_path = '/kaggle/input/crm-dataset/Challenge2_DataSet/output.xlsx'

# Read the Excel file
df = pd.read_excel(file_path)
df.head()


In [None]:
df["classification"].unique().tolist()

In [None]:
import os

def count_images_in_folder(folder_path, extensions=None):
    """
    Count number of image files in a folder.

    Args:
        folder_path (str): Path to the folder.
        extensions (list or set, optional): Image file extensions to count. Default common image formats.

    Returns:
        int: Number of image files found.
    """
    if extensions is None:
        extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff'}

    count = 0
    for file in os.listdir(folder_path):
        ext = os.path.splitext(file)[1].lower()
        if ext in extensions:
            count += 1
    return count

# Example usage
folder = '/kaggle/input/crm-dataset/Challenge2_DataSet/detections_images_optical'
num_images1 = count_images_in_folder(folder)
print(f"Number of images in folder: {num_images1}")
folder = '/kaggle/input/crm-dataset/Challenge2_DataSet/detections_images_thermal'
num_images = count_images_in_folder(folder)
print(f"Number of images in folder: {num_images}")


In [None]:
import os
import pandas as pd
from PIL import Image
import ast

def generate_yolo_labels(images_dir, excel_file, output_dir='output'):
    """
    Generate YOLO annotation files from an Excel file with bounding boxes.
    
    Parameters:
        images_dir (str): Path to folder containing images.
        excel_file (str): Path to the Excel file with annotations.
        output_dir (str): Folder to save YOLO label files.
    """
    # Create output folder if not exists
    os.makedirs(output_dir, exist_ok=True)

    # Read Excel
    df = pd.read_excel(excel_file)

    # Parse geom_point if needed (optional, not needed for YOLO)
    if 'geom_point' in df.columns:
        df['geom_point'] = df['geom_point'].apply(ast.literal_eval)

    # Map class names to ids
    class_names = df['classification'].unique().tolist()
    class_to_id = {name: idx for idx, name in enumerate(class_names)}
    df['class_id'] = df['classification'].map(class_to_id)

    # Cache image sizes to avoid repeated IO
    image_size_cache = {}

    # Group rows by image
    for img_name, group in df.groupby('DetectionImage'):
        img_path = os.path.join(images_dir, img_name)
        # print(img_path)
        # break
        
        if img_path not in image_size_cache:
            try:
                with Image.open(img_path) as img:
                    img_width, img_height = img.size
                image_size_cache[img_path] = (img_width, img_height)
            except FileNotFoundError:
                print(f"Warning: Image not found: {img_path}")
                continue
        
        img_width, img_height = image_size_cache[img_path]

        yolo_lines = []
        for _, row in group.iterrows():
            # Convert bbox to YOLO format (normalized)
            x_center = ((row['X1'] + row['X2']) / 2) / img_width
            y_center = ((row['Y1'] + row['Y2']) / 2) / img_height
            width = (row['X2'] - row['X1']) / img_width
            height = (row['Y2'] - row['Y1']) / img_height

            # Clamp values between 0 and 1 just in case
            x_center = min(max(x_center, 0), 1)
            y_center = min(max(y_center, 0), 1)
            width = min(max(width, 0), 1)
            height = min(max(height, 0), 1)

            class_id = row['class_id']
            yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
            yolo_lines.append(yolo_line)

        # Save YOLO annotation file named as image basename with .txt
        label_filename = os.path.splitext(os.path.basename(img_name))[0] + '.txt'
        label_path = os.path.join(output_dir, label_filename)

        with open(label_path, 'w') as f:
            f.write('\n'.join(yolo_lines))
    print(class_to_id)
    
    print(f"YOLO label files generated in folder: {output_dir}")

In [None]:
rm -r /kaggle/working/yolo_labels

In [None]:
images_folder = '/kaggle/input/crm-dataset/Challenge2_DataSet'
excel_path = '/kaggle/input/crm-dataset/Challenge2_DataSet/output.xlsx'
output_folder = 'yolo_labels'

generate_yolo_labels(images_folder, excel_path, output_folder)

In [None]:
# Image.open("/kaggle/input/crm-dataset/Challenge2_DataSet/detections_images_thermal/img_20250522_08202333.jpg")

In [None]:
# Create the images directory
%mkdir -p /kaggle/working/images

# Copy optical images
!cp /kaggle/input/crm-dataset/Challenge2_DataSet/detections_images_optical/* /kaggle/working/images/

# Copy thermal images
!cp /kaggle/input/crm-dataset/Challenge2_DataSet/detections_images_thermal/* /kaggle/working/images/


In [None]:
import os
import shutil
import random

def split_yolo_dataset(images_dir, labels_dir, output_base, train_ratio=0.8, seed=42):
    """
    Split YOLO dataset into train/val structure for YOLOv8.
    
    Args:
        images_dir (str): Path to folder with all images.
        labels_dir (str): Path to folder with YOLO .txt label files.
        output_base (str): Base output folder (e.g., "dataset").
        train_ratio (float): Ratio of training data.
        seed (int): Random seed for reproducibility.
    """
    random.seed(seed)
    os.makedirs(output_base, exist_ok=True)
    for sub in ['images/train', 'images/val', 'labels/train', 'labels/val']:
        os.makedirs(os.path.join(output_base, sub), exist_ok=True)

    image_files = [f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(image_files)

    train_count = int(len(image_files) * train_ratio)
    train_files = image_files[:train_count]
    val_files = image_files[train_count:]

    def move_files(file_list, subset):
        for img_file in file_list:
            base_name = os.path.splitext(img_file)[0]
            label_file = base_name + '.txt'

            # Copy image
            shutil.copy(os.path.join(images_dir, img_file), os.path.join(output_base, f'images/{subset}', img_file))
            
            # Copy label
            label_path = os.path.join(labels_dir, label_file)
            if os.path.exists(label_path):
                shutil.copy(label_path, os.path.join(output_base, f'labels/{subset}', label_file))
            else:
                print(f"Warning: Missing label for {img_file}")

    move_files(train_files, 'train')
    move_files(val_files, 'val')
    print(f"Dataset split complete: {len(train_files)} train / {len(val_files)} val")

# Example usage:
split_yolo_dataset(
    images_dir='/kaggle/working/images',
    labels_dir='/kaggle/working/yolo_labels',
    output_base='/kaggle/working/yolo_dataset'
)


In [None]:
%rm -r /kaggle/working/images
%rm -r /kaggle/working/yolo_labels

In [None]:
dict = {'DJI-mavic': 0, 'Helicopter': 1, 'DJI-Phantom': 2, 'Camcopter': 3, 'DJI-Matrice': 4, 'Anka': 5}
class_names = list(dict.keys())
print(class_names)


In [None]:
import pandas as pd
import yaml

def create_yolo_yaml(class_names, dataset_path, output_yaml='data.yaml'):
    """
    Create YOLOv8 data.yaml file from Excel annotations.

    Args:
        excel_path (str): Path to the Excel file.
        dataset_path (str): Base path to dataset with images and labels.
        output_yaml (str): Output YAML file path.
    """

    # Extract unique classes
    class_names = class_names
    num_classes = len(class_names)

    data = {
        'path': dataset_path,
        'train': 'images/train',
        'val': 'images/val',
        'nc': num_classes,
        'names': class_names
    }

    with open(output_yaml, 'w') as f:
        yaml.dump(data, f, sort_keys=False)

    print(f"data.yaml created at: {output_yaml}")

# Example usage
create_yolo_yaml(
    class_names=class_names,
    dataset_path='/kaggle/working/yolo_dataset',
    output_yaml='/kaggle/working/yolo_dataset/data.yaml'
)


In [None]:
!pip install -q ultralytics

In [None]:
from ultralytics import YOLOWorld

# Load a pretrained YOLOv8s-worldv2 model
model = YOLOWorld("yolov8s-worldv2.pt")

# Train the model on the COCO8 example dataset for 100 epochs
results = model.train(data="/kaggle/working/yolo_dataset/data.yaml", epochs=100, imgsz=640)