In [3]:
import os
import pandas as pd
import torch
import yaml
from ultralytics import YOLO
from PIL import Image
import cv2

In [4]:
class YOLOTrainerDetector:
    def __init__(self):
        self.img_size = 640
        self.conf_thresh = 0.25
        self.epochs = 50
        self.batch_size = 16
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(self.device)
        
    def prepare_data(self, annotations_path, data_dir):
        """Prepare dataset from CSV annotations file with format:
        filename,width,height,xmin,ymin,xmax,ymax,class
        """
        # Create directories
        os.makedirs('dataset/images/train', exist_ok=True)
        os.makedirs('dataset/images/val', exist_ok=True)
        os.makedirs('dataset/labels/train', exist_ok=True)
        os.makedirs('dataset/labels/val', exist_ok=True)
        
        # Read annotations
        df = pd.read_csv(annotations_path)
        
        # Extract unique class names and create mapping
        unique_classes = sorted(df['class'].unique())
        class_dict = {class_name: i for i, class_name in enumerate(unique_classes)}
        
        print(f"Found {len(unique_classes)} classes: {unique_classes}")
        print(f"Class mapping: {class_dict}")
        
        # Create YAML config file for YOLOv8
        data_yaml = {
            'path': os.path.abspath('dataset'),
            'train': 'images/train',
            'val': 'images/val',
            'names': {i: name for i, name in enumerate(unique_classes)}
        }
        
        with open('dataset/data.yaml', 'w') as f:
            yaml.dump(data_yaml, f, sort_keys=False)
        
        # Group by image
        image_groups = df.groupby('filename')
        
        # Split into train (80%) and validation (20%) sets
        image_paths = list(image_groups.groups.keys())
        split_idx = int(len(image_paths) * 0.8)
        train_images = image_paths[:split_idx]
        val_images = image_paths[split_idx:]
        
        # Process each image and its annotations
        for img_path in train_images:
            self.process_image(img_path, image_groups, data_dir, 'train', class_dict)
        
        for img_path in val_images:
            self.process_image(img_path, image_groups, data_dir, 'val', class_dict)
        
        print(f"Dataset prepared with {len(train_images)} training and {len(val_images)} validation images")
        return 'dataset/data.yaml'

    def process_image(self, img_path, image_groups, data_dir, split, class_dict):
        """Process a single image and its annotations"""
        # Get annotations for this image
        annotations = image_groups.get_group(img_path)
        
        # Copy image to dataset
        img_src = os.path.join(data_dir, img_path)
        img_dst = os.path.join('dataset/images', split, img_path)
        
        # Skip if image doesn't exist
        if not os.path.exists(img_src):
            print(f"Warning: Image {img_src} not found, skipping")
            return
        
        # Copy image
        img = Image.open(img_src)
        os.makedirs(os.path.dirname(img_dst), exist_ok=True)
        img.save(img_dst)
        
        # Get image dimensions from CSV
        img_width = annotations['width'].iloc[0]
        img_height = annotations['height'].iloc[0]
        
        # Create label file (YOLO format: class x_center y_center width height)
        label_path = os.path.join('dataset/labels', split, os.path.splitext(img_path)[0] + '.txt')
        os.makedirs(os.path.dirname(label_path), exist_ok=True)
        
        with open(label_path, 'w') as f:
            for _, row in annotations.iterrows():
                # Get class ID from the class column
                class_name = row['class']
                class_id = class_dict[class_name]
                
                # Convert bbox coordinates to YOLO format
                x_min, y_min, x_max, y_max = row['xmin'], row['ymin'], row['xmax'], row['ymax']
                
                # Normalize to 0-1
                x_center = ((x_min + x_max) / 2) / img_width
                y_center = ((y_min + y_max) / 2) / img_height
                bbox_width = (x_max - x_min) / img_width
                bbox_height = (y_max - y_min) / img_height
                
                # Write to file
                f.write(f"{class_id} {x_center} {y_center} {bbox_width} {bbox_height}\n")

    def train_model(self, data_yaml, weights='yolov8n.pt'):
        model = YOLO(weights).to(self.device)
        
        results = model.train(
            data=data_yaml,
            epochs=self.epochs,
            batch=self.batch_size,
            imgsz=self.img_size,
            patience=10,
            save=True,
            device=self.device
        )
        
        best_weights = results.best
        print(f"Training completed. Best weights saved to: {best_weights}")
        return best_weights

    def detect_obstacles(self, model_path, image_path, output_csv=None):
        """
        Detect obstacles in an image and save results to a CSV file
        
        Args:
            model_path (str): Path to the trained YOLO model
            image_path (str): Path to the input image or directory of images
            output_csv (str): Path to save the output CSV file, default is 'detection_results.csv'
        """
        # Set default CSV output path if not provided
        if output_csv is None:
            output_csv = 'detection_results.csv'
        
        model = YOLO(model_path).to(self.device)
        
        # Prepare lists for CSV data
        csv_data = []
        
        # Handle if image_path is a directory
        if os.path.isdir(image_path):
            image_files = [os.path.join(image_path, f) for f in os.listdir(image_path) 
                          if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        else:
            image_files = [image_path]
        
        # Process each image
        for img_file in image_files:
            results = model.predict(
                source=img_file,
                conf=self.conf_thresh,
                imgsz=self.img_size,
                save=False,
                device=self.device
            )
            
            # Get the filename without the path
            filename = os.path.basename(img_file)
            
            # Load image for visualization
            img = cv2.imread(img_file)
            
            # Process results
            for result in results:
                boxes = result.boxes
                
                for box in boxes:
                    # Get box coordinates
                    x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
                    conf = float(box.conf[0].cpu().numpy())
                    cls_id = int(box.cls[0].cpu().numpy())
                    cls_name = result.names[cls_id]
                    
                    # Draw bounding box
                    cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    
                    # Add label
                    label = f"{cls_name}: {conf:.2f}"
                    cv2.putText(img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
                    
                    # Add to CSV data
                    csv_data.append({
                        'filename': filename,
                        'class': cls_name,
                        'confidence': conf,
                        'xmin': x1,
                        'ymin': y1,
                        'xmax': x2,
                        'ymax': y2
                    })
            
            # Save the annotated image
            #output_path = os.path.splitext(img_file)[0] + '_detected.jpg'
            #cv2.imwrite(output_path, img)
            #print(f"Detection results for {filename} saved to {output_path}")
        
        # Save results to CSV
        if csv_data:
            df = pd.DataFrame(csv_data)
            df.to_csv(output_csv, index=False)
            print(f"Detection results saved to CSV file: {output_csv}")
        else:
            print("No objects detected in the provided image(s)")
        
        # Display the last processed image if there was at least one
        if image_files:
            cv2.imshow('Obstacle Detection', img)
            cv2.waitKey(0)
            cv2.destroyAllWindows()

In [None]:
'''
# Example usage - modify these paths as needed
if __name__ == "__main__":
    # CONFIGURATION: Update these paths for your use case
    
    # Mode - set to either "train" or "detect"
    MODE = "train"  # Change to "detect" for inference
    
    # Training settings
    ANNOTATIONS_PATH = "data/export/annotations.csv"  # Path to your CSV file
    DATA_DIR = "data/export/"  # Directory containing your images
    
    # Detection settings
    MODEL_WEIGHTS = "yolov8n.pt"  # For training: use pre-trained weights, for detection: use your trained weights
    IMAGE_PATH = "data/export/1478898901028431352_jpg.rf.bf49a488aeeba9b30e5b0455a1c0e100.jpg"  # Image for detection
    
    # Create detector object
    yolo = YOLOTrainerDetector()
    
    # Set custom parameters if needed
    yolo.epochs = 10  # Number of training epochs
    yolo.batch_size = 8  # Batch size
    yolo.img_size = 640  # Image size
    yolo.conf_thresh = 0.25  # Confidence threshold for detection
    
    # Run in selected mode
    if MODE == "train":
        # Prepare dataset and train model
        data_yaml = yolo.prepare_data(ANNOTATIONS_PATH, DATA_DIR)
        best_weights = yolo.train_model(data_yaml, weights=MODEL_WEIGHTS)
        print(f"Use the following weights for detection: {best_weights}")
    
    elif MODE == "detect":
        # Run detection
        yolo.detect_obstacles(best_weights, IMAGE_PATH)
    
    else:
        print(f"Invalid mode: {MODE}. Use 'train' or 'detect'")
'''

In [7]:

yolo = YOLOTrainerDetector()
yolo.detect_obstacles("runs/detect/train/weights/best.pt", "testing/")


cuda

image 1/1 D:\minie\code\testing\000010_10.png: 224x640 3 cars, 1 trafficLight-Red, 16.8ms
Speed: 2.2ms preprocess, 16.8ms inference, 2.8ms postprocess per image at shape (1, 3, 224, 640)
Detection results for 000010_10.png saved to testing/000010_10_detected.jpg

image 1/1 D:\minie\code\testing\000153_11.png: 224x640 15 cars, 14.7ms
Speed: 3.7ms preprocess, 14.7ms inference, 2.3ms postprocess per image at shape (1, 3, 224, 640)
Detection results for 000153_11.png saved to testing/000153_11_detected.jpg

image 1/1 D:\minie\code\testing\000169_10.png: 224x640 1 car, 1 pedestrian, 1 truck, 17.1ms
Speed: 2.5ms preprocess, 17.1ms inference, 7.0ms postprocess per image at shape (1, 3, 224, 640)
Detection results for 000169_10.png saved to testing/000169_10_detected.jpg

image 1/1 D:\minie\code\testing\000199_11.png: 224x640 6 cars, 21.7ms
Speed: 3.6ms preprocess, 21.7ms inference, 3.9ms postprocess per image at shape (1, 3, 224, 640)
Detection results for 000199_11.png saved to testing