In [3]:
import os
import cv2
import time
from ultralytics import YOLO
from tqdm import tqdm

class Config:
    # Path configurations
    VIDEO_PATH = r"C:\Users\rakti\Downloads\a\YOLO\Object-Detection-with-YOLO-and-Data-Filtering-main\task_video.mp4"
    OUTPUT_DIR = r"C:\Users\rakti\Downloads\a\YOLO\Object-Detection-with-YOLO-and-Data-Filtering-main"
    NEW_DATASET_DIR = os.path.join(OUTPUT_DIR, "new_dataset")
    MODEL_PATH = "/content/yolov8n.pt"

    # Detection parameters
    CONFIDENCE_THRESH = 0.3
    IOU_THRESH = 0.5

    # Class settings
    VEHICLE_CLASSES = ['car', 'bus', 'truck', 'motorcycle']
    PERSON_CLASS = 'person'

    # Delay (in seconds) after processing each frame to give the model more time
    FRAME_PROCESSING_DELAY = 0.1

def process_detections(results, vehicle_classes):
    """Process YOLO results and separate vehicles/persons."""
    vehicles = []
    persons = []
    detections = []

    for result in results:
        for box in result.boxes:
            if box.conf < Config.CONFIDENCE_THRESH:
                continue

            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls_id = int(box.cls)
            cls_name = result.names[cls_id]

            if cls_name in vehicle_classes:
                vehicles.append((x1, y1, x2, y2))
                detections.append((cls_id, x1, y1, x2, y2))
            elif cls_name == Config.PERSON_CLASS:
                persons.append((x1, y1, x2, y2))

    return vehicles, persons, detections

def filter_persons(persons, vehicles, person_cls_id):

    filtered = []
    for (x1, y1, x2, y2) in persons:
        center_x = (x1 + x2) / 2.0
        center_y = (y1 + y2) / 2.0
        inside = False
        for (vx1, vy1, vx2, vy2) in vehicles:
            if vx1 <= center_x <= vx2 and vy1 <= center_y <= vy2:
                inside = True
                break
        if not inside:
            filtered.append((person_cls_id, x1, y1, x2, y2))
    return filtered

def run_object_detection():
    # Initialize model and video
    model = YOLO(Config.MODEL_PATH)
    cap = cv2.VideoCapture(Config.VIDEO_PATH)

    # Get the class id for the person class
    person_cls_id = None
    for cls_id, name in model.names.items():
        if name == Config.PERSON_CLASS:
            person_cls_id = cls_id
            break
    if person_cls_id is None:
        raise ValueError("Person class not found in model")

    # Create new dataset directory and its subdirectories
    os.makedirs(Config.NEW_DATASET_DIR, exist_ok=True)
    img_dir = os.path.join(Config.NEW_DATASET_DIR, "images")
    label_dir = os.path.join(Config.NEW_DATASET_DIR, "labels")
    os.makedirs(img_dir, exist_ok=True)
    os.makedirs(label_dir, exist_ok=True)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    with tqdm(total=total_frames, desc="Processing video") as pbar:
        frame_count = 0
        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                break

            # Run object detection on the frame
            results = model(frame, conf=Config.CONFIDENCE_THRESH,
                           iou=Config.IOU_THRESH, verbose=False)

            # Process detections: separate vehicles and persons
            vehicles, persons, detections = process_detections(results, Config.VEHICLE_CLASSES)

            # Filter out persons that are inside a detected vehicle
            filtered_persons = filter_persons(persons, vehicles, person_cls_id)
            final_detections = detections + filtered_persons

            # YOLO format
            height, width = frame.shape[:2]
            img_path = os.path.join(img_dir, f"{frame_count:04d}.jpg")
            label_path = os.path.join(label_dir, f"{frame_count:04d}.txt")

            cv2.imwrite(img_path, frame)

            with open(label_path, "w") as f:
                for detection in final_detections:
                    cls_id, x1, y1, x2, y2 = detection
                    x_center = (x1 + x2) / (2.0 * width)
                    y_center = (y1 + y2) / (2.0 * height)
                    w = (x2 - x1) / width
                    h = (y2 - y1) / height
                    f.write(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}\n")

            frame_count += 1
            pbar.update(1)

            # Adding a short delay to ensure each frame is processed properly
            time.sleep(Config.FRAME_PROCESSING_DELAY)

    cap.release()
    print(f"Results saved in {Config.NEW_DATASET_DIR}")

if __name__ == "__main__":
    run_object_detection()

Processing video: 100%|██████████| 1224/1224 [04:46<00:00,  4.28it/s]

Results saved in C:\Users\rakti\Downloads\a\YOLO\Object-Detection-with-YOLO-and-Data-Filtering-main\new_dataset





In [9]:
import os
import cv2
from ultralytics import YOLO
from tqdm import tqdm

OUTPUT_DIR = r"C:\Users\rakti\Downloads\a\YOLO\Object-Detection-with-YOLO-and-Data-Filtering-main"
NEW_DATASET_DIR = os.path.join(OUTPUT_DIR, "new_dataset")
VISUALIZATION_DIR = os.path.join(NEW_DATASET_DIR, "visualizations")
MODEL_PATH = "/content/yolov8n.pt"

CLASS_COLORS = {
    'person': (0, 255, 0),
    'car': (255, 0, 0),
    'motorcycle': (255, 255, 0),
    'bus': (0, 255, 255),
    'truck': (255, 0, 255)
}

def visualize_results():

    os.makedirs(VISUALIZATION_DIR, exist_ok=True)

    # Load model for class names
    model = YOLO(MODEL_PATH)
    class_names = model.names

    img_dir = os.path.join(NEW_DATASET_DIR, "images")
    label_dir = os.path.join(NEW_DATASET_DIR, "labels")

    image_files = sorted(os.listdir(img_dir))

    for img_file in tqdm(image_files, desc="Visualizing images"):
        img_path = os.path.join(img_dir, img_file)
        label_path = os.path.join(label_dir, img_file.replace(".jpg", ".txt"))

        image = cv2.imread(img_path)
        if image is None:
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width = image.shape[:2]

        if os.path.exists(label_path):
            with open(label_path, "r") as f:
                for line in f.readlines():
                    parts = line.strip().split()
                    if len(parts) != 5:
                        continue
                    cls_id, x_center, y_center, w, h = map(float, parts)
                    cls_name = class_names[int(cls_id)]

                    # Convert from YOLO format to absolute coordinates
                    x1 = int((x_center - w/2) * width)
                    y1 = int((y_center - h/2) * height)
                    x2 = int((x_center + w/2) * width)
                    y2 = int((y_center + h/2) * height)

                    color = CLASS_COLORS.get(cls_name, (255, 255, 255))
                    cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(image, cls_name, (x1, y1 - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        # Save the visualized image
        vis_path = os.path.join(VISUALIZATION_DIR, img_file)
        cv2.imwrite(vis_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

    print(f"Visualizations saved in {VISUALIZATION_DIR}")

if __name__ == "__main__":
    visualize_results()

Visualizing images: 100%|██████████| 1224/1224 [00:20<00:00, 60.29it/s]

Visualizations saved in C:\Users\rakti\Downloads\a\YOLO\Object-Detection-with-YOLO-and-Data-Filtering-main\new_dataset\visualizations



