In [22]:
import os
import shutil
import random
from pathlib import Path
import cv2
from ultralytics import YOLO

In [23]:
# 1. Define paths
BASE_PATH = Path('D:/IIMS/sem 3/Computer Vision And Natural Language Processing/datasets')
OUTPUT_PATH = BASE_PATH / 'combined_dataset'

# Number Plate Dataset paths
NP_IMAGES = [
    BASE_PATH / 'number plates/normal_imgs',
    BASE_PATH / 'number plates/wide_imgs',
    BASE_PATH / 'number plates/zoomed_imgs'
]
NP_LABELS = [
    BASE_PATH / 'number plates/normal_number_plate_labels',
    BASE_PATH / 'number plates/wide_number_plate_labels',
    BASE_PATH / 'number plates/zoomed_number_plate_labels'
]

# Vehicle Dataset paths
VEHICLE_IMAGES = [
    BASE_PATH / 'vehicles/wide_imgs',
    BASE_PATH / 'vehicles/all_normal_vehicle_dataset_with_labels/car/images',
    BASE_PATH / 'vehicles/all_normal_vehicle_dataset_with_labels/bikes/images',
    BASE_PATH / 'vehicles/all_normal_vehicle_dataset_with_labels/scooter/images'
]
VEHICLE_LABELS = [
    BASE_PATH / 'vehicles/wide_vehicle_detection_labels',
    BASE_PATH / 'vehicles/all_normal_vehicle_dataset_with_labels/car/labels',
    BASE_PATH / 'vehicles/all_normal_vehicle_dataset_with_labels/bikes/labels',
    BASE_PATH / 'vehicles/all_normal_vehicle_dataset_with_labels/scooter/labels'
]

In [24]:
# 2. Create combined dataset structure
def create_dataset_structure():
    for folder in ['images/train', 'images/val', 'labels/train', 'labels/val']:
        (OUTPUT_PATH / folder).mkdir(parents=True, exist_ok=True)

In [25]:
# 3. Merge datasets and split
def merge_and_split_datasets(train_split=0.8):
    all_images = []
    all_labels = []

    for img_dir, lbl_dir in zip(NP_IMAGES, NP_LABELS):
        for img in img_dir.glob('*.jpg'):
            label_file = lbl_dir / f"{img.stem}.txt"
            if label_file.exists():
                all_images.append(img)
                all_labels.append(label_file)

    for img_dir, lbl_dir in zip(VEHICLE_IMAGES, VEHICLE_LABELS):
        for img in img_dir.glob('*.jpg'):
            label_file = lbl_dir / f"{img.stem}.txt"
            if label_file.exists():
                all_images.append(img)
                all_labels.append(label_file)

    combined = list(zip(all_images, all_labels))
    random.shuffle(combined)
    all_images, all_labels = zip(*combined)
    
    split_idx = int(len(all_images) * train_split)
    train_images, val_images = all_images[:split_idx], all_images[split_idx:]
    train_labels, val_labels = all_labels[:split_idx], all_labels[split_idx:]

    return train_images, train_labels, val_images, val_labels

In [26]:
# 4. Copy files and update labels
def copy_files(images, labels, split_type):
    class_mapping = {'number_plate': 0, 'car': 1, 'bike': 2, 'scooter': 3}
    
    for img, lbl in zip(images, labels):
        shutil.copy(img, OUTPUT_PATH / f'images/{split_type}/{img.name}')
        
        with open(lbl, 'r') as f:
            lines = f.readlines()
        
        with open(OUTPUT_PATH / f'labels/{split_type}/{lbl.name}', 'w') as f:
            for line in lines:
                parts = line.strip().split()
                if parts:
                    class_name = next((k for k, v in class_mapping.items() if v == int(parts[0])), None)
                    if class_name:
                        parts[0] = str(class_mapping[class_name])
                        f.write(' '.join(parts) + '\n')

In [27]:
# 5. Create data.yaml
def create_data_yaml():
    yaml_content = f"""
path: {OUTPUT_PATH}
train: images/train
val: images/val
nc: 4
names: ['number_plate', 'car', 'bike', 'scooter']
    """
    with open(OUTPUT_PATH / 'data.yaml', 'w') as f:
        f.write(yaml_content)

In [28]:
# 6. Main dataset preparation function
def prepare_dataset():
    create_dataset_structure()
    train_img, train_lbl, val_img, val_lbl = merge_and_split_datasets()
    copy_files(train_img, train_lbl, 'train')
    copy_files(val_img, val_lbl, 'val')
    create_data_yaml()
    print("Dataset preparation completed!")

In [None]:
# 7. Training and Export
def train_and_export_model():
    model = YOLO('yolov8n.pt')  # Nano version
    
    # Train the model
    model.train(
        data=str(OUTPUT_PATH / 'data.yaml'),
        epochs=100,
        batch=16,
        imgsz=640,
        device=0,
        patience=50,
        augment=True,
        project='runs/train',
        name='nepali_detection',
        exist_ok=True
    )
    
    # Save PyTorch model
    model.save('best_nepali_detection.pt')
    
    # Export to ONNX
    model.export(format='onnx', imgsz=640, dynamic=False)
    print("Model exported to best_nepali_detection.onnx")
    
    return model

def export_model():
    
    model = 'D:/IIMS/sem 3/Computer Vision And Natural Language Processing/Python/Group_project/best_nepali_detection.pt'
    return model

In [2]:
# 8. Inference functions
def detect_from_image(model, image_path, output_dir="output"):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    results = model(image_path)
    annotated_img = results[0].plot()
    output_file = output_path / f"detected_{Path(image_path).name}"
    cv2.imwrite(str(output_file), annotated_img)
    print(f"Detected image saved to: {output_file}")
    
    cv2.imshow('Detection Result', annotated_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    return results

def detect_from_video(model, video_path, output_dir="output"):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        return
    
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    output_file = output_path / f"detected_{Path(video_path).stem}.mp4"
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(output_file), fourcc, fps, (frame_width, frame_height))
    
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        results = model(frame)
        annotated_frame = results[0].plot()
        out.write(annotated_frame)
        
        frame_count += 1
        if frame_count % 30 == 0:
            print(f"Processed {frame_count} frames")
    
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Detected video saved to: {output_file}")

In [None]:
# Main execution
if __name__ == "__main__":
    # Prepare dataset
    prepare_dataset()
    
    # Train and export model
    trained_model = train_and_export_model()
    
    # Example inference
    test_image = "D:/IIMS/sem 3/Computer Vision And Natural Language Processing/datasets/test/test.jpeg"
    test_video = "D:/IIMS/sem 3/Computer Vision And Natural Language Processing/datasets/test/test.mp4"
    
    detect_from_image(trained_model, test_image)
    detect_from_video(trained_model, test_video)

NameError: name 'Path' is not defined

In [7]:
from pathlib import Path
from ultralytics import YOLO  # ✅ Import YOLO here!

def detect_from_image(model, image_path, output_dir="output"):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    results = model.predict(image_path, save=True, save_txt=True, project=output_dir)
    print(f"Image prediction complete. Results saved to {output_dir}")

def detect_from_video(model, video_path, output_dir="output"):
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    results = model.predict(video_path, save=True, save_txt=True, project=output_dir)
    print(f"Video prediction complete. Results saved to {output_dir}")

# ✅ Load the trained model
trained_model = YOLO('D:/IIMS/sem 3/Computer Vision And Natural Language Processing/Python/Group_project/best_nepali_detection.pt')

# Paths
test_image = "D:/IIMS/sem 3/Computer Vision And Natural Language Processing/datasets/test/test.jpeg"
test_video = "D:/IIMS/sem 3/Computer Vision And Natural Language Processing/datasets/test/test.mp4"

# Run detection
detect_from_image(trained_model, test_image)
detect_from_video(trained_model, test_video)


image 1/1 D:\IIMS\sem 3\Computer Vision And Natural Language Processing\datasets\test\test.jpeg: 640x640 3 number_plates, 2 cars, 6.3ms
Speed: 2.8ms preprocess, 6.3ms inference, 49.8ms postprocess per image at shape (1, 3, 640, 640)
Results saved to [1moutput\predict[0m
1 label saved to output\predict\labels
Image prediction complete. Results saved to output


errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

video 1/1 (frame 1/378) D:\IIMS\sem 3\Computer Vision And Natural Language Processing\datasets\test\test.mp4: 384x640 1 car, 77.8ms
video 1/1 (frame 2/378) D:\IIMS\sem 3\Computer Vision And Natural Language P