In [None]:
!pip install ultralytics

**0) Setup and Imports**

In [None]:
import os
import shutil
from glob import glob
import xml.etree.ElementTree as ET
import pandas as pd
import yaml
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
import cv2
import matplotlib.pyplot as plt
from pathlib import Path

**1) Configuration**

In [None]:
BASE_PATH = '.'

# Define paths for the original dataset and the new structured dataset
DATASET_SOURCE_PATH = os.path.join(BASE_PATH, 'dataset') # Train and Test folders are here
DATASET_DEST_PATH = os.path.join(BASE_PATH, 'yolo_dataset') # Structured dataset will be created here

# Define paths for the new structure
TRAIN_IMG_PATH = os.path.join(DATASET_DEST_PATH, 'images/train')
VAL_IMG_PATH = os.path.join(DATASET_DEST_PATH, 'images/val')
TRAIN_LBL_PATH = os.path.join(DATASET_DEST_PATH, 'labels/train')
VAL_LBL_PATH = os.path.join(DATASET_DEST_PATH, 'labels/val')

YAML_PATH = os.path.join(BASE_PATH, 'data.yaml')
PROJECT_DIR = BASE_PATH # Directory to save training results


# Create necessary directories
for path in [TRAIN_IMG_PATH, VAL_IMG_PATH, TRAIN_LBL_PATH, VAL_LBL_PATH]:
    os.makedirs(path, exist_ok=True)

# Define class mapping
CLASSES = {'dog': 0, 'cat': 1}


# PART 1: Object Detection Framework Development

**Step 1: Dataset Prepareation**

In [None]:
def parse_xml_to_df(xml_files):
    """Parses a list of XML files and returns a pandas DataFrame."""
    xml_data = []
    for filename in xml_files:
        tree = ET.parse(filename)
        root = tree.getroot()

        image_name = root.find('filename').text
        img_width = int(root.find('size').find('width').text)
        img_height = int(root.find('size').find('height').text)

        for obj in root.findall('object'):
            class_name = obj.find('name').text
            xmin = int(obj.find('bndbox').find('xmin').text)
            xmax = int(obj.find('bndbox').find('xmax').text)
            ymin = int(obj.find('bndbox').find('ymin').text)
            ymax = int(obj.find('bndbox').find('ymax').text)
            xml_data.append([image_name, img_width, img_height, class_name, xmin, xmax, ymin, ymax])

    return pd.DataFrame(xml_data, columns=['filename', 'width', 'height', 'name', 'xmin', 'xmax', 'ymin', 'ymax'])


In [None]:
def convert_to_yolo_format(df):
    """Converts bounding box coordinates to YOLO format and adds class IDs."""
    df['center_x'] = ((df['xmin'] + df['xmax']) / 2) / df['width']
    df['center_y'] = ((df['ymin'] + df['ymax']) / 2) / df['height']
    df['w'] = (df['xmax'] - df['xmin']) / df['width']
    df['h'] = (df['ymax'] - df['ymin']) / df['height']

    # Label encode the class names
    df['id'] = df['name'].map(CLASSES)
    return df

In [None]:
def save_yolo_labels(df, labels_path):
    """Saves the DataFrame annotations to YOLO .txt format."""
    for filename, group in df.groupby('filename'):
        txt_filename = os.path.join(labels_path, os.path.splitext(filename)[0] + '.txt')
        group[['id', 'center_x', 'center_y', 'w', 'h']].to_csv(
            txt_filename, sep=' ', index=False, header=False
        )

**Main Data Preparation Pipeline**

In [None]:
# 1. Find all XML files
all_xml_files = glob(os.path.join(DATASET_SOURCE_PATH, '*/*.xml'), recursive=True)

# 2. Parse all XMLs into a single DataFrame
full_df = parse_xml_to_df(all_xml_files)
print(f"Found and parsed {len(full_df)} annotations from {full_df['filename'].nunique()} images.")

# 3. Get a list of unique image files
unique_images = full_df['filename'].unique()

# 4. Split files into training and validation sets (e.g., 80% train, 20% val)
# This is a more robust way to split than using pre-defined folders
train_files, val_files = train_test_split(unique_images, test_size=0.2, random_state=42)
print(f"Splitting into {len(train_files)} training images and {len(val_files)} validation images.")

# 5. Create train and validation DataFrames
train_df = full_df[full_df['filename'].isin(train_files)]
val_df = full_df[full_df['filename'].isin(val_files)]

# 6. Convert to YOLO format
train_df = convert_to_yolo_format(train_df)
val_df = convert_to_yolo_format(val_df)

# 7. Save YOLO label files
save_yolo_labels(train_df, TRAIN_LBL_PATH)
save_yolo_labels(val_df, VAL_LBL_PATH)
print("Saved YOLO format labels for train and validation sets.")


In [None]:
# 8. Copy image files to the new structured directories
def copy_images(filenames, source_dir, dest_dir):
    for filename in filenames:
        # Assuming images are in the same folder as their xmls (e.g., 'dataset/train/')
        source_img_path = os.path.join(source_dir, os.path.splitext(filename)[0].split('/')[0], filename)
        # Handle cases where source images might be in 'train' or 'test' folders
        train_path = os.path.join(DATASET_SOURCE_PATH, 'train', filename)
        test_path = os.path.join(DATASET_SOURCE_PATH, 'test', filename)
        if os.path.exists(train_path):
             shutil.copy(train_path, os.path.join(dest_dir, filename))
        elif os.path.exists(test_path):
             shutil.copy(test_path, os.path.join(dest_dir, filename))

copy_images(train_files, DATASET_SOURCE_PATH, TRAIN_IMG_PATH)
copy_images(val_files, DATASET_SOURCE_PATH, VAL_IMG_PATH)
print("Copied images to new train/val directories.")

In [None]:
# 9. Create the data.yaml file programmatically
yaml_content = {
    'train': os.path.abspath(TRAIN_IMG_PATH),
    'val': os.path.abspath(VAL_IMG_PATH),
    'nc': len(CLASSES),
    'names': list(CLASSES.keys())
}

with open(YAML_PATH, 'w') as f:
    yaml.dump(yaml_content, f, default_flow_style=False)

print(f"Generated data.yaml file at: {YAML_PATH}")
print("--- Dataset Preparation Complete ---\n")

**Step 2: Model Development & Experimentation**

In [None]:
# Set the working directory to save results properly
os.chdir(PROJECT_DIR)
print(f"Changed working directory to: {os.getcwd()}")

In [None]:
# --- Experiment 1: Training YOLOv8s, with 30 epochs, 8 batches, and 640 image size - This is the baseline ---
print("\n--- Starting Architecture Experiment 1: Training YOLOv8s, with 30 epochs, 8 batches, and 640 image size ---")
model_small = YOLO('yolov8s.pt')
results_small = model_small.train(
    data=YAML_PATH,
    epochs=30,
    batch=8,
    imgsz=640,
    name='yolov8s_architecture_exp'
)

In [None]:
#------Experiment 2 - Experimenting with increased no of epochs (50) ----
# This experiment helps to understand how performance changes with more training time. It helps identify if the model is undertrained or if it starts to overfit.
# Baseline is 30 epochs from the previous experiment.

print("\n--- Starting Parameter Experiment: Training for 50 Epochs ---")
model_s = YOLO('yolov8s.pt')
model_s.train(
    data=YAML_PATH,
    epochs=50, # Increased epochs
    batch=8,
    imgsz=640,
    name='yolov8s_50_epochs_exp'
)

In [None]:
# Experiement 3 - Experimenting with changed Image Size (320)  ---
# This experiement is to understand the impact of input resolution on detection performance, especially for small objects.
# Baseline is imgsz=640
print("\n--- Starting Parameter Experiment: Training with imgsz=320 ---")
model_s = YOLO('yolov8s.pt')
model_s.train(
    data=YAML_PATH,
    epochs=30,
    batch=8,
    imgsz=320, # Reduced image size
    name='yolov8s_imgsz320_exp'
)

In [None]:
# --- Experiment 4: Experimenting with model size (architecture) ---
#different YOLOv8 model sizes (n, s, m, l, x) are not just scaled-up versions of each other;
#they have different numbers of layers and channels in their backbone (the feature extractor) and neck.

print("\n--- Starting Architecture Experiment : Training YOLOv8n ---")
model_nano = YOLO('yolov8n.pt')
results_nano = model_nano.train(
    data=YAML_PATH,
    epochs=30,
    batch=8,
    imgsz=640,
    name='yolov8n_architecture_exp'
)

**Step 3: Model Evaluation**

In [None]:
import glob

def eval_run(run_name, imgsz=IMGSZ_EVAL):
    w = Path(PROJECT_DIR) / 'runs' / 'detect' / run_name / 'weights' / 'best.pt'
    assert w.exists(), f"Best weights not found for {run_name}"
    m = YOLO(str(w))
    eval_name = f"{run_name}_evaluation_bycode"  # Custom eval folder name
    vr = m.val(
        data=YAML_PATH,
        imgsz=imgsz,
        split='val',
        plots=True,
        save_json=True,
        name=eval_name,             # Custom folder for this evaluation
        project='runs/detect'       # Keep all evals under runs/detect
    )
    d = vr.results_dict
    return {
        'run': run_name,
        'eval_folder': f"runs/detect/{eval_name}",   # <- store the eval folder for reference!
        'imgsz_eval': imgsz,
        'mAP@0.5': float(d.get('metrics/mAP50', d.get('metrics/mAP50(B)', 0.0))),
        'mAP@0.5:0.95': float(d.get('metrics/mAP50-95', d.get('metrics/mAP50-95(B)', 0.0))),
        'Precision': float(d.get('metrics/precision', d.get('metrics/precision(B)', 0.0))),
        'Recall': float(d.get('metrics/recall', d.get('metrics/recall(B)', 0.0))),
    }

summary = [eval_run(r) for r in RUNS]
eval_df = pd.DataFrame(summary).sort_values('mAP@0.5:0.95', ascending=False)
display(eval_df)  # nice table in notebook

# Save a CSV for your report appendix
eval_csv = os.path.join(PROJECT_DIR, 'evaluation_summary.csv')
eval_df.to_csv(eval_csv, index=False)
print("Saved:", eval_csv)


**Step 4: Result of Visualization**

In [None]:
# Set the parameters
BEST_RUN = 'yolov8s_50_epochs_exp'  # <-- set the run to be visualized based on the evaluation earlier
EVAL_FOLDER = f"{BEST_RUN}_evaluation_bycode"
WEIGHTS_PATH = Path(PROJECT_DIR) / 'runs' / 'detect' / BEST_RUN / 'weights' / 'best.pt'
VISUALIZE_DIR = Path(PROJECT_DIR) / 'visualisations' / BEST_RUN

os.makedirs(VISUALIZE_DIR, exist_ok=True)

# Load best model
model = YOLO(str(WEIGHTS_PATH))

# Pick sample validation images
val_images = sorted(
    glob.glob(os.path.join(VAL_IMG_PATH, '*.jpg')) +
    glob.glob(os.path.join(VAL_IMG_PATH, '*.jpeg')) +
    glob.glob(os.path.join(VAL_IMG_PATH, '*.png'))
)
SAMPLE_SIZE = 16
sample_imgs = val_images[:SAMPLE_SIZE]

# Run prediction and save images
pred_results = model.predict(
    sample_imgs,
    imgsz=640,
    conf=0.25,
    save=True,
    project=str(VISUALIZE_DIR),
    name='viz_preds'
)
print(f"Saved predictions to: {VISUALIZE_DIR/'viz_preds'}")

# Show a few good predictions
def show_image_with_boxes(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8, 8))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

print("\n-- Example Good Predictions --")
pred_img_dir = VISUALIZE_DIR / 'viz_preds'
all_pred_imgs = sorted(glob.glob(str(pred_img_dir / '*.jpg')))
for img_path in all_pred_imgs[:4]:  # Show first 4 (likely to be correct)
    show_image_with_boxes(img_path)

# Find and show wrongly predicted (weak/failure) cases
weak_cases = []
for res, img_path in zip(pred_results, sample_imgs):
    # 1) Missed: no boxes predicted
    if len(res.boxes) == 0:
        weak_cases.append(("NO DETECTION", img_path, 0.0))
    else:
        # 2) Weak: lowest confidence in boxes
        conf = float(res.boxes.conf.min().cpu())
        # 3) Identify wrong class by comparing with ground truth
        weak_cases.append(("LOW CONF", img_path, conf))

# Sort: missed first, then lowest confidence
weak_cases_sorted = sorted(weak_cases, key=lambda x: x[2])[:6]

print("\n-- Example Weak/Failure Cases --")
for tag, img_path, conf in weak_cases_sorted:
    print(f"{tag} | {os.path.basename(img_path)} | conf: {conf:.3f}")
    show_image_with_boxes(img_path)

# PART 2: Real Time Object Detection

**Step 1: Load the Best model**

In [None]:
# Point to the best model's weights
BEST_RUN = 'yolov8s_50_epochs_exp'
WEIGHTS_PATH = Path(PROJECT_DIR) / 'runs' / 'detect' / BEST_RUN / 'weights' / 'best.pt'

model = YOLO(str(WEIGHTS_PATH))

**Step 2: Run Inference on the Video**

YOLOv8 makes this very simple. It will:


*   Process each frame of the video
*   Draw bounding boxes and class labels
*   Save the output as a new video file





In [None]:
# Input and output
video_input_path = 'catDog.mp4' # Example video file name
if os.path.exists(video_input_path):
    print(f"Running inference on {video_input_path}...")
    model.predict(
        source=video_input_path,
        conf=0.25,
        save=True,
        project='runs',
        name='video_inference'
    )
    print("Video inference complete. Output saved in 'runs/video_inference'.")
else:
    print(f"Video file not found at {video_input_path}. Skipping video inference.")
