# Chapter 9: Object Detection with OpenCV (Python)

## Objective
To learn and implement classical object detection techniques in OpenCV using Haar Cascades and HOG + SVM. This lab includes face and pedestrian detection from images and video streams.


## 1. What is Object Detection?

**Description**: Object detection is the task of locating and classifying objects within an image. It outputs bounding boxes around objects of interest.


## 2. Haar Cascade Classifiers

**Description**: Haar cascades use features trained with AdaBoost and cascade classifiers to detect objects quickly in real-time.


### 2.1 Face Detection with Haar Cascades


In [None]:
import cv2

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

img = cv2.imread('images/face_sample.jpeg')

# Resize image to fit screen better (keep aspect ratio)
height, width = img.shape[:2]
if width > 800 or height > 600:
    scale = min(800/width, 600/height)
    new_width = int(width * scale)
    new_height = int(height * scale)
    img = cv2.resize(img, (new_width, new_height))

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

# Create resizable window
cv2.namedWindow('Detected Faces', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Detected Faces', 800, 600)
cv2.imshow('Detected Faces', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Output Description: Draws rectangles around detected faces using pre-trained Haar cascades.


## 3. HOG + SVM Detector

**Description**: HOG (Histogram of Oriented Gradients) captures edge and gradient structure. Paired with SVM, it's effective for detecting pedestrians.


### 3.1 Pedestrian Detection


In [None]:
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

img = cv2.imread('images/pedestrian.jpg')

# Resize image to manageable size for display
height, width = img.shape[:2]
if width > 800 or height > 600:
    scale = min(800/width, 600/height)
    new_width = int(width * scale)
    new_height = int(height * scale)
    img = cv2.resize(img, (new_width, new_height))

(rects, weights) = hog.detectMultiScale(img, winStride=(8,8), padding=(8,8), scale=1.05)

for (x, y, w, h) in rects:
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

# Create resizable window
cv2.namedWindow('Pedestrian Detection', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Pedestrian Detection', 800, 600)
cv2.imshow('Pedestrian Detection', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Output Description: Draws green rectangles around pedestrians in the image using HOG + SVM detector.


## 4. Real-Time Detection with Webcam


In [None]:
cap = cv2.VideoCapture(0)

# Create resizable window for webcam
cv2.namedWindow('Real-Time Face Detection', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Real-Time Face Detection', 800, 600)

while True:
    ret, frame = cap.read()
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
    
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    
    cv2.imshow('Real-Time Face Detection', frame)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Output Description: Performs live face detection from the webcam and displays it in real-time.


## 5. Summary

• **Haar Cascades** are fast but sensitive to lighting and scale.
• **HOG + SVM** is more robust and well-suited for pedestrian detection.
• Both methods work without deep learning and run in real time on CPUs.


---

# Suggested Exercises Implementation


## Exercise 1: Detect eyes or smiles using haarcascade_eye.xml or haarcascade_smile.xml


In [24]:
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml')

img = cv2.imread('images/face_smile.jpg')

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect faces first
faces = face_cascade.detectMultiScale(gray, 1.1, 5)

for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
    
    # Region of interest within face
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = img[y:y+h, x:x+w]
    
    # Detect eyes
    eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
    for (ex, ey, ew, eh) in eyes:
        cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
    
    # Detect smiles
    smiles = smile_cascade.detectMultiScale(roi_gray, 1.8, 20)
    for (sx, sy, sw, sh) in smiles:
        cv2.rectangle(roi_color, (sx, sy), (sx+sw, sy+sh), (0, 0, 255), 2)

cv2.namedWindow('Face Features Detection', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Face Features Detection', 600, 900)
cv2.imshow('Face Features Detection', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

print("Exercise 1: Eye and smile detection completed!")
print("Blue = Face, Green = Eyes, Red = Smile")


Exercise 1: Eye and smile detection completed!
Blue = Face, Green = Eyes, Red = Smile


## Exercise 2: Replace webcam input with video file and detect people frame-by-frame


In [26]:
cap = cv2.VideoCapture('videos/people.mp4')

# Get video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Set up video writer to save the output
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('videos/people_detected.mp4', fourcc, fps, (width, height))

frame_count = 0
detection_count = 0

print("Processing video and saving detection results...")
print("Output will be saved as 'videos/people_detected.mp4'")

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    frame_count += 1
    
    # Detect pedestrians in current frame
    (rects, weights) = hog.detectMultiScale(frame, winStride=(8,8), padding=(8,8), scale=1.05)
    
    current_detections = len(rects)
    detection_count += current_detections
    
    # Draw rectangles around detected pedestrians
    for (x, y, w, h) in rects:
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 3)
        cv2.putText(frame, 'Person', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    
    # Add frame info
    cv2.putText(frame, f'Frame: {frame_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    cv2.putText(frame, f'People: {current_detections}', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    
    # Write the frame to output video
    out.write(frame)
    
    # Optional: Display while processing (comment out for faster processing)
    cv2.imshow('Processing Video...', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"✅ Video processing complete!")
print(f"📹 Output saved: videos/people_detected.mp4")
print(f"📊 Statistics:")
print(f"   • Total frames: {frame_count}")
print(f"   • Total detections: {detection_count}")
print(f"   • Average detections per frame: {detection_count/frame_count:.2f}")


Processing video and saving detection results...
Output will be saved as 'videos/people_detected.mp4'
✅ Video processing complete!
📹 Output saved: videos/people_detected.mp4
📊 Statistics:
   • Total frames: 596
   • Total detections: 139
   • Average detections per frame: 0.23


# Exercise 3: Tune the HOG detector parameters (e.g., scale, winStride) for different performance.

In [29]:
img = cv2.imread('images/pedestrian.jpg')

# Resize for display
height, width = img.shape[:2]
if width > 800 or height > 600:
    scale = min(800/width, 600/height)
    new_width = int(width * scale)
    new_height = int(height * scale)
    img = cv2.resize(img, (new_width, new_height))

# Test different parameter combinations
param_configs = [
    {'winStride': (4, 4), 'padding': (8, 8), 'scale': 1.02, 'name': 'High Precision'},
    {'winStride': (8, 8), 'padding': (8, 8), 'scale': 1.05, 'name': 'Balanced'},
    {'winStride': (16, 16), 'padding': (16, 16), 'scale': 1.1, 'name': 'Fast Detection'}
]

for i, config in enumerate(param_configs):
    test_img = img.copy()
    
    # Detect with current parameters
    (rects, weights) = hog.detectMultiScale(
        test_img, 
        winStride=config['winStride'], 
        padding=config['padding'], 
        scale=config['scale']
    )
    
    # Draw rectangles
    for (x, y, w, h) in rects:
        cv2.rectangle(test_img, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    # Add parameter info
    cv2.putText(test_img, f"{config['name']}: {len(rects)} detections", 
                (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
    
    cv2.namedWindow(f'HOG Tuning - {config["name"]}', cv2.WINDOW_NORMAL)
    cv2.resizeWindow(f'HOG Tuning - {config["name"]}', 800, 600)
    cv2.imshow(f'HOG Tuning - {config["name"]}', test_img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
    
    print(f"{config['name']}: {len(rects)} pedestrians detected")

print("Exercise 3: Parameter tuning completed!")


High Precision: 8 pedestrians detected
Balanced: 4 pedestrians detected
Fast Detection: 1 pedestrians detected
Exercise 3: Parameter tuning completed!


## Exercise 4: Combine face and body detection into a single pipeline


In [30]:
img = cv2.imread('images/pedestrian.jpg')

# Resize for display
height, width = img.shape[:2]
if width > 800 or height > 600:
    scale = min(800/width, 600/height)
    new_width = int(width * scale)
    new_height = int(height * scale)
    img = cv2.resize(img, (new_width, new_height))

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Face detection with Haar cascades
faces = face_cascade.detectMultiScale(gray, 1.1, 5)
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
    cv2.putText(img, 'Face', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)

# Pedestrian detection with HOG+SVM
(rects, weights) = hog.detectMultiScale(img, winStride=(8,8), padding=(8,8), scale=1.05)
for (x, y, w, h) in rects:
    cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
    cv2.putText(img, 'Person', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

cv2.namedWindow('Combined Detection', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Combined Detection', 800, 600)
cv2.imshow('Combined Detection', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

print(f"Exercise 4: Combined detection completed!")
print(f"Detected {len(faces)} faces and {len(rects)} pedestrians")
print("Blue boxes = Faces, Green boxes = Pedestrians")


Exercise 4: Combined detection completed!
Detected 0 faces and 4 pedestrians
Blue boxes = Faces, Green boxes = Pedestrians
