In [6]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # load pre-trained model
results = model("C:/Users/ss/OneDrive/Pictures/d1.png")  # inference
results[0].show()  # display result


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:08<00:00, 760kB/s]



image 1/1 C:\Users\ss\OneDrive\Pictures\d1.png: 416x640 12 persons, 3 cars, 1 motorcycle, 1 truck, 202.1ms
Speed: 11.8ms preprocess, 202.1ms inference, 2.9ms postprocess per image at shape (1, 3, 416, 640)


In [7]:
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort


In [8]:
# Load pre-trained YOLOv8 model (nano/small/medium/large/extra large)
model = YOLO("yolov8n.pt")  # or yolov8s.pt, etc.


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:08<00:00, 759kB/s]


In [9]:
tracker = DeepSort(max_age=25)  # You can tweak parameters


In [None]:
TARGET_CLASS = "person"  # Change to "car" or any class of interest


In [None]:
# Open video or webcam
cap = cv2.VideoCapture(0)  # Or use 0 for webcam
# r'C:\Users\ss\Downloads\honda.mp4

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLOv8 detection
    results = model(frame)[0]

    detections = []

    for box in results.boxes:
        cls_id = int(box.cls[0])
        class_name = model.names[cls_id]

        # Only track the specified class
        if class_name != TARGET_CLASS:
            continue
        conf = float(box.conf[0])
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        detections.append([[x1, y1, x2 - x1, y2 - y1], conf, class_name])

    # Update Deep SORT tracker
    tracks = tracker.update_tracks(detections, frame=frame)

    for track in tracks:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
        x1, y1, x2, y2 = map(int, ltrb)

        # Draw bounding box & ID
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f"{TARGET_CLASS} ID:{track_id}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    cv2.imshow("Tracking", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [5]:
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort


model = YOLO("yolov8n.pt")
tracker = DeepSort(max_age=25)
TARGET_CLASS = "person"
cap = cv2.VideoCapture(r'C:\Users\ss\Downloads\welcome.mp4')

while True:
    ret, frame = cap.read()

    if not ret:
        break

    results = model(frame)[0]
    detections = []
    for box in results.boxes:
        cls_id = int(box.cls[0])
        class_name = model.names[cls_id]
        if class_name == TARGET_CLASS:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = float(box.conf[0])
            detections.append(([x1, y1, x2 - x1, y2 - y1],conf, "person"))

    tracks = tracker.update_tracks(detections, frame=frame)
    for track in tracks:
        if not track.is_confirmed():
            continue
        x1, y1, x2, y2 = map(int, track.to_ltrb())
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(frame, f'ID {track.track_id}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    cv2.imshow("Tracking", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


0: 640x384 (no detections), 425.6ms
Speed: 23.5ms preprocess, 425.6ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 persons, 1 surfboard, 279.0ms
Speed: 28.8ms preprocess, 279.0ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 persons, 1 surfboard, 154.1ms
Speed: 4.0ms preprocess, 154.1ms inference, 2.4ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 9 persons, 1 skateboard, 1 surfboard, 156.8ms
Speed: 6.9ms preprocess, 156.8ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 5 persons, 1 surfboard, 197.5ms
Speed: 5.1ms preprocess, 197.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 8 persons, 1 surfboard, 187.5ms
Speed: 5.9ms preprocess, 187.5ms inference, 3.5ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 7 persons, 1 surfboard, 221.4ms
Speed: 6.7ms preprocess, 221.4ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 38

# Beginner-Friendly YOLO Object Detection

This section contains simplified code that's easier to understand for beginners. We'll break down the process into smaller, manageable steps.

In [None]:
# Step 1: Simple Image Detection (Beginner Version)
from ultralytics import YOLO
import cv2

# Load the YOLO model
print("Loading YOLO model...")
model = YOLO("yolov8n.pt")

# Load an image
image_path = "C:/Users/ss/OneDrive/Pictures/d1.png"  # Change this to your image path
print(f"Loading image: {image_path}")

try:
    # Read the image
    image = cv2.imread(image_path)
    
    if image is None:
        print("Error: Could not load image. Please check the file path.")
    else:
        print("Image loaded successfully!")
        
        # Run detection
        print("Running object detection...")
        results = model(image)
        
        # Show results with bounding boxes
        annotated_image = results[0].plot()
        
        # Display the image
        cv2.imshow("YOLO Detection Results", annotated_image)
        cv2.waitKey(0)  # Wait for a key press
        cv2.destroyAllWindows()
        
        # Print detected objects
        print("\nDetected objects:")
        for i, box in enumerate(results[0].boxes):
            class_id = int(box.cls[0])
            class_name = results[0].names[class_id]
            confidence = float(box.conf[0])
            print(f"{i+1}. {class_name} (confidence: {confidence:.2f})")
            
except Exception as e:
    print(f"An error occurred: {e}")

In [34]:
# Step 2: Simple Webcam Detection (Beginner Version)
from ultralytics import YOLO
import cv2

# Load the YOLO model
model = YOLO("yolov8n.pt")

print("Starting webcam detection...")
print("Press 'q' to quit")

# Open webcam (0 is usually the default camera)
cap = cv2.VideoCapture( 0)

# Check if webcam opened successfully
if not cap.isOpened():
    print("Error: Could not open webcam")
else:
    while True:
        # Read frame from webcam
        ret, frame = cap.read()
        
        if not ret:
            print("Error: Could not read frame")
            break
        
        # Run YOLO detection on the frame
        results = model(frame)
        
        # Draw the detection results on the frame
        annotated_frame = results[0].plot()
        
        # Display the frame
        cv2.imshow("Webcam YOLO Detection", annotated_frame)
        
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    # Clean up
    cap.release()
    cv2.destroyAllWindows()
    print("Webcam detection ended.")

Starting webcam detection...
Press 'q' to quit


0: 640x384 1 person, 324.2ms
Speed: 5.5ms preprocess, 324.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)
0: 640x384 1 person, 324.2ms
Speed: 5.5ms preprocess, 324.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)


0: 640x384 1 person, 196.2ms
Speed: 20.2ms preprocess, 196.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 196.2ms
Speed: 20.2ms preprocess, 196.2ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 147.5ms
Speed: 2.6ms preprocess, 147.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 147.5ms
Speed: 2.6ms preprocess, 147.5ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 142.8ms
Speed: 2.5ms preprocess, 142.8ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 384)
0: 640x384 1 person, 142.8ms
Speed: 2.5ms preprocess, 

In [33]:
# Step 3: Count Specific Objects (Beginner Version)
from ultralytics import YOLO
import cv2

# Load the YOLO model
model = YOLO("yolov8n.pt")

# What object do you want to count?
target_object = "person"  # Change this to "car", "dog", "cat", etc.

print(f"Starting webcam detection to count: {target_object}")
print("Press 'q' to quit")

# Open webcam
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open webcam")
else:
    while True:
        ret, frame = cap.read()
        
        if not ret:
            break
        
        # Run detection
        results = model(frame)
        
        # Count the target objects
        count = 0
        
        # Loop through all detected objects
        for box in results[0].boxes:
            class_id = int(box.cls[0])
            class_name = results[0].names[class_id]
            confidence = float(box.conf[0])
            
            # If it's our target object and confidence is high enough
            if class_name == target_object and confidence > 0.5:
                count += 1
                
                # Draw bounding box for target objects only
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                cv2.putText(frame, f"{class_name} {confidence:.2f}", 
                           (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        
        # Display the count on screen
        cv2.putText(frame, f"{target_object} count: {count}", 
                   (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        
        # Show the frame
        cv2.imshow(f"Counting {target_object}", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()
    print("Object counting ended.")

Starting webcam detection to count: person
Press 'q' to quit


0: 480x640 (no detections), 522.2ms
Speed: 47.5ms preprocess, 522.2ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)
0: 480x640 (no detections), 522.2ms
Speed: 47.5ms preprocess, 522.2ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)


0: 480x640 1 person, 222.4ms
Speed: 7.7ms preprocess, 222.4ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 222.4ms
Speed: 7.7ms preprocess, 222.4ms inference, 1.9ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 2 persons, 170.6ms
Speed: 6.5ms preprocess, 170.6ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)
0: 480x640 2 persons, 170.6ms
Speed: 6.5ms preprocess, 170.6ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)


0: 480x640 1 person, 180.7ms
Speed: 3.2ms preprocess, 180.7ms inference, 1.8ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 1 person, 18

In [None]:
# Step 4: Process Video File (Beginner Version)
from ultralytics import YOLO
import cv2

# Load the YOLO model
model = YOLO("yolov8n.pt")

# Video file path (change this to your video file)
video_path = "C:/Users/ss/Downloads/honda.mp4"  # Update with your video path

print(f"Processing video: {video_path}")
print("Press 'q' to quit early")

# Open video file
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video file")
    print("Make sure the file path is correct and the video format is supported")
else:
    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    print(f"Video info: {total_frames} frames, {fps:.2f} FPS")
    
    frame_count = 0
    
    while True:
        ret, frame = cap.read()
        
        if not ret:
            print("End of video reached")
            break
        
        frame_count += 1
        
        # Process every 5th frame to speed up (optional)
        if frame_count % 5 == 0:
            # Run detection
            results = model(frame)
            
            # Draw detection results
            annotated_frame = results[0].plot()
            
            # Add frame counter
            cv2.putText(annotated_frame, f"Frame: {frame_count}/{total_frames}", 
                       (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            
            # Resize frame if it's too big
            height, width = annotated_frame.shape[:2]
            if width > 1200:
                scale = 1200 / width
                new_width = int(width * scale)
                new_height = int(height * scale)
                annotated_frame = cv2.resize(annotated_frame, (new_width, new_height))
            
            # Display the frame
            cv2.imshow("Video Processing", annotated_frame)
            
            # Exit if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    
    cap.release()
    cv2.destroyAllWindows()
    print("Video processing completed!")

## Key Differences from Advanced Version

### What makes these examples beginner-friendly:

1. **Better Error Handling**: Added try-catch blocks and checks for camera/file opening
2. **Clear Comments**: Every step is explained with comments
3. **Step-by-Step Approach**: Broken down into separate, focused examples
4. **Print Statements**: Added informative messages to track progress
5. **Simpler Logic**: No complex tracking algorithms, just basic detection
6. **Easy Customization**: Clear variables at the top to change settings

### What the advanced version adds:
- **Object Tracking**: Uses DeepSORT to track objects across frames with unique IDs
- **Performance Optimization**: More complex but efficient processing
- **Advanced Features**: Persistent tracking even when objects are temporarily hidden

### For Beginners: Start with These Simple Examples
1. Run cell 8 first (image detection)
2. Then try cell 9 (webcam detection) 
3. Move to cell 10 (object counting)
4. Finally try cell 11 (video processing)

Each example builds on the previous one but remains simple and easy to understand!