In [2]:
import cv2
import numpy as np
import time

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
layer_names = net.getUnconnectedOutLayersNames()
classes = []
with open('coco.names', 'r') as f:
            classes = [line.strip() for line in f.readlines()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))

# Load video
video_path = '../Dataset/pedestrian.mp4'
cap = cv2.VideoCapture(video_path)

start_time = time.time()  # Record the start time
skipframes = 4
tempcount = 0
framecount = 0
AllConfidence = []
framesProcessed = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    if tempcount == skipframes:
        tempcount = 0
    else:
        tempcount += 1
        continue
    
    framesProcessed += 1
    height, width, _ = frame.shape

    # Prepare the image for YOLO
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    outs = net.forward(layer_names)

    class_ids = []
    confidences = []
    boxes = []

    # Process YOLO output
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5 and class_id == 0:  # Class ID 0 corresponds to pedestrians in COCO dataset
                center_x, center_y, w, h = (detection[0:4] * np.array([width, height, width, height])).astype('int')
                x, y = int(center_x - w / 2), int(center_y - h / 2)

                class_ids.append(class_id)
                confidences.append(float(confidence))
                
                boxes.append([x, y, w, h])

    # Apply Non-Maximum Suppression
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)  # You can adjust the second threshold (0.4) as needed

    # Draw bounding boxes after NMS
    for i in indices:
        box = boxes[i]
        AllConfidence.append(confidences[i])
        x, y, w, h = box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(frame, 'Pedestrian', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2)

    # Display the resulting frame
    cv2.imshow('Pedestrian Tracking', frame)

    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

end_time = time.time()  # Record the end time
total_time = end_time - start_time
    
fps = framesProcessed/total_time
print("FPS: {:2f}".format(fps))
confidenceAvg = np.mean(AllConfidence)
    
print("Total Detections: ", len(AllConfidence), " Average Confidence score: ", confidenceAvg)
# Release the video capture object and close all windows
cap.release()
cv2.destroyAllWindows()


FPS: 2.186052
Total Detections:  348  Average Confidence score:  0.938521607511345
