In [2]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import numpy as np
import time

# Load the Faster R-CNN model from TensorFlow Hub
model_url = "https://www.kaggle.com/models/tensorflow/retinanet-resnet/frameworks/TensorFlow2/variations/101-v1-fpn-1024x1024/versions/1"
detector = hub.load(model_url)

# Function to perform pedestrian detection on a video
def detect_pedestrians(input_video_path, output_video_path):
    # Open the video file
    cap = cv2.VideoCapture(input_video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Create VideoWriter for output
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
    
    start_time = time.time()  # Record the start time
    skipframes = 4
    tempcount = 0
    framecount = 0
    confidence = []
    framesProcessed = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        if tempcount == skipframes:
            tempcount = 0
            framecount += 1
        else:
            framecount += 1
            tempcount += 1
            continue
        
        framesProcessed += 1
        print("Processing Frame: ", framecount)
        # Convert BGR to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Preprocess the frame to match the model's input requirements
        image_tensor = tf.convert_to_tensor([rgb_frame], dtype=tf.uint8)

        # Apply the image detector
        detector_output = detector(image_tensor)

        # Extract relevant information from the detector_output
        num_detections = int(detector_output['num_detections'][0])
        detection_boxes = detector_output['detection_boxes'][0].numpy()
        detection_classes = detector_output['detection_classes'][0].numpy()
        detection_scores = detector_output['detection_scores'][0].numpy()

        # Draw bounding boxes on pedestrians
        for i in range(num_detections):
            if detection_scores[i] > 0.5 and detection_classes[i] == 1:  # Class index 1 corresponds to pedestrians
                confidence.append(detection_scores[i])
                box = detection_boxes[i]
                ymin, xmin, ymax, xmax = box
                ymin, xmin, ymax, xmax = int(ymin * height), int(xmin * width), int(ymax * height), int(xmax * width)
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
                cv2.putText(frame, 'Pedestrian', (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,255,255), 2)

        # Write the frame with bounding boxes to the output videoq
        out.write(frame)

        # Display the frame with bounding boxes
        cv2.imshow('Pedestrian Detection', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    end_time = time.time()  # Record the end time
    total_time = end_time - start_time
    
    fps = framesProcessed/total_time
    print("FPS: {:2f}".format(fps))
    confidenceAvg = np.mean(confidence)
    
    print("Total Detections: ", len(confidence), " Average Confidence score: ", confidenceAvg)
    
    # Release video capture and writer
    cap.release()
    out.release()
    cv2.destroyAllWindows()

# Specify the input and output video paths
input_video_path = 'Dataset/pedestrian.mp4'
output_video_path = 'FasterRCNN_output_video.avi'

# Perform pedestrian detection on the input video and display bounding boxes
detect_pedestrians(input_video_path, output_video_path)


Processing Frame:  5
Processing Frame:  10
Processing Frame:  15
Processing Frame:  20
Processing Frame:  25
Processing Frame:  30
Processing Frame:  35
Processing Frame:  40
Processing Frame:  45
Processing Frame:  50
Processing Frame:  55
Processing Frame:  60
Processing Frame:  65
Processing Frame:  70
Processing Frame:  75
Processing Frame:  80
Processing Frame:  85
Processing Frame:  90
Processing Frame:  95
Processing Frame:  100
Processing Frame:  105
Processing Frame:  110
Processing Frame:  115
Processing Frame:  120
Processing Frame:  125
Processing Frame:  130
Processing Frame:  135
Processing Frame:  140
Processing Frame:  145
Processing Frame:  150
Processing Frame:  155
Processing Frame:  160
Processing Frame:  165
Processing Frame:  170
Processing Frame:  175
Processing Frame:  180
Processing Frame:  185
Processing Frame:  190
Processing Frame:  195
Processing Frame:  200
Processing Frame:  205
Processing Frame:  210
Processing Frame:  215
Processing Frame:  220
Processi