In [9]:
from dotenv import load_dotenv
load_dotenv() 

True

I am using Roboflow's object detection model that has been trained on three objects: Golf club, golf club head and golf ball. Let's first extract the predictions into an array.

In [6]:
# import the InferencePipeline interface
from inference import InferencePipeline
# import a built-in sink called render_boxes (sinks are the logic that happens after inference)
from inference.core.interfaces.stream.sinks import render_boxes
from inference.core.interfaces.camera.entities import VideoFrame
import os
import json
from pprint import pprint

TARGET_DIR = "predictions"
api_key = os.getenv("ROBOFLOW_API_KEY")

arr = []

def save_prediction(prediction: dict, video_frame: VideoFrame) -> None:
    # pprint(prediction)
    # pprint(video_frame)
    # print("------------------------------------")
  # with open(os.path.join(TARGET_DIR, f"test.txt"), "a") as f:
    arr.append(prediction)
  #   json.dump(prediction, f)
  #   json.dump(video_frame, f)
    

# create an inference pipeline object
pipeline = InferencePipeline.init(
    model_id="golf-49wbh/1", # set the model id to a yolov8x model with in put size 1280
    video_reference="input_videos/ex.mp4", # set the video reference (source of video), it can be a link/path to a video file, an RTSP stream url, or an integer representing a device id (usually 0 for built in webcams)
    on_prediction=save_prediction, # tell the pipeline object what to do with each set of inference by passing a function
    api_key=api_key, # provide your roboflow api key for loading models from the roboflow api
)
# start the pipeline
pipeline.start()
# wait for the pipeline to finish
pipeline.join()

In [10]:
len(arr)

794

I quickly want to check if the length of the array equals the number of frames when extracted using OpenCV

In [5]:
import cv2

def get_frame_count(video_path):
    # Open the video file
    video = cv2.VideoCapture(video_path)
    
    # Check if the video opened successfully
    if not video.isOpened():
        print("Error: Could not open video.")
        return None
    
    # Get the total number of frames
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Release the video capture object
    video.release()
    
    return frame_count

# Example usage
video_path = 'input_videos/ex.mp4'
frame_count = get_frame_count(video_path)
print(f'Total number of frames: {frame_count}')


Total number of frames: 794


Now let's look at what each prediction contains

In [11]:
arr[0]

{'image': {'width': 720, 'height': 1280},
 'predictions': [{'x': 473.5,
   'y': 1153.0,
   'width': 299.0,
   'height': 118.0,
   'confidence': 0.6775482892990112,
   'class': 'club',
   'class_id': 0,
   'detection_id': '0e6d3a2c-116e-4186-b98f-a68a81b38e49'},
  {'x': 603.0,
   'y': 1161.5,
   'width': 38.0,
   'height': 117.0,
   'confidence': 0.6426902413368225,
   'class': 'club_head',
   'class_id': 1,
   'detection_id': '00f6f024-194f-4dd6-8d9a-9bdfb18d9a7e'},
  {'x': 101.5,
   'y': 512.5,
   'width': 43.0,
   'height': 289.0,
   'confidence': 0.5648428797721863,
   'class': 'club',
   'class_id': 0,
   'detection_id': '43cef3bd-eb2b-476e-9b73-275369a97b5b'}]}

In [13]:
num_preds = [len(x['predictions']) for x in arr]
print(set(num_preds))

{0, 1, 2, 3, 4}
