In [1]:
! pip3 install opencv-python numpy ultralytics


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [23]:
import cv2
import numpy as np
from ultralytics import YOLO

## Function to extract object names

In [55]:
def get_list_of_objects(result):
    objectIndices = result[0].probs.top5
    objects = []
    for index in objectIndices:
        objects.append(result[0].names[index])
    return objects

## Function to get objects from single image

In [46]:
def get_objects_from_frame(model, frame):
    result = model.predict(source=frame)
    return get_list_of_objects(result)

## Use a pretrained YOLO model to predict objects in image

In [11]:
model = YOLO("yolov8x-cls.pt")

In [37]:
result = model.predict(source="https://ultralytics.com/images/bus.jpg")


Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 /home/paul/Uni/SEP/ShortSearch/bus.jpg: 224x224 minibus 0.95, amphibian 0.02, trolleybus 0.01, recreational_vehicle 0.01, passenger_car 0.00, 96.5ms
Speed: 1.0ms preprocess, 96.5ms inference, 0.1ms postprocess per image at shape (1, 3, 224, 224)


In [42]:
getListOfObjects(result)

['minibus', 'amphibian', 'trolleybus', 'recreational_vehicle', 'passenger_car']

In [34]:
result.boxes

## Get all frames from a video and find objects

In [48]:
def detect_objects_in_video(video_path, model):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error opening video file")
        return

    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Extract every fifth frame
    frame_interval = 5
    detected_objects = set()
    for frame_number in range(0, frame_count, frame_interval):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        ret, frame = cap.read()

        if not ret:
            print(f"Error reading frame {frame_number}")
            break

        # Call the provided function with the extracted frame
        for detected_object in get_objects_from_frame(model, frame):   
            detected_objects.add(detected_object)

    cap.release()
    cv2.destroyAllWindows()
    print(detected_objects)
    return detected_objects

## Run video classification

In [56]:
detect_objects_in_video("./test.mp4", model)


0: 224x224 desk 0.42, folding_chair 0.33, dining_table 0.07, studio_couch 0.02, sewing_machine 0.01, 96.7ms
Speed: 1.0ms preprocess, 96.7ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 desk 0.52, folding_chair 0.30, dining_table 0.09, sewing_machine 0.01, studio_couch 0.01, 84.1ms
Speed: 4.0ms preprocess, 84.1ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 desk 0.24, folding_chair 0.18, dining_table 0.14, table_lamp 0.05, studio_couch 0.04, 89.7ms
Speed: 2.3ms preprocess, 89.7ms inference, 0.1ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 folding_chair 0.34, dining_table 0.10, desk 0.08, stove 0.03, paper_towel 0.03, 86.4ms
Speed: 2.8ms preprocess, 86.4ms inference, 0.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 dining_table 0.51, desk 0.13, folding_chair 0.06, grand_piano 0.03, upright 0.02, 109.1ms
Speed: 30.6ms preprocess, 109.1ms inference, 0.0ms postprocess per image at shape (1, 3,

{'espresso_maker', 'stove', 'radio', 'refrigerator', 'upright', 'modem', 'coffee_mug', "potter's_wheel", 'entertainment_center', 'measuring_cup', 'folding_chair', 'sewing_machine', 'joystick', 'mortar', 'shoe_shop', 'candle', 'table_lamp', 'paper_towel', 'wine_bottle', 'nipple', 'studio_couch', 'water_jug', "carpenter's_kit", 'violin', 'grand_piano', 'beer_bottle', 'eggnog', 'cup', 'medicine_chest', 'soap_dispenser', 'desk', 'pop_bottle', 'tray', 'pencil_sharpener', 'goblet', 'beaker', 'water_bottle', 'dining_table'}


{'beaker',
 'beer_bottle',
 'candle',
 "carpenter's_kit",
 'coffee_mug',
 'cup',
 'desk',
 'dining_table',
 'eggnog',
 'entertainment_center',
 'espresso_maker',
 'folding_chair',
 'goblet',
 'grand_piano',
 'joystick',
 'measuring_cup',
 'medicine_chest',
 'modem',
 'mortar',
 'nipple',
 'paper_towel',
 'pencil_sharpener',
 'pop_bottle',
 "potter's_wheel",
 'radio',
 'refrigerator',
 'sewing_machine',
 'shoe_shop',
 'soap_dispenser',
 'stove',
 'studio_couch',
 'table_lamp',
 'tray',
 'upright',
 'violin',
 'water_bottle',
 'water_jug',
 'wine_bottle'}