In [None]:
!pip install ultralytics --user
# !apt-get update && apt-get install ffmpeg libsm6 libxext6 git -y

In [None]:
from ultralytics import YOLOWorld
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

model = YOLOWorld('yolov8x-worldv2.pt') 

object_classes = ["red ball"]
model.set_classes(object_classes)

results = model.predict('frame_0.jpg', conf=0.04)
plt.imshow(mpimg.imread('frame_0.jpg'))
for box, cls_id in zip(results[0].boxes.xyxy, results[0].boxes.cls):
    x_min, y_min, x_max, y_max = box[:4]
    center_x = (x_min + x_max) / 2
    center_y = (y_min + y_max) / 2
    object_name = object_classes[int(cls_id)]
    print(f"Center of {object_name}: ({center_x}, {center_y})")
    plt.scatter(center_x.cpu(), center_y.cpu(), c='blue', marker='o', label='Origin')
    
results[0].show()

In [1]:
import os
import pandas as pd
from ultralytics import YOLOWorld

In [2]:
def detect_objects(image_path, model, config, confidence = 0.23, main = False):
    detected_objects = {}

    primary_terms = [alternatives[0] for alternatives in config.values()]
    model.set_classes(primary_terms)
    results = model.predict(image_path, conf=confidence)
    
    print(len(results))

    if len(results[0].boxes) <= len(primary_terms):
        for box, cls_id in zip(results[0].boxes.xyxy, results[0].boxes.cls):
            object_name = list(config.keys())[int(cls_id)]
            x_min, y_min, x_max, y_max = box[:4]
            center_x = (x_min + x_max) / 2
            center_y = (y_min + y_max) / 2
            detected_objects[object_name] = (center_x.item(), center_y.item())

    if not main:
        return detected_objects
    
    for object_name, prompts in config.items():
        while object_name not in detected_objects:
            if confidence >= 0.12:
                confidence -= 0.1
                detected_objects = detect_objects(image_path, model, { object_name: prompts }, confidence) | detected_objects
            elif len(prompts) > 1:
                confidence = 0.23
                prompts = prompts[1:]
                while object_name not in detected_objects:
                    if confidence >= 0.01:
                        detected_objects = detect_objects(image_path, model, { object_name: prompts }, confidence) | detected_objects
                        confidence -= 0.1
                    else:
                        break
            else:
                break

    return detected_objects

def process_folder(folder_path, model):
    csv_file_path = os.path.join(folder_path, 'processed_step_1.csv')
    output_folder = os.path.join(folder_path, 'processed_video')
    output_csv_file_path = os.path.join(folder_path, 'processed_step_2.csv')
    
    config = {
        'red ball': ['red ball', 'apple', 'red apple'],
        'green box': ['green box']
    }

    df = pd.read_csv(csv_file_path)

    df['origin_x'] = None
    df['origin_y'] = None
    df['goal_x'] = None
    df['goal_y'] = None

    for index, row in df.iterrows():
        frame_number = row['FrameNumber']
        image_path = os.path.join(output_folder, f'frame_{frame_number}.jpg')

        detected_objects = detect_objects(image_path, model, config, 0.23, True)

        if "red ball" in detected_objects:
            df.at[index, 'origin_x'], df.at[index, 'origin_y'] = detected_objects["red ball"]

        if "green box" in detected_objects:
            df.at[index, 'goal_x'], df.at[index, 'goal_y'] = detected_objects["green box"]

    df.to_csv(output_csv_file_path, index=False)

In [3]:
base_folder = r'.\NaoPickAndPlaceData'

# https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s-worldv2.pt
# https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8x-worldv2.pt
model = YOLOWorld('yolov8x-worldv2.pt')

for folder in os.listdir(base_folder):
    folder_path = os.path.join(base_folder, folder)
    if os.path.isdir(folder_path) and 'processed_step_1.csv' in os.listdir(folder_path) and 'processed_step_2.csv' not in os.listdir(folder_path):
        process_folder(folder_path, model)

  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)



image 1/1 C:\Users\ARB\PycharmProjects\Nao\NaoPickAndPlaceData\2024-08-09_16-54-15\processed_video\frame_1.jpg: 480x640 1 red ball, 1 green box, 77.4ms
Speed: 2.5ms preprocess, 77.4ms inference, 47.2ms postprocess per image at shape (1, 3, 480, 640)
1

image 1/1 C:\Users\ARB\PycharmProjects\Nao\NaoPickAndPlaceData\2024-08-09_16-54-15\processed_video\frame_2.jpg: 480x640 1 red ball, 1 green box, 35.0ms
Speed: 0.0ms preprocess, 35.0ms inference, 1.3ms postprocess per image at shape (1, 3, 480, 640)
1

image 1/1 C:\Users\ARB\PycharmProjects\Nao\NaoPickAndPlaceData\2024-08-09_16-54-15\processed_video\frame_3.jpg: 480x640 1 red ball, 1 green box, 35.7ms
Speed: 0.0ms preprocess, 35.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)
1

image 1/1 C:\Users\ARB\PycharmProjects\Nao\NaoPickAndPlaceData\2024-08-09_16-54-15\processed_video\frame_4.jpg: 480x640 1 red ball, 1 green box, 34.7ms
Speed: 1.0ms preprocess, 34.7ms inference, 1.0ms postprocess per image at shape (1, 3, 480