In [1]:
import cv2
import os
from ultralytics import YOLO
from playsound import playsound  # Bibliothek für Sound-Wiedergabe

# Klassen aus der Datei classes.txt laden
with open("classes.txt", "r") as f:
    class_names = [line.strip() for line in f.readlines()]

# Zuordnung von Klassen zu ihren Sounds
sounds = {
    "Nasebohren": "nasebohren.mp3",
    "Nägelkauen": "naegelkauen.mp3",
}

# Sicherstellen, dass alle Dateien existieren
for class_name, sound_file in sounds.items():
    if not os.path.exists(sound_file):
        raise FileNotFoundError(f"Sound file for {class_name} not found: {sound_file}")

# YOLOv8-Modell laden
model = YOLO("best.pt")

# Kamera öffnen
cap = None
for camera_index in range(5):  # Versuche Kamera-Indices von 0 bis 4
    cap = cv2.VideoCapture(camera_index)
    if cap.isOpened():
        print(f"Kamera erfolgreich geöffnet mit Index {camera_index}.")
        break
    else:
        cap.release()
        cap = None

if cap is None or not cap.isOpened():
    raise RuntimeError("Keine Kamera konnte geöffnet werden. Stellen Sie sicher, dass eine Kamera verfügbar ist und nicht von einer anderen Anwendung verwendet wird.")

print("Drücken Sie 'q', um das Programm zu beenden.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Fehler beim Lesen des Kamera-Feeds.")
        break

    # YOLO-Vorhersagen
    results = model.predict(source=frame, show=False, conf=0.5)

    # Ergebnisse durchlaufen
    for result in results:
        boxes = result.boxes
        for box in boxes:
            cls = int(box.cls[0])  # Klassenindex
            class_name = class_names[cls] if cls < len(class_names) else "Unbekannt"

            # Sound abspielen, wenn Klasse erkannt wird
            if class_name in sounds:
                print(f"Erkannt: {class_name}")
                playsound(sounds[class_name])

            # Bounding Box zeichnen
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            confidence = box.conf[0]
            label = f"{class_name} {confidence:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Frame anzeigen
    cv2.imshow("YOLOv8 Detection", frame)

    # Beenden, wenn 'q' gedrückt wird
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


playsound is relying on another python subprocess. Please use `pip install pygobject` if you want playsound to run more efficiently.
[ WARN:0@1.494] global cap_v4l.cpp:999 open VIDEOIO(V4L2:/dev/video0): can't open camera by index
[ERROR:0@1.600] global obsensor_uvc_stream_channel.cpp:158 getStreamChannelGroup Camera index out of range


Kamera erfolgreich geöffnet mit Index 1.
Drücken Sie 'q', um das Programm zu beenden.

0: 480x640 (no detections), 36.3ms
Speed: 2.4ms preprocess, 36.3ms inference, 22.7ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 3.6ms
Speed: 1.1ms preprocess, 3.6ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.2ms
Speed: 0.9ms preprocess, 6.2ms inference, 0.5ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 5.9ms
Speed: 0.7ms preprocess, 5.9ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 5.0ms
Speed: 0.9ms preprocess, 5.0ms inference, 0.3ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.2ms
Speed: 1.1ms preprocess, 6.2ms inference, 0.4ms postprocess per image at shape (1, 3, 480, 640)

0: 480x640 (no detections), 6.4ms
Speed: 1.0ms preprocess, 6.4ms inference, 0.4ms postprocess per image at shape (1, 3, 480,