In [97]:
import cv2
import numpy as np
from ultralytics import YOLO

# Load the model
model = YOLO('MouseNet.pt')

# Open the video file
video_path = "trial_vids/08_31_2024_Trial_2_20fps.mp4"


In [98]:
cap = cv2.VideoCapture(video_path)

# Get the frame width and height
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
output_path = "output_video_3.mp4"


fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, 20.0, (frame_width, frame_height))


# Define the class index you want to predict 
## names: {0: 'Tail', 1: 'body', 2: 'feces', 3: 'head', 4: 'mouse'}

print(model.names)

class_index_to_predict = 1  # Replace 'cat' with the class you want to predict



{0: 'Tail', 1: 'body', 2: 'feces', 3: 'head', 4: 'mouse'}


In [99]:
# Loop through the video frames 

xywh_cors = []

while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run yolov8 inference on the frame
        results = model.predict(source=frame, classes=1, max_det=1, iou=0.0001)

        # Visualize the results on the frame
        annotated_frame = results[0].plot()

        # Write the annotated frame to the output video
        out.write(annotated_frame)

        # Display the annotated frame 
        # cv2.imshow("Yolov8 inference", annotated_frame)

        for r in results:
            arr = r.boxes.xywh.tolist()
            if len(arr) > 0: 
                xywh_cors.append(arr[0])
            else:
                xywh_cors.append([-1,-1,-1,-1])
        
        # Wait for a key press (0 means wait indefinitely)
        key = cv2.waitKey(1)

        # Break the loop if 'q' is pressed
        if key & 0xFF == ord("q"):
            break
    
        
    
    else:
        break

cors = np.array(xywh_cors)

npy_file_path = "bbox_data/bbox_cors.npy"

# Save the NumPy array to a .npy file
np.save(npy_file_path, cors)

# Release the video capture object, the video writer, and close the display window
cap.release()
out.release()
cv2.destroyAllWindows()


0: 640x640 1 body, 45.6ms
Speed: 1.4ms preprocess, 45.6ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 45.2ms
Speed: 1.3ms preprocess, 45.2ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 45.2ms
Speed: 1.4ms preprocess, 45.2ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 44.9ms
Speed: 2.0ms preprocess, 44.9ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 40.7ms
Speed: 1.4ms preprocess, 40.7ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 42.4ms
Speed: 1.3ms preprocess, 42.4ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 42.1ms
Speed: 1.3ms preprocess, 42.1ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 body, 43.1ms
Speed: 1.4ms preprocess, 43.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x