In [2]:
import cv2
from ultralytics import YOLO
import time
import sp_modules as sp
import numpy as np

# Load the YOLOv8 model
model = YOLO('./weights/20240106_best_weight_yolov8_640.pt')

# Open the video file
file_path = 'E:/Raw data/datasets/AVATAR/BYUN_data/'
file_name = '2023-11-29_11-52-14-041_1200f'
# file_path = 'E:/Raw data/datasets/AVATAR/age_mbest1_cerebellum/original_datasets/collection/'
# file_name = 'Hab_02_1200f'
input_video_name = file_path + file_name + '.mp4'
output_video_name = file_path + file_name + '_face.mp4'

cap = cv2.VideoCapture(input_video_name)

# Define the codec and create VideoWriter object to save the video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for .mp4 files
fps = cap.get(cv2.CAP_PROP_FPS)  # Use the same FPS as the source video
frame_total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Use the total frame to estimate the time
frame_width = int(cap.get(3))  # Width of the frames in the video
frame_height = int(cap.get(4))  # Height of the frames in the video
frame_width_resize = 800
frame_height_resize = 600

# out = cv2.VideoWriter(output_video_name, fourcc, fps, (frame_width, frame_height))
out = cv2.VideoWriter(output_video_name, fourcc, fps, (1500, 300))
current_frame=0

# Color code setting for center-point
color_code = {
    "red": (0, 0, 255),
    "orange": (0, 165, 255),
    "yellow": (0, 255, 255),
    "green": (0, 128, 0),
    "blue": (255, 0, 0),
    "skyblue": (235, 206, 135),
    "purple": (128, 0, 128),
    "black": (0, 0, 0),
    "pink": (255, 192, 203)
}
color_order = ["red", "orange", "yellow", "green", "blue", "skyblue", "purple", "black", "pink"]
output = []

#To match darknet (YOLOv4), #4, 5: forelimb / #6, 7: hindlimb
rearrange = [4, 6, 0, 1, 3, 8, 2]
# Five areas from captured frame (x1, y1, x2, y2)
area = [
    [ 1, 1, 1200, 1000],
    [ 1201, 1, 2400, 1000],
    [ 2401, 1, 3600, 1200],
    [ 1, 1001, 1200, 2016],
    [ 1201, 1001, 2400, 2016]
]
raw_coordinates = [0] * 135
while cap.isOpened():
    start_time = time.time() # Time measurement
    head_image = np.zeros((300, 1500, 3), dtype="uint8")
    success, frame = cap.read()
    if success:
        # Run YOLOv8 inference on the frame
        results = model(frame, verbose=False)
                
        # Visualize the results on the frame
        # annotated_frame = results[0].plot(line_width = 1, labels = False)
        annotated_frame = frame
        cnt = 0
        for det in results[0].boxes:
            # det: [x1, y1, x2, y2, conf, cls]
            xy = det.xyxy.tolist()[0]
            conf, cls = det.conf.item(), int(det.cls.item())
            # cls definition
            # 0: fore / 1: hind / 2: nose / 3: head / 4: ass / 5: tail / 6: torso

            if(cls == 3 and cnt < 5):
                cropped_frame = annotated_frame[int(xy[1]):int(xy[3]), int(xy[0]):int(xy[2])]
                # cv2.rectangle(annotated_frame, (int(xy[0]), int(xy[1])), (int(xy[2]), int(xy[3])), color=(255, 0, 0), thickness=2)
                head_image[0:cropped_frame.shape[0], cnt*300:cnt*300+cropped_frame.shape[1]] = cropped_frame
                cnt+=1
            
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
        
        cv2.imshow("YOLOv8 Inference", head_image)
    else:
        break
    
    # Display the frame with bounding box and center
    # resized_frame = cv2.resize(annotated_frame, (800, 600))
    out.write(head_image)
    # cv2.imshow("YOLOv8 Inference", resized_frame)

    # Write the annotated frame to the output video file
    # out.write(annotated_frame)
    
    # Result accumulation
    # output.append(raw_coordinates)
    
    # Print total estimated time
    sp.print_remaining_time(start_time, time.time()-start_time, current_frame, frame_total)
    
    current_frame += 1

# Print job completion
if(current_frame == frame_total):
    print('\r', flush=True) # Make sure the blank
    print("Pose estimation complete!", end='\r', flush=True)

# Save to txt file
# with open(file_path+file_name+'.txt', 'w') as file:
#     for row in output:
#         file.write('\t'.join(map(str, row)) + '\n')

cap.release()
out.release()
cv2.destroyAllWindows()

1196 of 1197 frames processed: 0.00 mins left
Pose estimation complete!