In [1]:
import os
os.chdir("../")

In [2]:
import matplotlib.pyplot as plt
import torch
import cv2
import numpy as np

from torchvision import transforms

from src.utils.config import load_config

In [3]:
global_config, local_config = load_config()

In [4]:
os.chdir("../yolov7/")
from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device used to run: {device}")
weigths = torch.load(
    "yolov7-w6-pose.pt"
)
model = weigths['model']
model = model.half().to(device)
_ = model.eval()

Device used to run: cuda:0


In [11]:
os.chdir("../smear-beta/")
video_path = local_config['PATH']['raw_videos']+"pose_estimation_1.mp4"

cap = cv2.VideoCapture(video_path)
if (cap.isOpened() == False):
    print('Video could not be read')
 
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))


scale_by = (
    frame_width if frame_width > frame_height
    else frame_height
)
scale_factor = 1
 
# Pass the first frame through `letterbox` function to get the resized image,
# to be used for `VideoWriter` dimensions. Resize by larger side.
vid_write_image = letterbox(cap.read()[1], int(scale_by/scale_factor), stride=64, auto=True)[0]
resize_height, resize_width = vid_write_image.shape[:2]

print(resize_height)
print(resize_width)
 
save_name = local_config['PATH']['output_videos']+"yolov7_vidal_test.mp4"

# Define codec and create VideoWriter object .
out = cv2.VideoWriter(
    save_name,
    cv2.VideoWriter_fourcc(*'mp4v'), 
    30,
    (resize_width, resize_height)
)

1920
1088


In [10]:
while(cap.isOpened):

    ret, frame = cap.read()
  
    if ret:
        orig_image = frame
        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
        image = letterbox(image, int(scale_by/scale_factor), stride=64, auto=True)[0]
        image_ = image.copy()
        image = transforms.ToTensor()(image)
        image = torch.tensor(np.array([image.numpy()]))
        image = image.to(device)
        image = image.half()
    

        with torch.no_grad():
            output, _ = model(image)
    
        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'], kpt_label=True)
        output = output_to_keypoint(output)
        nimg = image[0].permute(1, 2, 0) * 255
        nimg = nimg.cpu().numpy().astype(np.uint8)
        nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
        for idx in range(output.shape[0]):
            plot_skeleton_kpts(nimg, output[idx, 7:].T, 3)
    
            # Comment/Uncomment the following lines to show bounding boxes around persons.
            # xmin, ymin = (output[idx, 2]-output[idx, 4]/2), (output[idx, 3]-output[idx, 5]/2)
            # xmax, ymax = (output[idx, 2]+output[idx, 4]/2), (output[idx, 3]+output[idx, 5]/2)
            # cv2.rectangle(
            #     nimg,
            #     (int(xmin), int(ymin)),
            #     (int(xmax), int(ymax)),
            #     color=(255, 0, 0),
            #     thickness=1,
            #     lineType=cv2.LINE_AA
            # )
    
        # Write the FPS on the current frame.
        # cv2.putText(nimg, f"{fps:.3f} FPS", (15, 30), cv2.FONT_HERSHEY_SIMPLEX,
        #             1, (0, 255, 0), 2)
        # Convert from BGR to RGB color format.
        # cv2.imshow('image', nimg)
        out.write(nimg)
        # Press `q` to exit.
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        # No more frames to read
        break
  

# Release VideoCapture().
cap.release()
out.release()
# Close all frames and video windows.
cv2.destroyAllWindows()