# Fencing Computer Vision Model Test #1

## MoveNet - Multi Person Detection



# Import Dependencies

In [1]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np

In [2]:
import movenet_multipose_lightning_1

# Load MoveNet

In [3]:
# Download the model from TF Hub.
#model = hub.load("https://tfhub.dev/google/movenet/multipose/lightning/1")
#model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
model = hub.load("/Users/juan/Desktop/Python/fencingai/movenet_multipose_lightning_1/")
movenet = model.signatures['serving_default']

2022-05-06 10:57:54.819004: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## draw points and lines

In [4]:
def draw_points(frame, person, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(person, [y, x, 1]))
    
    for p in shaped:
        y, x, conf = p
        if conf > confidence_threshold:
            cv2.circle(frame, (int(x), int(y)), 4, (0,255,0), -1)

In [5]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}

def draw_connections(frame, person, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(person, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)

## Multi Person Loop

In [6]:
def loop_through_people(frame, points, edges, confidence_threshold):
    for person in points:
        draw_connections(frame, person, edges, confidence_threshold)
        draw_points(frame, person, confidence_threshold)

# Pose Detection (webcam)

In [None]:
cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    
    # Model Requirement: Resize 256 x 256 (multiple of 32), int32 Type
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 160, 256)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Make Detection
    results = movenet(input_img)
    points = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Draw Pose
    loop_through_people(frame, points, EDGES, 0.3)
    
    cv2.imshow('MoveNet', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

### Example Output

In [11]:
results['output_0'].numpy()[:,:,:56][0][0]

array([4.4123420e-01, 2.7708939e-01, 6.9145709e-01, 4.0667498e-01,
       3.2739532e-01, 4.9196729e-01, 4.1070819e-01, 2.5615749e-01,
       8.0806792e-01, 4.4805995e-01, 4.1284192e-01, 6.7486602e-01,
       4.4569373e-01, 2.4323143e-01, 7.4638480e-01, 6.9111764e-01,
       4.9377954e-01, 8.1511635e-01, 6.6011971e-01, 1.5455548e-01,
       6.8172604e-01, 7.8068036e-01, 5.0935858e-01, 8.8405922e-02,
       7.7705544e-01, 6.6064045e-02, 1.0612572e-01, 7.5486088e-01,
       4.1013539e-01, 3.4292463e-02, 7.7671880e-01, 1.4037894e-01,
       2.1936417e-01, 7.8182852e-01, 4.0292582e-01, 3.7871185e-05,
       7.8166556e-01, 1.7132211e-01, 3.2979000e-04, 7.5815898e-01,
       3.5965768e-01, 2.7594257e-03, 7.6537937e-01, 7.0583954e-02,
       3.3933967e-02, 4.6543241e-01, 4.1075364e-01, 3.3177828e-04,
       4.3542874e-01, 1.3696021e-01, 6.9100811e-04, 2.7762896e-01,
       1.6489744e-02, 7.7694255e-01, 5.6312305e-01, 6.2338620e-01],
      dtype=float32)

# Pose Detection (video)

In [13]:
cap = cv2.VideoCapture('fenc_video_1.mov')

while cap.isOpened():
    ret, frame = cap.read()
    
    # Model Requirement: Resize 256 x 256 (multiple of 32), int32 Type
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 160, 256)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Make Detection
    results = movenet(input_img)
    points = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Draw Pose
    loop_through_people(frame, points, EDGES, 0.3)
    
    cv2.imshow('MoveNet', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

# Pose Detection (forward)

In [16]:
cap = cv2.VideoCapture('fenc_video_2.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    
    # Model Requirement: Resize 256 x 256 (multiple of 32), int32 Type
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 256, 256)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Make Detection
    results = movenet(input_img)
    points = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Draw Pose
    loop_through_people(frame, points, EDGES, 0.3)
    
    cv2.imshow('MoveNet', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

# Pose Detection (Close)

In [21]:
cap = cv2.VideoCapture('fenc_video_3.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    
    # Model Requirement: Resize 256 x 256 (multiple of 32), int32 Type
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 128, 256)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Make Detection
    results = movenet(input_img)
    points = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Draw Pose
    loop_through_people(frame, points, EDGES, 0.3)
    
    cv2.imshow('MoveNet', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

In [23]:
cap = cv2.VideoCapture('fenc_video_4.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    
    # Model Requirement: Resize 256 x 256 (multiple of 32), int32 Type
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 256, 256)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Make Detection
    results = movenet(input_img)
    points = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Draw Pose
    loop_through_people(frame, points, EDGES, 0.3)
    
    cv2.imshow('MoveNet', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

In [25]:
cap = cv2.VideoCapture('fenc_video_5.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    
    # Model Requirement: Resize 256 x 256 (multiple of 32), int32 Type
    img = frame.copy()
    img = tf.image.resize_with_pad(tf.expand_dims(img, axis=0), 192, 256)
    input_img = tf.cast(img, dtype=tf.int32)
    
    # Make Detection
    results = movenet(input_img)
    points = results['output_0'].numpy()[:,:,:51].reshape((6,17,3))
    
    # Draw Pose
    loop_through_people(frame, points, EDGES, 0.3)
    
    cv2.imshow('MoveNet', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

# Conclusion

영상을 256px 내외로 축소해서 detection을 하고 reshape를 하는 방식이어서 피사체가 작으면 정확도가 매우 떨어졌다. 카메라를 고정하고 펜싱 영상을 찍으면 피스트 전체를 담아야 해서 사람이 작게 나온다. 영상을 직접 찍어서 분석할 경우 MoveNet은 적합하지 않다고 생각된다.

카메라가 선수를 줌인해서 움직이는 대회 영상을 분석할 경우 피사체가 크게 나와 사용이 가능할 것 같다. 다만 손목이 인식되지 않아 칼의 움직임을 추적할 수는 없다. 처리 시간이 빠르다는 장점이 있다.