In [4]:
from __future__ import division
import argparse, time, logging, os, math, tqdm, cv2

import numpy as np
import mxnet as mx
from mxnet import gluon, nd, image
from mxnet.gluon.data.vision import transforms

import matplotlib.pyplot as plt

import gluoncv as gcv
from gluoncv import data
from gluoncv.data import mscoco
from gluoncv.model_zoo import get_model
from gluoncv.data.transforms.pose import detector_to_alpha_pose, detector_to_simple_pose, heatmap_to_coord, heatmap_to_coord_alpha_pose
from gluoncv.utils.viz import cv_plot_image, cv_plot_keypoints

# SimplePose를 이용하여 실시간으로 오른쪽 손목 Velocity(영상 표시), FPS 계산해보기

In [5]:
ctx = mx.cpu()
detector_name = "ssd_512_mobilenet1.0_coco"
# detector_name = "yolo3_mobilenet1.0_coco"
detector = get_model(detector_name, pretrained=True, ctx=ctx)

detector.reset_class(classes=['person'], reuse_weights={'person':'person'})
detector.hybridize()

estimators = get_model('simple_pose_resnet18_v1b', pretrained='ccd24037', ctx=ctx)
# estimators = get_model('alpha_pose_resnet101_v1b_coco', pretrained=True, ctx=ctx)
estimators.hybridize()

# capture = cv2.VideoCapture(0)        # 실시간
capture = cv2.VideoCapture("videos/golf_swing1.mp4")
capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)







# used to record the time when we processed last frame
prev_frame_time = 0
 
# used to record the time at which we processed current frame
new_frame_time = 0




prev_frame_coor = nd.array([0,0])

new_frame_coor = nd.array([0,0])


while cv2.waitKey(33) < 0:
    ret, frame = capture.read()
    frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
#     x, frame = data.transforms.presets.yolo.load_test(frame, short=512)
    x, frame = gcv.data.transforms.presets.ssd.transform_test(frame, short=512, max_size=350)
    x = x.as_in_context(ctx)
    class_IDs, scores, bounding_boxs = detector(x)

    pose_input, upscale_bbox = detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs,
                                                       output_shape=(128, 96), ctx=ctx)
    if len(upscale_bbox) > 0:
        predicted_heatmap = estimators(pose_input)
        pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

        img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores,
                                box_thresh=0.5, keypoint_thresh=0.2)
        
#         print(pred_coords[0][10])     ## @@@ cooridateme print
        new_frame_coor = pred_coords[0][10]
        
#         ve = ve.asscalar()



            # font which we will be using to display FPS
        font = cv2.FONT_HERSHEY_SIMPLEX
    # time when we finish processing for this frame
        new_frame_time = time.time()
 
    # Calculating the fps
 
    # fps will be number of frame processed in given time frame
    # since their will be most of time error of 0.001 second
    # we will be subtracting it to get more accurate result
        fps = 1/(new_frame_time-prev_frame_time)
    
#     print(fps)                   ## @@@ fps print
        prev_frame_time = new_frame_time
       
 
    # converting the fps into integer
#         fps = int(fps)    # 초 당 프레임 수
        fps = round(1/fps, 2)  # 한 프레임 당 초
    
    # converting the fps to string so that we can display it on frame
    # by using putText function
#         fps = str(fps)
        
        ve = np.sqrt(np.sum(np.square(prev_frame_coor - new_frame_coor)))/fps
        ve = np.asscalar(ve)
#         print(ve)
        string = ''.join(str(x) for x in ve)

        string1 = string.replace('\n',"")
        string2 = string1.replace('[',"")
        string3 = string2.replace('<NDArray 1 @cpu(0)>',"")
        ve = string3.replace(']',"")
    
    
    
        prev_frame_coor = new_frame_coor
 
    # putting the FPS count on the frame
        cv2.putText(frame, ve, (7, 70), font, 3, (100, 255, 0), 3, cv2.LINE_AA)
    
    

    cv2.imshow("VideoFrame", frame)
    
        
    # press 'Q' if you want to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        

capture.release()
cv2.destroyAllWindows()

In [8]:
detector_to_simple_pose(frame, class_IDs, scores, bounding_boxs,
                                                       output_shape=(128, 96), ctx=ctx)

(
 [[[[-0.7134568  -0.5295201  -0.13406867 ... -0.7400802  -1.1837753
     -1.4187317 ]
    [-0.8639222  -0.5647867   0.02782574 ... -0.43338758 -0.47045687
     -0.49008694]
    [-0.8048207  -0.5828195  -0.16456152 ... -0.53705895 -0.7115804
     -0.8039969 ]
    ...
    [-0.36645743 -0.26980913 -0.13423122 ... -1.1565235  -1.1591748
     -1.1611127 ]
    [-0.4053993  -0.17937548  0.20529792 ... -1.1539224  -1.2185396
     -1.2526724 ]
    [ 0.08918573  0.3696089   0.8904507  ... -1.1799676  -1.2568297
     -1.2970049 ]]
 
   [[-0.28384462 -0.09580223  0.30847624 ...  1.3944284   0.02331912
     -0.70274407]
    [-0.46902096 -0.16320802  0.4426322  ...  1.7024477   0.739723
      0.22991733]
    [-0.42852226 -0.20156547  0.22602892 ...  1.5899407   0.4780488
     -0.11074676]
    ...
    [ 0.6386479   0.6745371   0.7059094  ... -0.90293956 -0.9228701
     -0.9339604 ]
    [ 0.5121456   0.67339396  0.9434131  ... -0.92560184 -1.0061231
     -1.049236  ]
    [ 0.8770247   1.0942706   1.