In [None]:
!pip install tf-pose

In [None]:
import os
import sys
import re
import time
import logging
import pandas as pd
import numpy as np
import seaborn as sns
import cv2

from tf_pose import common
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh

import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.animation as animation


import warnings
warnings.filterwarnings("ignore")

In [None]:
# optional 
logger = logging.getLogger('TfPoseEstimator-Video')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

In [None]:
model='mobilenet_thin'
#show_process = True # for debug purpose, if enabled, speed for inference is dropped.
#logger.debug('initialization %s : %s' % (model, get_graph_path(model)))

resolution='864x736'
w, h = model_wh(resolution)
e = TfPoseEstimator(get_graph_path(model), target_size=(w, h))

In [None]:
def get_x_features_array_and_df(keypoints):
        
    feat_dict = {}
    
    for i in range (0, 18):
        feat_dict["bp_" + str(i) + '_x'] = None
        feat_dict["bp_" + str(i) + '_y'] = None

    for i in range (0, len(keypoints)-1):
        bps = int(re.findall(r'\d+', keypoints[i].split("(")[0])[-1])
        coordinates = keypoints[i].split("(")[1].split(")")[0].split(",")
        
        #print(i, bps)
        feat_dict["bp_" + str(bps) + '_x'] = float(coordinates[0])
        feat_dict["bp_" + str(bps) + '_y'] = float(coordinates[1])
    
#     for k, v in feat_dict.items():
#         print(k, v)

    df = pd.DataFrame(feat_dict.items()).set_index(0).T      
    return list(feat_dict.values()), df

In [None]:
video_path = "../input/human-keypoints-tracking-dataset/squat_test1.avi"
showBG = True
fps_time = 0

cap = cv2.VideoCapture(video_path)
if cap.isOpened() is False:
    print("Error opening video stream or file")

In [None]:
fcount = 0    
frames = []
while True:
    ret_val, image = cap.read()
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    fcount += 1
    
    #print("frame no : ", fcount)
    
    if not ret_val:
        break

    humans = e.inference(image,
                         resize_to_default=(w > 0 and h > 0),
                         upsample_size=4.0)
 
    if len(humans) > 0:    
        keypoints = str(str(str(humans[0]).split('BodyPart:')[1:]).split('-')).split(' score=')

        arr, fdf = get_x_features_array_and_df(keypoints)
        #print(arr)

    if not showBG:
        image = np.zeros(image.shape)
    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)

    cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    frames.append(image)
    #frames.append([plt.imshow(image, animated=True)])
    #plt.imshow(image)
    fps_time = time.time()
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break
    if len(frames) > 65:
        break

print("{} frames processed".format(len(frames)))
cap.release()
#cv2.destroyAllWindows()

In [None]:
plt.rcParams["figure.figsize"] = (20,50)
rows = 16
for num, img in enumerate(frames[:-2]):
    #img = PIL.Image.open(x)
    plt.subplot(rows,4,num+1)
    #plt.title(x.split('.')[0])
    plt.axis('off')
    plt.imshow(img)