In [41]:
import cv2 
import os.path as osp
import os
import glob
from mmdet.apis import inference_detector, init_detector
from mmpose.apis import inference_top_down_pose_model, init_pose_model,process_mmdet_results,vis_pose_result
import mmcv
import numpy as np
import shutil
def extract_frame(video_path):
    dname = 'temp'
    os.makedirs(dname, exist_ok=True)
    frame_tmpl = osp.join(dname, 'img_{:05d}.jpg')
    cap = cv2.VideoCapture(video_path)
    frame_paths = []
    cnt = 0
    while(cap.isOpened()):
        flag, frame = cap.read()
        if flag:
            frame_path = frame_tmpl.format(cnt + 1)
            frame_paths.append(frame_path)

            cv2.imwrite(frame_path, frame)
            cnt += 1
        break
    return frame_paths


In [42]:
pose_config = 'mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w48_coco_256x192.py'
pose_checkpoint = 'hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'
###########
# det_config = 'Pose/yolox_s_8x8_300e_coco.py'
# det_checkpoint = 'Pose/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth'
##############
det_config = 'mmdetection/configs/yolox/yolox_s_8x8_300e_coco.py'
det_checkpoint = 'yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth'

# initialize pose model
pose_model = init_pose_model(pose_config, pose_checkpoint)
# initialize detector
det_model = init_detector(det_config, det_checkpoint)

load checkpoint from local path: hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth
load checkpoint from local path: yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth


In [43]:
def detection_inference(det_config, det_checkpoint ,frame_paths, det_score_thr=0.5,device='cuda' ):
    model = init_detector(det_config, det_checkpoint, device)
    assert model.CLASSES[0] == 'person', ('We require you to use a detector '
                                          'trained on COCO')
    results = []
    print('Performing Human Detection for each frame')
    prog_bar = mmcv.ProgressBar(len(frame_paths))
    for frame_path in frame_paths:
        result = inference_detector(model, frame_path)
        # We only keep human detections with score larger than det_score_thr
        result = result[0][result[0][:, 4] >= det_score_thr]
        results.append(result)
        prog_bar.update()
    return results

def pose_inference(pose_config,pose_checkpoint, frame_paths, det_results, device='cuda'):
    model = init_pose_model(pose_config, pose_checkpoint, device)
    print('Performing Human Pose Estimation for each frame')
    prog_bar = mmcv.ProgressBar(len(frame_paths))

    num_frame = len(det_results)
    num_person = max([len(x) for x in det_results])
    kp = np.zeros((num_person, num_frame, 17, 3), dtype=np.float32)

    for i, (f, d) in enumerate(zip(frame_paths, det_results)):
        # Align input format
        d = [dict(bbox=x) for x in list(d) if x[-1] > 0.5]
        pose = inference_top_down_pose_model(model, f, d, format='xyxy')[0]
        for j, item in enumerate(pose):
            kp[j, i] = item['keypoints']
        prog_bar.update()
    return kp

In [44]:
def pose_extraction(vid,det_config, det_checkpoint,pose_config,pose_checkpoint,label, det_score_thr=0.5,device='cuda'):
    frame_paths = extract_frame(vid)
    det_results = detection_inference(det_config, det_checkpoint ,frame_paths, det_score_thr,device)
    image = cv2.imread(frame_paths[0])
    image_shape = (image.shape[0], image.shape[1])
    pose_results = pose_inference(pose_config,pose_checkpoint, frame_paths, det_results, device)
    anno = dict()
    anno['keypoint'] = pose_results[..., :2]
    anno['keypoint_score'] = pose_results[..., 2]
    anno['frame_dir'] = osp.splitext(osp.basename(vid))[0]
    anno['img_shape'] = image_shape
    anno['original_shape'] = image_shape
    anno['total_frames'] = pose_results.shape[1]
    anno['label'] = label
    shutil.rmtree(osp.dirname(frame_paths[0]))
    return anno

In [45]:
files=glob.glob('VIDEO_TEST/*.mp4')
# file='VIDEO_TEST/VIDEO11.mp4'

In [47]:
anno_train = []
for file in files:
    print('Processing ' + file)
    anno = pose_extraction(file, det_config, det_checkpoint,pose_config,pose_checkpoint,label=0) #LABEL NOT_FALL
    anno_train.append(anno)

Processing VIDEO_TEST\VIDEO1.mp4
load checkpoint from local path: yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth
Performing Human Detection for each frame
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 1/1, 4.7 task/s, elapsed: 0s, ETA:     0sload checkpoint from local path: hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth
Performing Human Pose Estimation for each frame
[                                                  ] 0/1, elapsed: 0s, ETA:

In [35]:
mmcv.dump(anno_train, 'train.pkl')

In [36]:
import pandas as pds
data=pds.read_pickle('train.pkl')

[{'keypoint': array([], shape=(0, 1, 17, 2), dtype=float32),
  'keypoint_score': array([], shape=(0, 1, 17), dtype=float32),
  'frame_dir': 'VIDEO1',
  'img_shape': (480, 852),
  'original_shape': (480, 852),
  'total_frames': 1,
  'label': 0},
 {'keypoint': array([[[[465.74457 ,  68.67043 ],
           [468.80707 ,  65.60795 ],
           [462.68207 ,  64.0767  ],
           [473.40076 ,  67.13918 ],
           [456.55713 ,  64.0767  ],
           [474.932   ,  82.45161 ],
           [442.77594 ,  80.92037 ],
           [477.9945  , 105.420265],
           [422.86975 , 102.357765],
           [485.65076 , 126.85764 ],
           [415.21356 , 125.32641 ],
           [456.55713 , 123.79517 ],
           [439.71344 , 120.73267 ],
           [447.36963 , 159.01376 ],
           [439.71344 , 157.48251 ],
           [435.1197  , 186.57611 ],
           [427.4635  , 191.16983 ]]]], dtype=float32),
  'keypoint_score': array([[[0.8117888 , 0.8298861 , 0.85488516, 0.87764573, 0.8960279 ,
      