In [1]:
import sys
sys.path.append("../")

In [2]:
from common.camera import *
from common.visualization import *
from common.utils import *
from common.generators import ChunkedGenerator, UnchunkedGenerator
from common.h36m_dataset import Human36mDataset, preprocess_Human36m
from common.visualization import *
from common.model import *
from common.xianhui_dataset import *
import matplotlib
import glob
import plotly
import json
%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

wild data

In [3]:
cam_0_data_path = "../../wild_data/2019.05.30_14.03/1.json"
cam_0_img_paths = ["../../wild_data/2019.05.30_14.03/BonesInfo_Camera_1/{}.png".format(str(i+1)) for i in range(49)]
cam_0_img_cal_paths = glob.glob('../../wild_data/2019.05.30_14.03/Camera_0/*.png')
pts_3d, cam_0_pts_2d, cam_0_params, cam_0_imgs = xianhui_data_processing(cam_0_data_path, cam_0_img_paths, cam_0_img_cal_paths, cam_id = "cam_0", chess_board_shape = (7, 5))

cam_1_data_path = "../../wild_data/2019.05.30_14.03/2.json"
cam_1_img_paths = ["../../wild_data/2019.05.30_14.03/BonesInfo_Camera_2/{}.png".format(str(i+1)) for i in range(49)]
cam_1_img_cal_paths = glob.glob('../../wild_data/2019.05.30_14.03/Camera_1/*.png')
pts_3d, cam_1_pts_2d, cam_1_params, cam_1_imgs = xianhui_data_processing(cam_1_data_path, cam_1_img_paths, cam_1_img_cal_paths, cam_id = "cam_1", chess_board_shape = (7, 5))

calibrate camera
load 3d and 2d skeletons
estimate extrinsics
calibrate camera
load 3d and 2d skeletons
estimate extrinsics


In [4]:
cameras = {}
cameras["S1"] = [cam_0_params, cam_1_params]
data = {}
data["S1"] = {}
data["S1"]["somba"] = {
    "positions" : pts_3d,
    "cameras" : cameras["S1"]
}

dataset = XianhuiDataset(cameras, data)

### preprocessing 3d data

In [5]:
for subject in dataset.subjects():
    for action in dataset[subject].keys():
        anim = dataset[subject][action]        
        positions_3d = []
        for cam in anim['cameras']:
            nframe = anim['positions'].shape[0]
            pts_3d_homo = np.ones((nframe, 17, 4))
            pts_3d_homo[... , :3] = anim['positions']
            pts_3d_homo = pts_3d_homo.reshape(-1, 4).T

            pts_3d_cam = world2camera_cv(pts_3d_homo, cam_0_params['rvec'], cam_0_params['tvec'])[:3]
            pos_camera = pts_3d_cam.T.reshape(nframe, 17, 3)
            pos_camera[:, 1:] -= pos_camera[:, :1]

            positions_3d.append(pos_camera)
            
        anim['positions_3d'] = positions_3d

### preprocessing 2d data

In [6]:
keypoints = {}
keypoints["S1"] = {}
keypoints["S1"]["somba"] = [cam_0_pts_2d, cam_1_pts_2d]

for subject in keypoints.keys():
    for action in keypoints[subject]:
        for cam_idx, kps in enumerate(keypoints[subject][action]):
            # Normalize camera frame
            cam = dataset.cameras()[subject][cam_idx]
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
            keypoints[subject][action][cam_idx] = kps

In [7]:
subjects_test = ['S1']

In [8]:
cameras_valid, poses_valid, poses_valid_2d = fetch(dataset = dataset, 
                                                   keypoints = keypoints, 
                                                   subjects = subjects_test, 
                                                   stride = 1)

## generate model

In [9]:
model_pos = TemporalModel(poses_valid_2d[0].shape[-2], 
                          poses_valid_2d[0].shape[-1], 
                          poses_valid[0].shape[-2],
                          filter_widths = [3, 3, 3, 3, 3],
                          causal =False,
                          dropout = 0.25,
                          channels = 1024,
                          dense = False)

In [10]:
receptive_field = model_pos.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))
pad = (receptive_field - 1) // 2 # Padding on each side

INFO: Receptive field: 243 frames


load model on gpu

In [11]:
if torch.cuda.is_available():
    model_pos = model_pos.cuda()

resume from pre-trained model

In [12]:
checkpoint = torch.load("../../checkpoint/pretrained_h36m_cpn.bin", map_location=lambda storage, loc: storage)
print('This model was trained for {} epochs'.format(checkpoint['epoch']))
model_pos.load_state_dict(checkpoint['model_pos'])

This model was trained for 80 epochs


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [13]:
input_keypoints = keypoints["S1"]["somba"][1]
gt = dataset["S1"]["somba"]["positions_3d"][0].copy()

In [14]:
test_generator = UnchunkedGenerator(None, 
                                    None, 
                                    [input_keypoints],
                                    pad=pad, 
                                    causal_shift=0, 
                                    augment=False)

In [15]:
with torch.no_grad():
    model_pos.eval()
    N = 0
    for _, batch, batch_2d in test_generator.next_epoch():
        inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
        if torch.cuda.is_available():
            inputs_2d = inputs_2d.cuda()

        # Positional model
        predicted_3d_pos = model_pos(inputs_2d)
        
        prediction = predicted_3d_pos.squeeze(0).cpu().numpy()

Ground truth

In [16]:
human36m_kpts_name = ['Pelvis', 'RHip', 'RKnee', 'RAnkle','LHip','LKnee',
                      'LAnkle','Spine1','Neck', 'Head','Site','LShoulder',
                      'LElbow','LWrist','RShoulder', 'RElbow','RWrist']

post process output

In [17]:
trajectory = gt[:, :1]
gt[:, 1:] += trajectory
prediction += trajectory

get camera

In [18]:
cam = dataset.cameras()["S1"][1]

reproject to world coordinates

In [19]:
nframe = 269
gt_homo = np.ones((nframe, 17, 4))
gt_homo[... , :3] = gt
gt_homo = gt_homo.reshape(-1, 4).T
gt_reproj = camera2world_cv(gt_homo, cam["rvec"], cam["tvec"])[:3].T.reshape(-1, 17, 3)

In [20]:
nframe = 269
pred_homo = np.ones((nframe, 17, 4))
pred_homo[... , :3] = prediction
pred_homo = pred_homo.reshape(-1, 4).T
pred_reproj = camera2world_cv(pred_homo, cam["rvec"], cam["tvec"])[:3].T.reshape(-1, 17, 3)

In [21]:
obj_pts = gt.reshape(-1, 3)
xmin = obj_pts[:,0].min()
ymin = obj_pts[:,1].min()
zmin = obj_pts[:,2].min()
xmax = obj_pts[:,0].max()
ymax = obj_pts[:,1].max()
zmax = obj_pts[:,2].max()

frames, layout = generate_frames_layout(gt[::10], 
                                        dataset.skeleton(), 
                                        human36m_kpts_name, 
                                        x_range = [xmin, xmax], 
                                        y_range = [ymin, ymax], 
                                        z_range = [zmin, zmax])
fig = dict(data=get_line_3d_segs(gt, 0, dataset.skeleton(), human36m_kpts_name), layout=layout, frames=frames)
plotly.offline.iplot(fig)

In [22]:
obj_pts = gt_reproj.reshape(-1, 3)
xmin = obj_pts[:,0].min()
ymin = obj_pts[:,1].min()
zmin = obj_pts[:,2].min()
xmax = obj_pts[:,0].max()
ymax = obj_pts[:,1].max()
zmax = obj_pts[:,2].max()

frames, layout = generate_frames_layout(gt_reproj[::10], 
                                        dataset.skeleton(), 
                                        human36m_kpts_name, 
                                        x_range = [xmin, xmax], 
                                        y_range = [ymin, ymax], 
                                        z_range = [zmin, zmax])
fig = dict(data=get_line_3d_segs(gt_reproj, 0, dataset.skeleton(), human36m_kpts_name), layout=layout, frames=frames)
plotly.offline.iplot(fig)

In [23]:
frames, layout = generate_frames_layout(pred_reproj[::10], 
                                        dataset.skeleton(), 
                                        human36m_kpts_name, 
                                        x_range = [xmin, xmax], 
                                        y_range = [ymin, ymax], 
                                        z_range = [zmin, zmax])
fig = dict(data=get_line_3d_segs(pred_reproj, 0, dataset.skeleton(), human36m_kpts_name), layout=layout, frames=frames)
plotly.offline.iplot(fig)