In [1]:
import sys
sys.path.append("../")

In [37]:
import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import sys
import errno

from common.camera import *
from common.model import *
from common.loss import *
from common.utils import *
from common.visualization import *
from common.generators import ChunkedGenerator, UnchunkedGenerator
from time import time
from common.utils import deterministic_random

## Load and Preprocess Data

In [3]:
dataset_path = "../../data/data_3d_h36m.npz"
dataset_path_2d = '../../data/data_2d_h36m_cpn_ft_h36m_dbb.npz'

### load and preprocessing 3d data

In [4]:
from common.h36m_dataset import Human36mDataset
dataset = Human36mDataset(dataset_path)

for subject in dataset.subjects():
    for action in dataset[subject].keys():
        anim = dataset[subject][action]
        
        positions_3d = []
        for cam in anim['cameras']:
            pos_3d = world_to_camera(anim['positions'], R=cam['orientation'], t=cam['translation'])
            pos_3d[:, 1:] -= pos_3d[:, :1] # Remove global offset, but keep trajectory in first position
            positions_3d.append(pos_3d)
        anim['positions_3d'] = positions_3d

### load and preprocessing 2d data

In [5]:
keypoints = np.load(dataset_path_2d, allow_pickle = True)
keypoints_symmetry = keypoints['metadata'].item()['keypoints_symmetry']
kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1])
joints_left, joints_right = list(dataset.skeleton().joints_left()), list(dataset.skeleton().joints_right())
keypoints = keypoints['positions_2d'].item()

for subject in dataset.subjects():
    assert subject in keypoints, 'Subject {} is missing from the 2D detections dataset'.format(subject)
    for action in dataset[subject].keys():
        assert action in keypoints[subject], 'Action {} of subject {} is missing from the 2D detections dataset'.format(action, subject)
        for cam_idx in range(len(keypoints[subject][action])):
            
            # We check for >= instead of == because some videos in H3.6M contain extra frames
            mocap_length = dataset[subject][action]['positions_3d'][cam_idx].shape[0]
            assert keypoints[subject][action][cam_idx].shape[0] >= mocap_length
            
            if keypoints[subject][action][cam_idx].shape[0] > mocap_length:
                # Shorten sequence
                keypoints[subject][action][cam_idx] = keypoints[subject][action][cam_idx][:mocap_length]

        assert len(keypoints[subject][action]) == len(dataset[subject][action]['positions_3d'])
        
for subject in keypoints.keys():
    for action in keypoints[subject]:
        for cam_idx, kps in enumerate(keypoints[subject][action]):
            # Normalize camera frame
            cam = dataset.cameras()[subject][cam_idx]
            kps[..., :2] = normalize_screen_coordinates(kps[..., :2], w=cam['res_w'], h=cam['res_h'])
            keypoints[subject][action][cam_idx] = kps

### generate subsets

In [6]:
subjects_train = ['S1', 'S5', 'S6', 'S7', 'S8']
subjects_semi = ['S7', 'S8']
subjects_test = ['S9', 'S11']

In [7]:
cameras_valid, poses_valid, poses_valid_2d = fetch(dataset = dataset, 
                                                   keypoints = keypoints, 
                                                   subjects = subjects_test, 
                                                   stride = 1)

In [8]:
model_pos = TemporalModel(poses_valid_2d[0].shape[-2], 
                          poses_valid_2d[0].shape[-1], 
                          poses_valid[0].shape[-2],
                          filter_widths = [3, 3, 3, 3, 3],
                          causal =False,
                          dropout = 0.25,
                          channels = 1024,
                          dense = False)

In [9]:
receptive_field = model_pos.receptive_field()
print('INFO: Receptive field: {} frames'.format(receptive_field))
pad = (receptive_field - 1) // 2 # Padding on each side

INFO: Receptive field: 243 frames


load model on gpu

In [10]:
if torch.cuda.is_available():
    model_pos = model_pos.cuda()

resume from pre-trained model

In [11]:
checkpoint = torch.load("../../checkpoint/pretrained_h36m_cpn.bin", map_location=lambda storage, loc: storage)
print('This model was trained for {} epochs'.format(checkpoint['epoch']))
model_pos.load_state_dict(checkpoint['model_pos'])

This model was trained for 80 epochs


IncompatibleKeys(missing_keys=[], unexpected_keys=[])

In [48]:
input_keypoints = keypoints["S9"]["Walking"][0]
gt = dataset["S9"]["Walking"]["positions_3d"][0].copy()

In [49]:
test_generator = UnchunkedGenerator(None, 
                                    None, 
                                    [input_keypoints],
                                    pad=pad, 
                                    causal_shift=0, 
                                    augment=False)

In [50]:
with torch.no_grad():
    model_pos.eval()
    N = 0
    for _, batch, batch_2d in test_generator.next_epoch():
        inputs_2d = torch.from_numpy(batch_2d.astype('float32'))
        if torch.cuda.is_available():
            inputs_2d = inputs_2d.cuda()

        # Positional model
        predicted_3d_pos = model_pos(inputs_2d)
        
        prediction = predicted_3d_pos.squeeze(0).cpu().numpy()

post process output

In [51]:
trajectory = gt[:, :1]
gt[:, 1:] += trajectory
prediction += trajectory

get camera

In [58]:
cam = dataset.cameras()["S9"][0]

In [53]:
prediction = camera_to_world(prediction, R=cam['orientation'], t=cam['translation'])
gt = camera_to_world(gt, R=cam['orientation'], t=cam['translation'])

In [54]:
import plotly
plotly.offline.init_notebook_mode(connected=True)

visualizaion

In [55]:
human36m_kpts_name = ['Pelvis', 'RHip', 'RKnee', 'RAnkle','LHip','LKnee',
                      'LAnkle','Spine1','Neck', 'Head','Site','LShoulder',
                      'LElbow','LWrist','RShoulder', 'RElbow','RWrist']

## visualize ground truth

In [56]:
frames, layout = generate_frames_layout(gt[::100], dataset.skeleton(), human36m_kpts_name)
fig = dict(data=get_line_3d_segs(gt, 0, dataset.skeleton(), human36m_kpts_name), layout=layout, frames=frames)
plotly.offline.iplot(fig)

## visualize predictions

In [57]:
frames, layout = generate_frames_layout(prediction[::100], dataset.skeleton(), human36m_kpts_name)
fig = dict(data=get_line_3d_segs(prediction, 0, dataset.skeleton(), human36m_kpts_name), layout=layout, frames=frames)
plotly.offline.iplot(fig)