# H36m dataset

## Overview
加载数据集，data_3d_h36m是原始数据的positions（即3d数据，单位是米(m)），data_2d_h36m_gt是从data_3d_h36m根据相机参数投影得到的2d坐标。

## 3D
数据分subjects存放，一共有'S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11'。每个subject会存放多个action，每个action对应一段数据，shape为(frames, 32, 3)，这里32是关节数，**这里的数据不是相机坐标系的位置，要得到3d坐标还需处理**。

In [31]:
import numpy as np
dir_path = '../data/'
data_path = dir_path + 'data_3d_h36m.npz'
data = np.load(data_path, allow_pickle=True)
print(list(data.keys()))

data = data['positions_3d'].item()
print(data.keys())
print(len(data['S1'].keys()))
print(data['S1'].keys())
print(data['S1']['Posing'].shape)
print(data['S1']['Posing'][50][9])
print(data['S1']['Posing'][100][9])

['positions_3d']
dict_keys(['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11'])
30
dict_keys(['Directions 1', 'Directions', 'Discussion 1', 'Discussion', 'Eating 2', 'Eating', 'Greeting 1', 'Greeting', 'Phoning 1', 'Phoning', 'Photo 1', 'Photo', 'Posing 1', 'Posing', 'Purchases 1', 'Purchases', 'Sitting 1', 'Sitting 2', 'SittingDown 2', 'SittingDown', 'Smoking 1', 'Smoking', 'Waiting 1', 'Waiting', 'WalkDog 1', 'WalkDog', 'Walking 1', 'Walking', 'WalkTogether 1', 'WalkTogether'])
(992, 32, 3)
[-0.17224269  0.07311359  0.02302729]
[-0.26585695  0.09934875  0.02429163]


## 如何得到3d坐标

理论部分：\
https://zhuanlan.zhihu.com/p/54139614 \
https://zhuanlan.zhihu.com/p/389653208

In [32]:
X = data['S1']['Posing'].copy()
print(X.shape)
print(X)

(992, 32, 3)
[[[-0.287247    0.0645002   0.933714  ]
  [-0.4172063   0.03707894  0.93954366]
  [-0.38434964  0.08887868  0.5009175 ]
  ...
  [-0.47607398 -0.18019038  0.9261923 ]
  [-0.55295044 -0.19875656  0.77568805]
  [-0.55295044 -0.19875656  0.77568805]]

 [[-0.287696    0.0644208   0.93369204]
  [-0.41760775  0.03676208  0.93945915]
  [-0.38473797  0.08888602  0.5008725 ]
  ...
  [-0.4741127  -0.17884623  0.92517275]
  [-0.5508415  -0.1976939   0.7746281 ]
  [-0.5508415  -0.1976939   0.7746281 ]]

 [[-0.28812602  0.0643741   0.933667  ]
  [-0.41800445  0.03655129  0.939395  ]
  [-0.3850915   0.08892319  0.5008411 ]
  ...
  [-0.47295648 -0.17646953  0.9231978 ]
  [-0.5493806  -0.19463897  0.7724151 ]
  [-0.5493806  -0.19463897  0.7724151 ]]

 ...

 [[-0.146762    0.394196    0.921581  ]
  [-0.27964193  0.3982155   0.92302877]
  [-0.2488135   0.31266007  0.48957112]
  ...
  [-0.34589848  0.514822    1.0374599 ]
  [-0.30724785  0.35412127  0.9976149 ]
  [-0.30724785  0.35412127  0.9

In [33]:
# 相机外参
import torch


def wrap(func, *args, unsqueeze=False):
    """
    Wrap a torch function so it can be called with NumPy arrays.
    Input and return types are seamlessly converted.
    """

    # Convert input types where applicable
    args = list(args)
    for i, arg in enumerate(args):
        if type(arg) == np.ndarray:
            args[i] = torch.from_numpy(arg)
            if unsqueeze:
                args[i] = args[i].unsqueeze(0)

    result = func(*args)

    # Convert output types where applicable
    if isinstance(result, tuple):
        result = list(result)
        for i, res in enumerate(result):
            if type(res) == torch.Tensor:
                if unsqueeze:
                    res = res.squeeze(0)
                result[i] = res.numpy()
        return tuple(result)
    elif type(result) == torch.Tensor:
        if unsqueeze:
            result = result.squeeze(0)
        return result.numpy()
    else:
        return result


def qrot(q, v):
    """
    Rotate vector(s) v about the rotation described by quaternion(s) q.
    Expects a tensor of shape (*, 4) for q and a tensor of shape (*, 3) for v,
    where * denotes any number of dimensions.
    Returns a tensor of shape (*, 3).
    """
    assert q.shape[-1] == 4
    assert v.shape[-1] == 3
    assert q.shape[:-1] == v.shape[:-1]

    qvec = q[..., 1:]
    uv = torch.cross(qvec, v, dim=len(q.shape)-1)
    uuv = torch.cross(qvec, uv, dim=len(q.shape)-1)
    return (v + 2 * (q[..., :1] * uv + uuv))


def qinverse(q, inplace=False):
    # We assume the quaternion to be normalized
    if inplace:
        q[..., 1:] *= -1
        return q
    else:
        w = q[..., :1]
        xyz = q[..., 1:]
        return torch.cat((w, -xyz), dim=len(q.shape)-1)


def world_to_camera(X, R, t):
    Rt = wrap(qinverse, R)  # Invert rotation
    # Rotate and translate
    return wrap(qrot, np.tile(Rt, (*X.shape[:-1], 1)), X - t)


cam = {
    'orientation': [0.1407056450843811, -0.1500701755285263, -0.755240797996521, 0.6223280429840088],
    'translation': [1841.1070556640625, 4955.28466796875, 1563.4454345703125],
}
for k, v in cam.items():
    cam[k] = np.array(v)
cam['translation'] /= 1000  # mm -> m
Rt = wrap(qinverse, cam['orientation'])
print(Rt.shape)
print(Rt)
print(X.shape)
tmp = np.tile(Rt, (*X.shape[:-1], 1))
print(tmp.shape)
pos_3d = world_to_camera(X, cam['orientation'], cam['translation'])
print(pos_3d.shape)
print(pos_3d)


(4,)
[ 0.14070565  0.15007018  0.7552408  -0.62232804]
(992, 32, 3)
(992, 32, 4)
(992, 32, 3)
[[[-0.03315838 -0.37332899  5.35777481]
  [ 0.07493338 -0.39070041  5.43320512]
  [ 0.0543777   0.05117561  5.45508514]
  ...
  [ 0.04117422 -0.41981021  5.65425389]
  [ 0.10020883 -0.27928964  5.72958715]
  [ 0.10020883 -0.27928964  5.72958715]]

 [[-0.03277985 -0.37334491  5.35802947]
  [ 0.07517138 -0.39069524  5.4336656 ]
  [ 0.05473495  0.05120112  5.45524199]
  ...
  [ 0.03989277 -0.41846522  5.6524534 ]
  [ 0.09867809 -0.27794812  5.72798793]
  [ 0.09867809 -0.27794812  5.72798793]]

 [[-0.03240564 -0.37335092  5.35824776]
  [ 0.07544815 -0.39069071  5.43402518]
  [ 0.05507269  0.05122045  5.45535563]
  ...
  [ 0.03973854 -0.41603705  5.65022493]
  [ 0.09851135 -0.27514807  5.72507311]
  [ 0.09851135 -0.27514807  5.72507311]]

 ...

 [[-0.02959083 -0.29470127  5.00793553]
  [ 0.09369471 -0.30224806  5.05711935]
  [ 0.01993819  0.1096714   5.20216409]
  ...
  [ 0.20414321 -0.39703027  4.

## 从3d投影到2d
这里有两种实现，
project_to_2d_linear 和 project_to_2d，前者只用线性参数，后者用了非线性参数（各种畸变） 