/
msra_heatmap.py
153 lines (123 loc) · 5.62 KB
/
msra_heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Tuple
import numpy as np
from mmpose.registry import KEYPOINT_CODECS
from .base import BaseKeypointCodec
from .utils.gaussian_heatmap import (generate_gaussian_heatmaps,
generate_unbiased_gaussian_heatmaps)
from .utils.post_processing import get_heatmap_maximum
from .utils.refinement import refine_keypoints, refine_keypoints_dark
@KEYPOINT_CODECS.register_module()
class MSRAHeatmap(BaseKeypointCodec):
"""Represent keypoints as heatmaps via "MSRA" approach. See the paper:
`Simple Baselines for Human Pose Estimation and Tracking`_ by Xiao et al
(2018) for details.
Note:
- instance number: N
- keypoint number: K
- keypoint dimension: D
- image size: [w, h]
- heatmap size: [W, H]
Encoded:
- heatmaps (np.ndarray): The generated heatmap in shape (K, H, W)
where [W, H] is the `heatmap_size`
- keypoint_weights (np.ndarray): The target weights in shape (N, K)
Args:
input_size (tuple): Image size in [w, h]
heatmap_size (tuple): Heatmap size in [W, H]
sigma (float): The sigma value of the Gaussian heatmap
unbiased (bool): Whether use unbiased method (DarkPose) in ``'msra'``
encoding. See `Dark Pose`_ for details. Defaults to ``False``
blur_kernel_size (int): The Gaussian blur kernel size of the heatmap
modulation in DarkPose. The kernel size and sigma should follow
the expirical formula :math:`sigma = 0.3*((ks-1)*0.5-1)+0.8`.
Defaults to 11
.. _`Simple Baselines for Human Pose Estimation and Tracking`:
https://arxiv.org/abs/1804.06208
.. _`Dark Pose`: https://arxiv.org/abs/1910.06278
"""
label_mapping_table = dict(keypoint_weights='keypoint_weights', )
field_mapping_table = dict(heatmaps='heatmaps', )
def __init__(self,
input_size: Tuple[int, int],
heatmap_size: Tuple[int, int],
sigma: float,
unbiased: bool = False,
blur_kernel_size: int = 11) -> None:
super().__init__()
self.input_size = input_size
self.heatmap_size = heatmap_size
self.sigma = sigma
self.unbiased = unbiased
# The Gaussian blur kernel size of the heatmap modulation
# in DarkPose and the sigma value follows the expirical
# formula :math:`sigma = 0.3*((ks-1)*0.5-1)+0.8`
# which gives:
# sigma~=3 if ks=17
# sigma=2 if ks=11;
# sigma~=1.5 if ks=7;
# sigma~=1 if ks=3;
self.blur_kernel_size = blur_kernel_size
self.scale_factor = (np.array(input_size) /
heatmap_size).astype(np.float32)
def encode(self,
keypoints: np.ndarray,
keypoints_visible: Optional[np.ndarray] = None) -> dict:
"""Encode keypoints into heatmaps. Note that the original keypoint
coordinates should be in the input image space.
Args:
keypoints (np.ndarray): Keypoint coordinates in shape (N, K, D)
keypoints_visible (np.ndarray): Keypoint visibilities in shape
(N, K)
Returns:
dict:
- heatmaps (np.ndarray): The generated heatmap in shape
(K, H, W) where [W, H] is the `heatmap_size`
- keypoint_weights (np.ndarray): The target weights in shape
(N, K)
"""
assert keypoints.shape[0] == 1, (
f'{self.__class__.__name__} only support single-instance '
'keypoint encoding')
if keypoints_visible is None:
keypoints_visible = np.ones(keypoints.shape[:2], dtype=np.float32)
if self.unbiased:
heatmaps, keypoint_weights = generate_unbiased_gaussian_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=keypoints / self.scale_factor,
keypoints_visible=keypoints_visible,
sigma=self.sigma)
else:
heatmaps, keypoint_weights = generate_gaussian_heatmaps(
heatmap_size=self.heatmap_size,
keypoints=keypoints / self.scale_factor,
keypoints_visible=keypoints_visible,
sigma=self.sigma)
encoded = dict(heatmaps=heatmaps, keypoint_weights=keypoint_weights)
return encoded
def decode(self, encoded: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Decode keypoint coordinates from heatmaps. The decoded keypoint
coordinates are in the input image space.
Args:
encoded (np.ndarray): Heatmaps in shape (K, H, W)
Returns:
tuple:
- keypoints (np.ndarray): Decoded keypoint coordinates in shape
(N, K, D)
- scores (np.ndarray): The keypoint scores in shape (N, K). It
usually represents the confidence of the keypoint prediction
"""
heatmaps = encoded.copy()
K, H, W = heatmaps.shape
keypoints, scores = get_heatmap_maximum(heatmaps)
# Unsqueeze the instance dimension for single-instance results
keypoints, scores = keypoints[None], scores[None]
if self.unbiased:
# Alleviate biased coordinate
keypoints = refine_keypoints_dark(
keypoints, heatmaps, blur_kernel_size=self.blur_kernel_size)
else:
keypoints = refine_keypoints(keypoints, heatmaps)
# Restore the keypoint scale
keypoints = keypoints * self.scale_factor
return keypoints, scores