open-mmlab · wusize · Jun 29, 2022 · Jun 30, 2022 · Jul 12, 2022 · Jul 12, 2022
diff --git a/...dy/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn32x32x32_cpn80x80x20_campus_cam3.py b/...dy/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn32x32x32_cpn80x80x20_campus_cam3.py
@@ -94,6 +94,7 @@
     type='DetectAndRegress',
     backbone=None,
     pretrained=None,
+    keypoint_head=None,
     human_detector=dict(
         type='VoxelCenterDetector',
         image_size=image_size,

diff --git a/...dy/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn64x64x64_cpn80x80x20_campus_cam3.py b/...dy/3d_kpt_mview_rgb_img/voxelpose/campus/voxelpose_prn64x64x64_cpn80x80x20_campus_cam3.py
@@ -94,6 +94,7 @@
 model = dict(
     type='DetectAndRegress',
     backbone=None,
+    keypoint_head=None,
     pretrained=None,
     human_detector=dict(
         type='VoxelCenterDetector',

diff --git a/...w_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md b/...w_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.md
@@ -34,4 +34,4 @@ Results on CMU Panoptic dataset.
 
 | Arch                                                       |  mAP  |  mAR  | MPJPE | Recall@500mm |                            ckpt                            |                            log                            |
 | :--------------------------------------------------------- | :---: | :---: | :---: | :----------: | :--------------------------------------------------------: | :-------------------------------------------------------: |
-| [prn64_cpn80_res50](/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py) | 97.31 | 97.99 | 17.57 |    99.85     | [ckpt](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-545c150e_20211103.pth) | [log](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5_20211103.log.json) |
+| [prn64_cpn80_res50](/configs/body/3d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py) | 97.15 | 97.70 | 17.09 |    99.25     | [ckpt](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-358648cb_20230118.pth) | [log](https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5_20230118.log.json) |
diff --git a/...d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py b/...d_kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.py
@@ -65,44 +65,30 @@
         subset='validation'))
 
 # model settings
-backbone = dict(
-    type='AssociativeEmbedding',
-    pretrained=None,
-    backbone=dict(type='ResNet', depth=50),
-    keypoint_head=dict(
-        type='DeconvHead',
-        in_channels=2048,
-        out_channels=num_joints,
-        num_deconv_layers=3,
-        num_deconv_filters=(256, 256, 256),
-        num_deconv_kernels=(4, 4, 4),
-        loss_keypoint=dict(
-            type='MultiLossFactory',
-            num_joints=15,
-            num_stages=1,
-            ae_loss_type='exp',
-            with_ae_loss=[False],
-            push_loss_factor=[0.001],
-            pull_loss_factor=[0.001],
-            with_heatmaps_loss=[True],
-            heatmaps_loss_factor=[1.0],
-        )),
-    train_cfg=dict(),
-    test_cfg=dict(
-        num_joints=num_joints,
-        nms_kernel=None,
-        nms_padding=None,
-        tag_per_joint=None,
-        max_num_people=None,
-        detection_threshold=None,
-        tag_threshold=None,
-        use_detection_val=None,
-        ignore_too_much=None,
+backbone = dict(type='ResNet', depth=50)
+keypoint_head = dict(
+    type='DeconvHead',
+    in_channels=2048,
+    out_channels=num_joints,
+    num_deconv_layers=3,
+    num_deconv_filters=(256, 256, 256),
+    num_deconv_kernels=(4, 4, 4),
+    loss_keypoint=dict(
+        type='MultiLossFactory',
+        num_joints=15,
+        num_stages=1,
+        ae_loss_type='exp',
+        with_ae_loss=[False],
+        push_loss_factor=[0.001],
+        pull_loss_factor=[0.001],
+        with_heatmaps_loss=[True],
+        heatmaps_loss_factor=[1.0],
     ))
 
 model = dict(
     type='DetectAndRegress',
     backbone=backbone,
+    keypoint_head=keypoint_head,
     pretrained='checkpoints/resnet_50_deconv.pth.tar',
     human_detector=dict(
         type='VoxelCenterDetector',

diff --git a/..._kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml b/..._kpt_mview_rgb_img/voxelpose/panoptic/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5.yml
@@ -15,8 +15,8 @@ Models:
   Results:
   - Dataset: CMU Panoptic
     Metrics:
-      MPJPE: 17.57
-      mAP: 97.31
-      mAR: 97.99
+      MPJPE: 17.09
+      mAP: 97.15
+      mAR: 97.7
     Task: Body 3D Keypoint
-  Weights: https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-545c150e_20211103.pth
+  Weights: https://download.openmmlab.com/mmpose/body3d/voxelpose/voxelpose_prn64x64x64_cpn80x80x20_panoptic_cam5-358648cb_20230118.pth
diff --git a/...body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn32x32x32_cpn48x48x12_shelf_cam5.py b/...body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn32x32x32_cpn48x48x12_shelf_cam5.py
@@ -91,6 +91,7 @@
 model = dict(
     type='DetectAndRegress',
     backbone=None,
+    keypoint_head=None,
     pretrained=None,
     human_detector=dict(
         type='VoxelCenterDetector',

diff --git a/...body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn64x64x64_cpn80x80x20_shelf_cam5.py b/...body/3d_kpt_mview_rgb_img/voxelpose/shelf/voxelpose_prn64x64x64_cpn80x80x20_shelf_cam5.py
@@ -92,6 +92,7 @@
     type='DetectAndRegress',
     backbone=None,
     pretrained=None,
+    keypoint_head=None,
     human_detector=dict(
         type='VoxelCenterDetector',
         image_size=image_size,

diff --git a/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py b/mmpose/datasets/datasets/base/kpt_3d_mview_rgb_img_direct_dataset.py
@@ -6,7 +6,6 @@
 
 import json_tricks as json
 import numpy as np
-from scipy.io import loadmat
 from torch.utils.data import Dataset
 
 from mmpose.datasets import DatasetInfo
@@ -249,8 +248,5 @@ def _load_files(self):
 
         assert osp.exists(self.gt_pose_db_file), f'gt_pose_db_file ' \
             f"{self.gt_pose_db_file} doesn't exist, please check again"
-        gt = loadmat(self.gt_pose_db_file)
-        self.gt_pose_db = np.array(np.array(
-            gt['actor3D'].tolist()).tolist()).squeeze()
-
+        self.gt_pose_db = np.load(self.gt_pose_db_file)
         self.num_persons = len(self.gt_pose_db)
diff --git a/mmpose/models/detectors/multiview_pose.py b/mmpose/models/detectors/multiview_pose.py
@@ -14,7 +14,7 @@
 from mmpose.core.post_processing.post_transforms import (
     affine_transform_torch, get_affine_transform)
 from .. import builder
-from ..builder import POSENETS
+from ..builder import BACKBONES, HEADS, POSENETS
 from ..utils.misc import torch_meshgrid_ij
 from .base import BasePose
 
@@ -138,7 +138,9 @@ class DetectAndRegress(BasePose):
     """DetectAndRegress approach for multiview human pose detection.
 
     Args:
-        backbone (ConfigDict): Dictionary to construct the 2D pose detector
+        backbone (ConfigDict): Dictionary to construct the backbone.
+        keypoint_head (ConfigDict): Dictionary to construct the 2d
+            keypoint head.
         human_detector (ConfigDict): dictionary to construct human detector
         pose_regressor (ConfigDict): dictionary to construct pose regressor
         train_cfg (ConfigDict): Config for training. Default: None.
@@ -150,6 +152,7 @@ class DetectAndRegress(BasePose):
 
     def __init__(self,
                  backbone,
+                 keypoint_head,
                  human_detector,
                  pose_regressor,
                  train_cfg=None,
@@ -158,11 +161,16 @@ def __init__(self,
                  freeze_2d=True):
         super(DetectAndRegress, self).__init__()
         if backbone is not None:
-            self.backbone = builder.build_posenet(backbone)
-            if self.training and pretrained is not None:
-                load_checkpoint(self.backbone, pretrained)
+            self.backbone = BACKBONES.build(backbone)
         else:
             self.backbone = None
+        if keypoint_head is not None:
+            self.keypoint_head = HEADS.build(keypoint_head)
+        else:
+            self.keypoint_head = None
+
+        if self.training and pretrained is not None:
+            load_checkpoint(self, pretrained)
 
         self.freeze_2d = freeze_2d
         self.human_detector = builder.MODELS.build(human_detector)
@@ -188,8 +196,11 @@ def train(self, mode=True):
             Module: self
         """
         super().train(mode)
-        if mode and self.freeze_2d and self.backbone is not None:
-            self._freeze(self.backbone)
+        if mode and self.freeze_2d:
+            if self.backbone is not None:
+                self._freeze(self.backbone)
+            if self.keypoint_head is not None:
+                self._freeze(self.keypoint_head)
 
         return self
 
@@ -283,6 +294,12 @@ def train_step(self, data_batch, optimizer, **kwargs):
 
         return outputs
 
+    def predict_heatmap(self, img):
+        output = self.backbone(img)
+        output = self.keypoint_head(output)
+
+        return output
+
     def forward_train(self,
                       img,
                       img_metas,
@@ -331,7 +348,7 @@ def forward_train(self,
             feature_maps = []
             assert isinstance(img, list)
             for img_ in img:
-                feature_maps.append(self.backbone.forward_dummy(img_)[0])
+                feature_maps.append(self.predict_heatmap(img_)[0])
 
         losses = dict()
         human_candidates, human_loss = self.human_detector.forward_train(
@@ -351,8 +368,9 @@ def forward_train(self,
             heatmaps_tensor = torch.cat(feature_maps, dim=0)
             targets_tensor = torch.cat(targets, dim=0)
             masks_tensor = torch.cat(masks, dim=0)
-            losses_2d_ = self.backbone.get_loss(heatmaps_tensor,
-                                                targets_tensor, masks_tensor)
+            losses_2d_ = self.keypoint_head.get_loss(heatmaps_tensor,
+                                                     targets_tensor,
+                                                     masks_tensor)
             for k, v in losses_2d_.items():
                 losses_2d[k + '_2d'] = v
             losses.update(losses_2d)
@@ -400,7 +418,7 @@ def forward_test(
             feature_maps = []
             assert isinstance(img, list)
             for img_ in img:
-                feature_maps.append(self.backbone.forward_dummy(img_)[0])
+                feature_maps.append(self.predict_heatmap(img_)[0])
 
         human_candidates = self.human_detector.forward_test(
             None, img_metas, feature_maps)
@@ -506,7 +524,7 @@ def forward_dummy(self, img, input_heatmaps=None, num_candidates=5):
             feature_maps = []
             assert isinstance(img, list)
             for img_ in img:
-                feature_maps.append(self.backbone.forward_dummy(img_)[0])
+                feature_maps.append(self.predict_heatmap(img_)[0])
 
         _ = self.human_detector.forward_dummy(feature_maps)
 

diff --git a/model-index.yml b/model-index.yml
@@ -109,8 +109,8 @@ Import:
 - configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
 - configs/face/2d_kpt_sview_rgb_img/topdown_heatmap/wflw/hrnetv2_wflw.yml
 - configs/fashion/2d_kpt_sview_rgb_img/deeppose/deepfashion/resnet_deepfashion.yml
-- configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml
 - configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion2/resnet_deepfashion2.yml
+- configs/fashion/2d_kpt_sview_rgb_img/topdown_heatmap/deepfashion/resnet_deepfashion.yml
 - configs/hand/2d_kpt_sview_rgb_img/deeppose/onehand10k/resnet_onehand10k.yml
 - configs/hand/2d_kpt_sview_rgb_img/deeppose/panoptic2d/resnet_panoptic2d.yml
 - configs/hand/2d_kpt_sview_rgb_img/deeppose/rhd2d/resnet_rhd2d.yml

diff --git a/tests/data/campus/actorsGT.mat b/tests/data/campus/actorsGT.mat
diff --git a/tests/data/campus/actorsGT.npy b/tests/data/campus/actorsGT.npy
diff --git a/tests/data/shelf/actorsGT.mat b/tests/data/shelf/actorsGT.mat
diff --git a/tests/data/shelf/actorsGT.npy b/tests/data/shelf/actorsGT.npy
diff --git a/tests/test_datasets/test_body3d_dataset.py b/tests/test_datasets/test_body3d_dataset.py
@@ -379,7 +379,7 @@ def test_body3dmview_direct_campus_dataset():
         cam_file=f'{data_root}/calibration_campus.json',
         train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl',
         test_pose_db_file=f'{data_root}/pred_campus_maskrcnn_hrnet_coco.pkl',
-        gt_pose_db_file=f'{data_root}/actorsGT.mat',
+        gt_pose_db_file=f'{data_root}/actorsGT.npy',
     )
 
     test_data_cfg = dict(
@@ -398,7 +398,7 @@ def test_body3dmview_direct_campus_dataset():
         cam_file=f'{data_root}/calibration_campus.json',
         train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl',
         test_pose_db_file=f'{data_root}/pred_campus_maskrcnn_hrnet_coco.pkl',
-        gt_pose_db_file=f'{data_root}/actorsGT.mat',
+        gt_pose_db_file=f'{data_root}/actorsGT.npy',
     )
 
     # test when dataset_info is None
@@ -507,7 +507,7 @@ def test_body3dmview_direct_shelf_dataset():
         cam_file=f'{data_root}/calibration_shelf.json',
         train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl',
         test_pose_db_file=f'{data_root}/pred_shelf_maskrcnn_hrnet_coco.pkl',
-        gt_pose_db_file=f'{data_root}/actorsGT.mat',
+        gt_pose_db_file=f'{data_root}/actorsGT.npy',
     )
 
     test_data_cfg = dict(
@@ -526,7 +526,7 @@ def test_body3dmview_direct_shelf_dataset():
         cam_file=f'{data_root}/calibration_shelf.json',
         train_pose_db_file=f'{data_root}/panoptic_training_pose.pkl',
         test_pose_db_file=f'{data_root}/pred_shelf_maskrcnn_hrnet_coco.pkl',
-        gt_pose_db_file=f'{data_root}/actorsGT.mat',
+        gt_pose_db_file=f'{data_root}/actorsGT.npy',
     )
 
     # test when dataset_info is None

diff --git a/tests/test_models/test_multiview_pose.py b/tests/test_models/test_multiview_pose.py
@@ -63,6 +63,7 @@ def test_voxelpose_forward():
     model_cfg = dict(
         type='DetectAndRegress',
         backbone=None,
+        keypoint_head=None,
         human_detector=dict(
             type='VoxelCenterDetector',
             image_size=[960, 512],