open-mmlab · caizhongang · Jan 20, 2022 · Jan 6, 2022 · Jan 6, 2022 · Jan 6, 2022
diff --git a/mmhuman3d/data/data_converters/agora.py b/mmhuman3d/data/data_converters/agora.py
@@ -105,7 +105,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
             for idx in tqdm(range(len(df))):
                 imgname = df.iloc[idx]['imgPath']
                 if self.res == (1280, 720):
-                    imgname.replace('.png', '_1280x720.png')
+                    imgname = imgname.replace('.png', '_1280x720.png')
                 img_path = os.path.join('images', mode, imgname)
                 valid_pers_idx = np.where(df.iloc[idx].at['isValid'])[0]
                 for pidx in valid_pers_idx:
@@ -118,7 +118,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
 
                     # obtain keypoints
                     keypoints2d = df.iloc[idx]['gt_joints_2d'][pidx]
-                    if self.res == '1280x720':
+                    if self.res == (1280, 720):
                         keypoints2d *= (720 / 2160)
                     keypoints3d = df.iloc[idx]['gt_joints_3d'][pidx]
 
@@ -170,7 +170,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                         max(keypoints2d[:, 0]),
                         max(keypoints2d[:, 1])
                     ]
-                    bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                    bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                    bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
                     keypoints2d_.append(keypoints2d)
                     keypoints3d_.append(keypoints3d)

diff --git a/mmhuman3d/data/data_converters/base_converter.py b/mmhuman3d/data/data_converters/base_converter.py
@@ -26,20 +26,32 @@ def convert(self):
     @staticmethod
     def _bbox_expand(bbox_xyxy: List[float],
                      scale_factor: float) -> List[float]:
-        """Obtain bbox in xywh format given bbox in xyxy format
+        """Expand bbox in xyxy format by scale factor
         Args:
             bbox_xyxy (List[float]): Bounding box in xyxy format
             scale_factor (float): Scale factor to expand bbox
 
         Returns:
-            bbox_xywh (List[float]): Bounding box in xywh format
+            bbox_xyxy (List[float]): Expanded bounding box in xyxy format
         """
         center = [(bbox_xyxy[0] + bbox_xyxy[2]) / 2,
                   (bbox_xyxy[1] + bbox_xyxy[3]) / 2]
         x1 = scale_factor * (bbox_xyxy[0] - center[0]) + center[0]
         y1 = scale_factor * (bbox_xyxy[1] - center[1]) + center[1]
         x2 = scale_factor * (bbox_xyxy[2] - center[0]) + center[0]
         y2 = scale_factor * (bbox_xyxy[3] - center[1]) + center[1]
+        return [x1, y1, x2, y2]
+
+    @staticmethod
+    def _xyxy2xywh(bbox_xyxy: List[float]) -> List[float]:
+        """Obtain bbox in xywh format given bbox in xyxy format
+        Args:
+            bbox_xyxy (List[float]): Bounding box in xyxy format
+
+        Returns:
+            bbox_xywh (List[float]): Bounding box in xywh format
+        """
+        x1, y1, x2, y2 = bbox_xyxy
         return [x1, y1, x2 - x1, y2 - y1]
 
 

diff --git a/mmhuman3d/data/data_converters/h36m.py b/mmhuman3d/data/data_converters/h36m.py
@@ -327,8 +327,9 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                             np.max(xs) + 1,
                             np.max(ys) + 1
                         ])
-                        bbox_xywh = self._bbox_expand(
+                        bbox_xyxy = self._bbox_expand(
                             bbox_xyxy, scale_factor=0.9)
+                        bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
                         # read GT 2D pose
                         keypoints2dall = np.reshape(poses_2d[frame_i, :],
@@ -372,7 +373,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
 
         metadata_path = os.path.join(dataset_path, 'metadata.xml')
         if isinstance(metadata_path, str):
-            cam_param = H36mCamera(metadata_path)
+            camera = H36mCamera(metadata_path)
+            cam_param = camera.generate_cameras_dict()
         bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
         bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
         keypoints2d_ = np.array(keypoints2d_).reshape((-1, 17, 3))

diff --git a/mmhuman3d/data/data_converters/h36m_spin.py b/mmhuman3d/data/data_converters/h36m_spin.py
@@ -191,7 +191,9 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                             np.max(xs) + 1,
                             np.max(ys) + 1
                         ])
-                        bbox = self._bbox_expand(bbox_xyxy, scale_factor=0.9)
+                        bbox_xyxy = self._bbox_expand(
+                            bbox_xyxy, scale_factor=1.2)
+                        bbox = self._xyxy2xywh(bbox_xyxy)
 
                         # read GT 2D pose
                         keypoints2dall = np.reshape(poses_2d[frame_i, :],

diff --git a/mmhuman3d/data/data_converters/humman.py b/mmhuman3d/data/data_converters/humman.py
@@ -206,7 +206,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
 
                 for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs):
                     bbox_xyxy = [xmin, ymin, xmax, ymax]
-                    bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                    bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                    bbox_xywh = self._xyxy2xywh(bbox_xyxy)
                     bbox_xywh_.append(bbox_xywh)
 
                 # get keypoints3d (all frames)

diff --git a/mmhuman3d/data/data_converters/insta_vibe.py b/mmhuman3d/data/data_converters/insta_vibe.py
@@ -63,7 +63,8 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
                 max(keypoints2d_vis[:, 0]),
                 max(keypoints2d_vis[:, 1])
             ]
-            bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+            bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+            bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
             vid_path_.append(vid_id)
             image_path_.append(image_path)

diff --git a/mmhuman3d/data/data_converters/lsp.py b/mmhuman3d/data/data_converters/lsp.py
@@ -72,16 +72,17 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
             keypoints2d14 = np.hstack([keypoints2d14, np.ones([14, 1])])
 
             # bbox
-            bbox_xywh = [
+            bbox_xyxy = [
                 min(keypoints2d14[:, 0]),
                 min(keypoints2d14[:, 1]),
                 max(keypoints2d14[:, 0]),
                 max(keypoints2d14[:, 1])
             ]
 
-            if 0 <= bbox_xywh[0] <= w and 0 <= bbox_xywh[2] <= w and \
-                    0 <= bbox_xywh[1] <= h and 0 <= bbox_xywh[3] <= h:
-                bbox_xywh = self._bbox_expand(bbox_xywh, scale_factor=1.2)
+            if 0 <= bbox_xyxy[0] <= w and 0 <= bbox_xyxy[2] <= w and \
+                    0 <= bbox_xyxy[1] <= h and 0 <= bbox_xyxy[3] <= h:
+                bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                bbox_xywh = self._xyxy2xywh(bbox_xyxy)
             else:
                 print('Bbox out of image bounds. Skipping image {}'.format(
                     imgname))

diff --git a/mmhuman3d/data/data_converters/lsp_extended.py b/mmhuman3d/data/data_converters/lsp_extended.py
@@ -60,16 +60,17 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
             keypoints2d14 = np.hstack([keypoints2d14, np.ones([14, 1])])
 
             # bbox
-            bbox_xywh = [
+            bbox_xyxy = [
                 min(keypoints2d14[:, 0]),
                 min(keypoints2d14[:, 1]),
                 max(keypoints2d14[:, 0]),
                 max(keypoints2d14[:, 1])
             ]
 
-            if 0 <= bbox_xywh[0] <= w and 0 <= bbox_xywh[2] <= w and \
-                    0 <= bbox_xywh[1] <= h and 0 <= bbox_xywh[3] <= h:
-                bbox_xywh = self._bbox_expand(bbox_xywh, scale_factor=1.2)
+            if 0 <= bbox_xyxy[0] <= w and 0 <= bbox_xyxy[2] <= w and \
+                    0 <= bbox_xyxy[1] <= h and 0 <= bbox_xyxy[3] <= h:
+                bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                bbox_xywh = self._xyxy2xywh(bbox_xyxy)
             else:
                 print('Bbox out of image bounds. Skipping image {}'.format(
                     imgname))

diff --git a/mmhuman3d/data/data_converters/mpi_inf_3dhp.py b/mmhuman3d/data/data_converters/mpi_inf_3dhp.py
@@ -45,7 +45,8 @@ def extract_keypoints(
             max(keypoints2d[:, 0]),
             max(keypoints2d[:, 1])
         ]
-        bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+        bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+        bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
         # check that all joints are visible
         h, w = 2048, 2048

diff --git a/mmhuman3d/data/data_converters/penn_action.py b/mmhuman3d/data/data_converters/penn_action.py
@@ -75,7 +75,8 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
                     max(kp[:, 0]),
                     max(kp[:, 1])
                 ]
-                bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                bbox_xywh = self._xyxy2xywh(bbox_xyxy)
                 # store relative instead of absolute image path
                 image_path_.append(img_path.replace(dataset_path + '/', ''))
                 bbox_xywh_.append(bbox_xywh)

diff --git a/mmhuman3d/data/data_converters/posetrack.py b/mmhuman3d/data/data_converters/posetrack.py
@@ -49,23 +49,24 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
         for ann_file in tqdm(ann_files):
             json_data = mmcv.load(ann_file)
 
-            counter = 0
-            for im, ann in zip(json_data['images'], json_data['annotations']):
-                # sample every 10 image and check image is labelled
-                if counter % 10 != 0 and not im['is_labeled']:
-                    continue
+            imgs = {}
+            for img in json_data['images']:
+                imgs[img['id']] = img
+
+            for ann in json_data['annotations']:
+                image_id = ann['image_id']
+                image_path = str(imgs[image_id]['file_name'])
+
                 keypoints2d = np.array(ann['keypoints']).reshape(17, 3)
                 keypoints2d[keypoints2d[:, 2] > 0, 2] = 1
                 # check if all major body joints are annotated
                 if sum(keypoints2d[5:, 2] > 0) < 12:
                     continue
 
-                image_path = im['file_name']
                 image_abs_path = os.path.join(dataset_path, image_path)
                 if not os.path.exists(image_abs_path):
                     print('{} does not exist!'.format(image_abs_path))
                     continue
-                counter += 1
                 bbox_xywh = np.array(ann['bbox'])
 
                 # store data

diff --git a/mmhuman3d/data/data_converters/pw3d.py b/mmhuman3d/data/data_converters/pw3d.py
@@ -97,15 +97,16 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                     for valid_i in range(valid_pose.shape[0]):
                         keypoints2d = valid_keypoints_2d[valid_i, :, :].T
                         keypoints2d = keypoints2d[keypoints2d[:, 2] > 0, :]
-                        bbox_xywh = [
+                        bbox_xyxy = [
                             min(keypoints2d[:, 0]),
                             min(keypoints2d[:, 1]),
                             max(keypoints2d[:, 0]),
                             max(keypoints2d[:, 1])
                         ]
 
-                        bbox_xywh = self._bbox_expand(
-                            bbox_xywh, scale_factor=1.2)
+                        bbox_xyxy = self._bbox_expand(
+                            bbox_xyxy, scale_factor=1.2)
+                        bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
                         image_path = valid_img_names[valid_i]
                         image_abs_path = os.path.join(root_path, image_path)

diff --git a/mmhuman3d/data/data_converters/surreal.py b/mmhuman3d/data/data_converters/surreal.py
@@ -184,10 +184,10 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                         success, image = vidcap.read()
                         if not success:
                             break
-                        frame += 1
                         # image name
                         imgname = os.path.join(img_dir,
                                                'frame_%06d.jpg' % frame)
+                        frame += 1
                         # save image
                         cv2.imwrite(imgname, image)
 
@@ -202,7 +202,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                         max(keypoints2d[:, 0]),
                         max(keypoints2d[:, 1])
                     ]
-                    bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                    bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                    bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
                     # add confidence column
                     keypoints2d = np.hstack([keypoints2d, np.ones((24, 1))])

diff --git a/mmhuman3d/data/data_converters/up3d.py b/mmhuman3d/data/data_converters/up3d.py
@@ -83,7 +83,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
                  np.min(ys),
                  np.max(xs) + 1,
                  np.max(ys) + 1])
-            bbox_xywh = self._bbox_expand(bbox_xyxy, scale_factor=0.9)
+            bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=0.9)
+            bbox_xywh = self._xyxy2xywh(bbox_xyxy)
 
             # pose and shape
             pkl_file = os.path.join(dataset_path, '%s_body.pkl' % img_base)

diff --git a/mmhuman3d/data/datasets/pipelines/transforms.py b/mmhuman3d/data/datasets/pipelines/transforms.py
@@ -327,7 +327,7 @@ def __call__(self, results):
         if 'keypoints2d' in results:
             assert self.flip_pairs is not None
             width = results['img'][:, ::-1, :].shape[1]
-            keypoints2d = results['keypoints2d']
+            keypoints2d = results['keypoints2d'].copy()
             keypoints2d = _flip_keypoints(keypoints2d, self.flip_pairs, width)
             results['keypoints2d'] = keypoints2d
 
@@ -687,7 +687,7 @@ def __call__(self, results):
             results['img'] = img
 
         if 'keypoints2d' in results:
-            keypoints2d = results['keypoints2d']
+            keypoints2d = results['keypoints2d'].copy()
             num_keypoints = len(keypoints2d)
             for i in range(num_keypoints):
                 if keypoints2d[i][2] > 0.0: