From fba748c7da2934ef00098351923db9e4ef87c5f1 Mon Sep 17 00:00:00 2001 From: davidliyutong Date: Fri, 21 Apr 2023 21:21:03 +0800 Subject: [PATCH] v1.8.1 feat: add timestamp in selected_frames.sequence_view refactor: move marker detection to other repo build: update numpy version --- realsense_recorder/filters/fiducial.py | 368 ------------------------- realsense_recorder/io/Sync.py | 27 +- requirements.txt | 3 +- tests/test_reprojection.py | 20 -- 4 files changed, 22 insertions(+), 396 deletions(-) delete mode 100644 realsense_recorder/filters/fiducial.py delete mode 100644 tests/test_reprojection.py diff --git a/realsense_recorder/filters/fiducial.py b/realsense_recorder/filters/fiducial.py deleted file mode 100644 index 91d7c9b..0000000 --- a/realsense_recorder/filters/fiducial.py +++ /dev/null @@ -1,368 +0,0 @@ -import copy -import dataclasses -import logging -import os -import os.path as osp -import pickle -from typing import Union, List, Optional, Dict, Tuple - -import cv2 -import numpy as np -import open3d as o3d -import pupil_apriltags -import json -from scipy.spatial.transform.rotation import Rotation as R -import tqdm - -logger = logging.getLogger("realsense_recorder.filters.fiducial") - -from realsense_recorder.common.utils import rvec_tvec_to_matrix, R_T_to_matrix - -@dataclasses.dataclass() -class AprilTagFiducialDetectHelper: - marker_length_m: float # in meter - april_tag_family: str = "tag25h9" - camera_matrix: Union[List[List[float]], np.ndarray] = None # [[ fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]] - camera_distort: Optional[Union[List[float], np.ndarray]] = None # [ 0.1927826544288516, -0.34972530095573834, 0.011612480526787846, -0.00393533140166019, -2.9216752723525734 ] - dof_cache: Optional[Dict[str, Tuple[np.ndarray, np.ndarray]]] = None - font: int = cv2.FONT_HERSHEY_SIMPLEX - - def __post_init__(self): - self.marker_points = np.array([[-self.marker_length_m / 2, self.marker_length_m / 2, 0], - [self.marker_length_m / 2, self.marker_length_m / 2, 0], - [self.marker_length_m / 2, -self.marker_length_m / 2, 0], - [-self.marker_length_m / 2, -self.marker_length_m / 2, 0]]) - - self.detector = pupil_apriltags.Detector( - families=self.april_tag_family, - nthreads=1, - quad_decimate=1.0, - quad_sigma=0.0, - refine_edges=1, - decode_sharpening=0.25, - debug=0 - ) - - if self.camera_distort is not None: - if isinstance(self.camera_distort, list): - self.camera_distort = np.array(self.camera_distort) - self.camera_distort = self.camera_distort.squeeze() - assert self.camera_distort.shape[0] in [4, 5, 8, 12, 14] - - if self.camera_matrix is not None: - if isinstance(self.camera_matrix, list): - self.camera_matrix = np.array(self.camera_matrix) - assert self.camera_matrix.shape == (3, 3) - - self.dof_cache = {} - self.dof_freq = {} - - @staticmethod - def depth2xyz(u, v, depth, K): - x = (u - K[0, 2]) * depth / K[0, 0] - y = (v - K[1, 2]) * depth / K[1, 1] - return np.array([x, y, depth]).reshape(3, 1) - - @staticmethod - def apply_polygon_mask_color(img: np.ndarray, polygons: List[np.ndarray]): - mask = np.zeros(img.shape, dtype=img.dtype) - for polygon in polygons: - polygon = polygon.reshape(-1, 2).astype(np.int32) - cv2.fillConvexPoly(mask, polygon, (255, 255, 255)) - res = cv2.bitwise_and(img, mask) - return res - - @staticmethod - def apply_polygon_mask_depth(img: np.ndarray, polygons: List[np.ndarray]): - mask = np.zeros(img.shape, dtype=img.dtype) - for polygon in polygons: - polygon = polygon.reshape(-1, 2).astype(np.int32) - cv2.fillConvexPoly(mask, polygon, (np.iinfo(img.dtype).max)) - res = cv2.bitwise_and(img, mask) - return res - - @classmethod - def create_roi_masked_point_cloud(cls, - color_frame: np.ndarray, - depth_frame: np.ndarray, - corners: List[np.ndarray], - camera_matrix: np.ndarray) -> Tuple[Optional[o3d.geometry.PointCloud], Optional[Exception]]: - color_frame_masked = cls.apply_polygon_mask_color(color_frame, [corner.reshape(-1, 2).astype(np.int32) for corner in corners]) - depth_frame_masked = cls.apply_polygon_mask_depth(depth_frame, [corner.reshape(-1, 2).astype(np.int32) for corner in corners]) - - if depth_frame_masked.sum() > 0: - color_frame_o3d = o3d.geometry.Image(cv2.cvtColor(color_frame_masked.astype(np.uint8), cv2.COLOR_BGRA2RGB)) - depth_frame_o3d = o3d.geometry.Image(depth_frame_masked.astype(np.uint16)) - rgbd_image = o3d.geometry.RGBDImage().create_from_color_and_depth( - color_frame_o3d, - depth_frame_o3d, - 4000., # MAGIC NUMBER - 2., # MAGIC NUMBER - convert_rgb_to_intensity=False - ) - - pcd = o3d.geometry.PointCloud().create_from_rgbd_image( - image=rgbd_image, - intrinsic=o3d.camera.PinholeCameraIntrinsic( - depth_frame_masked.shape[1], - depth_frame_masked.shape[0], - camera_matrix[0][0], - camera_matrix[1][1], - camera_matrix[0][2], - camera_matrix[1][2], - ), - extrinsic=np.eye(4), - ) - if len(pcd.points) < 4: - return None, Exception("no enough depth") - else: - return pcd, None - else: - return None, Exception("no depth") - - def reject_false_rotation(self, - color_frame, - depth_frame, - rvec: np.ndarray, - tvec: np.ndarray, - corners: np.ndarray, - debug=False) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], Optional[Exception]]: - pcd, err = self.create_roi_masked_point_cloud(color_frame, depth_frame, [corners], self.camera_matrix) - if err is None: - pcd.estimate_normals( - o3d.geometry.KDTreeSearchParamHybrid(radius=self.marker_length_m, max_nn=30) - ) - position = np.asarray(pcd.points).mean(axis=0) - normals = np.asarray(pcd.normals) - normal_vec = normals.mean(axis=0) - normal_vec /= np.linalg.norm(normal_vec) - - z_axis = np.array([[0], [0], [1]]) - candidates = [cv2.Rodrigues(x)[0] @ z_axis for x in rvec] - score = np.array([float(abs(np.dot(x.T, normal_vec))) for x in candidates]) - - if debug: - arrow = o3d.geometry.TriangleMesh().create_arrow( - cylinder_radius=self.marker_length_m / 20, - cone_radius=self.marker_length_m / 10, - cylinder_height=self.marker_length_m / 2, - cone_height=self.marker_length_m / 2, - ) - R, _ = cv2.Rodrigues(np.cross(normal_vec, [0, 0, 1])) - # arrow.scale(3, center=arrow.get_center()) - arrow.translate(position) - arrow.rotate(R, center=arrow.get_center()) - - o3d.visualization.draw_geometries([o3d.geometry.TriangleMesh().create_coordinate_frame(size=0.3, origin=[0, 0, 0]), pcd, arrow]) - # o3d.visualization.draw_geometries([o3d.geometry.TriangleMesh().create_coordinate_frame(size=0.3, origin=[0, 0, 0]), arrow]) - - return rvec[score.argmax()], tvec[score.argmax()], None - else: - return None, None, Exception("no depth") - - def preview_one_frame(self, color_frame) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - output_color_frame = copy.deepcopy(color_frame) - gray = cv2.cvtColor(color_frame, cv2.COLOR_BGR2GRAY) - corners, ids, rejectedImgPoints = self.detector.detectMarkers(gray) - if ids is not None: - cv2.aruco.drawDetectedMarkers(output_color_frame, corners, ids) - logger.info(f"detected {len(corners)} markers, with ids {ids}") - return output_color_frame, corners, ids - - # num = 0 - def process_one_frame(self, - color_frame, - depth_frame, - depth_scale: float = 1000., - undistort=True, - debug=False): - undistort = False if self.camera_distort is None else undistort - if self.camera_matrix is None: - return None, None, None, Exception("no camera matrix") - - if undistort: - color_frame_undistort = cv2.undistort(color_frame, self.camera_matrix, self.camera_distort) - depth_frame_undistort = cv2.undistort(depth_frame, self.camera_matrix, self.camera_distort) - else: - color_frame_undistort = color_frame - depth_frame_undistort = depth_frame - output_color_frame = copy.deepcopy(color_frame_undistort) - depth_frame_undistort = cv2.medianBlur(depth_frame_undistort, 5) - - gray = cv2.cvtColor(color_frame_undistort, cv2.COLOR_BGR2GRAY) - - detection_results = self.detector.detect( - gray, - estimate_tag_pose=True, - camera_params=(self.camera_matrix[0, 0], self.camera_matrix[1, 1], self.camera_matrix[0, 2], self.camera_matrix[1, 2]), - tag_size=self.marker_length_m, - ) - - if len(detection_results): - res = {} - for detection_result in detection_results: - pose_rot = detection_result.pose_R - pose_t = detection_result.pose_t - assert len(pose_rot.shape) == len(pose_t.shape) == 2 - - rvec = R.from_matrix(pose_rot).as_rotvec() - tvec = pose_t.reshape((3,)) - - res[detection_result.tag_id] = (rvec, tvec, True) - - return res, output_color_frame, depth_frame_undistort, None - else: - return None, output_color_frame, depth_frame_undistort, Exception("no marker found") - - def vis_3d(self, detection: Dict[str, Tuple[np.ndarray, np.ndarray]], color_frame=None, depth_frame=None): - marker_meshes = [o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3, origin=[0, 0, 0])] - for marker_id, (rvec, tvec, is_unique) in detection.items(): - if is_unique: - new_mesh = o3d.geometry.TriangleMesh().create_coordinate_frame(size=0.1, origin=tvec) - R, _ = cv2.Rodrigues(rvec) - new_mesh.rotate(R, center=tvec) - marker_meshes.append(new_mesh) - else: - for i in range(len(rvec)): - new_mesh = o3d.geometry.TriangleMesh().create_coordinate_frame(size=0.1, origin=tvec[i]) - R, _ = cv2.Rodrigues(rvec[i]) - new_mesh.rotate(R, center=tvec[i]) - marker_meshes.append(new_mesh) - if color_frame is not None: - color_frame_o3d = o3d.geometry.Image(cv2.cvtColor(color_frame.astype(np.uint8), cv2.COLOR_BGRA2RGB)) - depth_frame_o3d = o3d.geometry.Image(depth_frame.astype(np.uint16)) - - rgbd_image = o3d.geometry.RGBDImage().create_from_color_and_depth( - color_frame_o3d, - depth_frame_o3d, - 4000., # MAGIC NUMBER - 2., # MAGIC NUMBER - convert_rgb_to_intensity=False - ) - - pcd = o3d.geometry.PointCloud().create_from_rgbd_image( - image=rgbd_image, - intrinsic=o3d.camera.PinholeCameraIntrinsic( - depth_frame.shape[1], - depth_frame.shape[0], - self.camera_matrix[0, 0], - self.camera_matrix[1, 1], - self.camera_matrix[0, 2], - self.camera_matrix[1, 2], - ), - extrinsic=np.eye(4), - ) - marker_meshes.append(pcd) - o3d.visualization.draw_geometries(marker_meshes) - return marker_meshes - - def vis_2d(self, detection: Dict[str, Tuple[np.ndarray, np.ndarray]], color_frame): - for marker_id, (rvec, tvec, is_unique) in detection.items(): - if is_unique: - cv2.drawFrameAxes(color_frame, self.camera_matrix, self.camera_distort[:5], rvec, tvec, 0.03, 2) - else: - cv2.drawFrameAxes(color_frame, self.camera_matrix, self.camera_distort[:5], rvec[0], tvec[1], 0.03, 2) - cv2.putText(color_frame, "Id: " + str(list(detection.keys())), (0, 64), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA) - scale = int(color_frame.shape[0] / 480) - cv2.imshow("frame", color_frame[::scale, ::scale, :]) - key = cv2.waitKey(0) - return color_frame - - -def detect_fiducial_marker(base_dir: str, - enabled_cameras: List[str] = None, - marker_length_m: float = 0.032, - april_tag_family: str = "tag25h9", - sequence_length: int = None, - debug: bool = False, - ): - selected_frames_info = json.load(open(os.path.join(base_dir, "selected_frames.json"))) - sequence_length = selected_frames_info['meta']['sequence_length'] if sequence_length is None else sequence_length - calibration = json.load(open(os.path.join(base_dir, 'calibration.json'))) - - cam_ids = list(calibration['cameras'].keys()) - master_cam_id = selected_frames_info['meta']['master_camera'] - enabled_camera_ids = cam_ids if enabled_cameras is None else enabled_cameras - - transformation_matrix = { - cam_id: np.eye(4) if cam_id == master_cam_id else R_T_to_matrix(**calibration['camera_poses'][cam_id + '_to_' + master_cam_id]) for cam_id in enabled_camera_ids - } - - ctxs = [ - AprilTagFiducialDetectHelper( - marker_length_m=marker_length_m, - april_tag_family=april_tag_family, - camera_matrix=np.array(calibration['cameras'][cam]['K']), - camera_distort=np.array(calibration['cameras'][cam]['dist']), - ) for cam in enabled_camera_ids - ] - - # frames_list_dict = { - # [ - # {cam_id: { - # 'color': selected_frames_info['filenames'][cam_id][idx]['color'], - # 'depth': selected_frames_info['filenames'][cam_id][idx]['depth'], - # } for cam_id in cam_ids} - # ] - # for idx in range(sequence_length) - # } - - marker_detection_res = [] - with tqdm.tqdm(total=sequence_length) as pbar: - for frame_idx in range(0, sequence_length): - color_frames = [ - cv2.imread( - os.path.join( - base_dir, - cam_id, - 'color', - selected_frames_info['sequence_view'][frame_idx][cam_id]['color'] - ) - ) for cam_id in enabled_camera_ids - ] - depth_frames = [ - np.load( - os.path.join( - base_dir, - cam_id, - 'depth', - selected_frames_info['sequence_view'][frame_idx][cam_id]['depth'] - ) - )['arr_0'] for cam_id in enabled_camera_ids - ] - - # color_frames = [ctx.preview_one_frame(x)[0] for x in color_frames] - marker_curr_res = {} - for cam_id, color_frame, depth_frame, ctx in zip(enabled_camera_ids, color_frames, depth_frames, ctxs): - res, processed_color_frame, processed_depth_frame, err = ctx.process_one_frame( - color_frame, depth_frame, undistort=True, depth_scale=4000) # MAGIC NUMBER - - if res is None: - continue - - for marker_id, result in res.items(): - rvec, tvec, _ = result - marker_trans_mat = np.matmul(np.linalg.inv(transformation_matrix[cam_id]), rvec_tvec_to_matrix(rvec, tvec)) - if marker_id not in marker_curr_res.keys(): - marker_curr_res[marker_id] = [(cam_id, marker_trans_mat[:3,3])] # rvec, xyz, err - else: - marker_curr_res[marker_id].append((cam_id, marker_trans_mat[:3,3])) - - if debug: - ctx.vis_2d(res, processed_color_frame) - ctx.vis_3d(res, processed_color_frame, processed_depth_frame) - # cv2.imshow(cam, frame) - - - marker_detection_res.append({ - k: list(map(lambda x: (x[0], x[1].tolist()), v)) for k, v in marker_curr_res.items() - }) - pbar.update() - # print(marker_curr_res) - # for marker_id, result in marker_curr_res.items(): - # if len(result) < 2: - # continue - # print(frame_idx, marker_id, np.linalg.norm((result[0] - result[1]))) - # print(i, marker_id, result[0].tolist(), result[1].tolist()) - with open(osp.join(base_dir, "marker_detection_result.json"), "w") as f: - json.dump(marker_detection_res, f, indent=4) diff --git a/realsense_recorder/io/Sync.py b/realsense_recorder/io/Sync.py index c43afa2..af78cf2 100644 --- a/realsense_recorder/io/Sync.py +++ b/realsense_recorder/io/Sync.py @@ -93,6 +93,19 @@ def sync_cameras(base_dir: str, synced_filenames = { cam_id: filename_map[cam_id][closest_index_map[cam_id][head:tail]] for cam_id in cam_ids } + synced_timestamps = np.mean( + np.concatenate( + [ + np.expand_dims( + timestamp_map[cam_id][closest_index_map[cam_id][head:tail]], + axis=1 + ) for cam_id in cam_ids + ], + axis=1 + ), + axis=1 + ) + num_selected_frames = sum([len(v) for k, v in synced_filenames.items()]) num_dropped_frames = sum([len(v) for k, v in timestamp_map.items()]) - num_selected_frames sequence_length = tail - head @@ -112,14 +125,16 @@ def sync_cameras(base_dir: str, }, 'sequence_view': [ { - cam_id: { - 'color': synced_filenames[cam_id][i] + '.jpeg', - 'depth': synced_filenames[cam_id][i] + '.npz' - } for cam_id in cam_ids + 'path': { + cam_id: { + 'color': synced_filenames[cam_id][i] + '.jpeg', + 'depth': synced_filenames[cam_id][i] + '.npz' + } for cam_id in cam_ids + }, + 'timestamp': synced_timestamps[i] } for i in range(sequence_length) - ] - + ], } with open(osp.join(base_dir, 'selected_frames.json'), 'w') as f: json.dump(selected_frames_dict, f, indent=4) diff --git a/requirements.txt b/requirements.txt index f2c17ec..f68257b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -numpy<=1.22.0 +numpy<1.24.0 tqdm>=4.60 rich>=11.2.0 pyrealsense2>=2.50.0.3812 @@ -10,5 +10,4 @@ pyyaml>=6.0 flask>=2.0.2 gevent>=21.12.0 rich -pupil_apriltags open3d \ No newline at end of file diff --git a/tests/test_reprojection.py b/tests/test_reprojection.py deleted file mode 100644 index 5c938a2..0000000 --- a/tests/test_reprojection.py +++ /dev/null @@ -1,20 +0,0 @@ -import sys - -# NOTES to YUTONG: -# New dependencies -from realsense_recorder.filters.fiducial import detect_fiducial_marker - -# pip install pupil_apriltags -- works on Ubuntu but not on Windows -# Search "MAGIC NUMBER" for magic numbers - - -if __name__ == '__main__': - argv = sys.argv[1:] - detect_fiducial_marker( - argv[0], - marker_length_m=0.018, - enabled_cameras=['r08', 'r85'], - april_tag_family="tag25h9", - sequence_length=10, - debug=True - )