In [1]:
import os
import cv2
import open3d as o3d
import torch
import json
import numpy as np
import rosbag
import matplotlib.pyplot as plt
import data_conversion
import depth_anything_interface
import pcd_utils

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.


INFO - 2024-10-14 05:28:45,404 - topics - topicmanager initialized


In [2]:
#If directory exists, erase it

if os.path.exists('../pcd'):
    os.system('rm -r ../pcd')

#Create directory
    
os.system('mkdir ../pcd')

0

In [3]:
FRAME_INDEX = 0
GAP_INDEX = 10
MODEL_PATH = "/scratchdata/depth_anything_v2_metric_hypersim_vitl.pth"
model = depth_anything_interface.get_model("cuda", MODEL_PATH, model_type = "metric", encoder='vitl')

RATIO_THRESHOLD = 0.25

# Open bag file
bag_file_path = "/scratchdata/indoor_short.bag"
bag = rosbag.Bag(bag_file_path)

INFO - 2024-10-14 05:28:46,047 - dinov2 - using MLP layer as FFN
  model.load_state_dict(torch.load(MODEL_PATH))


In [4]:
wait = 0
for topic, msg, t in bag.read_messages(topics=["/camera/color/camera_info"]):
    D = msg.D
    K = msg.K
    R = msg.R
    P = msg.P
    break

fx = P[0]
fy = P[5]
cx = P[2]
cy = P[6]

print(fx, fy, cx, cy)

306.4570007324219 306.4668884277344 319.01312255859375 197.51637268066406


In [5]:
prev_img = None
prev_est_depth = None
for topic, msg, t in bag.read_messages(topics=["/camera/color/image_raw"]):
    prev_img = data_conversion.topic_to_image(msg)
    prev_est_depth = model.infer_image(prev_img)

    coord = data_conversion.depth_to_pcd(prev_est_depth,P)
    coord = np.concatenate([coord, np.ones((coord.shape[0], 1))], axis=1)
    pcd_utils.ply_from_1x4_coord(coord, "../pcd/0.ply", color = prev_img.reshape(-1,3)/255)

    break

In [6]:
cnt = 0
nxt_frame_index = GAP_INDEX 
tf = np.eye(4)

for topic, msg, t in bag.read_messages(topics=["/camera/color/image_raw"]):
    if nxt_frame_index == cnt:
        new_img = data_conversion.topic_to_image(msg)
        new_est_depth = model.infer_image(new_img)

        gray_prev = cv2.cvtColor(prev_img, cv2.COLOR_BGR2GRAY)
        gray_new = cv2.cvtColor(new_img, cv2.COLOR_BGR2GRAY)

        flow = cv2.calcOpticalFlowFarneback(gray_prev, gray_new, None, 0.5, 3, 15, 3, 5, 1.2, 0)

        prev_point = np.indices((prev_img.shape[0], prev_img.shape[1]))
        prev_point = np.moveaxis(prev_point, 0, -1)

        new_point = prev_point + flow

        mask = np.linalg.norm(flow, axis=2) > 4 # Match based on nearer objects, these should be more accurate?
        prev_point = prev_point[mask==1]
        new_point = new_point[mask==1]

        mask = new_point[:, 1] < new_img.shape[0] - 1 
        prev_point = prev_point[mask]
        new_point = new_point[mask]

        mask = prev_point[:, 1] < new_img.shape[0] - 1
        prev_point = prev_point[mask]
        new_point = new_point[mask]

        mask = new_point[:, 0] < new_img.shape[1] - 1 
        prev_point = prev_point[mask]
        new_point = new_point[mask]

        mask = prev_point[:, 0] < new_img.shape[1] - 1
        prev_point = prev_point[mask]
        new_point = new_point[mask]

        matching_depth1 = data_conversion.interpolate_depth(prev_est_depth, prev_point)
        matching_depth2 = data_conversion.interpolate_depth(new_est_depth, new_point)

        coord1 = np.zeros((len(matching_depth1), 3), dtype=np.float32)

        coord1[:, 0] = (prev_point[:,0] - cx) * matching_depth1/ fx
        coord1[:, 1] = (prev_point[:,1] - cy) * matching_depth1/ fy
        coord1[:, 2] = matching_depth1

        coord2 = np.zeros((len(matching_depth2), 3), dtype=np.float32)

        coord2[:, 0] = (new_point[:,0] - cx) * matching_depth2/ fx
        coord2[:, 1] = (new_point[:,1] - cy) * matching_depth2/ fy
        coord2[:, 2] = matching_depth2
        
        EPSILON = 0.02
        RANSAC_TRIALS = 10000
        new_tf = pcd_utils.pcd_matching_tf(coord1, coord2, EPSILON, RANSAC_TRIALS, verbose=True)

        tf = new_tf @ tf

        print(tf)

        coord = data_conversion.depth_to_pcd(new_est_depth,P)
        coord = np.concatenate([coord, np.ones((coord.shape[0], 1))], axis=1)
        coord = coord @ tf.T

        pcd_utils.ply_from_1x4_coord(coord, f"../pcd/{nxt_frame_index}.ply", color = prev_img.reshape(-1,3)/255)

        nxt_frame_index += GAP_INDEX
        prev_img = new_img
        prev_est_depth = new_est_depth
        break
    cnt += 1

Inliers Ratio: 0.2022564638286759
[[ 0.9963754  -0.05478258 -0.06507613  0.00129181]
 [ 0.05124861  0.99718199 -0.05478707  0.08841872]
 [ 0.06789414  0.05125341  0.99637515 -0.01112904]
 [ 0.          0.          0.          1.01017592]]
