In [1]:
import numpy as np

# Function to compute the rotation matrix and translation vector
def compute_extrinsics(theta_deg, r):
    theta_rad = np.deg2rad(theta_deg)
    R = np.array([
        [np.cos(theta_rad), 0, np.sin(theta_rad)],
        [0, 1, 0],
        [-np.sin(theta_rad), 0, np.cos(theta_rad)]
    ])
    t = np.array([
        [r * np.cos(theta_rad)],
        [0],
        [r * np.sin(theta_rad)]
    ])
    return R, t


# Function to combine rotation matrix and translation vector into a homogeneous extrinsic matrix
def extrinsic_to_homogeneous(R, t):
    homogeneous_matrix = np.eye(4)
    homogeneous_matrix[:3, :3] = R
    homogeneous_matrix[:3, 3] = t.flatten()
    return homogeneous_matrix




In [2]:
# Parameters
num_cameras = 10
angle_step = 360 / num_cameras  # Angle separation between cameras
radius = 0.3  # Distance from the origin

# Compute extrinsics for all cameras
extrinsics = []
for i in range(num_cameras):
    theta = i * angle_step
    R, t = compute_extrinsics(theta, radius)
    extrinsics.append((R, t))
    
# Display the extrinsics
#extrinsics

# Compute homogeneous extrinsic matrices for all cameras
homogeneous_extrinsics = [extrinsic_to_homogeneous(R, t) for R, t in extrinsics]

# Display the homogeneous extrinsic matrices
homogeneous_extrinsics



[array([[ 1. ,  0. ,  0. ,  0.3],
        [ 0. ,  1. ,  0. ,  0. ],
        [-0. ,  0. ,  1. ,  0. ],
        [ 0. ,  0. ,  0. ,  1. ]]),
 array([[ 0.80901699,  0.        ,  0.58778525,  0.2427051 ],
        [ 0.        ,  1.        ,  0.        ,  0.        ],
        [-0.58778525,  0.        ,  0.80901699,  0.17633558],
        [ 0.        ,  0.        ,  0.        ,  1.        ]]),
 array([[ 0.30901699,  0.        ,  0.95105652,  0.0927051 ],
        [ 0.        ,  1.        ,  0.        ,  0.        ],
        [-0.95105652,  0.        ,  0.30901699,  0.28531695],
        [ 0.        ,  0.        ,  0.        ,  1.        ]]),
 array([[-0.30901699,  0.        ,  0.95105652, -0.0927051 ],
        [ 0.        ,  1.        ,  0.        ,  0.        ],
        [-0.95105652,  0.        , -0.30901699,  0.28531695],
        [ 0.        ,  0.        ,  0.        ,  1.        ]]),
 array([[-0.80901699,  0.        ,  0.58778525, -0.2427051 ],
        [ 0.        ,  1.        ,  0.        ,  0

In [24]:
#covert world coordinate to camera coordinate
def world_to_camera_frame(P_batch, extrinsics):
    # For each point in the batch, apply the transformation
    transformed_batch = []
    for P in P_batch:
        transformed_points = []
        for extrinsic in extrinsics:
            # Convert P to homogeneous coordinates (x, y, z, 1)
            P_homogeneous = np.hstack([P, 1])
            # Apply transformation and extract the first 3 components
            transformed_points.append(np.dot(extrinsic, P_homogeneous)[:3])
        transformed_batch.append(transformed_points)
    
    return transformed_batch

#convert camera coordinate to world coordinate
def camera_to_world_frame(P, extrinsics):
    return [np.dot(np.linalg.inv(extrinsic), P) for extrinsic in extrinsics]

In [26]:
bbox_cam3D = [[6.6796875, -71.9921875, -62.34375, 38.7, 7.421875, 1.8, 0]]
#convert to camera coordinate
bbox_cam3D[0][:3] = world_to_camera_frame(np.array([bbox_cam3D[0][:3]]), homogeneous_extrinsics[:1])[0]
print(bbox_cam3D)

[[array([  6.9796875, -71.9921875, -62.34375  ]), 38.7, 7.421875, 1.8, 0]]


In [21]:
print(homogeneous_extrinsics[:1])

[array([[ 1. ,  0. ,  0. ,  0.3],
       [ 0. ,  1. ,  0. ,  0. ],
       [-0. ,  0. ,  1. ,  0. ],
       [ 0. ,  0. ,  0. ,  1. ]])]


In [31]:
gt_bboxes_3d = [[  7.0546875 ,  25.8671875 , 109.34765625, -48.  ,         4.11523438,
    3.    ,       0.        ],
 [  4.11523438 , -7.0546875 , 106.99609375 , 78.     ,      4.11523438,
    3.       ,    0.        ],
 [  5.87890625, -41.74023438, -54.0859375,   -6.   ,        7.0546875,
    3.    ,       0.        ],
 [  5.87890625, -41.74023438, -54.0859375 ,  -6.     ,      7.0546875,
    3.    ,       0.        ],
 [  7.0546875 ,  25.8671875 , 109.34765625, -48.    ,       4.703125,
    3.     ,      0.        ],
 [  5.29101562,  -7.64257812, 106.99609375,  78.    ,       4.11523438,
    3.     ,      0.        ],
 [  4.11523438 , -7.0546875 , 107.58398438 , 75.    ,       2.93945312,
    3.   ,        0.        ],
 [  5.29101562 ,-41.15234375, -54.0859375,   -6.      ,     6.46679688,
    3.  ,         0.        ],
 [  7.64257812 , 25.27929688, 108.75976562, -48.   ,        5.29101562,
    3.   ,        0.        ],
 [  4.703125 ,   -7.64257812, 106.40820312 , 78.    ,       5.29101562,
    3.    ,       0.        ]]

In [32]:
# Extract the first three coordinates of each bounding box
gt_bboxes_3d_coords = np.array([bbox[:3] for bbox in gt_bboxes_3d])

# Convert to camera coordinates
gt_bboxes_3d_coords_cam = world_to_camera_frame(gt_bboxes_3d_coords, homogeneous_extrinsics[:1])

# Update the original bounding boxes with the transformed coordinates
for i in range(len(gt_bboxes_3d)):
	gt_bboxes_3d[i][:3] = gt_bboxes_3d_coords_cam[i][0]

In [33]:
print(gt_bboxes_3d)

[[7.3546875, 25.8671875, 109.34765625, -48.0, 4.11523438, 3.0, 0.0], [4.41523438, -7.0546875, 106.99609375, 78.0, 4.11523438, 3.0, 0.0], [6.17890625, -41.74023438, -54.0859375, -6.0, 7.0546875, 3.0, 0.0], [6.17890625, -41.74023438, -54.0859375, -6.0, 7.0546875, 3.0, 0.0], [7.3546875, 25.8671875, 109.34765625, -48.0, 4.703125, 3.0, 0.0], [5.591015619999999, -7.64257812, 106.99609375, 78.0, 4.11523438, 3.0, 0.0], [4.41523438, -7.0546875, 107.58398438, 75.0, 2.93945312, 3.0, 0.0], [5.591015619999999, -41.15234375, -54.0859375, -6.0, 6.46679688, 3.0, 0.0], [7.942578119999999, 25.27929688, 108.75976562, -48.0, 5.29101562, 3.0, 0.0], [5.003125, -7.64257812, 106.40820312, 78.0, 5.29101562, 3.0, 0.0]]
