In [1]:
import torch
from data.dataloader import load_cameras

# Load cameras from JSON file
cameras, _ = load_cameras('data/cameras.json')
noisy_cameras, _ = load_cameras('data/cameras_noisy.json')

print(cameras[0])
print(noisy_cameras[0])


Camera ID: camera_0
  fx=800.0000, fy=800.0000, cx=320.00, cy=240.00
  Rotation angles (rad): [0.0, 0.0, 0.0]
  Translation (t): [[0], [0], [0]]
Camera ID: camera_0
  fx=807.0000, fy=797.0000, cx=324.00, cy=247.00
  Rotation angles (rad): [-0.005019308067858219, 0.010206499136984348, 0.005013196263462305]
  Translation (t): [[-0.1913280189037323], [-0.17249178886413574], [-0.05622875317931175]]


In [2]:
# 3D points in world coordinates
# 5 world points, each row is (x, y, z) in meters
X_world = torch.tensor([
    [1, 2, 10],
    [0, 0, 5],
    [2, -1, 8],
    [-3, 4, 12],
    [5, 5, 15]
], dtype=torch.double)

print("3D World Points:")
print(f"{X_world}")
print()

3D World Points:
tensor([[ 1.,  2., 10.],
        [ 0.,  0.,  5.],
        [ 2., -1.,  8.],
        [-3.,  4., 12.],
        [ 5.,  5., 15.]], dtype=torch.float64)



In [3]:
# Project the 3D point to all cameras
for cam_id in range(len(cameras)):  
    print(f"Camera ID: {cam_id}")
    print("-" * 60)
    
    # Project the point using the camera's method
    uv = cameras[cam_id].project_to_image(X_world)
    uv_noisy = noisy_cameras[cam_id].project_to_image(X_world)
    
    print("Projected pixel coordinates (u, v):")
    print(torch.cat([uv, uv_noisy], dim=1))

    error = torch.norm(uv - uv_noisy, dim=1)
    print(f"Mean Error: {error.mean().item():3.3f} px")

    print()

Camera ID: 0
------------------------------------------------------------
Projected pixel coordinates (u, v):
tensor([[400.0000, 400.0000, 397.2321, 398.2323],
        [320.0000, 240.0000, 301.0868, 223.2569],
        [520.0000, 140.0000, 516.9121, 134.1895],
        [120.0000, 506.6667, 115.4621, 505.2274],
        [586.6667, 506.6667, 591.9404, 511.1877]], dtype=torch.float64)
Mean Error: 9.366 px

Camera ID: 1
------------------------------------------------------------
Projected pixel coordinates (u, v):
tensor([[375.0000, 350.0000, 369.2070, 336.2177],
        [300.0000, 200.0000, 294.1527, 184.2755],
        [487.5000, 106.2500, 474.8299,  94.0903],
        [112.5000, 450.0000, 117.6846, 437.4331],
        [550.0000, 450.0000, 540.4824, 437.1824]], dtype=torch.float64)
Mean Error: 15.769 px

Camera ID: 2
------------------------------------------------------------
Projected pixel coordinates (u, v):
tensor([[485.0000, 470.0000, 477.0167, 455.6600],
        [400.0000, 300.0000, 38

In [None]:
'''
Assume you have 
- A list of noisy cameras 
- A list of 3D world points
- A list of projected pixels (Actual pixel coordinates)

Q: How would you design a pytorch model to optimize the camera parameters?

Q: Write a pseudo code for main training loop
Q: What are learnable parameters?
Q: How would you set up the optimizer?
Q: How would you set up the loss function?



''' 
 