# Reconstruction of the camera matrix P using DLT

We define a house in 3D, make an image in 2D and try to reconstruct the matrix $\bf{P}$ that results in:

$\bf{X}_\text{img} = \bf{P} \cdot \bf{X}_\text{wrld}$

In [1]:
# Just a bunch of imports...
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from scipy.linalg import null_space

# Define our 3D house
X_wrld = np.array([
    [ -1.0,  1.0,   1.0,  -1.0,  -1.0,   1.0,   1.0,  -1.0,   0.0,   3.0,   3.0],
    [ -2.5, -2.5,  -0.5,  -0.5,  -2.5,  -2.5,  -0.5,  -0.5,  -1.5,  -0.5,  -2.5],
    [  0.0,  0.0,   0.0,   0.0,   1.0,   1.0,   1.0,   1.0,   2.0,   0.0,   0.0],
    [  1.0,  1.0,   1.0,   1.0,   1.0,   1.0,   1.0,   1.0,   1.0,   1.0,   1.0]
])

no_pts = X_wrld.shape[1]

## Definition of Intrinsic Parameters

In [2]:
width  = 640
height = 480
f_phys = 700.0e-3 # 7mm
p_x = 250.0e-5 # 5um
p_y = 250.0e-5 # 5um
c_x = (width - 1)  / 2
c_y = (height - 1) / 2

# express focal length(s) in pixel units
f_x = f_phys / p_x
f_y = f_phys / p_y

# build up K (neglect skew)
K = np.array([
    [f_x,   0, c_x],
    [  0, f_y, c_y],
    [  0,   0,   1]
])

## Definition of Extrinsic Parameters

In [3]:
#camera center expressed in WCS
C = np.array([[2.5,  0.1, 2.5]]).T

# R precalculated, simply use values... ;)
R = np.array([
    [   -0.7770,    0.6286,    0.0339],
    [    0.3719,    0.5018,   -0.7809],
    [   -0.5079,   -0.5942,   -0.6237]
])

# translation T not necessary in this example...

## Image processing

In [4]:
# composition of the camera matrix P
M = K.dot(R)
P = np.hstack((M, -M.dot(C)))

# Shoot a nice picture...
X_img = P.dot(X_wrld)

# introduce some noise in inhomogeneous space
X_img_inhom = X_img[0:2, :] / X_img[2:3, :]
stddev = 0.1
X_img_noise = np.random.normal(scale=stddev, size=X_img_inhom.shape)
X_img_inhom_noisy = X_img_inhom + X_img_noise
X_img_noisy = np.vstack((X_img_inhom_noisy, np.ones((1, no_pts), dtype=np.float)))

## Reconstruction of P

In [5]:
# build up DLT system matrix
top_rows_cols_0to3  =  np.zeros((no_pts, 4), dtype=np.float)
top_rows_cols_4to7  = -X_img_noisy[2:3, :].T * X_wrld.T
top_rows_cols_8to11 =  X_img_noisy[1:2, :].T * X_wrld.T

bottom_rows_cols_0to3  =  X_img_noisy[2:3, :].T * X_wrld.T
bottom_rows_cols_4to7  =  np.zeros((no_pts, 4), dtype=np.float)
bottom_rows_cols_8to11 = -X_img_noisy[0:1, :].T * X_wrld.T

A_upper_part = np.hstack((top_rows_cols_0to3, top_rows_cols_4to7, top_rows_cols_8to11))
A_lower_part = np.hstack((bottom_rows_cols_0to3, bottom_rows_cols_4to7, bottom_rows_cols_8to11))
A            = np.vstack((A_upper_part, A_lower_part))

# we have an overdetermined noisy system -> SVD!
U, S, VT = np.linalg.svd(A)
r = np.linalg.matrix_rank(A)
p_estim = VT[r-1, :]
P_estim = p_estim.reshape((3, 4))




## Comparision

In [6]:

# better for comparison: normalize s.th. p34 is 1
P = P / P[-1, -1]
P_estim = P_estim / P_estim[-1, -1]

print("P original:")
print(P)
print("P estimated:")
print(P_estim)

P original:
[[-1.31502361e+02 -4.79116610e+00 -6.57037931e+01  4.93494502e+02]
 [-6.06215509e+00 -6.25566919e-01 -1.27415040e+02  3.33755545e+02]
 [-1.75840079e-01 -2.05718005e-01 -2.15931201e-01  1.00000000e+00]]
P estimated:
[[-1.31509524e+02 -4.91121437e+00 -6.59471419e+01  4.93464004e+02]
 [-6.03375547e+00 -7.10339803e-01 -1.27457327e+02  3.33734421e+02]
 [-1.75810713e-01 -2.06013694e-01 -2.16467349e-01  1.00000000e+00]]


In [7]:
# Mean absolute reconstruction error:
P_error = np.mean(np.abs(P - P_estim))
print("MAE:")
print(P_error)

MAE:
0.048208553053136864
