### Solve PnP Example

In this example, we perform stereo calibration on a pair of images showing a chessboard at two different angles. Later, we use solvePnP to estimate the pose of the right camera (the left camera defines the zero-pose). We shows, that this estimated pose is close to the pose returned by stereoCalibrate. We operate on undistorted images.

In [147]:
%matplotlib ipympl
import glob
import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pytransform3d.rotations import *

In [148]:
# load images and determine chessboard corners
left_img = cv2.imread("left.jpg", cv2.IMREAD_COLOR)
right_img = cv2.imread("right.jpg", cv2.IMREAD_COLOR)
left_img = cv2.resize(left_img, None, fx=0.25, fy=0.25)
right_img = cv2.resize(right_img, None, fx=0.25, fy=0.25)

In [149]:
patternsize = (9, 6)

def find_corners(img):
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, corners = cv2.findChessboardCorners(img_gray, patternsize, flags=cv2.CALIB_CB_ADAPTIVE_THRESH+cv2.CALIB_CB_NORMALIZE_IMAGE+cv2.CALIB_CB_FAST_CHECK)
    #print(ret)
    if ret:
        corners = cv2.cornerSubPix(img_gray, corners, (11, 11), (-1, -1), criteria=(cv2.TERM_CRITERIA_EPS+cv2.TERM_CRITERIA_MAX_ITER, 30, 0.1))
    img = cv2.drawChessboardCorners(img, patternsize, corners, ret)
    return img, corners

In [150]:
# prepare images for intrinsic calibration
images = glob.glob('intrinsic/*.jpg')
objpoints = [] # 3d point in real world space
imgpoints = [] # 2d points in image plane
for image in images:
    img = cv2.imread(image, cv2.IMREAD_COLOR)
    img = cv2.resize(img, None, fx=0.25, fy=0.25)
    img, corners = find_corners(img)
    imgpoints.append(corners)
    
    # prepare object points
    objp = np.zeros((patternsize[1]*patternsize[0],3), np.float32)
    objp[:,:2] = np.mgrid[0:patternsize[0],0:patternsize[1]].T.reshape(-1,2)
    objpoints.append(objp)

In [151]:
# calibrate camera intrinsics
retval, K, d, _, _ = cv2.calibrateCamera(objpoints, imgpoints, left_img.shape[:2][::-1], None, None)

In [152]:
# undistort images
mapx, mapy = cv2.initUndistortRectifyMap(K, d, None, K, left_img.shape[:2][::-1], cv2.CV_32FC1)
left_img = cv2.remap(left_img, mapx, mapy, cv2.INTER_CUBIC)
right_img = cv2.remap(right_img, mapx, mapy, cv2.INTER_CUBIC)

left_img, corners_left = find_corners(left_img)
right_img, corners_right = find_corners(right_img)

In [153]:
# calibrate as stereo camera
retval, K1, d1, K2, d2, R, t, E, F = cv2.stereoCalibrate([objpoints[0]], [corners_left], [corners_right], K, None, K, None, left_img.shape[:2][::-1])

In [154]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plot_basis(ax, np.eye(3), np.zeros(3,))
plot_basis(ax, R, t.reshape(3,))
plot_basis(ax, R.T, -np.matmul(R.T, t.reshape(3,)))
ax.set_xlim([-10,10])
ax.set_ylim([-10,10])
ax.set_zlim([-10,10])
plt.show()

FigureCanvasNbAgg()

In [155]:
pts1 = corners_left[:, 0, :]
pts2 = corners_right[:, 0, :]

In [156]:
# relative camera pose
R1 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]).astype(np.float64)
t1 = np.array([[0], [0], [0]]).astype(np.float64)
R2 = R.T
t2 = -np.matmul(R.T, t.reshape(3,)).reshape(3,1)

In [157]:
## create projection matrices
proj_matrix1 = np.hstack([R1.T, -R1.T.dot(t1)])
proj_matrix2 = np.hstack([R2.T, -R2.T.dot(t2)])
proj_matrix1 = K.dot(proj_matrix1)
proj_matrix2 = K.dot(proj_matrix2)

In [220]:
# recover pose of second (right camera) with respect to first camera by decomposing homography between points
# we could also use the essential matrix, however points are planar which is a degenrate case for the essential matrix
homography, mask = cv2.findHomography(pts1.reshape(-1, 1, 2), pts2.reshape(-1, 1, 2), cv2.LMEDS)

In [221]:
retval, rotations, translations, normals = cv2.decomposeHomographyMat(homography, K)

In [None]:
# test solution
cv2.projectPoints

In [222]:
rotations

[array([[ 0.91228335, -0.08453122,  0.40074126],
        [ 0.08167776,  0.99636418,  0.02423165],
        [-0.40133257,  0.01062551,  0.91587077]]),
 array([[ 0.91228335, -0.08453122,  0.40074126],
        [ 0.08167776,  0.99636418,  0.02423165],
        [-0.40133257,  0.01062551,  0.91587077]]),
 array([[ 0.98302143, -0.02341461, -0.18199074],
        [ 0.023565  ,  0.99972141, -0.00133629],
        [ 0.18197133, -0.00297501,  0.98329934]]),
 array([[ 0.98302143, -0.02341461, -0.18199074],
        [ 0.023565  ,  0.99972141, -0.00133629],
        [ 0.18197133, -0.00297501,  0.98329934]])]

In [223]:
translations

[array([[ 0.59841823],
        [ 0.03006539],
        [-0.10604911]]), array([[-0.59841823],
        [-0.03006539],
        [ 0.10604911]]), array([[ 0.03918393],
        [ 0.06374753],
        [-0.6038672 ]]), array([[-0.03918393],
        [-0.06374753],
        [ 0.6038672 ]])]

In [224]:
tnew23 = translations[0] * np.median(t/translations[0])
print(tnew23)
np.linalg.norm(tnew23-t)

[[-5.44058356]
 [-0.27334275]
 [ 0.96415688]]


0.06168208529446642

In [189]:
R,t

(array([[ 0.91861422, -0.07930684,  0.38711541],
        [ 0.07853035,  0.9967519 ,  0.01785035],
        [-0.38727368,  0.01400272,  0.92185846]]), array([[-5.44058356],
        [-0.21562355],
        [ 0.98590944]]))

In [128]:
print(R)
print(t)
print(R.T)
print(-np.matmul(R.T, t))

[[ 0.91861422 -0.07930684  0.38711541]
 [ 0.07853035  0.9967519   0.01785035]
 [-0.38727368  0.01400272  0.92185846]]
[[-5.44058356]
 [-0.21562355]
 [ 0.98590944]]
[[ 0.91861422  0.07853035 -0.38727368]
 [-0.07930684  0.9967519   0.01400272]
 [ 0.38711541  0.01785035  0.92185846]]
[[ 5.39654719]
 [-0.23035772]
 [ 1.20111374]]


In [158]:
essential_mat, mask = cv2.findEssentialMat(pts1.reshape(-1, 1, 2), pts2.reshape(-1, 1, 2), K, method=cv2.LMEDS)

In [159]:
essential_mat

array([[-0.00405732,  0.12562587,  0.03520796],
       [ 0.16865019,  0.00322373, -0.68579261],
       [ 0.02472118,  0.69539752,  0.00290596]])

In [160]:
E

array([[ 6.08151143e-03, -9.85726426e-01, -2.16373228e-01],
       [-1.20132437e+00, -2.00638602e-03,  5.39710873e+00],
       [-2.29176077e-01, -5.44001242e+00, -1.36451346e-02]])

In [182]:
retval, R_re, t_re, mask = cv2.recoverPose(essential_mat, pts1, pts2, K)  #pts2[mask.astype(np.bool).reshape(-1,), :]
t_re = t_re*np.mean(t/t_re)

In [183]:
R, t

(array([[ 0.91861422, -0.07930684,  0.38711541],
        [ 0.07853035,  0.9967519 ,  0.01785035],
        [-0.38727368,  0.01400272,  0.92185846]]), array([[-5.44058356],
        [-0.21562355],
        [ 0.98590944]]))

In [218]:
print(R_re, t_re)

[[ 0.90967639 -0.0845783   0.40661454]
 [ 0.08157705  0.99635981  0.02474505]
 [-0.40722728  0.01066042  0.91326464]] [[-5.05154052]
 [-0.25546949]
 [ 0.91376184]]


In [112]:
pts1[:10]

array([[210.98499, 122.6756 ],
       [312.7335 , 125.63492],
       [408.71353, 128.25554],
       [500.65393, 130.66794],
       [587.76917, 132.58319],
       [671.16693, 134.18114],
       [751.778  , 135.47215],
       [829.141  , 136.5749 ],
       [902.1669 , 138.61337],
       [216.55406, 223.5536 ]], dtype=float32)

In [50]:
# triangulate 3D points from observations in both cameras
pts = cv2.triangulatePoints(proj_matrix1, proj_matrix2, pts1.T, pts2.T).T

In [51]:
pts = cv2.convertPointsFromHomogeneous(pts).reshape(-1, 3)

In [52]:
pts

array([[-2.358715  , -2.9733794 ,  8.759449  ],
       [-1.35326   , -3.0070436 ,  8.961353  ],
       [-0.36058858, -3.0409563 ,  9.143972  ],
       [ 0.6284069 , -3.075516  ,  9.315775  ],
       [ 1.602491  , -3.1156964 ,  9.49733   ],
       [ 2.5711951 , -3.1578894 ,  9.677696  ],
       [ 3.5473492 , -3.2072814 ,  9.8702965 ],
       [ 4.525114  , -3.2622724 , 10.070582  ],
       [ 5.5149913 , -3.3284519 , 10.322598  ],
       [-2.336112  , -1.9793094 ,  8.8883505 ],
       [-1.328134  , -2.0191875 ,  9.077623  ],
       [-0.3273917 , -2.0532718 ,  9.238865  ],
       [ 0.6556845 , -2.0946293 ,  9.4281435 ],
       [ 1.6274455 , -2.137     ,  9.620763  ],
       [ 2.5959144 , -2.1826067 ,  9.800739  ],
       [ 3.5724542 , -2.2322624 ,  9.983123  ],
       [ 4.550291  , -2.2870748 , 10.181037  ],
       [ 5.542968  , -2.34434   , 10.3998165 ],
       [-2.317693  , -0.9839    ,  9.012222  ],
       [-1.2956791 , -1.0293686 ,  9.17385   ],
       [-0.29390737, -1.0715414 ,  9.335

In [45]:
# given:
#   - pts: 3D world points
#   - pts2_undistored: undistorted view of the world points in camera C2
# results:
#   - R, t to map 3D points to camera C2, we need to compute R.T, -R.T*t to retrieve the pose of C2 in world coordinates

retval, rvec, tvec, inliers = cv2.solvePnPRansac(pts.reshape(-1, 1, 3), pts2.reshape(-1, 1, 2), K, d, reprojectionError=8, iterationsCount=100)#, flags=cv2.SOLVEPNP_EPNP)
#retval, rvec, tvec = cv2.solveP3P(pts.reshape(-1, 1, 3)[:3, :, :], pts2_undistorted.reshape(-1, 1, 2)[:3, :, :], K, None, flags=cv2.SOLVEPNP_P3P)
#rvec = rvec[0]
#tvec = tvec[0]
print(retval)

# convert t vector from camera coords to world coords
R_recovered = cv2.Rodrigues(rvec)[0].T
t_recovered = -np.matmul(cv2.Rodrigues(rvec)[0].T, tvec)

print(R_recovered, t_recovered)

True
[[ 0.91433936  0.08023536 -0.39692041]
 [-0.08074068  0.99661512  0.01546756]
 [ 0.39681793  0.01790502  0.91772269]] [[ 5.45686485]
 [-0.2535795 ]
 [ 1.31101294]]


In [46]:
R2-R_recovered

array([[ 4.27485457e-03, -1.70501244e-03,  9.64673488e-03],
       [ 1.43384342e-03,  1.36779803e-04, -1.46484025e-03],
       [-9.70251742e-03, -5.46722848e-05,  4.13576847e-03]])

In [47]:
t2-t_recovered

array([[-0.06031766],
       [ 0.02322178],
       [-0.10989919]])

In [48]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2])
plot_basis(ax, R1, t1.reshape(3,))
plot_basis(ax, R2, t2.reshape(3,))
plot_basis(ax, R_recovered, t_recovered.reshape(3,))
ax.set_xlim([-10,10])
ax.set_ylim([-10,10])
ax.set_zlim([-10,10])
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_zlabel("z")
plt.show()

FigureCanvasNbAgg()

In [49]:
# map a world point to a camera point for testing
for pt in pts:
    test_pts = cv2.convertPointsToHomogeneous(pt.reshape(1, 3))
    R_recovered = cv2.Rodrigues(rvec)[0]
    t_recovered = tvec
    proj_matric_recovered = np.zeros((3, 4))
    proj_matric_recovered[:, :3] = R_recovered
    proj_matric_recovered[:, -1] = t_recovered.reshape(3,)
    proj_matric_recovered = np.matmul(K, proj_matric_recovered)
    img_pts = np.matmul(proj_matric_recovered, test_pts[0, 0, :]) 
    img_pts = cv2.convertPointsFromHomogeneous(img_pts.reshape(1, 3))
    print("world", pt, "reprojected image", img_pts)

world [-2.358715  -2.9733794  8.759449 ] reprojected image [[[ 97.46873556 135.36980783]]]
world [-1.35326   -3.0070436  8.961353 ] reprojected image [[[178.8738617  133.70110758]]]
world [-0.36058858 -3.0409563   9.143972  ] reprojected image [[[261.96802237 131.42959393]]]
world [ 0.6284069 -3.075516   9.315775 ] reprojected image [[[348.35420431 128.67626831]]]
world [ 1.602491  -3.1156964  9.49733  ] reprojected image [[[438.38348596 125.61762914]]]
world [ 2.5711951 -3.1578894  9.677696 ] reprojected image [[[532.41600371 122.19661367]]]
world [ 3.5473492 -3.2072814  9.8702965] reprojected image [[[632.324351   118.24046105]]]
world [ 4.525114  -3.2622724 10.070582 ] reprojected image [[[737.57838852 113.76218022]]]
world [ 5.5149913 -3.3284519 10.322598 ] reprojected image [[[849.43533926 109.6709347 ]]]
world [-2.336112  -1.9793094  8.8883505] reprojected image [[[101.03944574 224.81750706]]]
world [-1.328134  -2.0191875  9.077623 ] reprojected image [[[180.95544246 224.32836276

In [547]:
rvec2, _ = cv2.Rodrigues(R2.T)
p2, _ = cv2.projectPoints(pts, rvec2, -t2, K, distCoeffs=d)
mse2 = np.linalg.norm(p2-pts2.reshape(-1, 1, 2))
print(p2[:10])
print(mse2)

[[[  11.308057  109.48553 ]]

 [[ 111.901344  106.63015 ]]

 [[ 214.93959   101.320244]]

 [[ 325.12274    95.81196 ]]

 [[ 442.25693    90.87912 ]]

 [[ 565.8122     86.42345 ]]

 [[ 696.351      82.752174]]

 [[ 833.57587    77.79324 ]]

 [[1006.23987    55.34331 ]]

 [[  17.505978  222.61194 ]]]
686.7485


In [548]:
rvec1, _ = cv2.Rodrigues(R1.T)
p1, _ = cv2.projectPoints(pts, rvec1, -t1, K, distCoeffs=d)
mse1 = np.linalg.norm(p1-pts1.reshape(-1, 1, 2))
print(p1[:10])
print(mse1)

[[[211.6582   122.06578 ]]

 [[312.46432  124.27023 ]]

 [[408.50275  126.585396]]

 [[500.57413  128.84879 ]]

 [[587.4546   131.23679 ]]

 [[669.55634  133.79903 ]]

 [[747.62836  136.49435 ]]

 [[821.47375  138.73965 ]]

 [[892.5044   139.88567 ]]

 [[216.15575  222.0639  ]]]
6.672421
