In [43]:
import numpy as np
import cv2

In [44]:
def pixel2cam(p, K):
    return np.array([(p[0] - K[0, 2]) / K[0, 0],
                     (p[1] - K[1, 2]) / K[1, 1]])

In [46]:
def triangulation(R, t, pts_1, pts_2):
    T1 = np.eye(3, 4)
    T2 = np.concatenate((R, t), axis=1)
    pts_4d = cv2.triangulatePoints(T1, T2, pts_1, pts_2)
    points = []
    for i in range(pts_4d.shape[1]):
        x = pts_4d[:, i]
        p = np.array([[x[0]/x[3]], [x[1]/x[3]], [x[2]/x[3]]])
        points.append(p)
    return points

In [48]:
def findFeatureMatches(img_1, img_2):

    # Keypoints and their descriptors
    keypoints_1, keypoints_2 = [], []
    descriptors_1, descriptors_2 = [], []

    orb = cv2.ORB_create()

    keypoints_1, descriptors_1 = orb.detectAndCompute(img_1, None)
    keypoints_2, descriptors_2 = orb.detectAndCompute(img_2, None)

    matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)

    matches = matcher.match(descriptors_1, descriptors_2)

    # Sort and remove outliers
    matches = sorted(matches, key = lambda x: x.distance)

    min_dist = matches[0].distance
    max_dist = matches[len(matches) - 1].distance

    print(f"Min dist: {min_dist}, Max dist: {max_dist}")

    good_matches = []

    for i in range(descriptors_1.shape[0]):
        if matches[i].distance <= max(2 * min_dist, 30.0):
            good_matches.append(matches[i])

    return keypoints_1, keypoints_2, good_matches

In [49]:
def poseEstimation2d2d(key_points_1, key_points_2, matches):

    # Convert the matching point to the form of Vector<Point2f>
    points1, points2 = [], []

    for i in range(len(matches)):
        points1.append(key_points_1[matches[i].queryIdx].pt)
        points2.append(key_points_2[matches[i].trainIdx].pt)

    points1 = np.asarray(points1)
    points2 = np.asarray(points2)

    # Camera intrinsics
    K = np.array([520.9, 0, 325.1, 0, 521.0, 249.7, 0, 0, 1]).reshape((3,3))

    # calculate fundamental and essential matrix
    fundamental_matrix, mask = cv2.findFundamentalMat(points1, points2, cv2.FM_8POINT)
    print(f"Fundamental matrix is: {fundamental_matrix}")

    essential_matrix, mask = cv2.findEssentialMat(points1, points2, K)
    print(f"Essential matrix is: {essential_matrix}")

    # Calculate homography matrix
    homography_matrix = cv2.findHomography(points1, points2, cv2.RANSAC, 3)

    pp = (325.1, 249.7)
    focal = 521

    # Recover rotation and translation from the essential matrix
    return cv2.recoverPose(essential_matrix, points1, points2, K, focal)

In [50]:
img_1 = cv2.imread("1.png")
img_2 = cv2.imread("2.png")

# Get matches and key points for images
key_points_1, key_points_2, matches = findFeatureMatches(img_1, img_2)
print(f"In total we have {len(matches)} feature points")

_, R, t, mask = poseEstimation2d2d(key_points_1, key_points_2, matches)
print(f"R: {R}")
print(f"t: {t}")

Min dist: 4.0, Max dist: 94.0
In total we have 79 feature points
Fundamental matrix is: [[ 4.54443750e-06  1.33385558e-04 -1.79849925e-02]
 [-1.27565701e-04  2.26679480e-05 -1.41667843e-02]
 [ 1.81499464e-02  4.14605587e-03  1.00000000e+00]]
Essential matrix is: [[-0.00216463  0.10709656  0.09820592]
 [-0.05307528  0.03076833 -0.69812107]
 [-0.05866729  0.69562566  0.02018955]]
R: [[ 0.99530207 -0.05373494  0.08053783]
 [ 0.05062839  0.99791092  0.04013191]
 [-0.08252607 -0.03586588  0.99594332]]
t: [[-0.9786534 ]
 [-0.13314121]
 [ 0.1565597 ]]
t^R= 
[[ 0.          0.00841273 -0.0107229 ]
 [ 0.00792637 -0.          0.03927523]
 [-0.01098762  0.03510026  0.        ]]


In [51]:
# Check E = t^R*scale
t_x = np.cross(np.eye(3), t.transpose())
print(f"t^R= \n{t_x*R}")  # I guess this is scale simular to E

# Check epipolar constraints (commented out)
# Convert pixel coordinates to camera coordinates
K = np.array([[520.9, 0, 325.1], [0, 521.0, 249.7], [0, 0, 1]])
# These have to be 2xN numpy arrays
pts_1 = np.empty((2, 0))
pts_2 = np.empty((2,0))
for m in matches:
    pt1 = pixel2cam(key_points_1[m.queryIdx].pt, K)
    # y1 = np.array([[pt1[0]], [pt1[1]], [1]])
    pt2 = pixel2cam(key_points_2[m.trainIdx].pt, K)
    # y2 = np.array([[pt2[0]], [pt2[1]], [1]])
    # d = np.dot(np.dot(y2.transpose(), t_x), np.dot(R, y1))
    # print(f"epipolar constraint: {d}")
    pts_1 = np.hstack([pts_1, np.array(pt1).reshape(-1, 1)])
    pts_2 = np.hstack([pts_2, np.array(pt2).reshape(-1, 1)])

# Why does this work?
points3D = triangulation(R, t, pts_1, pts_2)

t^R= 
[[ 0.          0.00841273 -0.0107229 ]
 [ 0.00792637 -0.          0.03927523]
 [-0.01098762  0.03510026  0.        ]]


In [53]:
def get_color(depth):
    up_th = 11.7
    low_th = 6.4
    th_range = up_th - low_th
    if depth > up_th:
        depth = up_th
    if depth < low_th:
        depth = low_th
    scaled_range = depth-low_th
    color = tuple(int(x) for x in (255 * scaled_range/th_range, 0, 255 * (1 - scaled_range/th_range)))
    return color


# plot the points with color depth
img1_plot = img_1.copy()
img2_plot = img_2.copy()
for i in range(len(matches)):
    depth1 = points3D[i][2]
    pix1 = tuple(int(x) for x in key_points_1[matches[i].queryIdx].pt)
    pt1_cam = pixel2cam(key_points_1[matches[i].queryIdx].pt, K)
    cv2.circle(img1_plot, pix1, 2, get_color(depth1), 2)
    pt2_trans = R.dot(points3D[i]) + t
    depth2 = pt2_trans[2]
    pix2 = tuple(int(x) for x in key_points_2[matches[i].trainIdx].pt)
    cv2.circle(img2_plot, pix2, 2, get_color(depth2), 2)
cv2.imshow("img_1", img1_plot)
cv2.imshow("img_2", img2_plot)
cv2.waitKey()

-1