## Create dataset

Do all imports.

In [None]:
# For input/output
from pathlib import Path

# For numerical methods
import numpy as np

# For image processing and visualization of results
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch

# For optimization with symforce
import symforce
symforce.set_epsilon_to_symbol()
import symforce.symbolic as sf
from symforce.values import Values
from symforce.opt.factor import Factor
from symforce.opt.optimizer import Optimizer
import sym

Function to print things nicely.

In [None]:
def myprint(M):
    if M.shape:
        with np.printoptions(linewidth=150, formatter={'float': lambda x: f'{x:10.4f}'}):
            print(M)
    else:
        print(f'{M:10.4f}')

Function to compute projection error.

In [None]:
def apply_transform(R_inB_ofA, p_inB_ofA, p_inA):
    p_inB = np.row_stack([
        (R_inB_ofA @ p_inA_i + p_inB_ofA) for p_inA_i in p_inA
    ])
    return p_inB

def project(K, R_inB_ofA, p_inB_ofA, p_inA):
    p_inB = apply_transform(R_inB_ofA, p_inB_ofA, p_inA)
    if not np.all(p_inB[:, 2] > 0):
        print('WARNING: non-positive depths')
    q = np.row_stack([K @ p_inB_i / p_inB_i[2] for p_inB_i in p_inB])
    return q[:, 0:2]

def projection_error(K, R_inB_ofA, p_inB_ofA, p_inA, b):
    b_pred = project(K, R_inB_ofA, p_inB_ofA, p_inA)
    return np.linalg.norm(b_pred - b, axis=1)

Load two images from video.

In [None]:
# Specify filename
video_filename = Path('../20240305_realdata_whatbreaks/video.MOV')

# Create a video reader
video_src = cv2.VideoCapture(str(video_filename))

# Say what frames we want to read
# - index of first frame
i_frame_1 = 0
# - index of second frame
i_frame_2 = 30

# Read first frame
video_src.set(cv2.CAP_PROP_POS_FRAMES, i_frame_1)
success, frame = video_src.read()
assert(success)
img1 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

# Read second frame
video_src.set(cv2.CAP_PROP_POS_FRAMES, i_frame_2)
success, frame = video_src.read()
assert(success)
img2 = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

Detection.

In [None]:
# Create a SIFT feature detector
sift = cv2.SIFT_create()

# Apply detector to find keypoints (pts) and descriptors (desc) in each image
pts1, desc1 = sift.detectAndCompute(image=img1, mask=None)
pts2, desc2 = sift.detectAndCompute(image=img2, mask=None)

Matching.

In [None]:
# Create a brute-force matcher
bf = cv2.BFMatcher(
    normType=cv2.NORM_L2,
    crossCheck=False,       # <-- IMPORTANT - must be False for kNN matching
)

# Function to get good matches with ratio test
def get_good_matches(descA, descB, threshold=0.5):
    # Find the two best matches between descriptors
    matches = bf.knnMatch(descA, descB, k=2)

    # Find the subset of good matches
    good_matches = []
    for m, n in matches:
        if m.distance / n.distance < threshold:
            good_matches.append(m)
    
    # Return good matches, sorted by distance (smallest first)
    return sorted(good_matches, key = lambda m: m.distance)

# Match the two images
matches = get_good_matches(desc1, desc2)
print(f'found {len(matches)} good matches')

Visualize all good matches.

In [None]:
# Create figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 10))

# Show images
ax1.imshow(img1, cmap='gray')
ax2.imshow(img2, cmap='gray')

# Show matches
for m in matches:
    # - Get location of keypoints
    p1 = pts1[m.queryIdx].pt
    p2 = pts2[m.trainIdx].pt
    # - Draw line connecting keypoint in first image with keypoint in second image
    fig.add_artist(
        ConnectionPatch(
            p1, p2, 
            'data', 'data',
            axesA=ax1, axesB=ax2,\
            color='red',
            connectionstyle='arc3, rad=0.',
            linewidth=0.1,
        )
    )
    # - Draw red dot at each keypoint
    ax1.plot(p1[0], p1[1], 'r.', markersize=2)
    ax2.plot(p2[0], p2[1], 'r.', markersize=2)

plt.show()

Define camera matrix.

In [None]:
K = np.array([[1565.7702703272157, 0.0, 964.2389356041999], [0.0, 1562.3561924508267, 537.4247202074102], [0.0, 0.0, 1.0]])

Get image coordinates of matches.

In [None]:
# Create a, b
a = []
b = []
for m in matches:
    a.append(pts1[m.queryIdx].pt)
    b.append(pts2[m.trainIdx].pt)
a = np.array(a)
b = np.array(b)

## Two-view reconstruction with OpenCV

Estimate $R^B_A$ and $p^B_A$.

In [None]:
# Get solution
num_inliers_cv, E_cv, R_inB_ofA_cv, p_inB_ofA_cv, mask_cv = cv2.recoverPose(
    a.copy(),
    b.copy(),
    K, np.zeros(4),
    K, np.zeros(4),
)

# Flatten the position (returned as a 2d array by default)
p_inB_ofA_cv = p_inB_ofA_cv.flatten()
mask_cv = mask_cv.flatten()

Estimate $p^A_1, \dotsc, p^A_n$.

In [None]:
points = cv2.triangulatePoints(
    K @ np.column_stack([np.eye(3), np.zeros(3)]),
    K @ np.column_stack([R_inB_ofA_cv, p_inB_ofA_cv]),
    a.copy().T,
    b.copy().T,
)

# Normalize points
points /= points[-1, :]

# Extract non-homogeneous coordinates
p_inA_cv = points[0:3, :].T

How many inliers.

In [None]:
print(f'OpenCV found {num_inliers_cv} inliers out of {len(mask_cv)}')

Get subsets of points that were declared inliers and outliers by OpenCV.

In [None]:
a_inliers_cv = np.array([a_i for a_i, mask_i in zip(a, mask_cv) if mask_i])
b_inliers_cv = np.array([b_i for b_i, mask_i in zip(b, mask_cv) if mask_i])
assert(len(a_inliers_cv) == np.count_nonzero(mask_cv))
assert(len(b_inliers_cv) == np.count_nonzero(mask_cv))

p_inA_inliers_cv = np.array([p_inA_i for p_inA_i, mask_i in zip(p_inA_cv, mask_cv) if mask_i])
assert(len(p_inA_inliers_cv) == np.count_nonzero(mask_cv))

a_outliers_cv = np.array([a_i for a_i, mask_i in zip(a, mask_cv) if not mask_i])
b_outliers_cv = np.array([b_i for b_i, mask_i in zip(b, mask_cv) if not mask_i])
p_inA_outliers_cv = np.array([p_inA_i for p_inA_i, mask_i in zip(p_inA_cv, mask_cv) if not mask_i])

Find and visualize reprojection error.

In [None]:
print('getting error for inliers')
e_a_inliers_cv = projection_error(K, np.eye(3), np.zeros(3), p_inA_inliers_cv, a_inliers_cv)
e_b_inliers_cv = projection_error(K, R_inB_ofA_cv, p_inB_ofA_cv, p_inA_inliers_cv, b_inliers_cv)
print('getting error for outliers')
e_a_outliers_cv = projection_error(K, np.eye(3), np.zeros(3), p_inA_outliers_cv, a_outliers_cv)
e_b_outliers_cv = projection_error(K, R_inB_ofA_cv, p_inB_ofA_cv, p_inA_outliers_cv, b_outliers_cv)

fig, (ax_a, ax_b) = plt.subplots(1, 2, figsize=(10, 3))
ax_a.hist(e_a_inliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='inliers')
ax_a.hist(e_a_outliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='outliers')
ax_a.legend()
ax_a.set_xlabel('error (pixels)')
ax_a.set_ylabel('count')
ax_b.hist(e_b_inliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='inliers')
ax_b.hist(e_b_outliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='outliers')
ax_b.legend()
ax_b.set_xlabel('error (pixels)')
ax_b.set_ylabel('count')
plt.show()

## Optimized two-view reconstruction with SymForce

Write a symbolic function that projects a point into the image.

In [None]:
def sf_projection(
    T_inC_ofW: sf.Pose3,
    p_inW: sf.V3,
    fx: sf.Scalar,
    fy: sf.Scalar,
    cx: sf.Scalar,
    cy: sf.Scalar,
    epsilon: sf.Scalar,
) -> sf.V2:
    p_inC = T_inC_ofW * p_inW
    return sf.V2(
        fx * (p_inC[0] / p_inC[2]) + cx,
        fy * (p_inC[1] / p_inC[2]) + cy,
    )

Write a symbolic function that computes the difference between a projected point and an image point.

In [None]:
def sf_projection_residual(
    T_inC_ofW: sf.Pose3,
    p_inW: sf.V3,
    q: sf.V2,
    fx: sf.Scalar,
    fy: sf.Scalar,
    cx: sf.Scalar,
    cy: sf.Scalar,
    epsilon: sf.Scalar,  
) -> sf.V2:
    q_proj = sf_projection(T_inC_ofW, p_inW, fx, fy, cx, cy, epsilon)
    return sf.V2(q_proj - q)

Lambdify these two functions so they can be evaluated numerically.

In [None]:
sf_projection_num = symforce.util.lambdify(sf_projection)
sf_projection_residual_num = symforce.util.lambdify(sf_projection_residual)

Create one more residual to fix the scale so that the distance between frame $A$ and frame $B$ is close to one.

In [None]:
def sf_scale_residual(
    T_inC_ofW: sf.Pose3,
    epsilon: sf.Scalar,
) -> sf.V1:
    return sf.V1(T_inC_ofW.t.norm() - 1)

### Apply only to inliers (as determined by OpenCV)

Create initial values for optimization.

In [None]:
initial_values = Values(
    T_inA_ofA=sym.Pose3(
        R=sym.Rot3.from_rotation_matrix(np.eye(3)),
        t=np.zeros(3),
    ),
    T_inB_ofA=sym.Pose3(
        R=sym.Rot3.from_rotation_matrix(R_inB_ofA_cv),
        t=p_inB_ofA_cv,
    ),
    matches=[],
    fx=K[0, 0],
    fy=K[1, 1],
    cx=K[0, 2],
    cy=K[1, 2],
    epsilon=sym.epsilon,
)
for a_i, b_i, p_inA_i in zip(a_inliers_cv, b_inliers_cv, p_inA_inliers_cv):
    match = Values(
        a=a_i,
        b=b_i,
        p_inA=p_inA_i,
    )
    initial_values['matches'].append(match)

Create factors for optimization.

In [None]:
factors = [
    Factor(
        residual=sf_scale_residual,
        keys=[
            'T_inB_ofA',
            'epsilon',
        ],
    )
]
for i_match, match in enumerate(initial_values['matches']):
    factors.append(Factor(
        residual=sf_projection_residual,
        keys=[
            'T_inA_ofA',
            f'matches[{i_match}].p_inA',
            f'matches[{i_match}].a',
            'fx',
            'fy',
            'cx',
            'cy',
            'epsilon',
        ],
    ))
    factors.append(Factor(
        residual=sf_projection_residual,
        keys=[
            'T_inB_ofA',
            f'matches[{i_match}].p_inA',
            f'matches[{i_match}].b',
            'fx',
            'fy',
            'cx',
            'cy',
            'epsilon',
        ],
    ))

Create optimizer.

In [None]:
optimized_keys = ['T_inB_ofA']
for i_match, match in enumerate(initial_values['matches']):
    optimized_keys.append(f'matches[{i_match}].p_inA')
optimizer = Optimizer(
    factors=factors,
    optimized_keys=optimized_keys,
    debug_stats=True,
    params=Optimizer.Params(
        iterations=100,
    ),
)

Run optimizer.

In [None]:
result = optimizer.optimize(initial_values)
assert(result.status == Optimizer.Status.SUCCESS)

Get results.

In [None]:
T_inB_ofA_inliers_sf = result.optimized_values['T_inB_ofA'].to_homogenous_matrix()
R_inB_ofA_inliers_sf = T_inB_ofA_inliers_sf[0:3, 0:3]
p_inB_ofA_inliers_sf = T_inB_ofA_inliers_sf[0:3, 3]
p_inA_inliers_sf = np.array([
    match['p_inA'] for match in result.optimized_values['matches']
])

Compute reprojection error from results.

In [None]:
e_a_inliers_sf = projection_error(K, np.eye(3), np.zeros(3), p_inA_inliers_sf, a_inliers_cv)
e_b_inliers_sf = projection_error(K, R_inB_ofA_inliers_sf, p_inB_ofA_inliers_sf, p_inA_inliers_sf, b_inliers_cv)

### Apply to both inliers and outliers

Create initial values for optimization.

In [None]:
initial_values = Values(
    T_inA_ofA=sym.Pose3(
        R=sym.Rot3.from_rotation_matrix(np.eye(3)),
        t=np.zeros(3),
    ),
    T_inB_ofA=sym.Pose3(
        R=sym.Rot3.from_rotation_matrix(R_inB_ofA_cv),
        t=p_inB_ofA_cv,
    ),
    matches=[],
    fx=K[0, 0],
    fy=K[1, 1],
    cx=K[0, 2],
    cy=K[1, 2],
    epsilon=sym.epsilon,
)
for a_i, b_i, p_inA_i in zip(a, b, p_inA_cv):
    match = Values(
        a=a_i,
        b=b_i,
        p_inA=p_inA_i,
    )
    initial_values['matches'].append(match)

Create factors for optimization.

In [None]:
factors = [
    Factor(
        residual=sf_scale_residual,
        keys=[
            'T_inB_ofA',
            'epsilon',
        ],
    )
]
for i_match, match in enumerate(initial_values['matches']):
    factors.append(Factor(
        residual=sf_projection_residual,
        keys=[
            'T_inA_ofA',
            f'matches[{i_match}].p_inA',
            f'matches[{i_match}].a',
            'fx',
            'fy',
            'cx',
            'cy',
            'epsilon',
        ],
    ))
    factors.append(Factor(
        residual=sf_projection_residual,
        keys=[
            'T_inB_ofA',
            f'matches[{i_match}].p_inA',
            f'matches[{i_match}].b',
            'fx',
            'fy',
            'cx',
            'cy',
            'epsilon',
        ],
    ))

Create optimizer.

In [None]:
optimized_keys = ['T_inB_ofA']
for i_match, match in enumerate(initial_values['matches']):
    optimized_keys.append(f'matches[{i_match}].p_inA')
optimizer = Optimizer(
    factors=factors,
    optimized_keys=optimized_keys,
    debug_stats=True,
    params=Optimizer.Params(
        iterations=100,
    ),
)

Run optimizer.

In [None]:
result = optimizer.optimize(initial_values)
assert(result.status == Optimizer.Status.SUCCESS)

Get results.

In [None]:
T_inB_ofA_all_sf = result.optimized_values['T_inB_ofA'].to_homogenous_matrix()
R_inB_ofA_all_sf = T_inB_ofA_all_sf[0:3, 0:3]
p_inB_ofA_all_sf = T_inB_ofA_all_sf[0:3, 3]
p_inA_all_sf = np.array([
    match['p_inA'] for match in result.optimized_values['matches']
])

Compute reprojection error from results.

In [None]:
e_a_all_sf = projection_error(K, np.eye(3), np.zeros(3), p_inA_all_sf, a)
e_b_all_sf = projection_error(K, R_inB_ofA_all_sf, p_inB_ofA_all_sf, p_inA_all_sf, b)

### Visualize results

In [None]:
print('REPROJECTION ERRORS')
print(f'a : mean (sf-all, sf-inliers, cv) : {np.mean(e_a_all_sf):4.2f}, {np.mean(e_a_inliers_sf):4.2f}, {np.mean(e_a_inliers_cv):4.2f}')
print(f'b : mean (sf-all, sf-inliers, cv) : {np.mean(e_b_all_sf):4.2f}, {np.mean(e_b_inliers_sf):4.2f}, {np.mean(e_b_inliers_cv):4.2f}')

fig, (ax_a, ax_b) = plt.subplots(1, 2, figsize=(10, 3))
ax_a.hist(e_a_inliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='inliers (cv)')
ax_a.hist(e_a_inliers_sf, np.linspace(0, 2, 20), alpha=0.5, label='inliers (sf)')
ax_a.hist(e_a_all_sf, np.linspace(0, 2, 20), alpha=0.5, label='all (sf)')
ax_a.hist(e_a_outliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='outliers')
ax_a.legend()
ax_a.set_xlabel('error (pixels)')
ax_a.set_ylabel('count')
ax_b.hist(e_b_inliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='inliers (cv)')
ax_b.hist(e_b_inliers_sf, np.linspace(0, 2, 20), alpha=0.5, label='inliers (sf)')
ax_b.hist(e_b_all_sf, np.linspace(0, 2, 20), alpha=0.5, label='all (sf)')
ax_b.hist(e_b_outliers_cv, np.linspace(0, 2, 20), alpha=0.5, label='outliers')
ax_b.legend()
ax_b.set_xlabel('error (pixels)')
ax_b.set_ylabel('count')
plt.show()

## Questions

* Why are the results what they are?
* Why, if at all, is it important to distinguish between "inliers" and "outliers" before performing optimization?
* What happens if you initialize the optimizer with estimates from your own code for two-view reconstruction?
* How does OpenCV distinguish between inliers and outliers? (Modify your own code to do the same.)
* Is there a way to change the optimization so that it performs well even with outliers? (Try it.)