# Two-view sparse reconstruction and dense reconstruction

Import everything *except* our sfm library.

In [None]:
# Import symforce for the sole purpose of setting the value
# of epsilon, which must be done first and exactly once.
import symforce
symforce.set_epsilon_to_symbol()

import importlib
from pathlib import Path
import numpy as np
import secrets
import cv2
import matplotlib.pyplot as plt
import subprocess
import sym

Import our sfm library. (Re-evaluate this cell if you make changes to the library.)

In [None]:
import sfm
importlib.reload(sfm)

Choose parameter values.

In [None]:
# When reading video frames
frames_to_skip = 30

# When matching (max threshold for ratio test)
matching_threshold = 0.5

# When deciding if triangulated points are invalid
max_reprojection_err = 0.75

# Camera matrix
K = np.array([
    [1565.7702703272157, 0.0, 964.2389356041999],
    [0.0, 1562.3561924508267, 537.4247202074102],
    [0.0, 0.0, 1.0],
])

Create random number generator.

In [None]:
seed = secrets.randbits(32)
print(f'seeding RNG with {seed}')
rng = np.random.default_rng(seed)

Load images from video.

In [None]:
# Specify filename
video_filename = Path('../../tutorials/20240305_realdata_whatbreaks/video.MOV')

# Create a video reader
video_src = cv2.VideoCapture(str(video_filename))

# Get frame count and frames per second
frame_count = int(video_src.get(cv2.CAP_PROP_FRAME_COUNT))
frames_per_second = video_src.get(cv2.CAP_PROP_FPS)

# Read frames
views = []
for i_frame in range(0, frame_count, frames_to_skip):
    video_src.set(cv2.CAP_PROP_POS_FRAMES, i_frame)
    success, frame = video_src.read()
    assert(success)
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    views.append({
        'frame_id': i_frame,
        'img': img,
        'R_inB_ofA': None,
        'p_inB_ofA': None,
    })
print(f'read {len(views)} images from video')

Detection.

In [None]:
# Create a SIFT feature detector
sift = cv2.SIFT_create()

# Apply detector to find keypoints (pts) and descriptors (desc) in each image
for view in views:
    pts, desc = sift.detectAndCompute(image=view['img'], mask=None)
    view['pts'] = [
        {
            'pt2d': np.array(pt.pt),
            'track': None,
        }
        for pt in pts
    ]
    view['desc'] = desc

## Do sparse reconstruction (a.k.a. two-view reconstruction)

### Get initial guess

Apply matcher.

In [None]:
matches = sfm.get_good_matches(views[0]['desc'], views[1]['desc'], threshold=matching_threshold)
print(f'found {len(matches)} good matches')

Store results.

In [None]:
tracks = []
for match in matches:
    track = {
        'p_inA': None,
        'valid': True,
        'matches': [
            {'view_id': 0, 'feature_id': match.queryIdx},
            {'view_id': 1, 'feature_id': match.trainIdx},
        ]
    }
    tracks.append(track)
    views[0]['pts'][match.queryIdx]['track'] = track
    views[1]['pts'][match.trainIdx]['track'] = track

Get image coordinates of matches.

In [None]:
# Create a, b
a = []
b = []
for m in matches:
    a.append(views[0]['pts'][m.queryIdx]['pt2d'])
    b.append(views[1]['pts'][m.trainIdx]['pt2d'])
a = np.array(a)
b = np.array(b)

Visualize all good matches.

In [None]:
# Create figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 10))

# Show images
ax1.imshow(views[0]['img'], cmap='gray')
ax2.imshow(views[1]['img'], cmap='gray')

# Show matches
for a_i, b_i in zip(a, b):
    fig.add_artist(
        ConnectionPatch(
            a_i, b_i, 
            'data', 'data',
            axesA=ax1, axesB=ax2,\
            color='red',
            connectionstyle='arc3, rad=0.',
            linewidth=0.5,
        )
    )
    # - Draw red dot at each keypoint
    ax1.plot(a_i[0], a_i[1], 'r.', markersize=2)
    ax2.plot(b_i[0], b_i[1], 'r.', markersize=2)

plt.show()

Do reconstruction.

In [None]:
# Estimate essential matrix
E, num_inliers, mask = sfm.getE(a, b, K, rng, threshold=2e-3, num_iters=1000)
print(f'found {num_inliers} inliers')

# Decompose essential matrix to estimate pose and to triangulate points
R_inB_ofA, p_inB_ofA, p_inA = sfm.decomposeE(a, b, K, E)

Store results.

In [None]:
# Store pose estimates
views[0]['R_inB_ofA'] = np.eye(3)
views[0]['p_inB_ofA'] = np.zeros(3)
views[1]['R_inB_ofA'] = R_inB_ofA
views[1]['p_inB_ofA'] = p_inB_ofA

# Always make sure zipped lists are the same length
assert(len(tracks) == len(p_inA))

# Store the position of the point corresponding to each track
for track, p_inA_i in zip(tracks, p_inA):
    track['p_inA'] = p_inA_i

Show results

In [None]:
sfm.show_results(views, tracks, K)

Copy results. If you want to start again from here, do the following:

```python
views, tracks = sfm.copy_results(views_1_ini, tracks_1_ini)
```

In [None]:
views_1_ini, tracks_1_ini = sfm.copy_results(views, tracks)

### Optimize (C++)

Create function that writes data to text file.

In [None]:
def to_str(a):
    return ' '.join(str(i) for i in a)

def optimizer_cpp_to(views, tracks, K, filename='optimizer_to.txt'):
    with open(filename, 'w') as f:
        f.write(f'{K[0, 0]} {K[1, 1]} {K[0, 2]} {K[1, 2]}\n')
        
        # For each view that has a pose estimate, add this pose estimate as an initial
        # value and (if not the first view) as an optimized key.
        s = ''
        num_views = 0
        for i_view, view in enumerate(views):
            if (view['R_inB_ofA'] is None) or (view['p_inB_ofA'] is None):
                continue

            num_views += 1
            T = sym.Pose3(
                R=sym.Rot3.from_rotation_matrix(view['R_inB_ofA']),
                t=view['p_inB_ofA'],
            )
            s += f' {i_view} {to_str(T.to_storage())}\n' # i_view qx qy qz qw x y z
        f.write(f'{num_views}\n')
        f.write(s)
        
        # For each valid track, add its 3d point as an initial value and an optimized
        # key, and then, for each match in this track, add its 2d point as an initial
        # value and add a factor to penalize reprojection error.
        num_tracks = 0
        s = ''
        for i_track, track in enumerate(tracks):
            if not track['valid']:
                continue
            
            num_tracks += 1
            p_inA = track['p_inA']
            s += f' {i_track} {len(track["matches"])} {to_str(p_inA)}\n'
            for match in track['matches']:
                view_id = match['view_id']
                feature_id = match['feature_id']
                b = views[view_id]['pts'][feature_id]['pt2d']
                s += f'  {view_id} {to_str(b)}\n'
        f.write(f'{num_tracks}\n')
        f.write(s)

Create function that reads data from text file.

In [None]:
def optimizer_cpp_from(views, tracks, K, filename='optimizer_from.txt', max_reprojection_err=1.):
    with open(filename, 'r') as f:
        line = f.readline().split()
        num_views = int(line[0])
        for i in range(num_views):
            line = f.readline().split()
            i_view = int(line[0])
            tmp = [float(n) for n in line[1:]]
            T_inB_ofA = sym.Pose3.from_storage(tmp).to_homogenous_matrix()
            R_inB_ofA = T_inB_ofA[0:3, 0:3]
            p_inB_ofA = T_inB_ofA[0:3, 3]
            views[i_view]['R_inB_ofA'] = R_inB_ofA
            views[i_view]['p_inB_ofA'] = p_inB_ofA
        line = f.readline().split()
        num_tracks = int(line[0])
        num_invalid_new = 0
        num_valid = 0
        for i in range(num_tracks):
            line = f.readline().split()
            i_track = int(line[0])
            track = tracks[i_track]
            p_inA = np.array([float(n) for n in line[1:]])
            track['p_inA'] = p_inA
            valid = track['valid']
            for match in track['matches']:
                view_id = match['view_id']
                feature_id = match['feature_id']
                view = views[view_id]
                R_inB_ofA = view['R_inB_ofA']
                p_inB_ofA = view['p_inB_ofA']
                p_inB = R_inB_ofA @ p_inA + p_inB_ofA
                b = views[view_id]['pts'][feature_id]['pt2d']
                e = sfm.projection_error(K, R_inB_ofA, p_inB_ofA, p_inA, b)
                
                # Remain valid if depth is positive
                valid = valid and p_inB[2] > 0.
                
                # Remain valid if reprojection error is below threshold
                valid = valid and e < max_reprojection_err
            
            track['valid'] = valid
            if valid:
                num_valid += 1
            else:
                num_invalid_new += 1


    # Show diagnostics
    print(f'{num_views:6d} views with updated pose estimate')
    print(f'{num_valid:6d} valid tracks with updated position estimate')
    print(f'{num_invalid_new:6d} newly invalid tracks')

Write data.

In [None]:
optimizer_cpp_to(views, tracks, K)

Run optimizer.

In [None]:
completed_process = subprocess.run([
                    'optimization-cpp/build/BretlOptimize',
                    str(Path('optimizer_to.txt').absolute()),
                    str(Path('optimizer_from.txt').absolute()),
                ], capture_output=True, text=True)
print(completed_process.stdout)
assert(completed_process.returncode == 0)

Read data.

In [None]:
optimizer_cpp_from(views, tracks, K, max_reprojection_err=max_reprojection_err)

Show results.

In [None]:
sfm.show_results(views, tracks, K)

Add visualization of results to the 3D viewer.

In [None]:
# sfm.visualize_results(views, tracks, K, frames_per_second)

Copy results. If you want to start again from here, do the following:

```python
views, tracks = sfm.copy_results(views_1_opt, tracks_1_opt)
```

In [None]:
views_1_opt, tracks_1_opt = sfm.copy_results(views, tracks)

## Do dense reconstruction (a.k.a. two-view stereo)

Get data.

In [None]:
# Get images
img_A = views[0]['img']
img_B = views[1]['img']

# Get height and width of images
img_height, img_width = img_A.shape

# Get relative pose
R_inB_ofA = views[1]['R_inB_ofA']
p_inB_ofA = views[1]['p_inB_ofA']

# Get the essential matrix
E = sfm.skew(p_inB_ofA) @ R_inB_ofA

# Get the fundamental matrix
F = np.linalg.inv(K).T @ E @ np.linalg.inv(K)

### Example (point in sparse reconstruction)

Choose a point in image $A$ to find the depth of.

In [None]:
# Choose a valid track to use as an example
track = tracks[0]
assert(track['valid'])

# Get image coordinates
match_A = sfm.get_match_with_view_id(track['matches'], 0)
match_B = sfm.get_match_with_view_id(track['matches'], 1)
a = sfm.get_pt2d_from_match(views, match_A)
b_from_match = sfm.get_pt2d_from_match(views, match_B)

Find the epiline in image $B$ that corresponds to this point.

In [None]:
def get_b_y(b_x, L):
    return - (L[2] + b_x * L[0]) / L[1]

# Get epipolar line
L = F @ np.concatenate([a, [1.]])

# Get endpoints of epipolar line segment
b_x_0 = 0
b_y_0 = get_b_y(b_x_0, L)
b_x_1 = img_width - 1
b_y_1 = get_b_y(b_x_1, L)

# Get length of epipolar line segment (pixels)
d = np.sqrt((b_x_1 - b_x_0)**2 + (b_y_1 - b_y_0)**2)

# Sub-sample epipolar line segment at a fixed resolution
step_px = 1.
b_epipolar_line = np.array([
    [b_x_i, get_b_y(b_x_i, L)] for b_x_i in np.linspace(b_x_0, b_x_1, int(1 + np.ceil(d / step_px)))
])

Plot the epiline in image $B$ that corresponds to this point.

In [None]:
# Create figure
fig, (ax_A, ax_B) = plt.subplots(1, 2, figsize=(15, 10))
ax_A.set_xlim([0, img_width - 1])
ax_A.set_ylim([img_height - 1, 0])
ax_B.set_xlim([0, img_width - 1])
ax_B.set_ylim([img_height - 1, 0])

# Show images
ax_A.imshow(img_A, cmap='gray')
ax_B.imshow(img_B, cmap='gray')

# Show epipolar line
ax_B.plot(b_epipolar_line[:, 0], b_epipolar_line[:, 1], 'r-')

# Show match
ax_A.plot(a[0], a[1], 'r.', markersize=15)
ax_B.plot(b_from_match[0], b_from_match[1], 'r.', markersize=15)

plt.show()

Set sum-squared difference in intensity values along epiline.

In [None]:
# Choose a window size (pixels)
win_size = 16

# Window in image A
win_A = cv2.getRectSubPix(img_A, [win_size, win_size], a)

# Find SSD with windows along epipolar line
ssd = []
for b_i in b_epipolar_line:
    win_B = cv2.getRectSubPix(img_B, [win_size, win_size], b_i)
    ssd.append(np.sum((win_B - win_A)**2))
ssd = np.array(ssd)

# Find match (the point elong the epipolar line in image B with minimum SSD)
i = np.argmin(ssd)
b = b_epipolar_line[i]

Show results.

In [None]:
# Create figure
fig, ((ax_A, ax_B), (ax_blank, ax_ssd)) = plt.subplots(2, 2, figsize=(15, 10), sharex=True)
ax_blank.set_visible(False)
ax_A.set_xlim([0, img_width - 1])
ax_A.set_ylim([img_height - 1, 0])
ax_B.set_xlim([0, img_width - 1])
ax_B.set_ylim([img_height - 1, 0])

# Show images
ax_A.imshow(img_A, cmap='gray')
ax_B.imshow(img_B, cmap='gray')

# Show epipolar line
ax_B.plot(b_epipolar_line[:, 0], b_epipolar_line[:, 1], 'r-')

# Show match
ax_A.plot(a[0], a[1], 'r.', markersize=15)
ax_B.plot(b_from_match[0], b_from_match[1], 'r.', markersize=15)
ax_B.plot(b[0], b[1], 'b.', markersize=10)

# Show ssd
ax_ssd.plot(b_epipolar_line[:, 0], ssd, 'r')
ax_ssd.plot(b_epipolar_line[i, 0], ssd[i], 'b.', markersize=10)

plt.show()

Get depth.

In [None]:
# Get depth
alpha = np.linalg.inv(K) @ np.concatenate([a, [1.]])
beta = np.linalg.inv(K) @ np.concatenate([b, [1.]])
u = sfm.skew(beta) @ R_inB_ofA @ alpha
v = - sfm.skew(beta) @ p_inB_ofA
depth = (u.T @ v) / (u.T @ u)

# Show depth
print(f'depth = {depth:4.2f} (compare to {track["p_inA"][2]:4.2f})')

### Example (point not in sparse reconstruction)

Choose a point in image $A$ to find the depth of.

In [None]:
a = np.array([1000., 400.])

Find the epiline in image $B$ that corresponds to this point.

In [None]:
# Get epipolar line
L = F @ np.concatenate([a, [1.]])

# Get endpoints of epipolar line segment
b_x_0 = 0
b_y_0 = get_b_y(b_x_0, L)
b_x_1 = img_width - 1
b_y_1 = get_b_y(b_x_1, L)

# Get length of epipolar line segment (pixels)
d = np.sqrt((b_x_1 - b_x_0)**2 + (b_y_1 - b_y_0)**2)

# Sub-sample epipolar line segment at a fixed resolution
step_px = 1.
b_epipolar_line = np.array([
    [b_x_i, get_b_y(b_x_i, L)] for b_x_i in np.linspace(b_x_0, b_x_1, int(1 + np.ceil(d / step_px)))
])

Plot the epiline in image $B$ that corresponds to this point.

In [None]:
# Create figure
fig, (ax_A, ax_B) = plt.subplots(1, 2, figsize=(15, 10))
ax_A.set_xlim([0, img_width - 1])
ax_A.set_ylim([img_height - 1, 0])
ax_B.set_xlim([0, img_width - 1])
ax_B.set_ylim([img_height - 1, 0])

# Show images
ax_A.imshow(img_A, cmap='gray')
ax_B.imshow(img_B, cmap='gray')

# Show epipolar line
ax_B.plot(b_epipolar_line[:, 0], b_epipolar_line[:, 1], 'r-')

# Show match
ax_A.plot(a[0], a[1], 'r.', markersize=15)

plt.show()

Set sum-squared difference in intensity values along epiline.

In [None]:
# Choose a window size (pixels)
win_size = 16

# Window in image A
win_A = cv2.getRectSubPix(img_A, [win_size, win_size], a)

# Find SSD with windows along epipolar line
ssd = []
for b_i in b_epipolar_line:
    win_B = cv2.getRectSubPix(img_B, [win_size, win_size], b_i)
    ssd.append(np.sum((win_B - win_A)**2))
ssd = np.array(ssd)

# Find match (the point elong the epipolar line in image B with minimum SSD)
i = np.argmin(ssd)
b = b_epipolar_line[i]

Show results.

In [None]:
# Create figure
fig, ((ax_A, ax_B), (ax_blank, ax_ssd)) = plt.subplots(2, 2, figsize=(15, 10), sharex=True)
ax_blank.set_visible(False)
ax_A.set_xlim([0, img_width - 1])
ax_A.set_ylim([img_height - 1, 0])
ax_B.set_xlim([0, img_width - 1])
ax_B.set_ylim([img_height - 1, 0])

# Show images
ax_A.imshow(img_A, cmap='gray')
ax_B.imshow(img_B, cmap='gray')

# Show epipolar line
ax_B.plot(b_epipolar_line[:, 0], b_epipolar_line[:, 1], 'r-')

# Show match
ax_A.plot(a[0], a[1], 'r.', markersize=15)
ax_B.plot(b[0], b[1], 'b.', markersize=10)

# Show ssd
ax_ssd.plot(b_epipolar_line[:, 0], ssd, 'r')
ax_ssd.plot(b_epipolar_line[i, 0], ssd[i], 'b.', markersize=10)

plt.show()

Get depth.

In [None]:
# Get depth
alpha = np.linalg.inv(K) @ np.concatenate([a, [1.]])
beta = np.linalg.inv(K) @ np.concatenate([b, [1.]])
u = sfm.skew(beta) @ R_inB_ofA @ alpha
v = - sfm.skew(beta) @ p_inB_ofA
depth = (u.T @ v) / (u.T @ u)

# Show depth
print(f'depth = {depth:4.2f}')

### Whole image with OpenCV

Rectify images to make all epipolar lines horizontal and parallel.

In [None]:
R1, R2, P1, P2, Q, ROI1, ROI2 = cv2.stereoRectify(K, np.zeros(5), K, np.zeros(5), [img_width, img_height], R_inB_ofA, p_inB_ofA)
mapX, mapY = cv2.initUndistortRectifyMap(K, np.zeros(5), R1, P1, [img_width, img_height], cv2.CV_32FC1)
img_A_rectified = cv2.remap(img_A, mapX, mapY, cv2.INTER_CUBIC)
mapX, mapY = cv2.initUndistortRectifyMap(K, np.zeros(5), R2, P2, [img_width, img_height], cv2.CV_32FC1)
img_B_rectified = cv2.remap(img_B, mapX, mapY, cv2.INTER_CUBIC)

Show rectified images.

In [None]:
# Create figure
fig, (ax_A, ax_B) = plt.subplots(1, 2, figsize=(15, 10), sharex=True)

# Show images
ax_A.imshow(img_A_rectified, cmap='gray')
ax_B.imshow(img_B_rectified, cmap='gray')

plt.show()

Create, apply, and show the results of a stereo matcher, following [this example](https://github.com/opencv/opencv/blob/master/samples/python/stereo_match.py):

In [None]:
# Create stereo matcher
win_size = 11
min_disp = 0
num_disp = 256 - min_disp
stereo = cv2.StereoSGBM_create(
    minDisparity=min_disp,
    numDisparities=num_disp,
    blockSize=16,
    P1 = (8 * 3 * win_size**2),
    P2 = (32 * 3 * win_size**2),
    disp12MaxDiff = 1,
    uniquenessRatio = 10,
    speckleWindowSize = 100,
    speckleRange = 32
)

# Apply stereo matcher
disparity = stereo.compute(img_A_rectified, img_B_rectified)

# Show results of stereo matcher
plt.imshow(disparity)
plt.show()

Some questions to ask yourself:
* Is this the depth of points in image $A$ or image $B$?
* Why do we get depth only of a subset of this image?
* How would we assign a color to each point?