# Structure from motion

Import everything *except* our sfm library.

In [None]:
# Import symforce for the sole purpose of setting the value
# of epsilon, which must be done first and exactly once.
import symforce
symforce.set_epsilon_to_symbol()

import importlib
from pathlib import Path
import numpy as np
import secrets
import cv2
import matplotlib.pyplot as plt
from matplotlib.patches import ConnectionPatch
import rerun as rr
from uuid import uuid4

Import our sfm library. (Re-evaluate this cell if you make changes to the library.)

In [None]:
import sfm_solution as sfm
importlib.reload(sfm)

Choose parameter values.

In [None]:
# When reading video frames
frames_to_skip = 30

# When matching (max threshold for ratio test)
matching_threshold = 0.9

# When deciding if triangulated points are invalid
max_reprojection_err = 0.75

# Camera matrix
K = np.array([
    [1565.7702703272157, 0.0, 964.2389356041999],
    [0.0, 1562.3561924508267, 537.4247202074102],
    [0.0, 0.0, 1.0],
])

Create random number generator.

In [None]:
seed = secrets.randbits(32)
print(f'seeding RNG with {seed}')
rng = np.random.default_rng(seed)

Initialize 3D visualizer (should open the rerun application if that isn't already open).

In [None]:
# Create recording with unique ID
rr.init('my_sfm', recording_id=uuid4(), spawn=True)

# Initialize a time sequence
rr.set_time_seconds('stable_time', 0)

# Make view coordinates consistent with camera frames (z forward, x right, y down)
rr.log('/results', rr.ViewCoordinates.RDF, timeless=True)

Load images from video.

In [None]:
# Specify filename
video_filename = Path('../../tutorials/20240305_realdata_whatbreaks/video.MOV')

# Create a video reader
video_src = cv2.VideoCapture(str(video_filename))

# Get frame count and frames per second
frame_count = int(video_src.get(cv2.CAP_PROP_FRAME_COUNT))
frames_per_second = video_src.get(cv2.CAP_PROP_FPS)

# Read frames
views = []
for i_frame in range(0, frame_count, frames_to_skip):
    video_src.set(cv2.CAP_PROP_POS_FRAMES, i_frame)
    success, frame = video_src.read()
    assert(success)
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    views.append({
        'frame_id': i_frame,
        'img': img,
        'R_inB_ofA': None,
        'p_inB_ofA': None,
    })
print(f'read {len(views)} images from video')

Detection.

In [None]:
# Create a SIFT feature detector
sift = cv2.SIFT_create()

# Apply detector to find keypoints (pts) and descriptors (desc) in each image
for view in views:
    pts, desc = sift.detectAndCompute(image=view['img'], mask=None)
    view['pts'] = [
        {
            'pt2d': np.array(pt.pt),
            'track': None,
        }
        for pt in pts
    ]
    view['desc'] = desc

## Initialize with two-view reconstruction

### Get initial guess

Apply matcher.

In [None]:
matches = sfm.get_good_matches(views[0]['desc'], views[1]['desc'], threshold=matching_threshold)
print(f'found {len(matches)} good matches')

Store results.

In [None]:
tracks = []
for match in matches:
    track = {
        'p_inA': None,
        'valid': True,
        'matches': [
            {'view_id': 0, 'feature_id': match.queryIdx},
            {'view_id': 1, 'feature_id': match.trainIdx},
        ]
    }
    tracks.append(track)
    views[0]['pts'][match.queryIdx]['track'] = track
    views[1]['pts'][match.trainIdx]['track'] = track

Get image coordinates of matches.

In [None]:
# Create a, b
a = []
b = []
for m in matches:
    a.append(views[0]['pts'][m.queryIdx]['pt2d'])
    b.append(views[1]['pts'][m.trainIdx]['pt2d'])
a = np.array(a)
b = np.array(b)

Visualize all good matches.

In [None]:
# Create figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 10))

# Show images
ax1.imshow(views[0]['img'], cmap='gray')
ax2.imshow(views[1]['img'], cmap='gray')

# Show matches
for a_i, b_i in zip(a, b):
    fig.add_artist(
        ConnectionPatch(
            a_i, b_i, 
            'data', 'data',
            axesA=ax1, axesB=ax2,\
            color='red',
            connectionstyle='arc3, rad=0.',
            linewidth=0.5,
        )
    )
    # - Draw red dot at each keypoint
    ax1.plot(a_i[0], a_i[1], 'r.', markersize=2)
    ax2.plot(b_i[0], b_i[1], 'r.', markersize=2)

plt.show()

Do reconstruction.

In [None]:
# Estimate essential matrix
E, num_inliers, mask = sfm.getE(a, b, K, rng, threshold=2e-3, num_iters=1000)
print(f'found {num_inliers} inliers')

# Decompose essential matrix to estimate pose and to triangulate points
R_inB_ofA, p_inB_ofA, p_inA = sfm.decomposeE(a, b, K, E)

Store results.

In [None]:
# Store pose estimates
views[0]['R_inB_ofA'] = np.eye(3)
views[0]['p_inB_ofA'] = np.zeros(3)
views[1]['R_inB_ofA'] = R_inB_ofA
views[1]['p_inB_ofA'] = p_inB_ofA

# Always make sure zipped lists are the same length
assert(len(tracks) == len(p_inA))

# Store the position of the point corresponding to each track
for track, p_inA_i in zip(tracks, p_inA):
    track['p_inA'] = p_inA_i

Show results

In [None]:
sfm.show_results(views, tracks, K)

Copy results. If you want to start again from here, do the following:

```python
views, tracks = sfm.copy_results(views_1_ini, tracks_1_ini)
```

In [None]:
views_1_ini, tracks_1_ini = sfm.copy_results(views, tracks)

### Optimize (C++)

Import required modules.

In [None]:
from symforce import codegen
import sym
import subprocess

Generate a C++ function that implements the projection residual (with optimization over pose).

In [None]:
codegen.Codegen.function(
    func=sfm.sf_projection_residual,
    config=codegen.CppConfig(),
    name='bretl_projection',
).with_linearization(
    which_args=[
        'T_inC_ofW',
        'p_inW',
    ],
).generate_function(
    output_dir='optimization-cpp',
    skip_directory_nesting=True,
    namespace='bretl_projection',
)

Generate a C++ function that implements the projection residual (without optimization over pose).

In [None]:
codegen.Codegen.function(
    func=sfm.sf_projection_residual,
    config=codegen.CppConfig(),
    name='bretl_projection_nopose',
).with_linearization(
    which_args=[
        'p_inW',
    ],
).generate_function(
    output_dir='optimization-cpp',
    skip_directory_nesting=True,
    namespace='bretl_projection_nopose',
)

Generate a C++ function that implements the scale residual.

In [None]:
codegen.Codegen.function(
    func=sfm.sf_scale_residual,
    config=codegen.CppConfig(),
    name='bretl_scale',
).with_linearization(
    which_args=[
        'T_inC_ofW',
    ],
).generate_function(
    output_dir='optimization-cpp',
    skip_directory_nesting=True,
    namespace='bretl_scale',
)

Create function that writes data to text file.

In [None]:
def to_str(a):
    return ' '.join(str(i) for i in a)

def optimizer_cpp_to(views, tracks, K, filename='optimizer_to.txt'):
    with open(filename, 'w') as f:
        f.write(f'{K[0, 0]} {K[1, 1]} {K[0, 2]} {K[1, 2]}\n')
        
        # For each view that has a pose estimate, add this pose estimate as an initial
        # value and (if not the first view) as an optimized key.
        s = ''
        num_views = 0
        for i_view, view in enumerate(views):
            if (view['R_inB_ofA'] is None) or (view['p_inB_ofA'] is None):
                continue

            num_views += 1
            T = sym.Pose3(
                R=sym.Rot3.from_rotation_matrix(view['R_inB_ofA']),
                t=view['p_inB_ofA'],
            )
            s += f' {i_view} {to_str(T.to_storage())}\n' # i_view qx qy qz qw x y z
        f.write(f'{num_views}\n')
        f.write(s)
        
        # For each valid track, add its 3d point as an initial value and an optimized
        # key, and then, for each match in this track, add its 2d point as an initial
        # value and add a factor to penalize reprojection error.
        num_tracks = 0
        s = ''
        for i_track, track in enumerate(tracks):
            if not track['valid']:
                continue
            
            num_tracks += 1
            p_inA = track['p_inA']
            s += f' {i_track} {len(track["matches"])} {to_str(p_inA)}\n'
            for match in track['matches']:
                view_id = match['view_id']
                feature_id = match['feature_id']
                b = views[view_id]['pts'][feature_id]['pt2d']
                s += f'  {view_id} {to_str(b)}\n'
        f.write(f'{num_tracks}\n')
        f.write(s)

Create function that reads data from text file.

In [None]:
def optimizer_cpp_from(views, tracks, K, filename='optimizer_from.txt', max_reprojection_err=1.):
    with open(filename, 'r') as f:
        line = f.readline().split()
        num_views = int(line[0])
        for i in range(num_views):
            line = f.readline().split()
            i_view = int(line[0])
            tmp = [float(n) for n in line[1:]]
            T_inB_ofA = sym.Pose3.from_storage(tmp).to_homogenous_matrix()
            R_inB_ofA = T_inB_ofA[0:3, 0:3]
            p_inB_ofA = T_inB_ofA[0:3, 3]
            views[i_view]['R_inB_ofA'] = R_inB_ofA
            views[i_view]['p_inB_ofA'] = p_inB_ofA
        line = f.readline().split()
        num_tracks = int(line[0])
        num_invalid_new = 0
        num_valid = 0
        for i in range(num_tracks):
            line = f.readline().split()
            i_track = int(line[0])
            track = tracks[i_track]
            p_inA = np.array([float(n) for n in line[1:]])
            track['p_inA'] = p_inA
            valid = track['valid']
            for match in track['matches']:
                view_id = match['view_id']
                feature_id = match['feature_id']
                view = views[view_id]
                R_inB_ofA = view['R_inB_ofA']
                p_inB_ofA = view['p_inB_ofA']
                p_inB = R_inB_ofA @ p_inA + p_inB_ofA
                b = views[view_id]['pts'][feature_id]['pt2d']
                e = sfm.projection_error(K, R_inB_ofA, p_inB_ofA, p_inA, b)
                
                # Remain valid if depth is positive
                valid = valid and p_inB[2] > 0.
                
                # Remain valid if reprojection error is below threshold
                valid = valid and e < max_reprojection_err
            
            track['valid'] = valid
            if valid:
                num_valid += 1
            else:
                num_invalid_new += 1


    # Show diagnostics
    print(f'{num_views:6d} views with updated pose estimate')
    print(f'{num_valid:6d} valid tracks with updated position estimate')
    print(f'{num_invalid_new:6d} newly invalid tracks')

Write data.

In [None]:
optimizer_cpp_to(views, tracks, K)

Run optimizer.

In [None]:
completed_process = subprocess.run([
                    'optimization-cpp/build/BretlOptimize',
                ], capture_output=True, text=True)
print(completed_process.stdout)
assert(completed_process.returncode == 0)

Read data.

In [None]:
optimizer_cpp_from(views, tracks, K, max_reprojection_err=max_reprojection_err)

Show results.

In [None]:
sfm.show_results(views, tracks, K)

Add visualization of results to the 3D viewer.

In [None]:
sfm.visualize_results(views, tracks, K, frames_per_second)

Copy results. If you want to start again from here, do the following:

```python
views, tracks = sfm.copy_results(views_1_opt, tracks_1_opt)
```

In [None]:
views_1_opt, tracks_1_opt = sfm.copy_results(views, tracks)

## Add a third image

### Get initial guess

Match new images with old images, updating views and tracks.

In [None]:
iC = sfm.add_next_view(views, tracks, K, matching_threshold=matching_threshold)

Get data for resectioning. Look for tracks that...
* are valid
* have a match with `view_id` that is the same as the new view
* have `p_inA` that is not `None`

Also get data for triangulation. Look for tracks that...
* are valid
* have a match with `view_id` that is the same as the new view
* have `p_inA` that is `None`

In [None]:
tracks_to_resection = []
tracks_to_triangulate = []
for track in tracks:
    if not track['valid']:
        continue
    
    match = sfm.get_match_with_view_id(track['matches'], iC)
    if match is None:
        continue

    if track['p_inA'] is None:
        tracks_to_triangulate.append(track)
    else:
        tracks_to_resection.append(track)

print(f'{len(tracks_to_resection)} tracks to resection')
print(f'{len(tracks_to_triangulate)} tracks to triangulate')

#### Resection

The output of this process is an estimate of the pose $R^C_A, p^C_A$.

In [None]:
p_inA = []
c = []
for track in tracks_to_resection:
    assert(track['p_inA'] is not None)
    p_inA.append(track['p_inA'])
    match = sfm.get_match_with_view_id(track['matches'], iC)
    c.append(sfm.get_pt2d_from_match(views, match))
p_inA = np.array(p_inA)
c = np.array(c)

print(f'len(p_inA) = {len(p_inA)}, len(c) = {len(c)}')

Visualize the image coordinates of points on which PnP will be based.

In [None]:
# Create figure
fig, ax = plt.subplots(1, 1, figsize=(10, 5))

# Show image
ax.imshow(views[iC]['img'], cmap='gray')

# Show matches corresponding to points that have already been triangulated
ax.plot(c[:, 0], c[:, 1], 'r.', markersize=4)

# Show plot    
plt.show()

Solve a PnP problem to estimate relative pose.

In [None]:
R_inC_ofA, p_inC_ofA, num_inliers, mask = sfm.resection(
    p_inA,
    c,
    K,
    rng,
    threshold=2.,
    num_iters=1000,
)
print(f'found {num_inliers} inliers out of {len(mask)}')

Store results.

In [None]:
views[iC]['R_inB_ofA'] = R_inC_ofA
views[iC]['p_inB_ofA'] = p_inC_ofA

#### Triangulate

Apply function to triangulate all new tracks. Store results.

In [None]:
for track in tracks_to_triangulate:
    p_inA = sfm.triangulate(track, views, K)
    track['p_inA'] = p_inA

#### Show results

In [None]:
sfm.show_results(views, tracks, K)

Copy results. If you want to start again from here, do the following:

```python
views, tracks = sfm.copy_results(views_2_ini, tracks_2_ini)
```

In [None]:
views_2_ini, tracks_2_ini = sfm.copy_results(views, tracks)

### Optimize

Write data.

In [None]:
optimizer_cpp_to(views, tracks, K)

Run optimizer.

In [None]:
completed_process = subprocess.run([
                    'optimization-cpp/build/BretlOptimize',
                ], capture_output=True, text=True)
print(completed_process.stdout)
assert(completed_process.returncode == 0)

Read data.

In [None]:
optimizer_cpp_from(views, tracks, K, max_reprojection_err=max_reprojection_err)

Show results.

In [None]:
sfm.show_results(views, tracks, K)

Add visualization of results to the 3D viewer.

In [None]:
sfm.visualize_results(views, tracks, K, frames_per_second)

Copy results. If you want to start again from here, do the following:

```python
views, tracks = sfm.copy_results(views_2_opt, tracks_2_opt)
```

In [None]:
views_2_opt, tracks_2_opt = sfm.copy_results(views, tracks)