# Set up notebook

Do all imports.

In [None]:
# For input/output
from pathlib import Path
import json

# For numerical methods
import numpy as np

# For image processing and visualization of results
import cv2
from pupil_apriltags import Detector
import matplotlib.pyplot as plt

# For optimization with symforce
import symforce
symforce.set_epsilon_to_symbol()
import symforce.symbolic as sf
from symforce.values import Values
from symforce.opt.factor import Factor
from symforce.opt.optimizer import Optimizer
import sym

Say where things are.

In [None]:
# Base directory
data_dir = Path('data')

# Where images are read from
img_src_dir = Path(data_dir, 'calibration_images')

# Where images are written to
img_dst_dir = Path(data_dir, 'calibration_results')

# Where the calibration template is located
template_filename = Path(data_dir, 'tag36_11_grid_5x8-template.json')

# Parse images

Define a function to get a tag with a particular ID from the template.

In [None]:
def get_tag_with_id(tag_id, template):
    for tag in template['tags']:
        if tag['tag_id'] == tag_id:
            return tag
    raise Exception(f'tag_id {tag_id} not found in template')

Load the template.

In [None]:
with open(template_filename, 'r') as f:
    template = json.load(f)

Create a tag detector.

In [None]:
tag_detector = Detector(
    families=template['tag_family'],
    nthreads=1,
    quad_decimate=1.0,
    quad_sigma=0.0,
    refine_edges=1,
    decode_sharpening=0.,
    debug=0,
)

Detect tags in all images.

In [None]:
# Tag corners must be no less than this number of pixels from the image border
buffer_px = 10

# We are going to create a list of views, one per image
views = []

# Iterate over all images in the source directory
for image_path in img_src_dir.iterdir():
    # Skip anything that isn't a PNG file
    if (not image_path.is_file()) or (image_path.suffix.lower() != '.png'):
        continue

    # Read image as grayscale
    img = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    
    # Detect tags
    tag_detections = tag_detector.detect(
        img,
        estimate_tag_pose=False,
        camera_params=None,
        tag_size=None,
    )

    # Get n point correspondences:
    #
    #  p (n x 3) is coordinates of each point in the tag frame
    #  q (n x 2) is coordinates of each point in the image frame
    #
    rejected_tags = []
    tags = []
    p = []
    q = []
    for d in tag_detections:
        # Reject tags with corners too close to the image boundary
        if ((d.corners[:, 0] < buffer_px).any() or
            (d.corners[:, 0] > (img.shape[1] - 1) - buffer_px).any() or
            (d.corners[:, 1] < buffer_px).any() or
            (d.corners[:, 1] > (img.shape[0] - 1) - buffer_px).any()):
            continue
        
        # Add tag to list of detected tags
        tags.append({
            'tag_id': d.tag_id,
            'corners': d.corners.tolist(),
        })

        # Add corners of tag to point correspondences
        p.extend(get_tag_with_id(d.tag_id, template)['corners'])
        q.extend(d.corners.tolist())
    
    # Make sure the lengths of p and q are consistent
    assert(len(p) == len(q))
    
    # Count the number of tags and correspondences that were found
    num_tags = len(tags)
    num_points = len(p)

    # Add to the list of views
    views.append({
        'image_name': str(image_path.name),
        'num_tags': num_tags,
        'tags': tags,
        'num_points': num_points,
        'p': p,
        'q': q,
    })
    
    # Show results
    print(f' {len(views) - 1:3d} ' +
          f': {str(image_path):30s} ' +
          f': {num_tags:3d} tags ({len(rejected_tags):3d} rejected) ' +
          f': {num_points:3d} points ')

# Estimate intrinsic and extrinsic parameters by inspection

Estimate the intrinsic parameters (i.e., $K$).

In [None]:
# FIXME (1)
K = np.array([
    [1., 0., 0.],
    [0., 1., 0.],
    [0., 0., 1.],
])

Estimate the extrinsic parameters (i.e., the pose $T_{camera}^{world}$ for each view).

In [None]:
# FIXME (2)
poses = []
for view in views:
    poses.append(np.array([
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
    ]))
poses = np.array(poses)

# Estimate intrinsic and extrinsic parameters by analysis

Define a function that implements the wedge operator.

In [None]:
def skew(v):
    assert(type(v) == np.ndarray)
    assert(v.shape == (3,))
    return np.array([[0., -v[2], v[1]],
                     [v[2], 0., -v[0]],
                     [-v[1], v[0], 0.]])

Define a function to estimate the planar homography (i.e., $H$) between two sets of points. The "source points" (`pts_src`) are on the tag grid and are expressed in the coordinates of the world frame. The "destination points" (`pts_dst`) are in the image and are expressed in image coordinates.

In [None]:
# FIXME (5)
def get_homography(pts_src, pts_dst):
    return np.eye(3)

Define a function to get the homography for each view.

In [None]:
def get_homographies(views):
    homographies = []
    for view in views:
        # Get src points (tag)
        pts_src = []
        for p in view['p']:
            pts_src.append(p[:-1])
        pts_src = np.array(pts_src)

        # Get dst points (img)
        pts_dst = []
        for q in view['q']:
            pts_dst.append(q)
        pts_dst = np.array(pts_dst)

        # Get homography
        homographies.append(get_homography(pts_src, pts_dst))
    return np.array(homographies)

Define a function to get the intrinsic parameters (i.e., the intrinsic camera matrix $K$), given homographies.

In [None]:
# FIXME (6)
def get_intrinsic_parameters(homographies):
    return np.eye(3)

Define a function to get the extrinsic parameters (i.e., the camera pose for each view), given homographics and intrinsic parameters.

In [None]:
# FIXME (7)
def get_extrinsic_parameters(homographies, K):
    poses = []
    for view in views:
        poses.append(np.array([
            [1., 0., 0., 0.],
            [0., 1., 0., 0.],
            [0., 0., 1., 0.],
            [0., 0., 0., 1.],
        ]))
    
    # Return all poses
    return np.array(poses)

Apply code to perform intrinsic and extrinsic calibration.

In [None]:
homographies = get_homographies(views)
K = get_intrinsic_parameters(homographies)
poses = get_extrinsic_parameters(homographies, K)

Show results.

In [None]:
with np.printoptions(linewidth=150, formatter={'float': lambda x: f'{x:10.4f}'}):
    print('K')
    print(K)
    print('')

for view, pose in zip(views, poses):
    with np.printoptions(linewidth=150, formatter={'float': lambda x: f'{x:10.4f}'}):
        print(f'Camera pose for image {view["image_name"]}')
        print(pose)
        print('')

# Estimate intrinsic and extrinsic parameters by optimization

Write a symbolic function that projects a point into the image.

In [None]:
# FIXME (3)
def projection(
    T: sf.Pose3,
    p: sf.V3,
    fx: sf.Scalar,
    fy: sf.Scalar,
    cx: sf.Scalar,
    cy: sf.Scalar,
    epsilon: sf.Scalar,
) -> sf.V2:
    return sf.V2(0, 0)

Write a symbolic function that computes the difference between a projected point and an image point.

In [None]:
# FIXME (4)
def projection_residual(
    T: sf.Pose3,
    p: sf.V3,
    q: sf.V2,
    fx: sf.Scalar,
    fy: sf.Scalar,
    cx: sf.Scalar,
    cy: sf.Scalar,
    epsilon: sf.Scalar,  
) -> sf.V2:
    return sf.V2(0, 0)

Lambdify these two functions so they can be evaluated numerically.

In [None]:
projection_num = symforce.util.lambdify(projection)
projection_residual_num = symforce.util.lambdify(projection_residual)

Create initial values for optimization.

In [None]:
initial_values = Values(
    views=[],               # <-- fill this list with initial values specific to each view
    fx=K[0, 0],             # <-- initial guess at fx
    fy=K[1, 1],             # <-- initial guess at fy
    cx=K[0, 2],             # <-- initial guess at cx
    cy=K[1, 2],             # <-- initial guess at cy
    epsilon=sym.epsilon,    # <-- constant parameter required by symforce
)

# Iterate over each view (along with each camera pose estimate)
for view, pose in zip(views, poses):
    view_values = Values(
        T=sym.Pose3(        # <-- initial guess at camera pose
            R=sym.Rot3.from_rotation_matrix(pose[0:3, 0:3]),
            t=pose[0:3, 3],
        ),
        matches=[],         # <-- fill this list with initial values specific to each match
    )

    # Iterate over each match (i.e., each point correspondence)
    for p, q in zip(view['p'], view['q']):
        view_values['matches'].append(Values(p=np.array(p), q=np.array(q)))
    
    # Append the initial values we just created to the list of views
    initial_values['views'].append(view_values)

Create factors for optimization.

In [None]:
factors = []
for i_view, view in enumerate(initial_values['views']):
    for i_match, match in enumerate(view['matches']):
        factors.append(Factor(
            residual=projection_residual,
            keys=[
                f'views[{i_view}].T',
                f'views[{i_view}].matches[{i_match}].p',
                f'views[{i_view}].matches[{i_match}].q',
                'fx',
                'fy',
                'cx',
                'cy',
                'epsilon',
            ]
        ))

Create optimizer.

In [None]:
optimized_keys = ['fx', 'fy', 'cx', 'cy',]
for i_view, view in enumerate(initial_values['views']):
    optimized_keys.append(f'views[{i_view}].T')
optimizer = Optimizer(
    factors=factors,
    optimized_keys=optimized_keys,
    debug_stats=True,
    params=Optimizer.Params(
        iterations=100,
    ),
)

Run optimizer.

In [None]:
result = optimizer.optimize(initial_values)
assert(result.status == Optimizer.Status.SUCCESS)

# Visualize results

Show sum-squared error.

In [None]:
# Compute all errors before optimization
initial_errors = []
for view in result.initial_values['views']:
    for match in view['matches']:
        initial_errors.append(np.linalg.norm(projection_residual_num(
            view['T'],
            match['p'],
            match['q'],
            result.initial_values['fx'],
            result.initial_values['fy'],
            result.initial_values['cx'],
            result.initial_values['cy'],
            result.initial_values['epsilon'],
        )))
initial_errors = np.array(initial_errors)

# Compute all errors after optimization
final_errors = []
for view in result.optimized_values['views']:
    for match in view['matches']:
        final_errors.append(np.linalg.norm(projection_residual_num(
            view['T'],
            match['p'],
            match['q'],
            result.optimized_values['fx'],
            result.optimized_values['fy'],
            result.optimized_values['cx'],
            result.optimized_values['cy'],
            result.optimized_values['epsilon'],
        )))
final_errors = np.array(final_errors)

# Compute sum-squared errors
print(f'Sum-squared error (halved), before optimization: {0.5 * np.sum(initial_errors**2):.1f} pixels')
print(f'Sum-squared error (halved), after optimization: {0.5 * np.sum(final_errors**2):.1f} pixels')
assert(np.isclose(0.5 * np.sum(final_errors**2), result.error()))

Show error histogram.

In [None]:
fig, ax = plt.subplots(1, 1)
ax.hist(initial_errors, np.linspace(0, 50, 20), alpha=0.5, label='before optimization')
ax.hist(final_errors, np.linspace(0, 50, 20), alpha=0.5, label='after optimization')
ax.legend()
ax.set_xlabel('error (pixels)')
ax.set_ylabel('count')
plt.show()

Save annotated images to show match (hopefully) between given and projected image points.

In [None]:
# Choose parameters for annotation
text_offset = 10
mark_radius = 10
text_scale = 1
text_thickness = 3
text_font = cv2.FONT_HERSHEY_SIMPLEX
text_linetype = cv2.LINE_AA

# Iterate over all views
for view_data, view_initial, view_final in zip(views, result.initial_values['views'], result.optimized_values['views']):
    # Image paths
    img_src_path = Path(img_src_dir, view_data['image_name'])
    img_dst_path = Path(img_dst_dir, view_data['image_name'])
    print(f'{img_src_path} -> {img_dst_path}')

    # Read image as BGR
    img = cv2.imread(str(img_src_path))
    
    # Add annotations to image
    for i_match, match in enumerate(view_final['matches']):
        # Get the image point that was given
        q = match['q']
        
        # Get the image point that was computed by projection (before optimization)
        q_initial = projection_num(
            view_initial['T'],
            match['p'],
            result.initial_values['fx'],
            result.initial_values['fy'],
            result.initial_values['cx'],
            result.initial_values['cy'],
            result.initial_values['epsilon'],
        )

        # Get the image point that was computed by projection (after optimization)
        q_final = projection_num(
            view_final['T'],
            match['p'],
            result.optimized_values['fx'],
            result.optimized_values['fy'],
            result.optimized_values['cx'],
            result.optimized_values['cy'],
            result.optimized_values['epsilon'],
        )

        # Mark and number the image point that was given
        cv2.circle(
            img,
            (int(q[0]), int(q[1])),
            2 * mark_radius,
            (0, 0, 255),
            -1,
        )
        cv2.putText(
            img,
            f'{i_match}',
            (int(q[0]) + text_offset, int(q[1]) + 4 * text_offset),
            text_font,
            text_scale,
            (0, 0, 255),
            text_thickness,
            text_linetype,
        )
        
        # Mark and number the image point that was computed by projection (before optimization)
        cv2.circle(
            img,
            (int(q_initial[0]), int(q_initial[1])),
            int(1.5 * mark_radius),
            (0, 255, 0),
            -1,
        )
        cv2.putText(
            img,
            f'{i_match}',
            (int(q_initial[0]) - 4 * text_offset, int(q_initial[1]) - 2 * text_offset),
            text_font,
            text_scale,
            (0, 255, 0),
            text_thickness,
            text_linetype,
        )

        # Mark and number the image point that was computed by projection (after optimization)
        cv2.circle(
            img,
            (int(q_final[0]), int(q_final[1])),
            mark_radius,
            (255, 0, 0),
            -1,
        )
        cv2.putText(
            img,
            f'{i_match}',
            (int(q_final[0]) + text_offset, int(q_final[1]) - 2 * text_offset),
            text_font,
            text_scale,
            (255, 0, 0),
            text_thickness,
            text_linetype,
        )

    cv2.imwrite(str(img_dst_path), img)