# Nonlinear least-squares

Import everything we need.

In [None]:
import symforce
symforce.set_epsilon_to_symbol() # <-- must be done first and exactly once
import symforce.symbolic as sf
from symforce.values import Values
from symforce.opt.factor import Factor
from symforce.opt.optimizer import Optimizer
from symforce.opt.noise_models import PseudoHuberNoiseModel
from symforce.opt.noise_models import BarronNoiseModel
import sym

import numpy as np
import secrets
import json
import matplotlib.pyplot as plt
from scipy.stats import chi2
from scipy.linalg import sqrtm

Create random number generator.

In [None]:
seed = secrets.randbits(32)
print(f'seeding RNG with {seed}')
rng = np.random.default_rng(seed)

Define a function to print things nicely.

In [None]:
def myprint(M):
    """
    Prints either a scalar or a numpy array with four digits after the
    decimal and with consistent spacing so it is easier to read.
    """
    if type(M) is np.ndarray:
        with np.printoptions(linewidth=150, formatter={'float': lambda x: f'{x:10.4f}'}):
            print(M)
    else:
        print(f'{M:10.4f}')

## Triangulation

The problem we want to solve is

$$ \underset{p^A \in \mathbb{R}^3}{\text{minimize}} \qquad \dfrac{1}{2}\left( \| b_1 - \eta\left( K (R^{B_1}_A p^A + p^{B_1}_A) \right) \|^2 + \| b_2 - \eta\left( K (R^{B_2}_A p^A + p^{B_2}_A) \right) \|^2 \right)$$

where the function

$$ \eta\colon\mathbb{R}^3 \rightarrow \mathbb{R}^2 $$

is defined by

$$ \eta(v) = \begin{bmatrix} r_1 / r_3 \\ r_2 / r_3 \end{bmatrix}. $$

**PROBLEM 1.** Answer the following questions:
* What is being minimized? (I.e., what do the two parts of the cost represent?)
* What is the decision variable and what does it represent?
* What are the constant terms in the cost (i.e., the "givens") and what do they represent?
* What is $\eta$ and what does it represent?

Load data.

In [None]:
# Load data from JSON file
with open('data_triangulation.json', 'r') as f:
    data = json.load(f)

# Convert all lists to numpy arrays
for k in data.keys():
    data[k] = np.array(data[k])

# Rename the variables we need, for convenience
b_1 = data['b_0']
b_2 = data['b_1']
R_inB1_ofA = data['R_inB0_ofA']
R_inB2_ofA = data['R_inB1_ofA']
p_inB1_ofA = data['p_inB0_ofA']
p_inB2_ofA = data['p_inB1_ofA']
p_inA_true = data['p_inA']
K = np.array(data['K'])

Choose an initial guess.

In [None]:
p_inA_initial_value = p_inA_true + 0.5 * rng.standard_normal(size=len(data['p_inA']))

### Optimize (SymForce)

Define residual function.

In [None]:
def sf_projection(
    T_inC_ofW: sf.Pose3,
    p_inW: sf.V3,
    fx: sf.Scalar,
    fy: sf.Scalar,
    cx: sf.Scalar,
    cy: sf.Scalar,
    epsilon: sf.Scalar,
) -> sf.V2:
    """
    Symbolic function that projects a point into an image. (If the depth
    of this point is non-positive, then the projection will be pushed far
    away from the image center.)
    """
    p_inC = T_inC_ofW * p_inW
    z = sf.Max(p_inC[2], epsilon)   # <-- if depth is non-positive, then projection
                                    #     will be pushed far away from image center
    return sf.V2(
        fx * (p_inC[0] / z) + cx,
        fy * (p_inC[1] / z) + cy,
    )

def sf_projection_residual(
    T_inC_ofW: sf.Pose3,
    p_inW: sf.V3,
    q: sf.V2,
    fx: sf.Scalar,
    fy: sf.Scalar,
    cx: sf.Scalar,
    cy: sf.Scalar,
    epsilon: sf.Scalar,  
) -> sf.V2:
    """
    Symbolic function that computes the difference between a projected point
    and an image point.
    """
    q_proj = sf_projection(T_inC_ofW, p_inW, fx, fy, cx, cy, epsilon)
    return sf.V2(q_proj - q)

Create initial values, factors, and optimizer.

In [None]:
# Create data structures
# - Initial values
initial_values = Values(
    fx=K[0, 0],
    fy=K[1, 1],
    cx=K[0, 2],
    cy=K[1, 2],
    T_inB1_ofA=sym.Pose3(
        R=sym.Rot3.from_rotation_matrix(R_inB1_ofA),
        t=p_inB1_ofA,
    ),
    T_inB2_ofA=sym.Pose3(
        R=sym.Rot3.from_rotation_matrix(R_inB2_ofA),
        t=p_inB2_ofA,
    ),
    p_inA=p_inA_initial_value,
    b_1=b_1,
    b_2=b_2,
    epsilon=sym.epsilon,
)
# - Optimized keys (i.e., variables to be optimized)
optimized_keys = ['p_inA']
# - Factors (i.e., terms in the residual)
factors = [
    Factor(
        residual=sf_projection_residual,
        keys=[
            f'T_inB1_ofA',
            f'p_inA',
            f'b_1',
            'fx',
            'fy',
            'cx',
            'cy',
            'epsilon',
        ],
    ),
    Factor(
        residual=sf_projection_residual,
        keys=[
            f'T_inB2_ofA',
            f'p_inA',
            f'b_2',
            'fx',
            'fy',
            'cx',
            'cy',
            'epsilon',
        ],
    ),
]
   
# Create optimizer
optimizer = Optimizer(
    factors=factors,
    optimized_keys=optimized_keys,
    debug_stats=True,
    params=Optimizer.Params(
        iterations=100,
        use_diagonal_damping=True,
        lambda_down_factor=0.1,
        lambda_up_factor=5.,
        early_exit_min_reduction=1e-10,
    ),
)

Do optimization.

In [None]:
result = optimizer.optimize(initial_values)
assert(result.status == symforce.opt.optimizer.Optimizer.Status.SUCCESS)

### Optimize (from scratch)

**PROBLEM 2.** Define $m$, $n$, $x$, $y_1, \dotsc, y_n$, and $f_1, \dotsc, f_n$ so that the cost function can be written in standard form:
$$ \underset{x \in \mathbb{R}^m}{\text{minimize}} \qquad \dfrac{1}{2} \sum_{i=1}^{n} \| y_i - f_i(x) \|^2.$$

**PROBLEM 3.** Define $z$ and derive expressions for $v_1, \dotsc, v_n$ and $H_1, \dotsc, H_n$ so that a first-order (linear) approximation to the cost function near $x_0\in\mathbb{R}^m$ can be written in standard form:

$$ \underset{z \in \mathbb{R}^m}{\text{minimize}} \qquad \dfrac{1}{2} \sum_{i=1}^{n} \| v_i(x_0) - H_i(x_0) z \|^2.$$

**PROBLEM 4**. Implement the Levenberg-Marquardt algorithm (i.e., damped Gauss-Newton) for iterative nonlinear least-squares minimization. Please describe the variant that you are using (e.g., what is the stopping criterion, are you doing line search or not, is the damping parameter $\mu$ constant and — if not — what is your logic for updating $\mu$ from one iteration to the next, how do you decide whether or not to take a step, etc.). You may want to use the following template code, but feel free to disregard this template and start from scratch.

In [None]:
# Choose a damping parameter (FIXME)
mu = 0.

# Choose an initial guess
p_inA = p_inA_initial_value.copy()

# Compute the initial error
# e_prev = ...

# Iterate
iter = 0
max_iters = 100
while iter < max_iters:

    # Get v_i and H_i
    # ...

    # Get z
    # ...

    # Get new error (that would result from taking the step z) and relative reduction
    # e_new = ...
    # rel_reduction = ...
    
    # Show current status
    print(f'{iter:5d} : {e_prev:11.4e}, {e_new:11.4e} : {rel_reduction:11.4e}')
    
    # Decide whether or not to take a step - if so, update p_inA and e_prev
    # if (...):
    #     p_inA = ...
    #     e_prev = e_new

    # Update iteration
    iter += 1

    # Stop if rel_reduction is small enough (but still positive)
    if (rel_reduction > 0) and (rel_reduction < 1e-10):
        break

Check that the result produced by your implementation is the same as the result produce by the implementation with SymForce.

In [None]:
print('p_inA (symforce) =')
myprint(result.optimized_values['p_inA'])

print('')

print('p_inA (from scratch):')
myprint(p_inA)

assert(np.allclose(result.optimized_values['p_inA'], p_inA))

### Analysis of uncertainty

Get the covariance assocated with the estimate of $p^A$ found with SymForce.

In [None]:
# Compute full covariance
cov = optimizer.compute_full_covariance(result.optimized_values)

# Get the covariance associated with the variable of interest (this isn't necessary -
# since there is only one variable over which we are optimizing, "cov" and "cov_p_inA"
# are exactly the same - but we do this anyway as an example)
idx = optimizer.linearization_index()
cov_p_inA_sf = cov[idx['p_inA'].offset : idx['p_inA'].offset + idx['p_inA'].tangent_dim,
                   idx['p_inA'].offset : idx['p_inA'].offset + idx['p_inA'].tangent_dim]

**PROBLEM 5.** Write an expression for the covariance associated with the estimate of $p^A$ found "from scratch."

**PROBLEM 6.** Evalute the expression you just wrote to get the covariance associated with the estimate of $p^A$ found "from scratch."

In [None]:
# FIXME
# cov_p_inA = ...

Check that the two results are the same.

In [None]:
print('cov_p_inA (symforce) =')
myprint(cov_p_inA_sf)

print('')

print('cov_p_inA (from scratch):')
myprint(cov_p_inA)

assert(np.allclose(cov_p_inA_sf, cov_p_inA))

**PROBLEM 7.** Which components of your estimate of $p^A$ are the most certain and which are the most uncertain?

**PROBLEM 8.** Plot the 90%-confidence ellipse that is associated with the first (i.e., "$x$") component and third (i.e., "$z$") component of the estimate. On the same axis, visualize the frames $B_1$ and $B_2$ (e.g., plot their origins and the directions of their $z$ axes). Explain what you are seeing.

**PROBLEM 9.** Our expression for the triangulation problem was based on the assumption that errors in the image coordinates $b_1$ and $b_2$ were normally distributed with zero mean (pixels) and unit variance (squared pixels). What if the variance was $\sigma^2$ instead, for some $\sigma > 0$? What — if anything — would change about the cost, the resulting estimate of $p^A$, and the covariance associated with this estimate?