In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
sys.path.append(os.path.realpath('..'))

In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import plotly.express as ex
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from copy import deepcopy

from IPython.display import display
import ipywidgets.widgets as widgets

from tracking_v2.target import ConstantVelocityTarget
from tracking_v2.kalman import LinearKalmanFilter
from tracking_v2.motion import ConstantVelocityModel, ConstantAccelerationModel, SingerAccelerationModel
from tracking_v2.sensor import GeometricSensor
from tracking_v2.evaluation import Runner, run_many, evaluate_many, plot_nees, evaluate_nees, plot_error

from tracking.util import to_df

In [None]:
def two_columns(fig1, fig2):
    output1 = widgets.Output(layout={'width': '50%'})
    with output1:
        display(fig1)

    output2 = widgets.Output(layout={'width': '50%'})
    with output2:
        display(fig2)
    
    column_layout = widgets.HBox([output1, output2])
    display(column_layout)

# Target and sensor

In this document we will consider a simple target moving with constant velocity along the X axis. The sensor produces 3D measurement in the Cartesian space with unit covariance matrix.

In [None]:
target = ConstantVelocityTarget()
sensor = GeometricSensor()

# Kalman Filter drift

In some situations, Kalman Filter's error has the tendency to drift - accumulate over time to large values of NEES.

Our target is moving with a perfectly constant velocity. The appropriate motion noise model would be of no noise, $Q = 0$. However, this leads to state covariance $\hat{P}$ converging to zero which places majority of "trust" into the current state estimate and increasingly less into each new measurement. With incorrect velocity estimates, this can push the state estimate to be quite off while the expected variance is minuscule.

Let's compare two situations: a KF with zero process noise and another with CV motion model with noise set to $1$.

In [None]:
def _drift(q):
    motion = ConstantVelocityModel(q)
    kf = LinearKalmanFilter(motion, [[1, 0, 0, 0, 0, 0],
                                     [0, 1, 0, 0, 0, 0],
                                     [0, 0, 1, 0, 0, 0]])

    sensor = GeometricSensor(seed=8)
    r = Runner(target, sensor, kf)
    r.run_many(1, 500)
    return r

In [None]:
r0 = _drift(0)
r1 = _drift(1)

In [None]:
def _plot_error(runner):
    tm  = np.arange(runner.n-100).reshape((runner.n-100, -1))
    err = np.hstack((tm, np.abs(runner.one_x_hat[100:,:3,0] - runner.truth[101:,:3])))
    err = to_df(err, columns=['time', 'x', 'y', 'z']).melt(['time'], ['x', 'y', 'z'], 'dim', 'error')
    return ex.line(err, x='time', y='error', facet_row='dim')

two_columns(_plot_error(r0), _plot_error(r1))

In the left-hand-side plot above, the absolute error accumulates up to about $0.5$ while in the right-hand-side plot the error does not show the pattern of accumulation, even though it consistently reaches much higher values, up to about $4$. However, the apparent advantage of the model with $Q=0$ disappears once we look at normalized errors (NEES).

In [None]:
def _plot_nees(runner):
    nees = evaluate_nees(runner.one_x_hat[:, :3, :], runner.one_P_hat[:, :3, :3], runner.truth[1:, :3])
    err = np.asarray((np.arange(runner.n-100), nees.scores[100:])).T
    err = to_df(err, columns=['time', 'nees'])
    fig = ex.line(err, x='time', y='nees')
    
    ci = sp.stats.chi2.ppf([0.025, 0.975], nees.dim)
    fig.add_hline(y=ci[0], line_width=.5, line_dash="dash", line_color="red")
    fig.add_hline(y=ci[1], line_width=.5, line_dash="dash", line_color="red")

    return fig

two_columns(_plot_nees(r0), _plot_nees(r1))

Using non-zero process noise (right-hand-side above) leads to NEES scores which do not show the tendency to accumulate over time. They also seem to remain within the 95% confidence interval. This comes at the cost of statistical consistency of the filter: the mean of multiple independent runs of the filter (with different random seeds governing the measurement noise) falls within the predicted 95% confidence interval for $Q=1$ but falls well below it for $Q=1$.

Using zero proces (left-hand-side above) leads to covariance estimate converging to zero which exacerbates the non-scaled error and takes it from $0.4$ (meters) to almost 20 (standard deviations).

In [None]:
def _drift_many(Q):
    motion = ConstantVelocityModel(Q)
    kf = LinearKalmanFilter(motion, [[1, 0, 0, 0, 0, 0],
                                     [0, 1, 0, 0, 0, 0],
                                     [0, 0, 1, 0, 0, 0]])

    r = Runner(target, sensor, kf)
    r.run_many(100, 500)
    return r

r0 = _drift_many(0)
r1 = _drift_many(1)

In [None]:
def _plot_nees(runner):
    nees = evaluate_nees(runner.many_x_hat[:, :, :3, :], runner.many_P_hat[:, :, :3, :3], runner.truth[1:, :3])
    err = np.asarray((np.arange(runner.n-100), nees.scores[:,100:].mean(axis=0))).T
    err = to_df(err, columns=['time', 'nees'])
    fig = ex.line(err, x='time', y='nees')
    
    ci = sp.stats.chi2.ppf([0.025, 0.975], runner.m * nees.dim) / runner.m
    fig.add_hline(y=ci[0], line_width=.5, line_dash="dash", line_color="red")
    fig.add_hline(y=ci[1], line_width=.5, line_dash="dash", line_color="red")

    return fig

two_columns(_plot_nees(r0), _plot_nees(r1))

Let's compare that with non-scaled error values. Above we have mean NEES across time from 100 independent runs. With $Q=0$ (on the left) we observe statistical consistency - the mean NEES stays within the 95% confidence interval about 95% of the time. In comparison, with $Q=1$ (on the right) that mean NEES stays below the 95% CI 100% of the time. Thus, the filter is no longer statistically consistent but the covariance estimate better represents the actual error of the position estimate.

In [None]:
def _plot_error(runner):
    tm  = np.arange(runner.n-100)
    err = np.linalg.norm(runner.many_x_hat[:,100:,:3,0] - runner.truth[101:,:3], axis=2)
    
    avg = np.array((tm, err.mean(axis=0))).T
    avg = to_df(avg, columns=['time', 'err'])
    avg['type'] = 'avg'
    
    low = np.array((tm, np.quantile(err, .025, axis=0))).T
    low = to_df(low, columns=['time', 'err'])
    low['type'] = '.025'
    
    upp = np.array((tm, np.quantile(err, .975, axis=0))).T
    upp = to_df(upp, columns=['time', 'err'])
    upp['type'] = '.975'

    err = pd.concat((avg, low, upp), axis=0).melt(['time', 'type'], ['err'], 'dim', 'error')
    return ex.line(err, x='time', y='error', color='type', facet_row='dim')

two_columns(_plot_error(r0), _plot_error(r1))

As we see above, for $Q=0$:
* `+` the average NEES across multiple runs is consistent with the predicted 95% confidence interval
* `+` the non-scaled error is low
* `-` NEES on individual runs can accumulate to very large values - above 15 - which means that a tracker might report fairly correct position but with extremely incorrect covariance

Coversely, for $Q=1$:
* `-` the average NEES across multiple runs is inconsistent with the predicted 95% CI
* `-` the non-scaled error is high
* `+` the error reported for individual runs matches the actual error much better

As the last thing, let's take a look at the scaled (NEES) and non-scaled (linear) error in the function of process noise intensity $Q$. For each value of $Q$ we perform 100 independent runs of the Kalman Filter, each taking 500 iterations. We then calcuate the mean and the 0.975 quantile of NEES and non-scaled errors for each iteration, across all 100 independent runs. Below, you can see boxplots of those four metrics: each boxplot aggregates 400 data points.

In [None]:
data = []
for Q in [0, .001, .002, .005, .01, .02, .05, .1, .2, .5, 1, 1.5, 2, 2.5, 5, 10, 20]:
    run  = _drift_many(Q)
    nees = evaluate_nees(run.many_x_hat[:, 100:, :3, :], run.many_P_hat[:, 100:, :3, :3], run.truth[101:, :3])
    err  = np.linalg.norm(run.many_x_hat[:,100:,:3,0] - run.truth[101:,:3], axis=2)
    
    mean_nees = nees.scores.mean(axis=0)
    q975_nees = np.quantile(nees.scores, .975, axis=0)

    mean_err = err.mean(axis=0)
    q975_err = np.quantile(err, .975, axis=0)

    part = np.asarray((mean_nees, q975_nees, mean_err, q975_err)).T
    part = to_df(part, columns=['nees_mean', 'nees_q975', 'err_mean', 'err_q975'])
    part['Q'] = str(Q)
    
    data.append(part)

data = pd.concat(data)

In [None]:
fig = ex.box(data.melt(['Q'], ['nees_mean', 'nees_q975', 'err_mean', 'err_q975'], 'metric', 'value'),
             x='Q', y='value', color='metric')

ci = sp.stats.chi2.ppf([0.025, 0.975], run.m * nees.dim) / run.m
fig.add_hline(y=ci[0], line_width=.5, line_dash="dash", line_color="red")
fig.add_hline(y=ci[1], line_width=.5, line_dash="dash", line_color="red")

fig.show()

As expected, only for $Q=0$, the mean NEES falls within the 95% confidence interval. However, it is also the value of $Q$ for which the 0.975 quantile of NEES is the highest, which is due to the accumulation of error within each independent run of the filter. This is also where the non-scaled error is the lowest, which means that in any given iterations of any individual run, we expect a very precision position estimate and a very imprecise covariance estimate.

The 0.975 quantile of NEES falls within the 95% confidence interval for somewhere around $Q=2$. This is where, most of the time within a single run, we can trust that the actual error of the position estimate matches the reported esimate of the covariance matrix.

Let's now take one last look at boxplots of all of the NEES and error data: each individual boxplot will aggregate $100 \times 400$ data points.

In [None]:
data = []
for Q in [0, .001, .002, .005, .01, .02, .05, .1, .2, .5, 1, 1.5, 2, 2.5, 5, 10, 20]:
    run  = _drift_many(Q)
    nees = evaluate_nees(run.many_x_hat[:, 100:, :3, :], run.many_P_hat[:, 100:, :3, :3], run.truth[101:, :3])
    err  = np.linalg.norm(run.many_x_hat[:,100:,:3,0] - run.truth[101:,:3], axis=2)
    
    part = np.asarray((nees.scores.reshape(-1), err.reshape(-1))).T
    part = to_df(part, columns=['nees', 'err'])
    part['Q'] = str(Q)
    
    data.append(part)

data = pd.concat(data)

In [None]:
fig = ex.box(data.melt(['Q'], ['nees', 'err'], 'metric', 'value'), x='Q', y='value', color='metric')
fig.update_traces(boxpoints=False)

ci = sp.stats.chi2.ppf([0.025, 0.975], run.m * nees.dim) / run.m
fig.add_hline(y=ci[0], line_width=.5, line_dash="dash", line_color="red")
fig.add_hline(y=ci[1], line_width=.5, line_dash="dash", line_color="red")

fig.show()

# Multiple independent runs

## NEES statistics

In [None]:
motion = ConstantVelocityModel(0)
kf = LinearKalmanFilter(motion, [[1, 0, 0, 0, 0, 0],
                                 [0, 1, 0, 0, 0, 0],
                                 [0, 0, 1, 0, 0, 0]])

cv0 = run_many(100, 400, target, sensor, kf)
cv0_eval = evaluate_many(*cv0)

In [None]:
plot_nees(cv0_eval.position_nees)

In [None]:
motion = ConstantVelocityModel(1)
kf = LinearKalmanFilter(motion, [[1, 0, 0, 0, 0, 0],
                                 [0, 1, 0, 0, 0, 0],
                                 [0, 0, 1, 0, 0, 0]])

cv1 = run_many(100, 400, target, sensor, kf)
cv1_eval = evaluate_many(*cv1)

In [None]:
plot_nees(cv1_eval.position_nees)

In [None]:
motion = ConstantVelocityModel(3)
kf = LinearKalmanFilter(motion, [[1, 0, 0, 0, 0, 0],
                                 [0, 1, 0, 0, 0, 0],
                                 [0, 0, 1, 0, 0, 0]])

cv3 = run_many(100, 400, target, sensor, kf)
cv3_eval = evaluate_many(*cv3)

In [None]:
plot_nees(cv3_eval.position_nees)

## All individual runs

In [None]:
fig = go.Figure()
for i in range(cv0_eval.position_nees.scores.shape[0]):
    fig.add_trace(go.Scatter(x=np.arange(375), y=cv0_eval.position_nees.scores[i,25:], mode='lines', legendgroup=i))
fig.show()

In [None]:
fig = go.Figure()
for i in range(cv1_eval.position_nees.scores.shape[0]):
    fig.add_trace(go.Scatter(x=np.arange(375), y=cv1_eval.position_nees.scores[i,25:], mode='lines', legendgroup=i))
fig.show()

In [None]:
fig = go.Figure()
for i in range(cv3_eval.position_nees.scores.shape[0]):
    fig.add_trace(go.Scatter(x=np.arange(375), y=cv3_eval.position_nees.scores[i,25:], mode='lines', legendgroup=i))
fig.show()