# Normal distributions: simulation and confidence ellipse

Import everything we need.

In [None]:
import numpy as np
import secrets
from scipy.stats import chi2
from scipy.linalg import sqrtm
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse

Create random number generator.

In [None]:
seed = secrets.randbits(32)
print(f'seeding RNG with {seed}')
rng = np.random.default_rng(seed)

Define a function to print things nicely.

In [None]:
def myprint(M):
    """
    Prints either a scalar or a numpy array with four digits after the
    decimal and with consistent spacing so it is easier to read.
    """
    if type(M) is np.ndarray:
        with np.printoptions(linewidth=150, formatter={'float': lambda x: f'{x:10.4f}'}):
            print(M)
    else:
        print(f'{M:10.4f}')

## How to simulate a normally distributed random vector

If $x \sim \mathcal{N}(0, I)$, then we say that the random vector $x \in \mathbb{R}^2$ has a standard normal distribution, and simulate it as follows:

In [None]:
# Choose number of points
n = 100

# Sample the points
x = np.array([rng.standard_normal(size=2) for i in range(n)])

# Plot the points
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.plot(x[:, 0], x[:, 1], '.')
ax.set_xlim([-5., 5.])
ax.set_ylim([-5., 5.])
ax.set_aspect('equal')
ax.grid()

If $y \sim \mathcal{N}(\mu_y, \Sigma_y)$, then we say that the random vector $y \in \mathbb{R}^2$ has mean $\mu_y \in \mathbb{R}^2$ and covarance $\Sigma_y \in \mathbb{R}^{2\times 2}$. One way to simulate $y$ is as
$$ y = \Sigma_y^{1/2} x + \mu_y $$
where $x \sim \mathcal{N}(0, I)$ and where $\Sigma_y^{1/2}$ is the matrix square root of $\Sigma_y$:

In [None]:
# Choose mean and covariance
mu_y = np.array([-1., 2.])
cov_y = np.array([[2., 1.],
                  [1., 1.]])

# Find matrix square root of covarance
A = sqrtm(cov_y)

# Choose number of points
n = 100

# Sample x from a standard normal distribution
x = np.array([rng.standard_normal(size=2) for i in range(n)])

# Compute y as a linear transformation of x
y = np.array([A @ x_i + mu_y for x_i in x])

# Plot the points
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.plot(y[:, 0], y[:, 1], '.')
ax.set_xlim([-5., 5.])
ax.set_ylim([-5., 5.])
ax.set_aspect('equal')
ax.grid()

Note that the covariance matrix must be symmetric ($\Sigma_y = \Sigma_y^\top$) and positive definite ($\Sigma_y > 0$).

## How to plot a confidence ellipse

The random vector $y\in\R^2$ with distribution $y \sim \mathcal{N}(\mu_y, \Sigma_y)$ has the probability density function
$$ p(y) = \dfrac{1}{2\pi\sqrt{\det(\Sigma_y)}} \exp\left( -\dfrac{1}{2} (y - \mu_y)^\top \Sigma_y^{-1} (y - \mu_y) \right). $$
Any point sampled from this distribution will satisfy
$$ (y - \mu_y)^\top \Sigma_y^{-1} (y - \mu_y) \leq F^{-1}_{\chi_2^2}(c) $$
with probability $c \in [0, 1]$, where $F^{-1}_{\chi_2^2}$ is the inverse cumulative distribution function that is associated with the $\chi^2_2$ ("chi-squared") distribution with two degrees of freedom. The set of all points satisfying this inequality is called a "confidence ellipse." In particular, the set of all points satisfying this inequality for $c = 0.9$ (as an example) is called the "90% confidence ellipse," because 90% of points sampled from normal distribution with mean $\mu_y$ and covarianace $\Sigma_y$ will be in this ellipse (at least in the limit as the number of sampled points gets large).

### Ellipse as image of unit ball

Remember that we could simulate $y\sim\mathcal{N}(\mu_y, \Sigma_y)$ as
$$ y = Ax + \mu_y $$
where $ A = \Sigma_y^{1/2} $ and $ x \sim \mathcal{N}(0, I) $. In the same way, if $x$ satisfies
$$ x^T x \leq F^{-1}_{\chi_2^2}(c) $$
then
$$ y = Ax + \mu_y $$
will satisfy
$$ (y - \mu_y)^\top \Sigma_y^{-1} (y - \mu_y) \leq F^{-1}_{\chi_2^2}(c). $$
So, one way to plot a point $y$ on the confidence ellipsoid is to generate a point $x$ that satisfies
$$ \|x\|=1 $$
and then to apply the transformation
$$ A\left( \sqrt{F^{-1}_{\chi_2^2}(c)} \;\right) x + \mu_y. $$

In [None]:
# Choose confidence
c = 0.9

# Get inverse cdf
alpha = chi2.ppf(c, df=2)

# Generate points on the unit ball
n_ellipse = 100
x_ellipse = np.array([[np.cos(theta), np.sin(theta)] for theta in np.linspace(0, 2 * np.pi, n_ellipse)])
y_ellipse = np.array([A @ (np.sqrt(alpha) * x_i) + mu_y for x_i in x_ellipse])

# Create figure
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.set_xlim([-5., 5.])
ax.set_ylim([-5., 5.])
ax.set_aspect('equal')
ax.grid()

# Plot points
ax.plot(y[:, 0], y[:, 1], '.')

# Plot ellipse
ax.plot(y_ellipse[:, 0], y_ellipse[:, 1], '-', linewidth=2)

plt.show()

### Ellipse from eigenvectors and eigenvalues

Suppose that the eigenvectors of $\Sigma_y^{-1}$ are $v_1, v_2 \in \mathbb{R}^2$ and that the corresponding eigenvalues are $\lambda_1, \lambda_2 \in \mathbb{R}$. Then, the $c$-confidence ellipse associated with the distribution $\mathcal{N}(\mu_y, \Sigma_y)$ has center $\mu_y$, has semiaxes along $v_1$ and $v_2$, and has semiaxis lengths of
$$\sqrt{\dfrac{F^{-1}_{\chi_2^2}(c)}{\lambda_1}} \qquad\text{and}\qquad \sqrt{\dfrac{F^{-1}_{\chi_2^2}(c)}{\lambda_2}}$$
respectively.

Suppose that the eigenvectors of $\Sigma_y$ are $v_1, v_2 \in \mathbb{R}^2$ and that the corresponding eigenvalues are $\lambda_1, \lambda_2 \in \mathbb{R}$. Then, the $c$-confidence ellipse associated with the distribution $\mathcal{N}(\mu_y, \Sigma_y)$ has center $\mu_y$, has semiaxes along $v_1$ and $v_2$, and has semiaxis lengths of
$$\sqrt{F^{-1}_{\chi_2^2}(c) \; \lambda_1} \qquad\text{and}\qquad \sqrt{F^{-1}_{\chi_2^2}(c) \; \lambda_2}$$
respectively.

In [None]:
# Find eigenvalues and eigenvectors of inverse covariance matrix
eigvals, eigvecs = np.linalg.eig(cov_y)
v_1 = eigvecs[:, 0]
v_2 = eigvecs[:, 1]
lambda_1 = eigvals[0]
lambda_2 = eigvals[1]

# Create figure
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ax.set_xlim([-5., 5.])
ax.set_ylim([-5., 5.])
ax.set_aspect('equal')
ax.grid()

# Plot points
ax.plot(y[:, 0], y[:, 1], '.')

# Plot ellipse
ax.add_patch(Ellipse(
    xy=mu_y,
    width=(2 * np.sqrt(alpha * lambda_1)),
    height=(2 * np.sqrt(alpha * lambda_2)),
    angle=np.rad2deg(np.arctan2(v_1[1], v_1[0])),
    fill=False,
    edgecolor='C1',
    linewidth=2,
))

# Plot axes of ellipse
ax.plot(
    [mu_y[0], mu_y[0] + np.sqrt(alpha * lambda_1) * v_1[0]],
    [mu_y[1], mu_y[1] + np.sqrt(alpha * lambda_1) * v_1[1]],
    linewidth=1,
    color='C1'
)
ax.plot(
    [mu_y[0], mu_y[0] + np.sqrt(alpha * lambda_2) * v_2[0]],
    [mu_y[1], mu_y[1] + np.sqrt(alpha * lambda_2) * v_2[1]],
    linewidth=1,
    color='C1'
)

plt.show()

### Is the confidence ellipse correct?

Sample a much larger number of points.

In [None]:
# Choose number of points
n = 1000

# Sample x from a standard normal distribution
x = np.array([rng.standard_normal(size=2) for i in range(n)])

# Compute y as a linear transformation of x
y = np.array([A @ x_i + mu_y for x_i in x])

Check how many of these points are inside the confidence ellipse.

In [None]:
# Total number of points (cumulative)
num_points = 1 + np.array(range(len(y)))

# Fraction of points inside confidence ellipse (cumulative)
is_inside = np.array([(y_i - mu_y).T @ np.linalg.inv(cov_y) @ (y_i - mu_y) <= alpha for y_i in y])
fraction_in_ellipse = np.cumsum(is_inside) / num_points

# Lists of points inside and outside confidence ellipse
y_inside = np.array([y_i for y_i, is_inside_i in zip(y, is_inside) if is_inside_i])
y_outside = np.array([y_i for y_i, is_inside_i in zip(y, is_inside) if not is_inside_i])

Plot everything.

In [None]:
# Create figure
fig = plt.figure(figsize=(12, 4))

# Create axes for points
ax_points = fig.add_axes(rect=[0.05, 0.05, 0.3, 0.9])
ax_points.set_xlim([-5., 5.])
ax_points.set_ylim([-5., 5.])
ax_points.set_aspect('equal')
ax_points.grid()

# Create axes for probabilities
ax_probs = fig.add_axes(rect=[0.45, 0.05, 0.5, 0.9])
ax_probs.set_xlim([0, len(y)])
ax_probs.set_ylim([0, 1])
ax_probs.grid()

# Plot points
ax_points.plot(y_inside[:, 0], y_inside[:, 1], '.', markersize=3)
ax_points.plot(y_outside[:, 0], y_outside[:, 1], '.', color='C3', markersize=3)

# Plot ellipse
ax_points.add_patch(Ellipse(
    xy=mu_y,
    width=(2 * np.sqrt(alpha * lambda_1)),
    height=(2 * np.sqrt(alpha * lambda_2)),
    angle=np.rad2deg(np.arctan2(v_1[1], v_1[0])),
    fill=False,
    edgecolor='C1',
    linewidth=2,
))

# Plot probabilities
ax_probs.plot(num_points, fraction_in_ellipse, linewidth=2)
ax_probs.plot(num_points, c * np.ones_like(num_points), '--')

plt.show()