In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Distribution of the empirical variance

Assume that the filling volumne $X$ is normally distributed with expected value $\mu$ and standard deviation $\sigma$. $X$ and $\sigma$ are unknown. We estimate the unknown variance $\sigma^2$ with

$$
s_x^2 = \frac{1}{n - 1} \sum_{i = 1}^n (X_i - \bar{X})^2
$$

and then the quantity

$$
Q = \frac{(n - 1) * s_x^2}{\sigma^2} = \frac{\sum_{i=1}^n (X_i - \bar{X})^2}{\sigma^2}
$$

has a known distribution. One calles it the Chi-squard ($\chi^2$) distribution with $n - 1$ degrees of freedom

# Confidence interval for standard deviation

Let $X_1, \dots, X_n$ be independent measurements with $X_i \sim N(\mu, \sigma^2)$, where $\theta = \sigma$ should be estimated from the data. Then

$$
\begin{align}
\hat{\theta}_L &= \frac{(n - 1)s_x^2}{q_{1 - \frac{\alpha}{2}}} \\
\hat{\theta}_U &= \frac{(n - 1)s_x^2}{q_{\frac{\alpha}{2}}}
\end{align}
$$

define a two-sided confidence interval with level $1 - \alpha$. Here $q_{\beta}$ is the $\beta$-quantile of the $\chi^2$-distribution with $n - 1$ dof.

In [2]:
std = 0.1337
x = stats.norm(1, std).rvs(100)
x[:10]

array([0.90770125, 1.23767567, 1.04638838, 0.8994124 , 1.02956507,
       0.56225592, 1.08670374, 1.05022968, 1.1747637 , 1.00523278])

In [3]:
alpha = 0.05
n = len(x)

# Calculate the quantiles
q1 = stats.chi2(n - 1).ppf(alpha / 2)
q2 = stats.chi2(n - 1).ppf(1 - alpha / 2)

# Calculate the variance
variance = np.var(x, ddof=1)

# Calculate confidence interval
lo = (n - 1) * variance / q2
up = (n - 1) * variance / q1

print(f"Confidence interval: [{lo}, {up}]")

Confidence interval: [0.013944984549628035, 0.024411345127434987]


In [4]:
print(f"Correct variance: {std ** 2}")

Correct variance: 0.017875690000000003
