In [10]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.stats import f

**Setup**

**Setup**

*The F-Test is a statistical test for the hypothesis that two distributions have the same variance.*

Let $X_1,...,X_n$ iid $\sim \mathcal{N}(\mu_x, \sigma_x^2)$ and  $Y_1,...,Y_n$ iid $\sim \mathcal{N}(\mu_y, \sigma_y^2)$, with X and Y independent.

The null hypothesis is $H_0 : \sigma_x^2 / sigma_y^2 = 1$, ie. both distributions have the same variance.

The pivot is,

$
\begin{align*}
Z(X,Y) := \frac{S_x^2}{S_y^2},
\end{align*}
$

where $S^2$ is the pooled unbiased sample variance, ie.

$
\begin{align*}
S_x^2 = \frac{1}{n-1}\sum_{i=1}^n(X_i - \bar{X})^2,\ \ \ S_y^2 = \frac{1}{m-1}\sum_{i=1}^m(Y_i - \bar{Y})^2,
\end{align*}
$

where $n,m$ are the number of samples of $X$ and $Y$ respectively.

*If $X,Y$ are normally distributed*, then $Z$ follows an F-distribution with $n-1$ over $m-1$ degrees of freedom. If we want to test deviations in both directions (ie. $\frac{S_x^2}{S_y^2} < 1$ and $\frac{S_x^2}{S_y^2} > 1$) the test is two tailed, and the p-value is (for a given pivot $\bar{z}$),

$
\mathbb{P}\left(|Z|>\bar{z}\right) = 2*\mathbb{P}\left(Z>|\bar{z}|\right) = 2 * (1 - \mathbb{P}\left(Z \leq|\bar{z}|\right))
$

To implement, I calculate the pivot and use cdf in `scipy.stats.f` to compute the p-value as described above.

In [94]:
N = np.array([100])#(10**(np.arange(0.5,5,0.5))).round().astype(int)
trials = 100
mu = 0
sigma_x = 0.5
sigma_y = 1
alpha = 0.05

print("SETUP\nno. of samples: %s" % N)
print("no. of trials for each sample size: %d" % trials)
print("X ~ N(%0.2f, %0.2f)" % (mu, sigma_x))
print("Y ~ N(%0.2f, %0.2f)" % (mu, sigma_y))
print("In this case, the null hypothesis is %s" % (sigma_x == sigma_y))

SETUP
no. of samples: [100]
no. of trials for each sample size: 100
X ~ N(0.00, 0.50)
Y ~ N(0.00, 1.00)
In this case, the null hypothesis is False


In [None]:
def ftest(X,Y,axis):
    """Return the pivot and p-value of the F-test for the given samples"""
    S_x = np.var(X, axis, ddof=1)
    S_y = np.var(Y, axis, ddof=1)
    d1, d2 = len(X) - 1, len(Y) - 1
    p_value = f.cdf(Z, d1, d2)
    return Z, p_value

In [96]:
# Run tests and plot results
plt.figure(figsize=(15,7))
rejected = np.zeros(len(N))
for i,n in enumerate(N):
    np.random.seed(40)
    X = np.random.normal(mu, sigma_x,size=(trials,n))
    Y = np.random.normal(mu, sigma_y, size=(trials,n))
    (_, p_value) = ftest(X, Y, axis=0)
    plt.plot(p_value, "-s", label="%d samples" % n)
    rejected[i] = np.sum((p_value <= alpha/2).astype(int)) / trials
plt.plot([0,trials], [alpha, alpha], '-r')
plt.xlabel("trial")
plt.ylabel("p-value")
plt.legend()

fig = plt.figure(figsize=(15,7))
plt.plot(rejected, '-o')
plot_meaning = "Statistical power" if sigma_x != sigma_y else "Type 1 error rate"
plt.title("Percentage of rejections for %d trials at level %0.4f (%s)" % (trials, alpha, plot_meaning))
ax = plt.gca()
ax.set_xticklabels(N)
plt.axis([0, len(N), 0, 1])
plt.xlabel("no. samples")
plt.ylabel("rejection %")

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

<Figure size 1080x504 with 0 Axes>

In [88]:
    axis=1
    S_x = np.var(X, axis, ddof=1)
    S_y = np.var(Y, axis, ddof=1)
    Z = S_x / S_y
    d1, d2 = len(X) - 1, len(Y) - 1
    p_value = 2*(1 - f.cdf(abs(Z), d1, d2))

In [93]:
f.cdf(1-abs(Z)

SyntaxError: unexpected EOF while parsing (<ipython-input-93-8e20f186c693>, line 1)

In [91]:
p_value

array([1.99999975, 2.        , 2.        , 2.        , 2.        ,
       2.        , 1.99999923, 2.        , 2.        , 2.        ,
       2.        , 1.99999998, 2.        , 2.        , 2.        ,
       2.        , 2.        , 2.        , 2.        , 2.        ,
       2.        , 2.        , 2.        , 2.        , 2.        ,
       1.99999999, 1.99999999, 2.        , 1.99999999, 2.        ,
       2.        , 2.        , 2.        , 2.        , 2.        ,
       2.        , 2.        , 2.        , 1.99999997, 2.        ,
       2.        , 2.        , 2.        , 1.99999999, 2.        ,
       2.        , 2.        , 2.        , 1.99999999, 2.        ,
       2.        , 2.        , 2.        , 1.99999994, 2.        ,
       2.        , 2.        , 2.        , 2.        , 1.99999999,
       2.        , 1.99999999, 2.        , 2.        , 2.        ,
       2.        , 2.        , 2.        , 2.        , 2.        ,
       1.99999999, 2.        , 2.        , 2.        , 2.     