In [106]:
import numpy as np
import scipy


def _statistic(x, y):
    r"""
    Calulates the *k*-sample test statistic.

    Parameters
    ----------
    *args : ndarrays
        Variable length input data matrices. All inputs must have the same
        number of samples. That is, the shapes must be `(n, p)` and
        `(m, p)` where `n` and `m` are the number of samples and `p` are
        the number of dimensions. Alternatively, inputs can be distance
        matrices, where the shapes must all be `(n, n)`.
    """
    # ported from Hotteling packge in R
    nx = x.shape[0]
    ny = y.shape[0]

    meanx = np.mean(x, axis=0).reshape(-1, 1)
    meany = np.mean(y, axis=0).reshape(-1, 1)

    covx = np.cov(x, rowvar=False)
    covy = np.cov(y, rowvar=False)

    covs = ((nx - 1) * covx + (ny - 1) * covy) / (nx + ny - 2)
    inv_covs = scipy.linalg.inv(covs)

    stat = np.sum((meanx - meany).T @ inv_covs @ (meanx - meany) * nx * ny / (nx + ny))

    return stat

In [107]:
from hyppo.sims import rot_2samp, linear

np.random.seed(0)
x, y = rot_2samp(linear, 1000, 1)
stat = _statistic(x, y)
print(stat)

0.6373338643604227
