In [286]:
import numpy as np
from scipy.spatial.distance import mahalanobis
from scipy.stats import multivariate_normal, invwishart
def random_data_gen(n_samples=1000, n_feats=10, maha=1.0, psi_diag=1.0, psi_offdiag=0., ddof=4, class_ratio=0.5, seed=None):
    if seed:
        np.random.seed(seed)
    ## initialize multivariate normal dist with normally distributed means and covariance
    ## drawn from an inverse wishart distribution (conjugate prior for MVN)
    norm_means_a = np.random.randn(n_feats)
    norm_means_b = np.zeros_like(norm_means_a)
    psi = psi_diag * np.eye(n_feats) + psi_offdiag * ~np.eye(n_feats).astype(bool)
    nu = n_feats + ddof
    wishart_cov = invwishart(nu, psi).rvs()
    ## specify the mahalanobis distance between the two distributions
    dist = mahalanobis(norm_means_a, norm_means_b, wishart_cov)
    norm_means_a = norm_means_a * (maha / dist)
    assert np.isclose(mahalanobis(norm_means_a, norm_means_b, wishart_cov), maha)
    ## multivariate normal distributions with different means and equal variances
    mvn_a = multivariate_normal(mean=norm_means_a, cov=wishart_cov)
    mvn_b = multivariate_normal(mean=norm_means_b, cov=wishart_cov)
    ## not used, but compute correlations
    corr = (D:=np.diag(1/np.sqrt(np.diag(wishart_cov)))) @ wishart_cov @ D
    ## generate data samples from a multivariate normal
    data = np.vstack([mvn_a.rvs(int(n_samples*class_ratio)), mvn_b.rvs(n_samples - int(n_samples*class_ratio))])
    labels = np.arange(len(data))<int(n_samples*class_ratio)
    return data, labels

In [287]:
data, labels = random_data_gen(n_samples=1000, n_feats=10, maha=1., psi_diag=1.)
data, labels

(array([[ 0.90587371,  0.85330968,  0.78026648, ...,  0.88040803,
          1.22170708, -0.22122927],
        [ 1.38490785,  0.15912619, -0.88120683, ..., -1.12625305,
          1.08151514, -1.26325278],
        [ 1.1851155 , -0.92126382, -0.70761187, ..., -1.38830525,
         -0.00730406, -1.56451073],
        ...,
        [ 0.03138741, -0.97906704, -0.09509917, ..., -0.04007228,
         -0.31558083,  0.35233976],
        [-0.29551414,  0.30137921,  0.6470991 , ...,  0.73653539,
         -0.25380056,  0.54616535],
        [-0.71440345, -0.31405635,  0.08474968, ...,  0.94248388,
         -0.80891815, -0.44761239]]),
 array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  Tr

In [80]:
np.save("random_data_X.npy", data)
np.save("random_data_y", labels)

In [244]:
def E_invwish(psi, dof):
    n_feats = len(psi)
    return psi / (dof-n_feats-1)

def Var_invwish(psi, dof):
    p = len(psi)
    Var = np.empty((p, p))
    for i in range(p):
        for j in range(p):
            Var[i][j] = (dof-p+1) * psi[i][j]**2 + (dof-p-1) * psi[i][i]*psi[j][j] 
    Var /= (dof-p)*(dof-p-1)**2*(dof-p-3)
    return Var

In [None]:
def covariance_invwishart(diag=1., offdiag=0., ddof=4, p=5):
    psi = diag*np.eye(5) + offdiag * np.ones((p, p))
    expected = E_invwish(psi, p+ddof)
    variance = Var_invwish(psi, p+ddof)
    return expected, variance 

expected, variance = covariance_invwishart(2, 1)
print(f"Expected Cov:\n{expected}")
corr = (D:=np.diag(1/np.sqrt(np.diag(expected)))) @ expected @ D
print(f"Expected Corr:\n{corr}")
print(f"Variance of Cov:\n{variance}")

In [276]:
iw_dist = invwishart(df=9, scale=psi)
iw_dist.rvs(100000).var(0)

array([[1.5516999 , 0.65217429, 0.65532596, 0.62977027, 0.60524033],
       [0.65217429, 1.61624763, 0.64239274, 0.65072092, 0.60476947],
       [0.65532596, 0.64239274, 1.76157798, 0.91743822, 0.63559231],
       [0.62977027, 0.65072092, 0.91743822, 2.49424013, 0.7386889 ],
       [0.60524033, 0.60476947, 0.63559231, 0.7386889 , 1.68301676]])

In [277]:
data, labels = random_data_gen(n_samples=1000, n_feats=10, maha=1., scale_diag=1.)

In [288]:
data_covar= np.stack([np.cov(random_data_gen(n_samples=200, n_feats=5, maha=1., psi_diag=3.)[0][:100].T) for i in range(1000)])

In [281]:
np.mean(data_covar, axis=0).round(2)

array([[ 0.95,  0.03, -0.01,  0.  , -0.07],
       [ 0.03,  0.99, -0.02, -0.01, -0.01],
       [-0.01, -0.02,  0.97,  0.03,  0.01],
       [ 0.  , -0.01,  0.03,  0.95, -0.  ],
       [-0.07, -0.01,  0.01, -0.  ,  0.99]])

### Kronecker product version

In [169]:
import numpy as np

def comm_mat(m, n):
    # determine permutation applied by K
    w = np.arange(m * n).reshape((m, n), order="F").T.ravel(order="F")
    # apply this permutation to the rows (i.e. to each column) of identity matrix and return result
    return np.eye(m * n)[w, :]

def vec(X):
    return np.ravel(X, order='F')

def kron_Var_invwish(psi, dof):
    p = len(psi)
    c2 = ((dof - p)*(dof - p - 1)*(dof - p - 3))**(-1)
    c1 = (dof-p-2)*c2
    c3 = (dof-p-1)**(-2)
    K_pp = comm_mat(p, p)
    return c1 * np.kron(psi, psi) + c2*vec(psi) @ vec(psi).T + c2 * K_pp @ np.kron(psi, psi) 
    