# NERCOME estimator to obtain covariance matrix

In [1]:
import numpy as np
import itertools as it
import matplotlib.pyplot as plt
%matplotlib inline

Generate data

In [2]:
# Parameters
Nd = 4 # Number of random variables
Ns = 6 # Number of data realizations

In [3]:
# Generate random matrix of size Nd x Ns
A = np.random.normal(0, 1, size=(Nd//2, Ns))
B = np.random.normal(0, np.sqrt(5), size=(Nd-Nd//2, Ns))
X = np.vstack((A, B))
# The resulting matrix X = (x_1, x_2, ..., x_Ns) consists of n column vectors,
# of which each x_i has length p, the upper half has variance 1 and the lower half has variance 5

#print(X)

The standard sample covariance estimator is given by
$ \hat{S} = \frac{1}{N_s-1} X X^T$

In [4]:
S = 1/(Ns-1)*np.matmul(X, X.T)
print(S)

[[ 0.25980571 -0.09519878  0.41277364 -0.70431221]
 [-0.09519878  0.605975    1.06583042 -0.03776825]
 [ 0.41277364  1.06583042 14.3942068  -0.39570605]
 [-0.70431221 -0.03776825 -0.39570605  7.14553307]]


Then the covariance matrix $ \Sigma = \mathbb{E}(\hat{S}) $

In [5]:
S_sum = np.zeros((Nd, Nd))
n = 10
for _ in range(n):
    A = np.random.normal(0, 1, size=(Nd//2, Ns))
    B = np.random.normal(0, np.sqrt(5), size=(Nd-Nd//2, Ns))
    X = np.vstack((A, B))
    S_sum += 1/(Ns-1)*np.matmul(X, X.T)
CovM = S_sum / n
print(CovM)

[[ 1.50992001 -0.31631377  0.44213484 -0.44173215]
 [-0.31631377  1.43719925  0.35827417  0.11784883]
 [ 0.44213484  0.35827417  7.09269187  1.03404255]
 [-0.44173215  0.11784883  1.03404255  4.84082999]]


The NERCOME procedure divides the dataset into two subsamples, $X = (X_1, X_2)$, where $X_1$ is an $N_d \times s$ matrix and $X_2$ is an $N_d \times (N_s - s)$ matrix.

In [17]:
# Generate random matrix of size Nd x Ns
A = np.random.normal(0, 1, size=(Nd//2, Ns))
B = np.random.normal(0, np.sqrt(5), size=(Nd-Nd//2, Ns))
X = np.vstack((A, B))

s = Ns // 2 # Trial value for s
col_combos = list(it.combinations(range(Ns), s)) # Tuples of possible combinations of s out of Ns column indices

for col_combo in col_combos:
    X1 = X[:, col_combo]
    X2 = X[:, np.delete(range(Ns), col_combo)]
    
    S1 = 1/(s-1)*np.matmul(X1, X1.T)
    S2 = 1/(Ns-s-1)*np.matmul(X2, X2.T)
    
    # Diagonalize S_i = U_i * D_i * U_i^T
    evals1, U1 = np.linalg.eig(S1)
    D1 = np.diag(evals1)
    evals2, U2 = np.linalg.eig(S2)
    D2 = np.diag(evals2)
    
    S1_trial = np.matmul(np.matmul(U1, D1), U1.T)
    S2_trial = np.matmul(np.matmul(U2, D2), U2.T)
    np.allclose(S1, S1_trial, 0, 1e-10))
    


True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
