In [2]:
from models.ICA_EM import *
from models.dgp import *
from models.metrics import *

In [3]:
n = 10000
J = 6
I = J -1

In [51]:
seed = 20

In [52]:
data = dgp(noise_dict= {"loc" : 0, "scale" : 0}, prior= {"loc" : 0, "scale" : 1/np.sqrt(2)})   
data.generate_data(n, I, J, random_state=seed)

In [53]:
def white_date(data):
    cov = np.cov(data.T)
    sqrt_cov = np.linalg.cholesky(cov)
    white_data = np.linalg.inv(sqrt_cov) @ data.T
    return {"white_data" : white_data.T, "sqrt_cov" : sqrt_cov}


In [54]:
data_out = white_date(data.data_observed)
print(np.round(np.cov(data_out["white_data"].T),2))

[[ 1. -0.  0. -0.  0.]
 [-0.  1. -0.  0. -0.]
 [ 0. -0.  1.  0. -0.]
 [-0.  0.  0.  1.  0.]
 [ 0. -0. -0.  0.  1.]]


In [55]:
data_white = data_out["white_data"] 
est = VarEM(true_A= None, max_iter = 10, random_seed = seed)
est.fit(data_white, J = J)

Initializing A randomly


Diff: 0.0534: 100%|██████████| 10/10 [00:02<00:00,  4.34it/s]


Estimating the signals


100%|██████████| 10000/10000 [00:00<00:00, 73724.49it/s]


In [56]:
# transform the estimated mixing matrix to the original space
print(np.round(data_out["sqrt_cov"] @ est.A,2))

[[-0.61 -0.42  0.02  0.08  0.03 -0.01]
 [-1.04 -1.17  0.12  0.2   0.37 -0.38]
 [ 0.9   0.72 -0.61  0.36 -0.09 -0.14]
 [-0.71 -0.98 -0.46  0.21  0.72  0.57]
 [-2.68 -1.82  0.99 -0.32  1.27  2.28]]


In [57]:
print(np.round(data.mixing_matrix_observed,2))

[[-0.    1.    0.    0.    0.   -0.  ]
 [ 0.    2.1   1.    0.    0.    0.  ]
 [ 0.   -1.47 -0.    1.   -0.    0.  ]
 [ 1.62  1.66  0.79 -0.    1.    0.  ]
 [ 3.72  4.5  -0.1  -2.6   1.63  1.  ]]


In [58]:
f_score(data.mixing_matrix_observed, data_out["sqrt_cov"] @ est.A)

((5, 2, 3, 0, 4, 1), np.float64(0.8800412517278833))

In [59]:
est = VarEM(true_A= None, max_iter = 10, random_seed=seed)
est.fit(data.data_observed, J = J)

Initializing A randomly


Diff: 0.0858: 100%|██████████| 10/10 [00:02<00:00,  4.38it/s]


Estimating the signals


100%|██████████| 10000/10000 [00:00<00:00, 89996.87it/s]


In [60]:
f_score(data.mixing_matrix_observed, est.A)

((1, 4, 0, 5, 3, 2), np.float64(0.3668554990048595))

In [63]:
print(np.round(est.A,2))

[[-0.02 -0.21  0.42  0.21  0.3  -0.22]
 [ 0.42 -0.67  0.66  0.54  0.79 -0.65]
 [ 0.5   0.15 -1.25 -0.29 -0.37  0.3 ]
 [ 0.34  0.76  0.24  1.5   0.63 -0.42]
 [-1.46  2.41  3.11  3.21  1.12 -0.68]]


In [75]:
# test performance of whitening
f_score_white = []
f_score_no_white = []
data = dgp(noise_dict= {"loc" : 0, "scale" : 0}, prior= {"loc" : 0, "scale" : 2})

for i in tqdm.tqdm(range(100)):
    data.generate_data(n, I, J, random_state=i)
    data_out = white_date(data.data_observed)
    data_white = data_out["white_data"]
    est = VarEM(true_A= None, max_iter = 10, random_seed = i)
    est.fit(data_white, J = J, progress_bar=False)
    f_score_white.append(f_score(data.mixing_matrix_observed, data_out["sqrt_cov"] @ est.A))
    est = VarEM(true_A= None, max_iter = 10, random_seed=i)
    est.fit(data.data_observed, J = J, progress_bar=False)
    f_score_no_white.append(f_score(data.mixing_matrix_observed, est.A))


100%|██████████| 100/100 [07:52<00:00,  4.72s/it]


In [76]:
# second entry in each tuple is the f_score
f_score_white_values = [x[1] for x in f_score_white]
f_score_no_white_values = [x[1] for x in f_score_no_white]

print(np.mean(f_score_white_values))
print(np.mean(f_score_no_white_values))

0.7600515074426041
0.829456924350306
