In [55]:
import numpy as np
import math
from tqdm import tqdm
import pandas as pd
import pickle

from sklearn.mixture import GaussianMixture

from scipy.stats import t

from graspy.embed import JointRDPG
from graspy.cluster import GaussianCluster, KMeansCluster
from graspy.utils import symmetrize

from mgcpy.independence_tests.mgc import MGC
from mgcpy.independence_tests.dcorr import DCorr

from simulations import rho_ER_marg, rho_sbm_marg, rho_sbm_diff_block
from utils import estimate_block_assignment, block_permute, sort_graph, identity, sbm_params, to_distance_mtx, triu_no_diag

In [59]:
def dcorr_ttest_power(sim_func, mc=500, alpha=0.05, given_blocks=False, blocks=None, **kwargs):
    # power for any test that builds on distance matrices
    # can use dcorr / mgc
    pval_array = np.zeros(mc)
    for i in range(mc):
        A, B = sim_func(**kwargs) 
        dcorr = DCorr()
        pval, _ = dcorr.p_value(
            matrix_X=triu_no_diag(A), matrix_Y=triu_no_diag(B))
        pval_array[i] = pval
    power = np.where(pval_array < alpha)[0].shape[0] / mc
    return power

In [61]:
import warnings
warnings.filterwarnings('ignore')

In [62]:
%%time
n_arr = np.linspace(10, 100, 10, dtype=int)
rho_arr = np.array([0, 0.1, -0.1])
k = 2
P1 = sbm_params(a=0.7, b=0.3)
P2 = sbm_params(a=0.2, b=0.5)
nmc = 500
power_sbm_marg = np.zeros((rho_arr.shape[0], n_arr.shape[0]))

for i, rho in enumerate(rho_arr):
    for j, n in enumerate(n_arr):
        blocks = np.repeat(np.arange(k), n//k)
        test_power = dcorr_ttest_power(rho_sbm_marg, given_blocks=True, blocks=blocks,
                                 rho=rho, AL=P1, BL=P2, k=k, n=n, mc=nmc)
        power_sbm_marg[i, j] = test_power
        print('finish for rho={}, n={}'.format(rho, n))

finish for rho=0.0, n=10
finish for rho=0.0, n=20
finish for rho=0.0, n=30
finish for rho=0.0, n=40
finish for rho=0.0, n=50
finish for rho=0.0, n=60
finish for rho=0.0, n=70


KeyboardInterrupt: 

In [63]:
power_sbm_marg

array([[0.   , 0.464, 0.576, 0.944, 1.   , 1.   , 1.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   ]])

In [35]:
%%time
n_arr = np.linspace(10, 100, 10, dtype=int)
rho_arr = np.array([0, 0.1, -0.1])
k = 2
P1 = sbm_params(a=0.7, b=0.3)
P2 = sbm_params(a=0.7, b=0.3)
nmc = 500
power_sbm = np.zeros((rho_arr.shape[0], n_arr.shape[0]))

for i, rho in enumerate(rho_arr):
    for j, n in enumerate(n_arr):
        blocks = np.repeat(np.arange(k), n//k)
        test_power = dcorr_ttest_power(rho_sbm_marg, given_blocks=True, blocks=blocks,
                                 rho=rho, AL=P1, BL=P2, k=k, n=n, mc=nmc)
        power_sbm[i, j] = test_power
        print('finish for rho={}, n={}'.format(rho, n))

finish for rho=0.0, n=10
finish for rho=0.0, n=20
finish for rho=0.0, n=30
finish for rho=0.0, n=40
finish for rho=0.0, n=50
finish for rho=0.0, n=60
finish for rho=0.0, n=70
finish for rho=0.0, n=80
finish for rho=0.0, n=90
finish for rho=0.0, n=100
finish for rho=0.1, n=10
finish for rho=0.1, n=20
finish for rho=0.1, n=30
finish for rho=0.1, n=40
finish for rho=0.1, n=50
finish for rho=0.1, n=60
finish for rho=0.1, n=70
finish for rho=0.1, n=80
finish for rho=0.1, n=90
finish for rho=0.1, n=100
finish for rho=-0.1, n=10
finish for rho=-0.1, n=20
finish for rho=-0.1, n=30
finish for rho=-0.1, n=40
finish for rho=-0.1, n=50
finish for rho=-0.1, n=60
finish for rho=-0.1, n=70
finish for rho=-0.1, n=80
finish for rho=-0.1, n=90
finish for rho=-0.1, n=100
CPU times: user 2min 37s, sys: 1.09 s, total: 2min 39s
Wall time: 2min 39s


In [36]:
power_sbm

array([[0.2  , 0.624, 0.934, 0.998, 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   ],
       [0.378, 0.936, 1.   , 1.   , 1.   , 1.   , 1.   , 1.   , 1.   ,
        1.   ],
       [0.102, 0.236, 0.476, 0.632, 0.826, 0.938, 0.98 , 0.994, 1.   ,
        0.998]])

In [37]:
%%time
n_arr = np.linspace(10, 100, 10, dtype=int)
rho_arr = np.array([0, 0.1, -0.1])
p = 0.5
q = 0.5
nmc = 500
k = 1
power_er = np.zeros((rho_arr.shape[0], n_arr.shape[0]))

for i, rho in enumerate(rho_arr):
    for j, n in enumerate(n_arr):
        blocks = np.repeat(np.arange(k), n//k)
        test_power = dcorr_ttest_power(rho_ER_marg, given_blocks=True, blocks=blocks,
                                           rho=rho, p=p, q=q, n=n, mc=nmc)
        power_er[i, j] = test_power
        print('finish for rho={}, n={}'.format(rho, n))

finish for rho=0.0, n=10
finish for rho=0.0, n=20
finish for rho=0.0, n=30
finish for rho=0.0, n=40
finish for rho=0.0, n=50
finish for rho=0.0, n=60
finish for rho=0.0, n=70
finish for rho=0.0, n=80
finish for rho=0.0, n=90
finish for rho=0.0, n=100
finish for rho=0.1, n=10
finish for rho=0.1, n=20
finish for rho=0.1, n=30
finish for rho=0.1, n=40
finish for rho=0.1, n=50
finish for rho=0.1, n=60
finish for rho=0.1, n=70
finish for rho=0.1, n=80
finish for rho=0.1, n=90
finish for rho=0.1, n=100
finish for rho=-0.1, n=10
finish for rho=-0.1, n=20
finish for rho=-0.1, n=30
finish for rho=-0.1, n=40
finish for rho=-0.1, n=50
finish for rho=-0.1, n=60
finish for rho=-0.1, n=70
finish for rho=-0.1, n=80
finish for rho=-0.1, n=90
finish for rho=-0.1, n=100
CPU times: user 2min 26s, sys: 1.01 s, total: 2min 27s
Wall time: 2min 27s


In [39]:
power_er

array([[0.05 , 0.058, 0.042, 0.042, 0.056, 0.056, 0.054, 0.054, 0.04 ,
        0.06 ],
       [0.098, 0.232, 0.566, 0.78 , 0.932, 0.988, 0.998, 0.998, 1.   ,
        1.   ],
       [0.106, 0.268, 0.534, 0.76 , 0.92 , 0.988, 0.998, 1.   , 1.   ,
        1.   ]])

In [40]:
%%time
n_arr = np.linspace(10, 100, 10, dtype=int)
rho_arr = np.array([0, 0.1, -0.1])
p = 0.7
q = 0.2
nmc = 500
k = 1
power_er_marg = np.zeros((rho_arr.shape[0], n_arr.shape[0]))

for i, rho in enumerate(rho_arr):
    for j, n in enumerate(n_arr):
        blocks = np.repeat(np.arange(k), n//k)
        test_power = dcorr_ttest_power(rho_ER_marg, given_blocks=True, blocks=blocks,
                                           rho=rho, p=p, q=q, n=n, mc=nmc)
        power_er_marg[i, j] = test_power
        print('finish for rho={}, n={}'.format(rho, n))

finish for rho=0.0, n=10
finish for rho=0.0, n=20
finish for rho=0.0, n=30
finish for rho=0.0, n=40
finish for rho=0.0, n=50
finish for rho=0.0, n=60
finish for rho=0.0, n=70
finish for rho=0.0, n=80
finish for rho=0.0, n=90
finish for rho=0.0, n=100
finish for rho=0.1, n=10
finish for rho=0.1, n=20
finish for rho=0.1, n=30
finish for rho=0.1, n=40
finish for rho=0.1, n=50
finish for rho=0.1, n=60
finish for rho=0.1, n=70
finish for rho=0.1, n=80
finish for rho=0.1, n=90
finish for rho=0.1, n=100
finish for rho=-0.1, n=10
finish for rho=-0.1, n=20
finish for rho=-0.1, n=30
finish for rho=-0.1, n=40
finish for rho=-0.1, n=50
finish for rho=-0.1, n=60
finish for rho=-0.1, n=70
finish for rho=-0.1, n=80
finish for rho=-0.1, n=90
finish for rho=-0.1, n=100
CPU times: user 2min 28s, sys: 1.09 s, total: 2min 29s
Wall time: 2min 30s


In [41]:
power_er_marg

array([[0.064, 0.056, 0.038, 0.05 , 0.072, 0.054, 0.04 , 0.062, 0.048,
        0.044],
       [0.106, 0.236, 0.464, 0.808, 0.958, 0.99 , 0.998, 1.   , 1.   ,
        1.   ],
       [0.13 , 0.252, 0.518, 0.79 , 0.904, 0.98 , 0.998, 1.   , 1.   ,
        1.   ]])

In [44]:
with open('../../results/rho_er_power_ttest.pkl', 'wb') as f:
    pickle.dump(power_er, f)
with open('../../results/rho_er_marg_power_ttest.pkl', 'wb') as f:
    pickle.dump(power_er_marg, f)
with open('../../results/rho_sbm_power_ttest.pkl', 'wb') as f:
    pickle.dump(power_sbm, f)
with open('../../results/rho_sbm_marg_power_ttest.pkl', 'wb') as f:
    pickle.dump(power_sbm_marg, f)    