In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [1]:
from functools import partial
from itertools import product

import numpy as np
import pandas as pd
import seaborn as sns
from joblib import Parallel, delayed
from hyppo.ksample import Hotelling, KSample

from src import generate_binary_sbms



In [2]:
def run_experiment(
    m, block_1, block_2, p, delta, reps, tests, alpha=0.05,
):
    total_n = block_1 + block_2

    # only do it for relevant nodes
    pvals = np.zeros((reps, 2, len(tests)))

    for i in range(reps):
        X, Y, labels = generate_binary_sbms(m, block_1, block_2, p, delta)
        for idx, j in enumerate([0, 19]):
            for k, test in enumerate(tests):
                X_nodes = np.delete(X[:, j, :], j, axis=1)
                Y_nodes = np.delete(Y[:, j, :], j, axis=1)
                try:
                    res = test.test(X_nodes, Y_nodes, reps=500)
                    pval = res[1]
                    if np.isnan(res[1]):
                        pval = 1
                    pvals[i, idx, k] = res[1]
                except:
                    pvals[i, idx, k] = 1

    powers = np.nanmean(pvals <= (alpha / total_n), axis=0)
    to_append = [m, p, delta, *powers.reshape(-1)]

    return to_append

In [3]:
# Experiment Parameters
# Constants
block_1 = 5
block_2 = 15
p = 0.5
reps = 50
tests = [KSample("MGC"), Hotelling()]

# Varying
spacing = 50
deltas = np.linspace(0, 1 - p, spacing + 1)
ms = np.linspace(0, 500, spacing + 1)[1:]

args = [dict(m=m, delta=delta) for m, delta in product(ms, deltas)]
args = sum(zip(reversed(args), args), ())[: len(args)]

partial_func = partial(
    run_experiment, block_1=block_1, block_2=block_2, p=p, reps=reps, tests=tests,
)

res = Parallel(n_jobs=-2, verbose=7)(delayed(partial_func)(**arg) for arg in args)

[Parallel(n_jobs=-2)]: Using backend LokyBackend with 95 concurrent workers.
[Parallel(n_jobs=-2)]: Done  10 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-2)]: Done  98 tasks      | elapsed: 232.6min
[Parallel(n_jobs=-2)]: Done 202 tasks      | elapsed: 273.5min
[Parallel(n_jobs=-2)]: Done 322 tasks      | elapsed: 446.1min
[Parallel(n_jobs=-2)]: Done 541 out of 638 | elapsed: 662.6min remaining: 118.8min
[Parallel(n_jobs=-2)]: Done 633 out of 638 | elapsed: 720.4min remaining:  5.7min
[Parallel(n_jobs=-2)]: Done 638 out of 638 | elapsed: 726.3min finished


In [None]:
cols = ['m', 'p', 'delta', 'mgc_node_1', 'hotelling_node_1', 'mgc_node_20', 'hotelling_node_20']
df = pd.DataFrame(res, columns=cols)
df.to_csv('./results/20200305_adj_row_wise.csv', index=False)

In [11]:
res_df.to_csv('./results/20200305_adj_row_wise.csv')