In [14]:
import pickle
from itertools import product
from pathlib import Path

import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from scipy.io import savemat

from hyppo.time_series import CCAX, MGCX, DcorrX, HsicX

In [15]:
def generate_phi(phi=0.5, d=1):
    out = np.eye(d)
    denom = 1 / np.arange(1, d + 1)
    out = out * denom * phi

    return out


def generate_var(n, d, sigma=1, lag=1, phi=0.5):
    epsilons = np.random.normal(0, sigma, (n, d))
    etas = np.random.normal(0, sigma, (n, d))

    x = epsilons
    y = etas

    phis = generate_phi(phi, d)

    for t in range(lag, n):
        x[t] = phis @ y[t - lag] + epsilons[t]
        y[t] = phis @ x[t - lag] + etas[t]

    return x, y


def generate_data(n, d, lag, phi, fname, output_dir="./data/", reps=300):
    X_full = np.zeros((reps, n, d))
    Y_full = np.zeros((reps, n, d))
    for s in range(reps):
        X_full[s], Y_full[s] = generate_var(n, d, lag=lag, phi=phi)

    # Save simulated output.
    output = {"X": X_full, "Y": Y_full}
    p = Path(output_dir)
    if not p.is_dir():
        p.mkdir(parents=True)

    filename = p / f"{fname}_data.pkl"
    file = open(filename, "wb")
    pickle.dump(output, file)
    file.close()

    # Save to MATLAB format as well.
    savemat(p / f"{fname}_data.mat", {"X_full": X_full, "Y_full": Y_full})


def run_experiment(test, n, d, phi=0.5, reps=300):
    pvals = []

    for seed in range(reps):
        np.random.seed(seed)

        x, y = generate_var(n, d, phi=phi)
        res = test.test(x, y, reps=1000, workers=1)
        pvals.append(res[1])

    rejects = np.array(pvals) <= 0.05

    return np.mean(rejects), np.std(rejects)

In [16]:
generate_data(100, 100, 1, 0.0, "indep_var1")

In [3]:
tests = [
    # ["dcorr", DcorrX(max_lag=2)],
    ["cca", CCAX(max_lag=1)],
    ["cca", CCAX(max_lag=2)],
    # ["hsic", HsicX(max_lag=2)],
]

sigma = 1
n = 50
ds = np.arange(0, 51, 10)
ds[0] = 1
lag = 1
reps = 300
phi = 0.0

args = list(product(tests, ds))

In [4]:
res = Parallel(-1, verbose=10)(
    delayed(run_experiment)(test, n, d, phi=phi, reps=reps) for (_, test), d in args
)

df = [[a[0][0]] + [a[1]] + [*b] for a, b in zip(args, res)]

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 96 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done   3 out of  12 | elapsed:  6.4min remaining: 19.3min
[Parallel(n_jobs=-1)]: Done   5 out of  12 | elapsed: 10.3min remaining: 14.4min
[Parallel(n_jobs=-1)]: Done   7 out of  12 | elapsed: 14.6min remaining: 10.4min
[Parallel(n_jobs=-1)]: Done   9 out of  12 | elapsed: 20.4min remaining:  6.8min
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed: 35.5min finished


In [5]:
df = pd.DataFrame(df, columns=["test", "d", "power", "std"])
df

Unnamed: 0,test,d,power,std
0,cca,1,0.053333,0.224697
1,cca,10,0.073333,0.260683
2,cca,20,0.32,0.466476
3,cca,30,0.71,0.453762
4,cca,40,0.966667,0.179505
5,cca,50,0.996667,0.057639
6,cca,1,0.06,0.237487
7,cca,10,0.093333,0.290899
8,cca,20,0.366667,0.481894
9,cca,30,0.763333,0.425036


In [13]:
df = pd.DataFrame(df, columns=["test", "d", "power", "std"])
df

Unnamed: 0,test,d,power,std
0,dcorr,1,0.05,0.217945
1,dcorr,10,0.023333,0.15096
2,dcorr,20,0.013333,0.114698
3,dcorr,30,0.016667,0.128019
4,dcorr,40,0.026667,0.161107
5,dcorr,50,0.013333,0.114698
6,cca,1,0.04,0.195959
7,cca,10,0.13,0.336303
8,cca,20,0.526667,0.499288
9,cca,30,0.906667,0.290899


In [9]:
df = pd.DataFrame(df, columns=["test", "d", "power", "std"])
df

Unnamed: 0,test,d,power,std
0,dcorr,1,0.823333,0.381386
1,dcorr,10,0.186667,0.389644
2,dcorr,20,0.056667,0.231205
3,dcorr,30,0.043333,0.203606
4,dcorr,40,0.026667,0.161107
5,dcorr,50,0.026667,0.161107
6,cca,1,0.866667,0.339935
7,cca,10,0.406667,0.491212
8,cca,20,0.72,0.448999
9,cca,30,0.953333,0.210924


In [6]:
df = pd.DataFrame(df, columns=["test", "d", "power", "std"])
df

Unnamed: 0,test,d,power,std
0,dcorr,1,0.87,0.336303
1,dcorr,10,0.296667,0.456788
2,dcorr,20,0.13,0.336303
3,dcorr,30,0.076667,0.266062
4,dcorr,40,0.05,0.217945
5,dcorr,50,0.066667,0.249444
6,cca,1,0.923333,0.266062
7,cca,10,0.506667,0.499956
8,cca,20,0.66,0.473709
9,cca,30,0.893333,0.308689


In [64]:
list(zip(args, res))[0][0]

(['dcorr', <hyppo.time_series.dcorrx.DcorrX at 0x7f629a2724f0>], 1)

In [47]:
list(zip(args, res))[0][1]

(0.87, 0.33630343441600474)

In [19]:
block = np.block([[zeros, phi], [phi, zeros]])

In [20]:
np.linalg.det(block)

8.528068668441306e-160