In [2]:
%matplotlib inline
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from joblib import Parallel, delayed
from scipy.io import loadmat, savemat

from hyppo.time_series import CCAX, MGCX, DcorrX, HsicX, LjungBox

# from mgcpy.benchmarks.ts_benchmarks import NonlinearDependence
# from mgcpy.independence_tests.dcorrx import DCorrX
# from mgcpy.independence_tests.mgcx import MGCX
# from mgcpy.independence_tests.xcorr import BoxPierceX, LjungBoxX

In [3]:
def _compute_power(test, X_full, Y_full, num_sims, alpha, n, replication_factor=100):
    """
    Helper method estimate power of a test on a given simulation.

    :param test: Test to profile, either DCorrX or MGCX.
    :type test: TimeSeriesIndependenceTest

    :param X_full: An ``[n*num_sims]`` data matrix where ``n`` is the highest sample size.
    :type X_full: 2D ``numpy.array``

    :param Y_full: An ``[n*num_sims]`` data matrix where ``n`` is the highest sample size.
    :type Y_full: 2D ``numpy.array``

    :param num_sims: number of simulation at each sample size.
    :type num_sims: integer

    :param alpha: significance level.
    :type alpha: float

    :param n: sample size.
    :type n: integer

    :return: returns the estimated power.
    :rtype: float
    """
    num_rejects = 0.0

    def worker(s):
        X = X_full[range(n), s]
        Y = Y_full[range(n), s]
        # if test["name"] in ["DCorr-X", "MGC-X"]:
        #     p_value, _ = test["object"].p_value(
        #         X, Y, replication_factor=replication_factor, is_fast=test["is_fast"]
        #     )
        # else:
        #     p_value = test["object"].p_value(
        #         X, Y, replication_factor=replication_factor
        #     )
        res = test["object"].test(X, Y, reps=replication_factor)
        p_value = res[1]

        if p_value <= alpha:
            return 1.0
        return 0.0

    rejects = Parallel(n_jobs=-2, verbose=0)(
        delayed(worker)(s) for s in range(num_sims)
    )
    power = np.mean(rejects)
    std = np.std(rejects)

    return power, std

In [4]:
n = 1200
alpha = 0.05
num_sims = 300

tests = [
    # {
    #     "name": "DCorr-X",
    #     "filename": "dcorrx",
    #     "is_fast": False,
    #     "subsample_size": -1,
    #     "object": DCorrX(max_lag=1),
    # },
    # {
    #     "name": "LjungX",
    #     "filename": "ljungx",
    #     "object": LjungBoxX(max_lag=1),
    #     "color": "k",
    # },
    # {
    #     "name": "BoxPierceX",
    #     "filename": "boxpiercex",
    #     "object": LjungBoxX(max_lag=1),
    #     "color": "k",
    # },
    # {
    #     "name": "MGC-X",
    #     "filename": "mgcx",
    #     "is_fast": False,
    #     "object": MGCX(max_lag=1),
    # },
    # {
    #     "name": "HSIC-X",
    #     "filename": "hsicx",
    #     "is_fast": False,
    #     "object": HsicX(max_lag=1),
    # },
    {
        "name": "CCA-X",
        "filename": "ccax",
        # "is_fast": False,
        "object": CCAX(max_lag=1),
    },
]

In [5]:
processes = list(Path("../data/ars").glob("*mat"))

In [6]:
processes

[PosixPath('../data/ars/indep_ar1_phi_0.100_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.150_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.200_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.250_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.300_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.350_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.400_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.450_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.500_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.550_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.600_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.650_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.700_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.750_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.800_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.850_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.900_data.mat'),
 PosixPath('../data/ars/indep_ar1_phi_0.950_data

In [7]:
rates = [float(process.name.split("_")[-2]) for process in processes]
df = pd.DataFrame(rates, columns=["phi"])

In [8]:
rates = [float(process.name.split("_")[-2]) for process in processes]
df = pd.DataFrame(rates, columns=["extinction_rate"])

for test in tests:
    print(f"Running test: {test['name']}")
    powers = np.zeros(len(processes))
    stds = np.zeros(len(processes))

    for i, process in enumerate(processes):
        print(f"Phi: {process.name.split('_')[-2]}")
        data = loadmat(process)
        X_full = data["X_full"]
        Y_full = data["Y_full"]

        powers[i], stds[i] = _compute_power(test, X_full, Y_full, num_sims, alpha, n)

    test_name = test["name"]
    tmp_df = pd.DataFrame(
        np.array([powers, stds]).T, columns=[f"{test_name}_powers", f"{test_name}_stds"]
    )

    df = pd.concat([df, tmp_df], axis=1)

df.to_csv("vary_phi_cca.csv", index=False)

Running test: CCA-X
Phi: 0.100
Phi: 0.150
Phi: 0.200
Phi: 0.250
Phi: 0.300
Phi: 0.350
Phi: 0.400
Phi: 0.450
Phi: 0.500
Phi: 0.550
Phi: 0.600
Phi: 0.650
Phi: 0.700
Phi: 0.750
Phi: 0.800
Phi: 0.850
Phi: 0.900
Phi: 0.950


In [21]:
df1 = pd.read_csv("./indepent_ar_results.csv")
df2 = pd.read_csv("./vary_phi_cca.csv")

In [24]:
df2

Unnamed: 0,extinction_rate,CCA-X_powers,CCA-X_stds
0,0.1,0.043333,0.203606
1,0.15,0.06,0.237487
2,0.2,0.046667,0.210924
3,0.25,0.05,0.217945
4,0.3,0.06,0.237487
5,0.35,0.063333,0.243562
6,0.4,0.06,0.237487
7,0.45,0.053333,0.224697
8,0.5,0.04,0.195959
9,0.55,0.05,0.217945


In [25]:
pd.concat([df1, df2.iloc[:, 1:]], axis=1).to_csv(
    "./indepent_ar_results.csv", index=False
)