In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import random

from mgcpy.independence_tests.dcorrx import DCorrX
from mgcpy.independence_tests.mgcx import MGCX

Simple test that everything works.

In [2]:
n = 40
X = np.random.normal(0.0, 1.0, n).reshape(n,1)
Y = np.random.normal(0.0, 1.0, n).reshape(n,1)

max_lag = 0

dcorrx = DCorrX(max_lag = max_lag)
p_value, metadata = dcorrx.p_value(X, Y, is_fast = 'subsample')
print("The p_value for DCorrX is: %f" % p_value)

mgcx = MGCX(max_lag = max_lag)
p_value, metadata = mgcx.p_value(X, Y, is_fast = 'subsample')
print("The p_value for MGCX is: %f" % p_value)

The p_value for DCorrX is: 0.475000
The p_value for MGCX is: 0.400000


Functions to simulate time series processes.

In [3]:
def indep_ar1(n, phi = 0.5, sigma2 = 1.0):
    # X_t and Y_t are univarite AR(1) with phi = 0.5 for both.
    # Innovations follow N(0, sigma2).
    
    # Innovations.
    epsilons = np.random.normal(0.0, sigma2, n)
    etas = np.random.normal(0.0, sigma2, n)
    
    X = np.zeros(n)
    Y = np.zeros(n)
    X[0] = epsilons[0]
    Y[0] = etas[0]
    
    # AR(1) process.
    for t in range(1,n):
        X[t] = phi*X[t-1] + epsilons[t]
        Y[t] = phi*Y[t-1] + etas[t]
        
    return X, Y

In [4]:
def corr_ar1(n, phi = 0.5, sigma2 = 1.0):
    # X_t and Y_t are together a bivarite AR(1) with Phi = [0 0.5; 0.5 0].
    # Innovations follow N(0, sigma2).
    
    # Innovations.
    epsilons = np.random.normal(0.0, sigma2, n)
    etas = np.random.normal(0.0, sigma2, n)
    
    X = np.zeros(n)
    Y = np.zeros(n)
    X[0] = epsilons[0]
    Y[0] = etas[0]
    
    for t in range(1,n):
        X[t] = phi*Y[t-1] + epsilons[t]
        Y[t] = phi*X[t-1] + etas[t]
        
    return X, Y

In [5]:
def nonlin_lag1(n, phi = 1, sigma2 = 0.5):
    # X_t and Y_t are together a bivarite nonlinear process.
    # Innovations follow N(0, sigma2).
    
    # Innovations.
    epsilons = np.random.normal(0.0, sigma2, n)
    etas = np.random.normal(0.0, sigma2, n)
    
    X = np.zeros(n)
    Y = np.zeros(n)
    X[0] = epsilons[0]
    Y[0] = etas[0]
    
    for t in range(1, n):
        X[t] = phi*epsilons[t]*Y[t-1]
        Y[t] = etas[t]
        
    return X, Y

Functions to compute and display power.

In [6]:
def compute_power(tests, simulate, num_sims, alpha, sample_sizes, i):
    n = sample_sizes[i]
    for s in range(num_sims):
        X, Y = simulate(n)
        for t in tests:
            test = tests[t]
            obj = test['object']
            p_value, _ = obj.p_value(X, Y, is_fast = test['is_fast'])
            if p_value <= alpha: 
                test['powers'][i] += 1.0
    for t in tests:
        test = tests[t]
        test['powers'][i] /= num_sims
        
    return

In [7]:
def plot_power(tests, sample_sizes, alpha, title):    
    plt.rcParams.update({'font.size': 15})
    fig, ax = plt.subplots()
    plt.title(title)
    plt.xlabel("n")
    plt.ylabel("Rejection Probability")
    plt.ylim((-0.05, 1.05))
    
    for t in tests:
        test = tests[t]
        plt.plot(sample_sizes, test['powers'], linestyle = '-', color = test['color'])
    ax.legend([tests[t]['name'] for t in tests], loc = 'upper left', prop={'size': 12})
    
    ax.axhline(y = alpha, color = 'black', linestyle = '--')
    plt.show()

In [8]:
def power_curve(tests, process, num_sims, alpha, sample_sizes):
    for i in range(len(sample_sizes)):
        compute_power(tests, process['function'], num_sims, alpha, sample_sizes, i)
        
    # Display.
    plot_power(tests, sample_sizes, alpha, process['name'])

Experiments.

In [9]:
max_lag = 1
num_sims = 40
alpha = 0.05
sample_sizes = range(100, 600, 100)

dcorrx = DCorrX(max_lag = max_lag)
mgcx = MGCX(max_lag = max_lag)

tests = {
    'fast_dcorrx_sub' : {
        'name' : 'Fast DCorr-X Sub',
        'filename' : 'fast_dcorrx_sub',
        'is_fast' : 'subsample',
        'object' : dcorrx,
        'powers' : np.zeros(len(sample_sizes)),
        'color' : 'red',
    },
    'fast_mgcx_sub' : {
        'name' : 'Fast MGC-X Sub',
        'filename' : 'fast_mgcx_sub',
        'is_fast' : 'subsample',
        'object' : mgcx,
        'powers' : np.zeros(len(sample_sizes)),
        'color' : 'blue',
    }
}
processes = { 
    'indep' : {'function' : indep_ar1,
               'name' : 'Independent AR(1)',
               'filename' : 'indep_ar1'},
    'corr' : {'function' : corr_ar1,
              'name' : 'Correlated AR(1)',
              'filename' : 'corr_ar1'},
    'nonlin' : {'function' : nonlin_lag1,
                'name' : 'Nonlinear Lag 1',
                'filename' : 'nonlin_lag1'}
}

In [10]:
for p in ['corr']:
    power_curve(tests, processes[p], num_sims, alpha, sample_sizes)

KeyboardInterrupt: 