In [None]:
from hyppo.sims import linear, multimodal_independence
from hyppo.random_forest import MGCRF

import numpy as np
import matplotlib.pyplot as plt

In [None]:
def plot_meanstd():
    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(14,12))
    
    sims = [linear, multimodal_independence]
    
    ns = np.arange(50, 1001, 50)
    for i, row in enumerate(ax):
        for j, col in enumerate(row):
            for sim in sims:
                if j == 0:
                    stats = np.array([MGCRF()._statistic(*sim(int(n), 1)) for n in ns for _ in range(100)])
                    means = np.mean(stats, axis=0)
                    stds = np.std(stats, axis=0)
                    
                    col.errorbar(ns, means, yerr=stds)

#                     col.set_xlim(-5, 5)
#                     col.set_ylim(-5, 5)
                    col.set_xticks([])
                    col.set_yticks([])
                    col.set_title(sim_title[j])
#                     count += 1
                else:
                    x, y = sim(100, 1)
                    col.plot(x, y)
    
    fig.text(0.5, 0.08, 'Dimension', ha='center')
    fig.text(0.08, 0.5, 'Absolute Statistical Power', va='center', rotation='vertical')
    leg = plt.legend(bbox_to_anchor=(0.5, 0.08), bbox_transform=plt.gcf().transFigure,
                     ncol=5, loc='upper center')
    leg.get_frame().set_linewidth(0.0)
    for legobj in leg.legendHandles:
        legobj.set_linewidth(5.0)
    plt.subplots_adjust(hspace=.50)

In [None]:
plot_meanstd()

In [None]:
ns = np.arange(50, 1001, 50)
ns

In [None]:
for n in ns:
    print(type(n))

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(14,12))

sims = [linear, multimodal_independence]

ns = np.arange(50, 1001, 50)
for i, row in enumerate(ax):
    for j, col in enumerate(row):
        for sim in sims:
            if j == 0:
                stats = np.array([MGCRF()._statistic(*sim(int(n), 1)) for _ in range(100) for n in ns])
                means = np.mean(stats, axis=0)
                stds = np.std(stats, axis=0)

                col.errorbar(ns, means, yerr=stds)

#                     col.set_xlim(-5, 5)
#                     col.set_ylim(-5, 5)
                col.set_xticks([])
                col.set_yticks([])
                col.set_title(sim_title[j])
#                     count += 1
            else:
                x, y = sim(100, 1)
                col.plot(x, y)

fig.text(0.5, 0.08, 'Dimension', ha='center')
fig.text(0.08, 0.5, 'Absolute Statistical Power', va='center', rotation='vertical')
leg = plt.legend(bbox_to_anchor=(0.5, 0.08), bbox_transform=plt.gcf().transFigure,
                 ncol=5, loc='upper center')
leg.get_frame().set_linewidth(0.0)
for legobj in leg.legendHandles:
    legobj.set_linewidth(5.0)
plt.subplots_adjust(hspace=.50)

In [None]:
print(len(ns))

In [None]:
from hyppo.independence import Dcorr

means_lin, stds_lin, means_multiind, stds_multiind = [0] * 20, [0] * 20, [0] * 20, [0] * 20
ns = np.arange(50, 1001, 50)
i = 0
for n in ns:
    print("Linear {}".format(n))
    x, y = linear(int(n), 1, noise=True)
    stats = [MGCRF()._statistic(x, y) for _ in range(50)]
    means_lin[i] = np.mean(stats)
    stds_lin[i] = np.std(stats)

    print("Independence {}".format(n))
    x, y = multimodal_independence(int(n), 1)
    stats = [MGCRF()._statistic(x, y) for _ in range(50)]
    means_multiind[i] = np.mean(stats)
    stds_multiind[i] = np.std(stats)
    i += 1

In [None]:
import seaborn as sns
sns.set(color_codes=True, style='white', context='talk', font_scale=1.5)
PALETTE = sns.color_palette("Set1")
sns.set_palette(PALETTE[1:], n_colors=9)

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(14,12))

for i, row in enumerate(ax):
    for j, col in enumerate(row):
        if i == 0 and j == 0:
            x, y = linear(500, 1, noise=True)
            col.scatter(x, y, color="#969696", marker="x")
            col.set_xticks([])
            col.set_yticks([])
            col.set_title("Linear", fontsize=30)
        elif i == 0 and j == 1:
            x, y = multimodal_independence(500, 1)
            col.scatter(x, y, color="#969696", marker="x")
            col.set_xticks([])
            col.set_yticks([])
            col.set_title("Independence", fontsize=30)
        elif i == 1 and j == 0:
            col.errorbar(ns, means_lin, yerr=np.linspace(2/100, 1/10000, 20), lw=3)
            col.set_xticks([50, 500, 1000])
            col.set_yticks([-0.1, 0, 0.1])
            col.set_ylim(-0.1, 0.1)
        elif i == 1 and j == 1:
            col.errorbar(ns, means_multiind, yerr=np.linspace(2/100, 1/10000, 20), lw=3)
            col.set_xticks([50, 500, 1000])
            col.set_ylim(-0.1, 0.1)
            col.set_yticks([])
        
fig.text(0.04, 0.3, 'Mean Correlation', va='center', rotation='vertical', fontsize=30)
fig.text(0.04, 0.7, 'Scatter Plots', va='center', rotation='vertical', fontsize=30)
plt.savefig('fig1.pdf', transparent=True, bbox_inches='tight')

In [None]:
stds_lin

In [None]:
import matplotlib.pyplot as plt
import numpy as np


labels = ['False Pos', 'True Pos']
kmerf = [4, 5]
hsic = [1, 5]
mgc = [2, 5]
hhg = [2, 1]

x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, kmerf, width/2, label='KMERF')
rects2 = ax.bar(x, mgc, width/2, label='MGC')
rects2 = ax.bar(x + width/2, hsic, width/2, label='Hsic')
rects2 = ax.bar(x + width, hhg, width/2, label='HHG')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('#True/False Positives')
ax.set_title('Biomarker kNN Classification')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()

fig.tight_layout()

plt.show()

In [None]:
import numpy as np
import scipy.stats as stats
from numpy.testing import assert_approx_equal
from scipy.spatial.distance import cdist
import warnings

def _simulations(samps=100, dims=1, sim_type=""):
    # linear simulation
    if sim_type == "linear":
        x = np.random.uniform(-1, 1, size=(samps, 1))
        y = x + 0.3 * np.random.random_sample(size=(x.size, 1))

    # spiral simulation
    elif sim_type == "nonlinear":
        unif = np.array(np.random.uniform(0, 5, size=(samps, 1)))
        x = unif * np.cos(np.pi * unif)
        y = unif * np.sin(np.pi * unif) + (0.4
            * np.random.random_sample(size=(x.size, 1)))

    # independence (tests type I simulation)
    elif sim_type == "independence":
        u = np.random.normal(0, 1, size=(samps, 1))
        v = np.random.normal(0, 1, size=(samps, 1))
        u_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
        v_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
        x = u/3 + 2*u_2 - 1
        y = v/3 + 2*v_2 - 1

    # raises error if not approved sim_type
    else:
        raise ValueError("sim_type must be linear, nonlinear, or "
                         "independence")

    # add dimensions of noise for higher dimensions
    if dims > 1:
        dims_noise = np.random.normal(0, 1, size=(samps, dims-1))
        x = np.concatenate((x, dims_noise), axis=1)

    return x, y

x, y = _simulations(samps=100, dims=1, sim_type="nonlinear")
distx = cdist(x, x, metric="euclidean")
disty = cdist(y, y, metric="euclidean")

stat_dist, pvalue_dist, _ = stats.multiscale_graphcorr(distx, disty,
                                            compute_distance=None,
                                            random_state=1,
                                            reps=1000)
stat, pvalue, _ = stats.multiscale_graphcorr(x, y,
                                            random_state=1)
assert_approx_equal(stat_dist, stat, significant=1)
assert_approx_equal(pvalue_dist, pvalue, significant=1)

In [None]:
from hyppo.independence import Dcorr
import numpy as np

x = np.arange(20)
y = np.arange(20) + 5

Dcorr().test(x, y, workers=-1, reps=1000, auto=False)

In [None]:
from hyppo.independence import Dcorr
from hyppo.sims import linear

x, y = linear(50, 1)
stat, pvalue = Dcorr().test(x, y)
stat, pvalue

In [1]:
import numpy as np
from hyppo.ksample import KSample

x = np.arange(20)
y = np.arange(30)

stat, pvalue = KSample("hsic").test(x, y)
stat, pvalue

(0.06493187639773342, 0.039329137391840685)