In [1]:
import gudhi as gd
from gudhi import representations
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
import pingouin as pg
import scipy.stats as st
from collections import defaultdict
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import paired_distances
from tqdm.notebook import tqdm
import scipy.spatial.distance as spdists
#from curlyBrace import curlyBrace
%matplotlib widget

  **kwargs
  **kwargs


In [2]:
#scipy.stats uses numpy.random seed
np.random.seed(seed=0)

In [3]:
def sampled_ecc_alpha(pointcloud, n_samples, max_filt_val):
    ac = gd.AlphaComplex(points = pointcloud)
    st = ac.create_simplex_tree()
    filt_values = np.linspace(0,max_filt_val,n_samples)
    ecc_values = np.zeros_like(filt_values)
    simplices = list(st.get_filtration())
    cur_val = 0
    i=0
    for (j,t) in enumerate(filt_values):
        while (i<len(simplices)) and (simplices[i][1]<=t):
            cur_val=cur_val +(-1)**(len(simplices[i][0])-1)
            i = i+1
        #print(t, cur_val)
        ecc_values[j] = cur_val
        
    return ecc_values

In [4]:
def get_ecc(data, n_samples, max_filt_val):
    eccs = []
    for dat in tqdm(data):
        ec = sampled_ecc_alpha(dat, n_samples, max_filt_val)
        eccs.append(ec)
    return eccs

In [5]:

rv_uniform = st.uniform()
rv_beta = st.beta(3,3)


In [6]:
#First look at true H_0: X and Y sampled from the same distribution
n=100
n_loops = 1000
d = 2
steps = 1001
maxfilt = 3
radii = np.linspace(0,maxfilt,steps)
X = np.array([rv_uniform.rvs(n), rv_uniform.rvs(n)]).T
Y = np.array([rv_uniform.rvs(n), rv_uniform.rvs(n)]).T

ecX = sampled_ecc_alpha(n**(1/2)*X, steps,maxfilt)
ecY = sampled_ecc_alpha(n**(1/2)*Y, steps,maxfilt)


f,ax1= plt.subplots(1,1, figsize = (8,8))
ax1.scatter(X[:,0], X[:,1], label ="$X$", color="tab:blue")
ax1.scatter(Y[:,0], Y[:,1], label ="$Y$", color="tab:orange")
ax1.legend()
plt.savefig("2sample-H0-unif-vs-unif-square-n100.pdf")


D = np.max(np.abs(ecY - ecX))

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [7]:
def two_sample_ecc_test(X,Y):
    pool = np.concatenate((X,Y), axis = 0)
    pv = 0

    for p in tqdm(range(0,n_loops)):
        permuted_pool = np.random.permutation(pool)
        Xp = permuted_pool[:n]
        Yp = permuted_pool[n:]

        ecXp = sampled_ecc_alpha(n**(1/2)*Xp, steps,maxfilt)
        ecYp = sampled_ecc_alpha(n**(1/2)*Yp, steps,maxfilt)
        dp = np.max(np.abs(ecXp - ecYp))
        if (dp>D):
            pv = pv + 1.0/n_loops

    return (pv)


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


0.016000000000000007


In [8]:
supdist = np.max(np.abs(ecX - ecY))
suparg = np.argmax(np.abs(ecX - ecY))

f,ax = plt.subplots(figsize = (8,8))
ax.plot(radii, ecX, label = "$\\chi(X)$", color="tab:blue")
ax.plot(radii, ecY, label = "$\\chi(Y)$", color="tab:orange")
ax.annotate("$\|\chi(X)-\chi(Y)\|_{\infty}$ ="+str(supdist), fontsize = "11",xy=(radii[suparg],0.5*(ecX[suparg]+ecY[suparg])),xycoords="data",xytext = (radii[suparg]+0.1,0.5*(ecX[suparg]+ecY[suparg])),textcoords="data",arrowprops={"arrowstyle":"-[, widthB=1.5", "connectionstyle":"arc3","shrinkA":1,"shrinkB":2, "color":"black"})
ax.legend()

plt.savefig("2sample-H0-ECC-unif-vs-unif-n100-supdist.pdf")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
#First look at true H_0: X and Y sampled from the same distribution
n=100
n_loops = 1000
d = 2
steps = 1001
maxfilt = 3
radii = np.linspace(0,maxfilt,steps)
X = np.array([rv_uniform.rvs(n), rv_uniform.rvs(n)]).T
Y = np.array([rv_beta.rvs(n), rv_beta.rvs(n)]).T

ecX = sampled_ecc_alpha(n**(1/2)*X, steps,maxfilt)
ecY = sampled_ecc_alpha(n**(1/2)*Y, steps,maxfilt)


f,ax1= plt.subplots(1,1, figsize = (8,8))
ax1.scatter(X[:,0], X[:,1], label ="$X$", color="tab:blue")
ax1.scatter(Y[:,0], Y[:,1], label ="$Y$", color="tab:orange")
ax1.legend()
plt.savefig("2sample-H1-unif-vs-beta-square-n100.pdf")


D = np.max(np.abs(ecY - ecX))

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
pool = np.concatenate((X,Y), axis = 0)
pv = 0

for p in tqdm(range(0,n_loops)):
    permuted_pool = np.random.permutation(pool)
    Xp = permuted_pool[:n]
    Yp = permuted_pool[n:]

    ecXp = sampled_ecc_alpha(n**(1/2)*Xp, steps,maxfilt)
    ecYp = sampled_ecc_alpha(n**(1/2)*Yp, steps,maxfilt)
    dp = np.max(np.abs(ecXp - ecYp))
    if (dp>D):
        pv = pv + 1.0/n_loops

print(pv)
