In [1]:
import gudhi as gd
from gudhi import representations
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
import pingouin as pg
import scipy.stats as st
from collections import defaultdict
from sklearn.metrics import pairwise_distances
from sklearn.metrics.pairwise import paired_distances
from tqdm.notebook import tqdm
import scipy.spatial.distance as spdists
from scipy.stats import ks_2samp

#from curlyBrace import curlyBrace
%matplotlib inline

In [8]:
def sampled_ecc_witness(landmarks, witnesses, n_samples, max_filt_val):
    ac = gd.EuclideanStrongWitnessComplex(landmarks = landmarks, witnesses=witnesses)
    st = ac.create_simplex_tree(999999)
    filt_values = np.linspace(0,max_filt_val,n_samples)
    ecc_values = np.zeros_like(filt_values)
    simplices = list(st.get_filtration())
    cur_val = 0
    i=0
    for (j,t) in enumerate(filt_values):
        while (i<len(simplices)) and (simplices[i][1]<=t):
            cur_val=cur_val +(-1)**(len(simplices[i][0])-1)
            i = i+1
        #print(t, cur_val)
        ecc_values[j] = cur_val
        
    return ecc_values

def get_ecc(data, n_samples, max_filt_val):
    eccs = []
    for dat in tqdm(data):
        ec = sampled_ecc_alpha(dat, n_samples, max_filt_val)
        eccs.append(ec)
    return eccs

def ecc_distance(samples1, samples2, n_samples, max_filt_val):
    ECCs1 = get_ecc(samples1, n_samples = n_samples, max_filt_val = max_filt_val)
    ECCs2 = get_ecc(samples2, n_samples = n_samples, max_filt_val = max_filt_val)
    #dists = paired_distances(ECCs1,ECCs2, n_jobs=-1, metric = spdists.chebyshev)
    dists = np.maximum(ECCs1-ECCs2)+np.maximum(ECCs2-ECCs1)
    return dists
    #dists = dists *(max_filt_val/(n_samples-1))
    #return dists.flatten()

def gen_rv(rv, N, n_samples):
    return [np.array([rv[0].rvs(N), rv[1].rvs(N).tolist(), rv[2].rvs(N).tolist()]).T for i in range(n_samples)]

In [9]:
rv_normal = st.norm()
rv_normal2_2 = st.norm(2,2)
rv_beta2_2 = st.beta(2, 2)
rv_beta5_5 = st.beta(5, 5)
rv_beta10_10 = st.beta(10, 10)
rv_laplace = st.laplace()
rv_uniform = st.uniform()
rv_t3 = st.t(df=3)
rv_t5 = st.t(df=5)
rv_t7 = st.t(df=7)
rv_t10 = st.t(df=10)
rv_gamma10_1 = st.gamma(10,1)
rv_gamma20_1 = st.gamma(20,1)
rv_gamma4_5 = st.gamma(4,5)
rv_gamma5_1= st.gamma(5,1)
rv_chisq4 = st.chi2(df=4)
rv_chisq10 = st.chi2(df=10)
rv_lorentz = st.cauchy()
rv_logistic = st.logistic()
rv_arcsine = st.arcsine()
rv_argus = st.argus(chi=1)
rv_cosine = st.cosine()

In [18]:
n=20
n_loops = 100
d = 3
steps = 1001
maxfilt = 3
rvs= [[rv_uniform, rv_uniform, rv_uniform],
      [rv_beta2_2, rv_beta2_2, rv_beta2_2],
      [rv_beta5_5, rv_beta5_5,rv_beta5_5],
      [rv_beta10_10, rv_beta10_10, rv_beta10_10],
      [rv_argus, rv_argus, rv_argus],
      [rv_arcsine, rv_arcsine, rv_arcsine],
      [rv_uniform, rv_beta10_10, rv_beta10_10],
      [rv_uniform, rv_uniform, rv_beta10_10]]

rv_names=["UxUxU",
          "B22xB22XB22",
          "B55xB55xB55",
          "B1010xB1010xB1010",
          "ArgusxArgusxArgus",
          "asinxasinxasin",
          "UxB1010xB1010",
          "UxUxB1010"]

In [15]:
samples = [[(n**(1/3)) * np.array(gen_rv(rv,n,n_loops)) for rv in tqdm(rvs)] for i in range(4)]


  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

In [20]:
eccs = np.zeros((len(rvs),len(rvs), n_loops, steps))

for i in range(len(rvs)):
    for j in range(len(rvs)):
        for k in range(n_loops):
            eccs[i,j,k] = sampled_ecc_witness(samples[0][i][k],samples[1][j][k],steps,maxfilt)

KeyboardInterrupt: 