In [9]:
import pandas as pd
import numpy as np
import gudhi as gd
from gudhi.point_cloud.timedelay import TimeDelayEmbedding
from gudhi.hera import wasserstein_distance
from gudhi.representations import PersistenceImage
from gudhi.representations.metrics import WassersteinDistance
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.manifold import MDS
import os
from tqdm.notebook import tqdm

%matplotlib widget

In [2]:
datapath =".\\data\\timeseries\\good_new"
filelist = os.listdir(datapath)
print(sorted(filelist))
good_time_series = [np.array(pd.read_csv(os.path.join(datapath,f), header=None)[0]) for f in sorted(filelist)]
datapath =".\\data\\timeseries\\bad_new"
filelist = os.listdir(datapath)
print(sorted(filelist))
bad_time_series = [np.array(pd.read_csv(os.path.join(datapath,f), header=None)[0]) for f in sorted(filelist)]

print(good_time_series[0])

['sig1_good.txt', 'sig2_good.txt', 'sig3_good.txt', 'sig4_good.txt', 'sig5_good.txt']
['sig1_bad.txt', 'sig2_bad.txt', 'sig3_bad.txt', 'sig4_bad.txt', 'sig5_bad.txt']
[-0.11156466  0.01043311  0.30969176 ...  0.44117724  0.98839737
 -2.1306849 ]


In [3]:
dim = 2
delay =100000//7
skip = 10

tde = TimeDelayEmbedding(dim = dim, delay=delay, skip=skip)
good_point_clouds = tde.transform(good_time_series)
bad_point_clouds = tde.transform(bad_time_series)

In [4]:
def sampled_ecc_alpha(pointcloud, n_samples, max_filt_val):
    ac = gd.AlphaComplex(points = pointcloud)
    st = ac.create_simplex_tree()
    filt_values = np.linspace(0,max_filt_val,n_samples)
    ecc_values = np.zeros_like(filt_values)
    simplices = list(st.get_filtration())
    cur_val = 0
    i=0
    for (j,t) in enumerate(filt_values):
        while (i<len(simplices)) and (simplices[i][1]<=t):
            cur_val=cur_val +(-1)**(len(simplices[i][0])-1)
            i = i+1
        #print(t, cur_val)
        ecc_values[j] = cur_val
        
    return ecc_values

In [5]:
def get_ecc(data, n_samples, max_filt_val):
    eccs = []
    for dat in tqdm(data):
        ec = sampled_ecc_alpha(dat, n_samples, max_filt_val)
        eccs.append(ec)
    return eccs

In [6]:
def two_sample_ecc_test(X,Y, n_loops, steps, maxfilt):
    ecX = sampled_ecc_alpha(len(X)**(1/2)*X, steps,maxfilt)/len(X)
    ecY = sampled_ecc_alpha(len(Y)**(1/2)*Y, steps,maxfilt)/len(Y)
    D = np.max(np.abs(ecY - ecX))
    pool = np.concatenate((X,Y), axis = 0)
    pv = 0

    for p in tqdm(range(0,n_loops)):
        permuted_pool = np.random.permutation(pool)
        Xp = permuted_pool[:len(X)]
        Yp = permuted_pool[len(X):]

        ecXp = sampled_ecc_alpha(len(X)**(1/2)*Xp, steps,maxfilt)/len(X)
        ecYp = sampled_ecc_alpha(len(Y)**(1/2)*Yp, steps,maxfilt)/len(Y)
        dp = np.max(np.abs(ecXp - ecYp))
        if (dp>D):
            pv = pv + 1.0/n_loops

    return (pv)


In [7]:
n=100
m = 150
n_loops = 1000
d = 2
steps = 1001
maxfilt = 8

In [10]:
for i in range(0,5):
    p = two_sample_ecc_test(good_point_clouds[i], bad_point_clouds[i],n_loops=n_loops, steps=steps, maxfilt=maxfilt)
    print(i,p)

HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


0 1.0000000000000007


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


1 0.7810000000000006


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


2 0.18700000000000014


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


3 0.5340000000000004


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))


4 0.06700000000000005
