In [16]:
import numpy as np
from numpy import linalg as la
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale

from data_cube import DataCube

In [17]:
from tslearn.metrics import dtw, dtw_path

In [54]:
dc = DataCube(
    subjects="all",
    gestures=["3", "4", "5", "6"],
    channels=["2", "4", "6", "8"],
    data_grp="parsed"
)
dc.load_data()
dc.rms_smooth(300, 50)
dc.normalize_modalities(smooth=True)

---

In [22]:
def cumulated_euc_ts(i, j):
    """
    cumulated version of the time series w/ euclidean distance
    in which we take the sum values over time as time increases
    and then apply the chosen metric.
    i, j - arrays of data points
    """
    # abs equivalent to ((i-j)**2)**0.5 in scalar case
    return abs(i.sum() - j.sum())

In [55]:
subj_lab = []
gest_lab = []
arrays = []

for chan in [1,2,3,4]:
    # get arrays of only one channel
    for s, gdict in dc.data_set_smooth.items():
        for g, a in gdict.items():
            subj_lab.append(s)
            gest_lab.append(int(g[0]))
            arrays.append(a[:, chan])

    # calculate dtw between all arrays and make predictions
    predicts = []
    for n, g1 in enumerate(arrays):
        g1_dtws = []
        for m, g2 in enumerate(arrays):
            g1_dtws.append(cumulated_euc_ts(g1, g2))
        g1_dtws = np.array(g1_dtws)
        pred_idx = np.argsort(g1_dtws)[1] # index of 2nd closest array by dtw; 1st closest is self
        predicts.append(gest_lab[pred_idx])

    acc = (sum(np.array(gest_lab) == np.array(predicts)) / len(gest_lab)) * 100
    
    print(f"raw channel {chan} accuracy: {acc}%")

raw channel 1 accuracy: 36.80555555555556%
raw channel 2 accuracy: 35.15625%
raw channel 3 accuracy: 36.226851851851855%
raw channel 4 accuracy: 35.15625%


---
### CV to determine optimal smoothing for raw signals

In [36]:
def cross_val_raw(chan, sze, stp):
    
    dc.rms_smooth(sze, stp)
    dc.normalize_modalities(smooth=True)
    
    for s, gdict in dc.data_set_smooth.items():
            for g, a in gdict.items():
                subj_lab.append(s)
                gest_lab.append(int(g[0]))
                arrays.append(a[:, chan])

    # calculate dtw between all arrays and make predictions
    predicts = []
    for n, g1 in enumerate(arrays):
        g1_dtws = []
        for m, g2 in enumerate(arrays):
            g1_dtws.append(cumulated_euc_ts(g1, g2))
        g1_dtws = np.array(g1_dtws)
        pred_idx = np.argsort(g1_dtws)[1] # index of 2nd closest array by dtw; 1st closest is self
        predicts.append(gest_lab[pred_idx])

    acc = (sum(np.array(gest_lab) == np.array(predicts)) / len(gest_lab)) * 100
        
    return acc

In [53]:
tnsr[0,0,17]

37.057413928012515

In [43]:
size_vals = [5*x for x in range(5, 65, 5)]
step_vals = [x for x in range(5, 26)]

tnsr = np.zeros(shape=(1, len(size_vals), len(step_vals)))
chan=1

for z, sze in enumerate(size_vals):
    for t, stp in enumerate(step_vals):
        tnsr[z,t] = cross_val_raw(chan, sze, stp)

0


KeyboardInterrupt: 

In [50]:
tnsr[0,:,:]

array([[32.24344136, 32.52840909, 32.65438988, 32.65411793, 32.46527778,
        33.18326271, 32.80381944, 32.99464936, 33.42853943, 33.38018078,
        34.42111545, 34.76228632, 34.92476852, 35.61100746, 35.61580882,
        36.08343398, 36.97668651, 37.05741393,  0.        ,  0.        ,
         0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        

In [49]:
tnsr[0,:,[np.argmax(tnsr[0,:,:])]]

array([[37.05741393,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ,  0.        ]])

---
### Matching with bottleneck distance

In [None]:
from ripser import ripser, Rips
from persim import plot_diagrams, PersImage, bottleneck
from TDA_helper_fcns import sublevel_set_time_series_dist

In [None]:
px = 20
sd = 1

In [None]:
subj_lab = []
gest_lab = []
arrays = []

for chan in [1,2,3,4]:
    # get arrays of only one channel
    for s, gdict in dc.data_set_smooth.items():
        for g, a in gdict.items():
            subj_lab.append(s)
            gest_lab.append(int(g[0]))
            arrays.append(a[:, chan])

    # calculate bottleneck distance between all pds and make predictions
    predicts = []
    for n, g1 in enumerate(arrays):
        rips = Rips(maxdim=0, verbose=False) # initialize rips complex
        sls1 = sublevel_set_time_series_dist(g1)
        dgm1 = rips.fit_transform(sls1, distance_matrix=True)[0]
        g1_bottlenecks = []
        for m, g2 in enumerate(arrays):
            sls2 = sublevel_set_time_series_dist(g2)
            dgm2 = rips.fit_transform(sls2, distance_matrix=True)[0]
            distance_bottleneck, (matching, D) = bottleneck(dgm1, dgm2, matching=True)
            g1_bottlenecks.append(distance_bottleneck)
        g1_bottlenecks = np.array(g1_bottlenecks)
        pred_idx = np.argsort(g1_bottlenecks)[1] # index of 2nd closest array by dtw; 1st closest is self
        predicts.append(gest_lab[pred_idx])

    acc = (sum(np.array(gest_lab) == np.array(predicts)) / len(gest_lab)) * 100
    
    print(f"raw channel {chan} accuracy: {acc}%")