In [2]:
import numpy as np
from numpy import linalg as la
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale

from data_cube import DataCube

In [None]:
from tslearn.metrics import dtw, dtw_path

In [3]:
dc = DataCube(
    subjects="all",
    gestures=["3", "4", "5", "6"],
    channels=["2", "4", "6", "8"],
    data_grp="parsed"
)
dc.load_data()
dc.rms_smooth(100, 50)
dc.normalize_modalities(smooth=True)

---
### Matching with Dynamic Time Warping

In [None]:
subj_lab = []
gest_lab = []
arrays = []

for chan in [1,2,3,4]:
    # get arrays of only one channel
    for s, gdict in dc.data_set_smooth.items():
        for g, a in gdict.items():
            subj_lab.append(s)
            gest_lab.append(int(g[0]))
            arrays.append(a[:, chan])

    # calculate dtw between all arrays and make predictions
    predicts = []
    for n, g1 in enumerate(arrays):
        g1_dtws = []
        for m, g2 in enumerate(arrays):
            g1_dtws.append(dtw(g1, g2))
        g1_dtws = np.array(g1_dtws)
        pred_idx = np.argsort(g1_dtws)[1] # index of 2nd closest array by dtw; 1st closest is self
        predicts.append(gest_lab[pred_idx])

    acc = (sum(np.array(gest_lab) == np.array(predicts)) / len(gest_lab)) * 100
    
    print(f"raw channel {chan} accuracy: {acc}%")

---
### Matching with Cumulated Euclidean Distance

In [None]:
def cumulated_euc_ts(i, j):
    """
    cumulated version of the time series w/ euclidean distance
    in which we take the sum values over time as time increases
    and then apply the chosen metric.
    i, j - arrays of data points
    """
    # abs equivalent to ((i-j)**2)**0.5 in scalar case
    return abs(i.sum() - j.sum())

In [None]:
subj_lab = []
gest_lab = []
arrays = []

for chan in [1,2,3,4]:
    # get arrays of only one channel
    for s, gdict in dc.data_set_smooth.items():
        for g, a in gdict.items():
            subj_lab.append(s)
            gest_lab.append(int(g[0]))
            arrays.append(a[:, chan])

    # calculate dtw between all arrays and make predictions
    predicts = []
    for n, g1 in enumerate(arrays):
        g1_dtws = []
        for m, g2 in enumerate(arrays):
            g1_dtws.append(cumulated_euc_ts(g1, g2))
        g1_dtws = np.array(g1_dtws)
        pred_idx = np.argsort(g1_dtws)[1] # index of 2nd closest array by dtw; 1st closest is self
        predicts.append(gest_lab[pred_idx])

    acc = (sum(np.array(gest_lab) == np.array(predicts)) / len(gest_lab)) * 100
    
    print(f"raw channel {chan} accuracy: {acc}%")

---
### Matching with bottleneck distance

In [6]:
from ripser import lower_star_img, Rips
from persim import plot_diagrams, PersImage, bottleneck
from TDA_helper_fcns import sublevel_set_time_series_dist

In [8]:
subj_lab = []
gest_lab = []
arrays = []

rips = Rips(maxdim=0, verbose=False) # initialize rips complex

for chan in [1,2,3,4]:
    # get arrays of only one channel
    for s, gdict in dc.data_set_smooth.items():
        for g, a in gdict.items():
            subj_lab.append(s)
            gest_lab.append(int(g[0]))
            arrays.append(a[:, chan])

    # calculate bottleneck distance between all pds and make predictions
    predicts = []
    for n, g1 in enumerate(arrays):
        if n % 100 == 0: print(n)
        sls1 = sublevel_set_time_series_dist(g1)
        dgm1 = rips.fit_transform(sls1, distance_matrix=True)[0]
        g1_bottlenecks = []
        for m, g2 in enumerate(arrays):
            sls2 = sublevel_set_time_series_dist(g2)
            dgm2 = rips.fit_transform(sls2, distance_matrix=True)[0]
            distance_bottleneck, (matching, D) = bottleneck(dgm1, dgm2, matching=True)
            g1_bottlenecks.append(distance_bottleneck)
        g1_bottlenecks = np.array(g1_bottlenecks)
        pred_idx = np.argsort(g1_bottlenecks)[1] # index of 2nd closest array by dtw; 1st closest is self
        predicts.append(gest_lab[pred_idx])

    acc = (sum(np.array(gest_lab) == np.array(predicts)) / len(gest_lab)) * 100
    
    print(f"raw channel {chan} accuracy: {acc}%")

0
100
200
300
400
500
raw channel 1 accuracy: 23.09027777777778%
0
100
200
300
400
500
600
700
800
900
1000
1100
raw channel 2 accuracy: 24.04513888888889%
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
raw channel 3 accuracy: 24.71064814814815%
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
raw channel 4 accuracy: 24.82638888888889%


---