In [76]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 
from numpy.linalg import norm

# Load Data and set important variables

In [77]:
# TODO change warnings

In [78]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG"
#data_type = "EMG"

In [79]:
# choose individuum
subject = "m292"

In [80]:
label_list = [0, 1, 2, 3, 4]

In [81]:
# Load persistence diagrams

persistence_diagrams  = np.load('Embeddings_and_Persistence_Diagrams/'+str(subject)+'/'+str(data_type)+'/Persistence_Diagrams_All_Labels.npy', \
    allow_pickle=True).item() # .item() to convert the dtype to dict again

In [82]:
# TODO do this in Preprocessing_And_Computing_...

reshaped_persistence_diagrams = {}

for label in label_list:
    reshaped_persistence_diagrams["Label_"+str(label)] = [persistence_diagram[0] for persistence_diagram in list(persistence_diagrams["Label_"+str(label)])]

persistence_diagrams = reshaped_persistence_diagrams

# HeatKernel Intensity

In a way, the Heat Kernel shows an "average distribution" of the persistence diagrams for each label, seperated per hole dimensionality.

In [83]:
HK = HeatKernel(sigma=0.00003, n_bins=100)

In [84]:
def heat_kernel_intensity(heatkernel, homology_dimension):
    """ Computes mean intensity of a heatkernel. Only takes positive values because otherwise the mean would
      always be zero.

    Parameters:
    - heatkernel (list of lists): heatkernel of all homology dimensions
    - homology_dimension (int): Which homology dimension to look at (0, 1 or 2)

    Returns:
    - mean intensity of heatkernel of homology dimension homology_dimension
    """
    
    positives =  [x for inner_list in heatkernel[0][homology_dimension] for x in inner_list if x > 0]
    
    return np.mean(positives)

In [85]:
kernel_densities = []

kernel_intensity_dim0 = {}
kernel_intensity_dim1 = {}
kernel_intensity_dim2 = {}

for label in label_list:

    # Initialize intensity lists of our label
    kernel_intensity_dim0["Label_"+str(label)] = []
    kernel_intensity_dim1["Label_"+str(label)] = []
    kernel_intensity_dim2["Label_"+str(label)] = []

    for diagram in persistence_diagrams["Label_"+str(label)]:
        heatkernel = HK.fit_transform([diagram.astype("float")])
        kernel_intensity_dim0["Label_"+str(label)].append(heat_kernel_intensity(heatkernel, 0))
        kernel_intensity_dim1["Label_"+str(label)].append(heat_kernel_intensity(heatkernel, 1))
        if heat_kernel_intensity(heatkernel, 2) == heat_kernel_intensity(heatkernel, 2): # TODO these checks should be there for all dimensions
            kernel_intensity_dim2["Label_"+str(label)].append(heat_kernel_intensity(heatkernel, 2))
        else:
            kernel_intensity_dim2["Label_"+str(label)].append(0)


kernel_densities.append(kernel_intensity_dim0)
kernel_densities.append(kernel_intensity_dim1)
kernel_densities.append(kernel_intensity_dim2)

# L1 norms of Features

Using the L1 norm of the some features as additional ML feature improves the accuracy by a bit.

In [86]:
L1_norms = {}

In [87]:
def compute_L1_norm_for_signature(persistence_diagrams, label_list, SG):

    L1_norms = {}
    
    for label in label_list:
        
        L1_norms["Label_"+str(label)] = []
        
        L1_norm_dim0 = []
        L1_norm_dim1 = []
        L1_norm_dim2 = []

        for diagram in persistence_diagrams["Label_"+str(label)]:
            signature = SG.fit_transform([diagram.astype("float")])
            L1_norm_dim0.append(norm(signature[0][0], 1))
            L1_norm_dim1.append(norm(signature[0][1], 1))
            L1_norm_dim2.append(norm(signature[0][2], 1))

        L1_norms["Label_"+str(label)].append(L1_norm_dim0)
        L1_norms["Label_"+str(label)].append(L1_norm_dim1)
        L1_norms["Label_"+str(label)].append(L1_norm_dim2)

    return L1_norms
    

## Persistence Landscape

In [88]:
PL = PersistenceLandscape()

In [89]:
L1_norms["PD"] = compute_L1_norm_for_signature(persistence_diagrams, label_list, PL)

## Betti Curve

In [90]:
BC = BettiCurve()

In [91]:
L1_norms["BC"] = compute_L1_norm_for_signature(persistence_diagrams, label_list, BC)

# Save Signature Features

In [92]:
def create_feature_df(data_type, kernel_densities, L1_norms, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for homology_dim in range(3):
        feature_df[str(data_type)+"_Kernel_Intensity_Dim"+str(homology_dim)] = kernel_densities[homology_dim]["Label_"+str(label)]

    for signature in L1_norms.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_L1_Norm_"+str(signature)+"Dim"+str(homology_dim)] = L1_norms[signature]["Label_"+str(label)][homology_dim]

    # Label
    feature_df["Label"] = label

    return feature_df

In [93]:
dataframes = {}
test_dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(data_type, kernel_densities, L1_norms, len(persistence_diagrams["Label_"+str(label)]), label)

In [94]:
# Concatenate and save features of training persistence diagrams
# TODO make the creation of this dataframe nicer
feature_df = pd.concat([dataframes["Label_"+str(0)], dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)]], ignore_index=True)
feature_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")