In [52]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 

In [53]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG" # Does not have an effect yet, will be added later when processing anesthesia data
#data_type = "EMG"

In [54]:
# choose individuum

subject = "m292"

In [55]:
label_list = [0, 1, 2, 3, 4]

n_folds = 5 # This should be the same as in the file which creates PDs 

In [56]:
# Load persistence diagrams

persistence_diagrams  = np.load('Embeddings_and_Persistence_Diagrams/'+str(subject)+'/'+str(data_type)+'/Persistence_Diagrams_All_Labels.npy', \
    allow_pickle=True).item() # .item() to convert the dtype to dict again

# Summary Statistics

In [45]:
def compute_summary_statistics(persistence_diagrams):
    """
    Compute summary statistics of list of persistence diagrams

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    Tuple of four lists:
    - Persistence Entropy
    - Persistence
    - Betti Numbers
    - Complex Polynomials
    """
    
    PE = PersistenceEntropy()
    AM = Amplitude()
    NP = NumberOfPoints()
    CP = ComplexPolynomial(n_coefficients=1)

    persistence_entropies = []
    amplitudes = []
    nos_points = []
    complex_polynomials = []

    for diagram in persistence_diagrams:
        persistence_entropies.append(PE.fit_transform([diagram]))
        amplitudes.append(AM.fit_transform([diagram]))
        nos_points.append(NP.fit_transform([diagram]))
        #complex_polynomials.append(CP.fit_transform([diagram]))

    return persistence_entropies, amplitudes, nos_points, #complex_polynomials

In [46]:
# Initialize dicts with labels as key
persistence_entropies = {}
amplitudes = {}
nos_points = {}


for label in label_list:

    reshaped_persistence_diagrams = [persistence_diagram[0] for persistence_diagram in list(persistence_diagrams["Label_"+str(label)])]

    persistence_entropies["Label_"+str(label)] = compute_summary_statistics(reshaped_persistence_diagrams)[0]
    amplitudes["Label_"+str(label)] = compute_summary_statistics(reshaped_persistence_diagrams)[1]
    nos_points["Label_"+str(label)] = compute_summary_statistics(reshaped_persistence_diagrams)[2]

In [47]:
def compute_largest_persistence(persistence_diagrams):
    """
    Computes persistence of the most prominent points of each dimension in each diagram

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    List of 3 lists:
    - List of the largest persistences of homology dimension 0
    - List of the largest persistences of homology dimension 1
    - List of the largest persistences of homology dimension 2 
    """

    largest_persistences = [] # will contain 3 lists for the 3 homology dimensions
    for homology_dimension in [0, 1, 2]:
        largest_persistences_of_hom_dim = []
        for diagram in persistence_diagrams:
            # only look at holes of our homology dimension
            condition = diagram[:, 2] == homology_dimension
            filtered_diagram = diagram[condition]

            if len(filtered_diagram) > 0:
                differences = filtered_diagram[:, 1] - filtered_diagram[:, 0]
                largest_persistences_of_hom_dim.append(np.max(differences))

        largest_persistences.append(largest_persistences_of_hom_dim)

    return largest_persistences

In [48]:
# Initialize dicts with labels as key
largest_persistences = {}

for label in label_list:

    reshaped_persistence_diagrams = [persistence_diagram[0] for persistence_diagram in list(persistence_diagrams["Label_"+str(label)])]

    largest_persistences["Label_"+str(label)] = compute_largest_persistence(reshaped_persistence_diagrams)

# Concatenate Features to one DataFrame and Save

In [49]:
def choose_column_in_matrix(matrix, i):
    return [row[0][i] for row in matrix]

In [50]:
def create_feature_df(persistence_entropies, amplitudes, nos_points, largest_persistences,  label):
    """
    Create DataFrame for each label from features

    Parameters:
    - persistence_entropies (list): persistence entropies
    - amplitudes (list): amplitudes
    - nos_points (list): number of points
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    all_labels_feature_df = pd.DataFrame()

    for label in label_list:

        feature_df = pd.DataFrame()

        # All 3 columns (corresponding to hole dimensions)
        feature_df[str(data_type)+"_Persistence Entropy_Dim_0"] = choose_column_in_matrix(list(persistence_entropies["Label_"+str(label)]), 0)
        feature_df[str(data_type)+"_Persistence Entropy_Dim_1"] = choose_column_in_matrix(list(persistence_entropies["Label_"+str(label)]), 1)
        feature_df[str(data_type)+"_Persistence Entropy_Dim_2"] = choose_column_in_matrix(list(persistence_entropies["Label_"+str(label)]), 2)

        # All 3 columns (corresponding to hole dimensions)
        feature_df[str(data_type)+"_Amplitude_Dim_0"] = choose_column_in_matrix(list(amplitudes["Label_"+str(label)]), 0)
        feature_df[str(data_type)+"_Amplitude_Dim_1"] = choose_column_in_matrix(list(amplitudes["Label_"+str(label)]), 1)
        feature_df[str(data_type)+"_Amplitude_Dim_2"] = choose_column_in_matrix(list(amplitudes["Label_"+str(label)]), 2)

        # All 3 columns (corresponding to hole dimensions)
        feature_df[str(data_type)+"_No_Points_Dim_0"] = choose_column_in_matrix(list(nos_points["Label_"+str(label)]), 0)
        feature_df[str(data_type)+"_No_Points_Dim_1"] = choose_column_in_matrix(list(nos_points["Label_"+str(label)]), 1)
        feature_df[str(data_type)+"_No_Points_Dim_2"] = choose_column_in_matrix(list(nos_points["Label_"+str(label)]), 2)


        feature_df[str(data_type)+"_Largest_Persistence_Dim_0"] = largest_persistences["Label_"+str(label)][0]
        feature_df[str(data_type)+"_Largest_Persistence_Dim_1"] = largest_persistences["Label_"+str(label)][1]
        feature_df[str(data_type)+"_Largest_Persistence_Dim_2"] = largest_persistences["Label_"+str(label)][2]

        # Label
        feature_df["Label"] = label

        all_labels_feature_df = pd.concat([all_labels_feature_df, feature_df])
    

    return all_labels_feature_df


all_labels_feature_df = create_feature_df(persistence_entropies, amplitudes, nos_points, largest_persistences, label_list)


# Save

In [51]:
all_labels_feature_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Topological_Summary_Statistics.csv")