In [2]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA


In [38]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG" # Does not have an effect yet, will be added later when processing anesthesia data
#data_type = "EMG"

In [39]:
# choose individuum

subject = "294"

In [40]:
label_list = [1,2,3,4,5,7]

n_folds = 5 # This should be the same as in the file which creates PDs 

In [41]:
# Load persistence diagrams

persistence_diagrams  = np.load('Embeddings_and_Persistence_Diagrams/'+str(subject)+'/'+str(data_type)+'/Persistence_Diagrams_All_Labels.npy', \
    allow_pickle=True).item() # .item() to convert the dtype to dict again

In [42]:
# Flatten diagrams and remove holes with 0 persistence

reshaped_persistence_diagrams = {}

for label in label_list:
    # Flatten persistence diagrams
    reshaped_persistence_diagrams["Label_"+str(label)] = [persistence_diagram[0] for persistence_diagram in list(persistence_diagrams["Label_"+str(label)])]

    # Remove holes with 0 persistence (which can occur if there are no holes of homology dimension 1 or 2)
    reshaped_persistence_diagrams["Label_"+str(label)] = [diagram[(diagram[:,0] != 0) | (diagram[:,1] != 0)] for diagram in reshaped_persistence_diagrams["Label_"+str(label)]]
    
persistence_diagrams = reshaped_persistence_diagrams

# Summary Statistics

In [43]:
def compute_summary_statistics(persistence_diagrams):
    """
    Compute summary statistics of list of persistence diagrams

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    Tuple of four lists:
    - Persistence Entropy
    - Persistence
    - Betti Numbers
    - Complex Polynomials
    """
    
    PE = PersistenceEntropy()
    AM = Amplitude()
    NP = NumberOfPoints()
    CP = ComplexPolynomial(n_coefficients=1)

    persistence_entropies = []
    amplitudes = []
    nos_points = []
    complex_polynomials = []

    for diagram in persistence_diagrams:
        if len(diagram) > 0:
            persistence_entropies.append(PE.fit_transform([diagram]))
            amplitudes.append(AM.fit_transform([diagram]))
            nos_points.append(NP.fit_transform([diagram]))
            complex_polynomials.append(CP.fit_transform([diagram]))

        else:
            persistence_entropies.append([[0, 0, 0]])
            amplitudes.append([[0, 0, 0]])
            nos_points.append([[0, 0, 0]])
            complex_polynomials.append([[0, 0, 0, 0, 0, 0 ]])
            
    return persistence_entropies, amplitudes, nos_points, complex_polynomials

In [44]:
# Initialize dicts with labels as key
persistence_entropies = {}
amplitudes = {}
nos_points = {}
complex_polynomials = {}


for label in label_list:
    all_summary_statistics = compute_summary_statistics(persistence_diagrams["Label_"+str(label)])
    persistence_entropies["Label_"+str(label)] = all_summary_statistics[0]
    amplitudes["Label_"+str(label)] = all_summary_statistics[1]
    nos_points["Label_"+str(label)] = all_summary_statistics[2]
    complex_polynomials["Label_"+str(label)] = all_summary_statistics[3]
    

In [45]:
def compute_largest_persistence(persistence_diagrams):
    """
    Computes persistence of the most prominent points of each dimension in each diagram

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    List of 3 lists:
    - List of the largest persistences of homology dimension 0
    - List of the largest persistences of homology dimension 1
    - List of the largest persistences of homology dimension 2 
    """

    largest_persistences = {} # will contain 3 lists for the 3 homology dimensions

    # Initialize
    for homology_dimension in [0, 1, 2]:
            largest_persistences[homology_dimension] = []


    for diagram in persistence_diagrams:

        for homology_dimension in [0, 1, 2]:
            
            # Only look at holes of our homology dimension
            condition = diagram[:, 2] == homology_dimension
            filtered_diagram = diagram[condition]

            if len(filtered_diagram) > 0:
                differences = filtered_diagram[:, 1] - filtered_diagram[:, 0]
                largest_persistences[homology_dimension].append(np.max(differences))
            
            # If there is no hole of dimension homology_dimension
            else:
                largest_persistences[homology_dimension].append(0)

            
    return largest_persistences

In [46]:
# Initialize dicts with labels as key
largest_persistences = {}

for label in label_list:
    largest_persistences["Label_"+str(label)] = compute_largest_persistence(persistence_diagrams["Label_"+str(label)])

In [47]:
# Test

subject_list = ["293", "294", "298"]

train_indices_dict_all_subjects = {}
validation_indices_dict_all_subjects = {}
test_indices_dict_all_subjects = {}

def load_indices(subject):
    train_indices = np.load("Train_Test_Splitting/"+str(subject)+"/Train_Indices_All_Labels_All_Folds.npy", allow_pickle=True).item()
    validation_indices = np.load("Train_Test_Splitting/"+str(subject)+"/Validation_Indices_All_Labels_All_Folds.npy", allow_pickle=True).item()
    test_indices = np.load("Train_Test_Splitting/"+str(subject)+"/Final_Test_Set_Indices_All_Labels.npy", allow_pickle=True).item()

    return train_indices, validation_indices, test_indices


for current_subject in subject_list:
    train_indices_dict_all_subjects[current_subject], validation_indices_dict_all_subjects[current_subject], test_indices_dict_all_subjects[current_subject] = load_indices(current_subject)



# Concatenate Features to one DataFrame and Save

In [48]:
def extend_features(persistence_entropies, amplitudes, nos_points, complex_polynomials, largest_persistences,  label_list):
    """
    It can happen (for instance, for EMG data) that even if most diagrams have holes up to dimension 1, 
    some of the diagrams (in the EMG data) only have 0-dimensional holes.
    This leads to irregular shapes for the features, which is why we extend the features for the "shorter" 
    diagrams.
    """

    all_final_features = []
    

    for label_idx, label in enumerate(label_list):
        all_final_features.append({})
        all_final_features[label_idx]["Label_"+str(label)] = []

        # All features except the largest persistence, which is shaped differently
        all_feature_types = [persistence_entropies["Label_"+str(label)], amplitudes["Label_"+str(label)], nos_points["Label_"+str(label)], complex_polynomials["Label_"+str(label)]]

        max_homology_dimension = max(len(diagram_entropy[0]) for diagram_entropy in persistence_entropies["Label_"+str(label)])

        # For each of the defined feature types                              
        for feature_type_list in all_feature_types:
            # Look at the values for each persistence diagram 
            for diagram_idx in range(len(feature_type_list)):

                # If this diagram only has holes of homology dimension up to 2 (or 1)
                # (This will show in the length of its features)
                if len(feature_type_list[diagram_idx][0]) < max_homology_dimension:
                    feature_type_list[diagram_idx] = np.append(feature_type_list[diagram_idx], [[0.]])

            all_final_features[label_idx]["Label_"+str(label)] = (feature_type_list)


    return all_final_features



In [49]:
def extend_features(persistence_entropies, amplitudes, nos_points, complex_polynomials, largest_persistences,  label_list):
    """
    It can happen (for instance, for EMG data) that even if most diagrams have holes up to dimension 1, 
    some of the diagrams (in the EMG data) only have 0-dimensional holes.
    This leads to irregular shapes for the features, which is why we extend the features for the "shorter" 
    diagrams.

    Returns:
    - all_final_features (list): List which contains dictionaries with the features in the following order:
    [persistence_entropies, amplitudes, nos_points, complex_polynomials, largest_persistences]
    """

    # Initialize return vector with a dictionary (with labels as keys) for each feature type
    all_final_features = [{}, {}, {}, {}, {}]

    for label in label_list:

        # All features except the complex polynomial and the largest_persistences, which are shaped differently
        all_feature_types = [persistence_entropies["Label_"+str(label)], amplitudes["Label_"+str(label)], nos_points["Label_"+str(label)]]

        max_homology_dimension = max(len(diagram_entropy[0]) for diagram_entropy in persistence_entropies["Label_"+str(label)])

        # For each of the defined feature types                              
        for feature_type_idx, feature_type_list in enumerate(all_feature_types):
            # Look at the values for each persistence diagram 
            for diagram_idx in range(len(feature_type_list)):

                # If this diagram only has holes of homology dimension up to 2 (or 1)
                # (This will show in the length of its features)
                if len(feature_type_list[diagram_idx][0]) < max_homology_dimension:
                    feature_type_list[diagram_idx] = [np.append(feature_type_list[diagram_idx], [[0.]])]

            all_final_features[feature_type_idx]["Label_"+str(label)] = feature_type_list


        # Complex polynomial

        max_num_coordinates = max(len(diagram_polynomial[0]) for diagram_polynomial in complex_polynomials["Label_"+str(label)])

        for diagram_idx in range(len(complex_polynomials["Label_"+str(label)])):

            # If this diagram only has holes of homology dimension up to 2 (or 1)
            # (This will show in the length of its features)
            if len(complex_polynomials["Label_"+str(label)][diagram_idx][0]) < max_num_coordinates:
                for _ in range(max_num_coordinates - len(complex_polynomials["Label_"+str(label)][diagram_idx][0])):
                    complex_polynomials["Label_"+str(label)][diagram_idx] = [np.append(complex_polynomials["Label_"+str(label)][diagram_idx], [[0.]])]

        all_final_features[3]["Label_"+str(label)] = complex_polynomials["Label_"+str(label)]

    
        all_final_features[4]["Label_"+str(label)] = largest_persistences["Label_"+str(label)]
        
        

    return all_final_features



In [50]:
all_features = extend_features(persistence_entropies, amplitudes, nos_points, complex_polynomials, largest_persistences,  label_list)

In [51]:
def choose_column_in_matrix(matrix, i):
    return [row[0][i] for row in matrix]

In [52]:
def create_feature_df(persistence_entropies, amplitudes, nos_points, complex_polynomials, largest_persistences,  label):
    """
    Create DataFrame for each label from features

    Parameters:
    - persistence_entropies (list): persistence entropies
    - amplitudes (list): amplitudes
    - nos_points (list): number of points
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    all_labels_feature_df = pd.DataFrame()

    for label in label_list:      
        feature_df = pd.DataFrame()

        # For all homology dimensions (2 or 3, depending on the data type), create a column for each feature
        num_homology_dimensions = len(persistence_entropies["Label_"+str(label)][0][0])

        for hom_dim in range(num_homology_dimensions):
            # Create columns for Persistence Entropy, Amplitude, Number of Points and Largest Persistence for Homology Dimension hom_dim
            #feature_df[str(data_type)+"_Persistence Entropy_Dim_"+str(hom_dim)] = choose_column_in_matrix(list(persistence_entropies["Label_"+str(label)]), hom_dim)
            feature_df[str(data_type)+"_Persistence Entropy_Dim_"+str(hom_dim)] = choose_column_in_matrix(list(persistence_entropies["Label_"+str(label)]), hom_dim)
            
            feature_df[str(data_type)+"_Amplitude_Dim_"+str(hom_dim)] = choose_column_in_matrix(list(amplitudes["Label_"+str(label)]), hom_dim)
            feature_df[str(data_type)+"_No_Points_Dim_"+str(hom_dim)] = choose_column_in_matrix(list(nos_points["Label_"+str(label)]), hom_dim)
            feature_df[str(data_type)+"_Largest_Persistence_Dim_"+str(hom_dim)] = largest_persistences["Label_"+str(label)][hom_dim]

        
        # For all coordinates of the complex polynomial, create a column
        num_coordinates = len(complex_polynomials["Label_"+str(label)][0][0])

        for coord in range(num_coordinates):
            feature_df[str(data_type)+"_Complex_Polynomial_Coord_"+str(coord)] = choose_column_in_matrix(list(complex_polynomials["Label_"+str(label)]), coord)


        # Add label as column
        feature_df["Label"] = label

        all_labels_feature_df = pd.concat([all_labels_feature_df, feature_df])
    

    return all_labels_feature_df


all_labels_feature_df = create_feature_df(all_features[0], all_features[1], all_features[2], all_features[3], all_features[4], label_list)


# Save

In [53]:
all_labels_feature_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Topological_Summary_Statistics.csv")