In [1]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA

In [None]:
subject = "m300"

# Compute mean of summary statistics of 

In [None]:
label_list = [0, 1, 2, 3, 4]

In [None]:
# Load persistence diagrams

persistence_diagrams = np.load("Embeddings_and_Persistence_Diagrams/"+str(subject)+"/Persistence_Diagrams.npy", allow_pickle=True).item()

# Summary Statistics

In [None]:
def compute_summary_statistics(persistence_diagrams):
    """
    Compute summary statistics of list of persistence diagrams

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    Tuple of four lists:
    - Persistence Entropy
    - Persistence
    - Betti Numbers
    - Complex Polynomials
    """
    
    PE = PersistenceEntropy()
    AM = Amplitude()
    NP = NumberOfPoints()
    CP = ComplexPolynomial(n_coefficients=1)

    persistence_entropies = []
    amplitudes = []
    nos_points = []
    complex_polynomials = []

    # The statistics of how many persistence diagrams to average
    amount_to_average = 80 # The imaging data has 20Hz, therefore we have to average 80 images to get 4 seconds

    # Take average of the statistics of 25 persistence diagrams 
    for idx in range(int(len(persistence_diagrams)/amount_to_average)):
        pers_entropies = []
        for counter in range(amount_to_average):
            pe = PE.fit_transform([persistence_diagrams[amount_to_average*idx+counter].astype("float")])
            pers_entropies.append(pe)
        persistence_entropies.append(np.mean(pers_entropies, axis=0))

        ampl = []
        for counter in range(amount_to_average):
            am = AM.fit_transform([persistence_diagrams[amount_to_average*idx+counter].astype("float")])
            ampl.append(am)
        amplitudes.append(np.mean(ampl, axis=0))

        no_p = []
        for counter in range(amount_to_average):
            number = NP.fit_transform([persistence_diagrams[amount_to_average*idx+counter].astype("float")])
            no_p.append(number)
            
        nos_points.append(np.mean(no_p, axis=0))


        #complex_polynomials.append(CP.fit_transform([diagram]))

    return persistence_entropies, amplitudes, nos_points #complex_polynomials

In [None]:
feautures = {}

for label in label_list:
    feautures[label] = compute_summary_statistics(persistence_diagrams["Label_"+str(label)])

In [None]:
def compute_largest_persistence(persistence_diagrams):
    """
    Computes persistence of the most prominent points of each dimension in each diagram

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    List of 3 lists:
    - List of the largest persistences of homology dimension 0
    - List of the largest persistences of homology dimension 1
    - List of the largest persistences of homology dimension 2 
    """

    # The statistics of how many persistence diagrams to average
    amount_to_average = 80 # The imaging data has 20Hz, therefore we have to average 80 images to get 4 seconds
    
    largest_persistences = [] # will contain 3 lists for the 3 homology dimensions
    for homology_dimension in [0, 1, 2]:
        average_largest_persistences_of_hom_dim = []
        # Take average of the statistics of 25 persistence diagrams 
        for idx in range(int(len(persistence_diagrams)/amount_to_average)):
            largest_per_to_average = []
                
            for counter in range(amount_to_average):
                diagram = persistence_diagrams[amount_to_average*idx+counter]

                # only look at holes of our homology dimension
                condition = diagram[:, 2] == homology_dimension
                filtered_diagram = diagram[condition]

                if len(filtered_diagram) > 0:
                    differences = filtered_diagram[:, 1] - filtered_diagram[:, 0]
                    largest_per_to_average.append(np.max(differences))

            average_largest_persistences_of_hom_dim.append(np.mean(largest_per_to_average, axis=0))
        
        largest_persistences.append(average_largest_persistences_of_hom_dim)

    return largest_persistences


In [None]:
largest_persistences= {}

for label in label_list:
    largest_persistences[label] = compute_largest_persistence(persistence_diagrams["Label_"+str(label)])

# Concatenate Features to one DataFrame

In [None]:
def choose_column_in_matrix(matrix, i):
    return [row[0][i] for row in matrix]

In [None]:
def create_feature_df(subject, persistence_entropies, amplitudes, nos_points, persistences, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - persistence_entropies (list): persistence entropies
    - amplitudes (list): amplitudes
    - nos_points (list): number of points
    - label (int): Label for which we want to create a dataframe. 0, 1, 2, 3 or 4.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame()

    # All 2 columns (corresponding to hole dimensions)
    feature_df["Persistence Entropy_Dim_0"] = choose_column_in_matrix(list(persistence_entropies), 0)
    feature_df["Persistence Entropy_Dim_1"] = choose_column_in_matrix(list(persistence_entropies), 1)

    # All 2 columns (corresponding to hole dimensions)
    feature_df["Amplitude_Dim_0"] = choose_column_in_matrix(list(amplitudes), 0)
    feature_df["Amplitude_Dim_1"] = choose_column_in_matrix(list(amplitudes), 1)

    # All 2 columns (corresponding to hole dimensions)
    feature_df["No_Points_Dim_0"] = choose_column_in_matrix(list(nos_points), 0)
    feature_df["No_Points_Dim_1"] = choose_column_in_matrix(list(nos_points), 1)

    # Label
    feature_df["Label"] = label



    return feature_df

In [None]:
dataframes= {}

for label in label_list:
    dataframes[label] = create_feature_df(subject, feautures[label][0], feautures[label][1], feautures[label][2], \
                                                largest_persistences[label], label)
    

In [None]:
# Concatenate and save features of training persistence diagrams
feature_df = pd.concat([dataframes[0], dataframes[1], dataframes[2], dataframes[3], dataframes[4]], ignore_index=True)

feature_df.to_csv("Features/"+str(subject)+"/Topological_Summary_Statistics.csv")