In [1]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, PersistenceImage 
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 
from numpy.linalg import norm
from scipy.stats import skew, kurtosis

pd.set_option('display.max_columns', None)

# Load Data and set important variables

In [2]:
# TODO change warnings
# TODO delete markdowns

In [3]:
# Choose if you want to look at EEG or EMG data

data_type = "EEG"
#data_type = "EMG"

In [4]:
# choose individuum
subject = "294"

In [5]:
label_list = [1,2,3,4,5,7]

In [6]:
# Load persistence diagrams

persistence_diagrams  = np.load('Embeddings_and_Persistence_Diagrams/'+str(subject)+'/'+str(data_type)+'/Persistence_Diagrams_All_Labels.npy', \
    allow_pickle=True).item() # .item() to convert the dtype to dict again

In [7]:
# TODO do this in Preprocessing_And_Computing_...

reshaped_persistence_diagrams = {}

for label in label_list:
    reshaped_persistence_diagrams["Label_"+str(label)] = [persistence_diagram[0] for persistence_diagram in list(persistence_diagrams["Label_"+str(label)])]

persistence_diagrams = reshaped_persistence_diagrams

# Define Function Wrapper for All Statistics

In [8]:
def signature_statistics_wrapper(func): 

    def compute_statistics_for_signature(*args, **kwargs):

        # Get variables
        signatures = kwargs.pop('signatures', None)
        label_list = kwargs.pop('label_list', None)
        persistence_diagrams = kwargs.pop('persistence_diagrams', None)

    
        overall_statistics = {}

            
        for label in label_list:

            # Initialize statistics list for label
            overall_statistics["Label_"+str(label)] = []

            for dim in range(3):

                statistics_for_dim = []

                # Take average of the statistics of 25 persistence diagrams
                for idx in range(int(len(persistence_diagrams["Label_"+str(label)]))):
                    # Initialize list for the statistics of each of the the 80 PD after idx

                    sgn = signatures[label][idx].astype("float")

                    statistics_for_dim.append(func(sgn, dim))


                overall_statistics["Label_"+str(label)].append(statistics_for_dim)
                    

        return overall_statistics

    return compute_statistics_for_signature

In [9]:
def precompute_signature(SG, label_list = label_list, persistence_diagrams = persistence_diagrams):
    
    signatures = {}
    for label in label_list:
        signatures[label] = []
        
        for idx in range(int(len(persistence_diagrams["Label_"+str(label)]))):
            
            signatures[label].append(SG.fit_transform([persistence_diagrams["Label_"+str(label)][idx].astype("float")]))

    return signatures

# HeatKernel Statistics

In a way, the Heat Kernel shows an "average distribution" of the persistence diagrams for each label, seperated per hole dimensionality.

In [10]:
HK = HeatKernel(sigma=0.00003, n_bins=100)

heatkernels = precompute_signature(HK)

In [11]:
heat_kernel_statistics = {}

## Intensity

In [12]:
@signature_statistics_wrapper
def heat_kernel_intensity(heatkernel, homology_dimension):
    """ Computes mean intensity of a heatkernel. Only takes positive values because otherwise the mean would
      always be zero.

    Parameters:
    - heatkernel (list of lists): heatkernel of all homology dimensions
    - homology_dimension (int): Which homology dimension to look at (0, 1 or 2)

    Returns:
    - mean intensity of heatkernel of homology dimension homology_dimension
    """
    
    positives =  [x for inner_list in heatkernel[0][homology_dimension] for x in inner_list if x > 0]
    
    return np.mean(positives) if len(positives) > 0 else 0

In [13]:
heat_kernel_statistics["intensity"] = heat_kernel_intensity(signatures = heatkernels, label_list = label_list, persistence_diagrams = persistence_diagrams, heatkernel = None, homology_dimension = None)

## Maximum and Minimum

In [14]:
@signature_statistics_wrapper
def heat_kernel_max(heatkernel, homology_dimension):
    """ Computes maximum and minimum of a heatkernel. 

    Parameters:
    - heatkernel (list of lists): heatkernel of all homology dimensions
    - homology_dimension (int): Which homology dimension to look at (0, 1 or 2)

    Returns:
    - mean intensity of heatkernel of homology dimension homology_dimension
    """
    
    positives =  [x for inner_list in heatkernel[0][homology_dimension] for x in inner_list if x > 0]
    
    return np.max(positives) if len(positives) > 0 else 0

In [15]:
@signature_statistics_wrapper
def heat_kernel_min(heatkernel, homology_dimension):
    """ Computes maximum and minimum of a heatkernel. Only takes positive values because otherwise the minimum
    would always be the negative of the maximum

    Parameters:
    - heatkernel (list of lists): heatkernel of all homology dimensions
    - homology_dimension (int): Which homology dimension to look at (0, 1 or 2)

    Returns:
    - mean intensity of heatkernel of homology dimension homology_dimension
    """
    
    positives =  [x for inner_list in heatkernel[0][homology_dimension] for x in inner_list if x > 0]
    
    return np.min(positives) if len(positives) > 0 else 0

In [16]:
heat_kernel_statistics["maximum"] = heat_kernel_max(signatures = heatkernels, label_list = label_list, persistence_diagrams = persistence_diagrams, heatkernel = None, homology_dimension = None)
heat_kernel_statistics["minimum"] = heat_kernel_min(signatures = heatkernels, label_list = label_list, persistence_diagrams = persistence_diagrams, heatkernel = None, homology_dimension = None)


def create_feature_df(data_type, heatkernel_statistics, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for stat in heatkernel_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Heatkernel_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = heatkernel_statistics[stat]["Label_"+str(label)][homology_dim]

    return feature_df

dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(data_type, heat_kernel_statistics, len(heat_kernel_statistics["maximum"]["Label_"+str(label)][0]), label)
    
feature_df = pd.concat([dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)], dataframes["Label_"+str(5)], dataframes["Label_"+str(7)]], ignore_index=True)

# Persistence Image Statistics

In [17]:
PI = PersistenceImage(sigma=0.00003, n_bins=100)

images = precompute_signature(PI)

In [18]:
persistence_image_statistics = {}

In [19]:
persistence_image_statistics["intensity"] = heat_kernel_intensity(signatures = images, label_list = label_list, persistence_diagrams = persistence_diagrams, heatkernel = None, homology_dimension = None)
persistence_image_statistics["maximum"] = heat_kernel_max(signatures = images, label_list = label_list, persistence_diagrams = persistence_diagrams, heatkernel = None, homology_dimension = None)
persistence_image_statistics["minimum"] = heat_kernel_min(signatures = images, label_list = label_list, persistence_diagrams = persistence_diagrams, heatkernel = None, homology_dimension = None)

def create_feature_df(data_type, persistence_image_statistics, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for stat in persistence_image_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Persistence_image_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = persistence_image_statistics[stat]["Label_"+str(label)][homology_dim]

    return feature_df

dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(data_type, persistence_image_statistics, len(persistence_image_statistics["intensity"]["Label_"+str(label)][0]), label)
    
feature_df_pi = pd.concat([dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)], dataframes["Label_"+str(5)], dataframes["Label_"+str(7)]], ignore_index=True)

concatenated_df = pd.concat([feature_df, feature_df_pi], axis=1)

concatenated_df = concatenated_df.loc[:, ~concatenated_df.columns.str.contains('^Unnamed')]

concatenated_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

# Betti Curve Features

In [20]:
BC = BettiCurve()

betti_curves = precompute_signature(BC)

In [21]:
betti_curve_statistics = {}

## L1 Norm

In [22]:
@signature_statistics_wrapper
def L1_norm(signature, homology_dimension):

    return norm(signature[0][0], homology_dimension)    

In [23]:
betti_curve_statistics["L1"] = L1_norm(signatures = betti_curves, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)

## Mean, Standard deviation, Skewness and Kurtosis

In [24]:
@signature_statistics_wrapper
def signature_mean(signature, homology_dimension):
    
    return statistics.mean(signature[0][homology_dimension])

In [25]:
@signature_statistics_wrapper
def signature_standard_deviation(signature, homology_dimension):
    
    return statistics.stdev(signature[0][homology_dimension])


In [26]:
@signature_statistics_wrapper
def signature_skewness(signature, homology_dimension):
    
    return skew(signature[0][homology_dimension])

In [27]:
@signature_statistics_wrapper
def signature_kurtosis(signature, homology_dimension):
    
    return kurtosis(signature[0][homology_dimension])

In [28]:
betti_curve_statistics["Mean"] = signature_mean(signatures = betti_curves, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
betti_curve_statistics["Standard_Deviation"] = signature_standard_deviation(signatures = betti_curves, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
betti_curve_statistics["Skewness"] = signature_skewness(signatures = betti_curves, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
betti_curve_statistics["Kurtosis"] = signature_kurtosis(signatures = betti_curves, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)

def create_feature_df(data_type, betti_curve_statistics, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for stat in betti_curve_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Betti_Curve_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = betti_curve_statistics[stat]["Label_"+str(label)][homology_dim]

    return feature_df

dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(data_type, betti_curve_statistics, len(betti_curve_statistics["Mean"]["Label_"+str(label)][0]), label)
    
feature_df_bc = pd.concat([dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)], dataframes["Label_"+str(5)], dataframes["Label_"+str(7)]], ignore_index=True)

feature_df = pd.read_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

concatenated_df = pd.concat([feature_df, feature_df_bc], axis=1)

concatenated_df = concatenated_df.loc[:, ~concatenated_df.columns.str.contains('^Unnamed')]

concatenated_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

# Persistence Landscape Features

In [29]:
PL = PersistenceLandscape()
landscapes = precompute_signature(PL)

landscape_statistics = {}

In [30]:
landscape_statistics["Mean"] = signature_mean(signatures = landscapes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
landscape_statistics["Standard_Deviation"] = signature_standard_deviation(signatures = landscapes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
landscape_statistics["Skewness"] = signature_skewness(signatures = landscapes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
landscape_statistics["Kurtosis"] = signature_kurtosis(signatures = landscapes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)

def create_feature_df(data_type, landscape_statistics, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for stat in landscape_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Persistence_Landscape_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = landscape_statistics[stat]["Label_"+str(label)][homology_dim]

    return feature_df

feature_df = pd.read_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(data_type, landscape_statistics, len(landscape_statistics["Mean"]["Label_"+str(label)][0]), label)
    
feature_df_pl = pd.concat([dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)], dataframes["Label_"+str(5)], dataframes["Label_"+str(7)]], ignore_index=True)

concatenated_df = pd.concat([feature_df, feature_df_pl], axis=1)

concatenated_df = concatenated_df.loc[:, ~concatenated_df.columns.str.contains('^Unnamed')]

concatenated_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

# Silhouette Features

In [31]:
SH = Silhouette()
silhouettes = precompute_signature(SH)

silhouette_statistics = {}

L1 norm, mean, SD, Skewness, Kurtosis

In [32]:
silhouette_statistics["L1"] = L1_norm(signatures = silhouettes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)

silhouette_statistics["Mean"] = signature_mean(signatures = silhouettes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
silhouette_statistics["Standard_Deviation"] = signature_standard_deviation(signatures = silhouettes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
silhouette_statistics["Skewness"] = signature_skewness(signatures = silhouettes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)
silhouette_statistics["Kurtosis"] = signature_kurtosis(signatures = silhouettes, label_list = label_list, persistence_diagrams = persistence_diagrams, signature = None, homology_dimension = None)

def create_feature_df(data_type, silhouette_statistics, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for stat in silhouette_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Persistence_Silhouette_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = silhouette_statistics[stat]["Label_"+str(label)][homology_dim]

    return feature_df

feature_df = pd.read_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(data_type, silhouette_statistics, len(silhouette_statistics["Mean"]["Label_"+str(label)][0]), label)
    
feature_df_sh = pd.concat([dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)], dataframes["Label_"+str(5)], dataframes["Label_"+str(7)]], ignore_index=True)

concatenated_df = pd.concat([feature_df, feature_df_sh], axis=1)

concatenated_df = concatenated_df.loc[:, ~concatenated_df.columns.str.contains('^Unnamed')]

concatenated_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

feature_df = pd.read_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")

# Entries of vectorizations as direct features

In [33]:
def compute_vectorizations(persistence_diagrams, label_list, signatures):
    
    vectorizations = {}

    for label in label_list:
        
        vectorizations["Label_"+str(label)] = []
    
        for idx in range(len(persistence_diagrams["Label_"+str(label)])):
            
            signature = signatures[label][idx]

            vectorizations["Label_"+str(label)].append(signature)

    return vectorizations



In [34]:
def reshape_vectorizations_type_1(vectorizations, label_list):
    """
    Reshape vectorizations of shape (num_persistence_diagrams, 1, 3, 100) to separate dimensions for each label.

    Parameters:
    - vectorizations (dict): Dictionary containing vectorizations for each label.
    - label_list (list): List of labels.

    Returns:
    - reshaped_vectorizations (dict): Dictionary containing reshaped vectorizations for each label and dimension.
    """
    # Initialize dictionary to store reshaped vectorizations
    reshaped_vectorizations = {}
    
    # Iterate over each label
    for label in label_list:
        # Initialize dictionary to store reshaped vectorizations for the current label
        reshaped_vectorizations["Label_" + str(label)] = {}

        for coordinate_idx in range(100):

            # For each vectorization coordinate (there are 100), initialize one dictionary
            # which will contain lists of 74 vectorization coordinates (the "vectorization_idx"st coordinate
            # of the 74 persistence images) as values and the homology dimensions as keys
            reshaped_vectorizations["Label_" + str(label)]["Coordinate_" + str(coordinate_idx)] = {}
 
            for hom_dim in range(3):
                # Initialize list to store reshaped vectorizations for the current homology dimension
                reshaped_vectorizations["Label_" + str(label)]["Coordinate_" + str(coordinate_idx)]["Hom_Dim_" + str(hom_dim)] = []

    
    # Iterate over each label
    for label in label_list:
        # Iterate over each vectorization for the current label
        for coordinate_idx in range(100):
            
            for hom_dim in range(3):
                
                for vectorization_idx in range(len(vectorizations["Label_" + str(label)])):

                    # Append the component corresponding to the current homology dimension to the list
                    reshaped_vectorizations["Label_" + str(label)]["Coordinate_" + str(coordinate_idx)]["Hom_Dim_" + str(hom_dim)].append(
                        vectorizations["Label_" + str(label)][vectorization_idx][0][hom_dim][coordinate_idx])

    return reshaped_vectorizations


In [35]:
# Initialize vectorizations
vectorizations = {}

## Persistence Landscape

First precompute.

In [36]:
PL = PersistenceLandscape()

landscapes = precompute_signature(PL)

In [37]:
vectorizations_before_reshaping = compute_vectorizations(persistence_diagrams, label_list, landscapes)
vectorizations["PL"] = reshape_vectorizations_type_1(vectorizations_before_reshaping, label_list)

## Betti Curve

In [38]:
vectorizations_before_reshaping = compute_vectorizations(persistence_diagrams, label_list, betti_curves)
vectorizations["BC"] = reshape_vectorizations_type_1(vectorizations_before_reshaping, label_list)

## Silhouette

In [39]:
vectorizations_before_reshaping = compute_vectorizations(persistence_diagrams, label_list, silhouettes)
vectorizations["SH"] = reshape_vectorizations_type_1(vectorizations_before_reshaping, label_list)

# Save Signature Features

In [40]:
def create_feature_df(data_type, heat_kernel_statistics, betti_curve_statistics, silhouette_statistics, persistence_image_statistics, landscape_statistics, vectorizations, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for stat in heat_kernel_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_HeatKernel_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = heat_kernel_statistics[stat]["Label_"+str(label)][homology_dim]

    for stat in betti_curve_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Betti_Curve_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = betti_curve_statistics[stat]["Label_"+str(label)][homology_dim]

    for stat in silhouette_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Silhouette_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = silhouette_statistics[stat]["Label_"+str(label)][homology_dim]


    for stat in persistence_image_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Persistence_image_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = persistence_image_statistics[stat]["Label_"+str(label)][homology_dim]

    
    for stat in landscape_statistics.keys():
        for homology_dim in range(3):
            feature_df[str(data_type)+"_Persistence_Landscape_Statistic_"+str(stat)+"Dim"+str(homology_dim)] = landscape_statistics[stat]["Label_"+str(label)][homology_dim]


    # Vectorizations
    for signature in vectorizations.keys():
        for homology_dim in range(3):
            for coordinate_idx in range(100):
                feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
                "_Homology_Dim_"+str(homology_dim)] = vectorizations[signature]["Label_"+str(label)]["Coordinate_" + \
                str(coordinate_idx)]["Hom_Dim_" + str(homology_dim)]

    # Label
    feature_df["Label"] = label

    return feature_df

In [41]:
dataframes = {}

for label in label_list:
    dataframes[label] = create_feature_df(data_type, heat_kernel_statistics, betti_curve_statistics, silhouette_statistics, persistence_image_statistics, landscape_statistics, vectorizations, len(silhouette_statistics["Mean"]["Label_"+str(label)][0]), label)

  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(data_type)+"_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df[str(dat

In [42]:
# Concatenate and save features of training persistence diagrams
# Concatenate and save dataframes
feature_df = pd.concat([dataframes[1], dataframes[2], dataframes[3], \
                              dataframes[4], dataframes[5], dataframes[7]], ignore_index=True)
feature_df.to_csv("Features/"+str(subject)+"/"+str(data_type)+"/Signature_Statistics.csv")