In [1]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 

In [2]:
train_persistence_diagrams_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Train_PD1.npy", allow_pickle=True)
test_persistence_diagrams_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Test_PD1.npy", allow_pickle=True)

train_persistence_diagrams_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Train_PD3.npy", allow_pickle=True)
test_persistence_diagrams_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Test_PD3.npy", allow_pickle=True)

train_persistence_diagrams_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Train_PD5.npy", allow_pickle=True)
test_persistence_diagrams_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Test_PD5.npy", allow_pickle=True)

train_persistence_diagrams_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Train_PD7.npy", allow_pickle=True)
test_persistence_diagrams_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Test_PD7.npy", allow_pickle=True)


# Summary Statistics

In [3]:
def compute_summary_statistics(persistence_diagrams):
    """
    Compute summary statistics of list of persistence diagrams

    Parameters:
    - persistence_diagrams (list): persistence diagrams

    Returns:
    Tuple of four lists:
    - Persistence Entropy
    - Persistence
    - Betti Numbers
    - Complex Polynomials
    """
    
    PE = PersistenceEntropy()
    AM = Amplitude()
    NP = NumberOfPoints()
    CP = ComplexPolynomial()

    persistence_entropies = []
    amplitudes = []
    nos_points = []
    complex_polynomials = []

    for diagram in persistence_diagrams:
        persistence_entropies.append(PE.fit_transform([diagram]))
        amplitudes.append(AM.fit_transform([diagram]))
        nos_points.append(NP.fit_transform([diagram]))
        complex_polynomials.append(CP.fit_transform([diagram]))

    return persistence_entropies, amplitudes, nos_points, complex_polynomials

In [4]:
# Label 1
train_feautures_label_1 = compute_summary_statistics(train_persistence_diagrams_label_1)
test_feautures_label_1 = compute_summary_statistics(test_persistence_diagrams_label_1)

# Label 3
train_feautures_label_3 = compute_summary_statistics(train_persistence_diagrams_label_3)
test_feautures_label_3 = compute_summary_statistics(test_persistence_diagrams_label_3)

# Label 1
train_feautures_label_5 = compute_summary_statistics(train_persistence_diagrams_label_5)
test_feautures_label_5 = compute_summary_statistics(test_persistence_diagrams_label_5)

# Label 1
train_feautures_label_7 = compute_summary_statistics(train_persistence_diagrams_label_7)
test_feautures_label_7 = compute_summary_statistics(test_persistence_diagrams_label_7)

# Concatenate Features to one DataFrame

In [5]:
def choose_column_in_matrix(matrix, i):
    return [row[0][i] for row in matrix]

In [6]:
def create_feature_df(persistence_entropies, amplitudes, nos_points, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - persistence_entropies (list): persistence entropies
    - amplitudes (list): amplitudes
    - nos_points (list): number of points
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame()

    # All 3 columns (corresponding to hole dimensions)
    feature_df["Persistence Entropy_Dim_0"] = choose_column_in_matrix(list(persistence_entropies), 0)
    feature_df["Persistence Entropy_Dim_1"] = choose_column_in_matrix(list(persistence_entropies), 1)
    feature_df["Persistence Entropy_Dim_2"] = choose_column_in_matrix(list(persistence_entropies), 2)

    # All 3 columns (corresponding to hole dimensions)
    feature_df["Amplitude_Dim_0"] = choose_column_in_matrix(list(amplitudes), 0)
    feature_df["Amplitude_Dim_1"] = choose_column_in_matrix(list(amplitudes), 1)
    feature_df["Amplitude_Dim_2"] = choose_column_in_matrix(list(amplitudes), 2)

    # All 3 columns (corresponding to hole dimensions)
    feature_df["No_Points_Dim_0"] = choose_column_in_matrix(list(nos_points), 0)
    feature_df["No_Points_Dim_1"] = choose_column_in_matrix(list(nos_points), 1)
    feature_df["No_Points_Dim_2"] = choose_column_in_matrix(list(nos_points), 2)

    # Label
    feature_df["Label"] = label

    return feature_df

In [7]:
# Create dataframes for label 1
train_df_label_1 = create_feature_df(train_feautures_label_1[0], train_feautures_label_1[1], train_feautures_label_1[2], 1)
test_df_label_1 = create_feature_df(test_feautures_label_1[0], test_feautures_label_1[1], test_feautures_label_1[2], 1)

# Create dataframes for label 3
train_df_label_3 = create_feature_df(train_feautures_label_3[0], train_feautures_label_3[1], train_feautures_label_3[2], 1)
test_df_label_3 = create_feature_df(test_feautures_label_3[0], test_feautures_label_3[1], test_feautures_label_3[2], 3)

# Create dataframes for label 5
train_df_label_5 = create_feature_df(train_feautures_label_5[0], train_feautures_label_5[1], train_feautures_label_5[2], 1)
test_df_label_5 = create_feature_df(test_feautures_label_5[0], test_feautures_label_5[1], test_feautures_label_5[2], 5)

# Create dataframes for label 7
train_df_label_7 = create_feature_df(train_feautures_label_7[0], train_feautures_label_7[1], train_feautures_label_7[2], 1)
test_df_label_7 = create_feature_df(test_feautures_label_7[0], test_feautures_label_7[1], test_feautures_label_7[2], 7)

In [8]:
# Concatenate and save features of training persistence diagrams
train_feature_df = pd.concat([train_df_label_1, train_df_label_3, train_df_label_5, train_df_label_7])
train_feature_df.to_csv("Features/Train_Topological_Summary_Statistics.csv")

# Concatenate and save features of training persistence diagrams
test_feature_df = pd.concat([test_df_label_1, test_df_label_3, test_df_label_5, test_df_label_7])
test_feature_df.to_csv("Features/Test_Topological_Summary_Statistics.csv")
