In [64]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 

In [65]:
# Load persistence diagrams

train_short_persistence_diagrams_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams1.npy", allow_pickle=True)
test_short_persistence_diagrams_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams1.npy", allow_pickle=True)

train_short_persistence_diagrams_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams3.npy", allow_pickle=True)
test_short_persistence_diagrams_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams3.npy", allow_pickle=True)

train_short_persistence_diagrams_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams5.npy", allow_pickle=True)
test_short_persistence_diagrams_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams5.npy", allow_pickle=True)

train_short_persistence_diagrams_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams7.npy", allow_pickle=True)
test_short_persistence_diagrams_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams7.npy", allow_pickle=True)


# Load removed indices
removed_train_indices_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices1.npy", allow_pickle=True)
removed_test_indices_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices1.npy", allow_pickle=True)

removed_train_indices_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices3.npy", allow_pickle=True)
removed_test_indices_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices3.npy", allow_pickle=True)

removed_train_indices_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices5.npy", allow_pickle=True)
removed_test_indices_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices5.npy", allow_pickle=True)

removed_train_indices_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices7.npy", allow_pickle=True)
removed_test_indices_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices7.npy", allow_pickle=True)

In [75]:
def create_feature_df(persistence_diagrams, removed_indices, num_all_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_intensity_dim0 (int): intensity of heatkernel for homology dimension 0
    - kernel_intensity_dim1 (int): intensity of heatkernel for homology dimension 1
    - kernel_intensity_dim2 (int): intensity of heatkernel for homology dimension 2
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_all_diagrams))

    # Preliminary replace removed persistence diagrams with 0s 
    for idx in removed_indices:
        np.insert(persistence_diagrams, idx, np.zeros((200, 3)), axis = 0)
    
    for diagram in persistence_diagrams:
        for hole_idx, hole in enumerate(diagram):
            feature_df["Birth_"+str(hole_idx)] = hole[0]
            feature_df["Death_"+str(hole_idx)] = hole[1]
            feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
    
    # Label
    
    feature_df["Label"] = label

    return feature_df

In [76]:
max_size = 1932

# Label 1
train_df_label_1 = create_feature_df(train_short_persistence_diagrams_label_1, removed_train_indices_label_1, max_size, 1)
test_df_label_1 = create_feature_df(test_short_persistence_diagrams_label_1, removed_test_indices_label_1, max_size, 1)

# Label 3
train_df_label_3 = create_feature_df(train_short_persistence_diagrams_label_3, removed_train_indices_label_3, max_size, 3)
test_df_label_3 = create_feature_df(test_short_persistence_diagrams_label_3, removed_test_indices_label_3, max_size, 3)

# Label 5
train_df_label_5 = create_feature_df(train_short_persistence_diagrams_label_5, removed_train_indices_label_5, max_size, 5)
test_df_label_5 = create_feature_df(test_short_persistence_diagrams_label_5, removed_test_indices_label_5, max_size, 5)
# Label 3
train_df_label_7 = create_feature_df(train_short_persistence_diagrams_label_7, removed_train_indices_label_7, max_size, 7)
test_df_label_7 = create_feature_df(test_short_persistence_diagrams_label_7, removed_test_indices_label_7, max_size, 7)


  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
  feature_df["Birth_"+str(hole_idx)] = hole[0]
  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
  feature_df["Birth_"+str(hole_idx)] = hole[0]
  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
  feature_df["Birth_"+str(hole_idx)] = hole[0]
  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
  feature_df["Birth_"+str(hole_idx)] = hole[0]
  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
  feature_df["Birth_"+str(hole_idx)] = hole[0]
  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimension_"+str(hole_idx)] = hole[2]
  feature_df["Birth_"+str(hole_idx)] = hole[0]
  feature_df["Death_"+str(hole_idx)] = hole[1]
  feature_df["Homology_Dimens

In [77]:
# Concatenate and save features of training persistence diagrams
train_feature_df = pd.concat([train_df_label_1, train_df_label_3, train_df_label_5, train_df_label_7], ignore_index=True)
train_feature_df.to_csv("Features/Train_Direct_Persistence_Diagrams.csv")

# Concatenate and save features of training persistence diagrams
test_feature_df = pd.concat([test_df_label_1, test_df_label_3, test_df_label_5, test_df_label_7], ignore_index=True)
test_feature_df.to_csv("Features/Test_Direct_Persistence_Diagrams.csv")
