In [20]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 
from numpy.linalg import norm


In [21]:
# Load persistence diagrams

train_short_persistence_diagrams_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams1.npy", allow_pickle=True)
test_short_persistence_diagrams_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams1.npy", allow_pickle=True)

train_short_persistence_diagrams_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams3.npy", allow_pickle=True)
test_short_persistence_diagrams_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams3.npy", allow_pickle=True)

train_short_persistence_diagrams_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams5.npy", allow_pickle=True)
test_short_persistence_diagrams_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams5.npy", allow_pickle=True)

train_short_persistence_diagrams_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Train_Shortened_Diagrams7.npy", allow_pickle=True)
test_short_persistence_diagrams_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Test_Shortened_Diagrams7.npy", allow_pickle=True)


# Load removed indices
removed_train_indices_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices1.npy", allow_pickle=True)
removed_test_indices_label_1 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices1.npy", allow_pickle=True)

removed_train_indices_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices3.npy", allow_pickle=True)
removed_test_indices_label_3 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices3.npy", allow_pickle=True)

removed_train_indices_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices5.npy", allow_pickle=True)
removed_test_indices_label_5 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices5.npy", allow_pickle=True)

removed_train_indices_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Train_Removed_Indices7.npy", allow_pickle=True)
removed_test_indices_label_7 = np.load("Embeddings_and_Persistence_Diagrams/Test_Removed_Indices7.npy", allow_pickle=True)

# HeatKernel

In a way, the Heat Kernel shows an "average distribution" of the persistence diagrams for each label, seperated per hole dimensionality.

In [22]:
HK = HeatKernel(sigma=0.00003, n_bins=100)

In [23]:
def heat_kernel_intensity(heatkernel, homology_dimension):
    """ Computes mean intensity of a heatkernel. Only takes positive values because otherwise the mean would
      always be zero.

    Parameters:
    - heatkernel (list of lists): heatkernel of all homology dimensions
    - homology_dimension (int): Which homology dimension to look at (0, 1 or 2)

    Returns:
    - mean intensity of heatkernel of homology dimension homology_dimension
    """
    
    positives =  [x for inner_list in heatkernel[0][homology_dimension] for x in inner_list if x > 0]
    
    return np.mean(positives)

### Label 1

In [24]:
# TRAIN DATA
train_label_1_kernel_intensity_dim0 = []
train_label_1_kernel_intensity_dim1 = []
train_label_1_kernel_intensity_dim2 = []


for diagram in train_short_persistence_diagrams_label_1:
    heatkernel = HK.fit_transform([diagram])
    train_label_1_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    train_label_1_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    train_label_1_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_1: 
    train_label_1_kernel_intensity_dim0.insert(idx, 0)
    train_label_1_kernel_intensity_dim1.insert(idx, 0)
    train_label_1_kernel_intensity_dim2.insert(idx, 0)


# TEST DATA
test_label_1_kernel_intensity_dim0 = []
test_label_1_kernel_intensity_dim1 = []
test_label_1_kernel_intensity_dim2 = []

for diagram in test_short_persistence_diagrams_label_1:
    heatkernel = HK.fit_transform([diagram])
    test_label_1_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    test_label_1_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    test_label_1_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_1: 
    test_label_1_kernel_intensity_dim0.insert(idx, 0)
    test_label_1_kernel_intensity_dim1.insert(idx, 0)
    test_label_1_kernel_intensity_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check

### Label 3

In [25]:
# TRAIN DATA
train_label_3_kernel_intensity_dim0 = []
train_label_3_kernel_intensity_dim1 = []
train_label_3_kernel_intensity_dim2 = []

for diagram in train_short_persistence_diagrams_label_3:
    heatkernel = HK.fit_transform([diagram])
    train_label_3_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    train_label_3_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    train_label_3_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_3: 
    train_label_3_kernel_intensity_dim0.insert(idx, 0)
    train_label_3_kernel_intensity_dim1.insert(idx, 0)
    train_label_3_kernel_intensity_dim2.insert(idx, 0)


# TEST DATA
test_label_3_kernel_intensity_dim0 = []
test_label_3_kernel_intensity_dim1 = []
test_label_3_kernel_intensity_dim2 = []

for diagram in test_short_persistence_diagrams_label_3:
    heatkernel = HK.fit_transform([diagram])
    test_label_3_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    test_label_3_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    test_label_3_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_3: 
    test_label_3_kernel_intensity_dim0.insert(idx, 0)
    test_label_3_kernel_intensity_dim1.insert(idx, 0)
    test_label_3_kernel_intensity_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check

### Label 5

In [26]:
# TRAIN DATA
train_label_5_kernel_intensity_dim0 = []
train_label_5_kernel_intensity_dim1 = []
train_label_5_kernel_intensity_dim2 = []

for diagram in train_short_persistence_diagrams_label_5:
    heatkernel = HK.fit_transform([diagram])
    train_label_5_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    train_label_5_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    train_label_5_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_5: 
    train_label_5_kernel_intensity_dim0.insert(idx, 0)
    train_label_5_kernel_intensity_dim1.insert(idx, 0)
    train_label_5_kernel_intensity_dim2.insert(idx, 0)


# TEST DATA
test_label_5_kernel_intensity_dim0 = []
test_label_5_kernel_intensity_dim1 = []
test_label_5_kernel_intensity_dim2 = []

for diagram in test_short_persistence_diagrams_label_5:
    heatkernel = HK.fit_transform([diagram])
    test_label_5_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    test_label_5_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    test_label_5_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_5: 
    test_label_5_kernel_intensity_dim0.insert(idx, 0)
    test_label_5_kernel_intensity_dim1.insert(idx, 0)
    test_label_5_kernel_intensity_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check

In [27]:
### Label 7

In [28]:
# TRAIN DATA
train_label_7_kernel_intensity_dim0 = []
train_label_7_kernel_intensity_dim1 = []
train_label_7_kernel_intensity_dim2 = []

for diagram in train_short_persistence_diagrams_label_7:
    heatkernel = HK.fit_transform([diagram])
    train_label_7_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    train_label_7_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    train_label_7_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_7: 
    train_label_7_kernel_intensity_dim0.insert(idx, 0)
    train_label_7_kernel_intensity_dim1.insert(idx, 0)
    train_label_7_kernel_intensity_dim2.insert(idx, 0)


# TEST DATA
test_label_7_kernel_intensity_dim0 = []
test_label_7_kernel_intensity_dim1 = []
test_label_7_kernel_intensity_dim2 = []

for diagram in test_short_persistence_diagrams_label_7:
    heatkernel = HK.fit_transform([diagram])
    test_label_7_kernel_intensity_dim0.append(heat_kernel_intensity(heatkernel, 0))
    test_label_7_kernel_intensity_dim1.append(heat_kernel_intensity(heatkernel, 1))
    test_label_7_kernel_intensity_dim2.append(heat_kernel_intensity(heatkernel, 2))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_7: 
    test_label_7_kernel_intensity_dim0.insert(idx, 0)
    test_label_7_kernel_intensity_dim1.insert(idx, 0)
    test_label_7_kernel_intensity_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)


# Persistence Landscape

Using the L1 norm of the Persistence Landscape as additional ML feature improves the accuracy by a bit.

In [29]:
PL = PersistenceLandscape()

#### Label 1

In [30]:
# TRAIN DATA
train_label_1_landscape_L1_norm_dim0 = []
train_label_1_landscape_L1_norm_dim1 = []
train_label_1_landscape_L1_norm_dim2 = []


for diagram in train_short_persistence_diagrams_label_1:
    persistence_landscape = PL.fit_transform([diagram])
    train_label_1_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    train_label_1_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    train_label_1_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_1: 
    train_label_1_landscape_L1_norm_dim0.insert(idx, 0)
    train_label_1_landscape_L1_norm_dim1.insert(idx, 0)
    train_label_1_landscape_L1_norm_dim2.insert(idx, 0)


# TEST DATA
test_label_1_landscape_L1_norm_dim0 = []
test_label_1_landscape_L1_norm_dim1 = []
test_label_1_landscape_L1_norm_dim2 = []

for diagram in test_short_persistence_diagrams_label_1:
    persistence_landscape = PL.fit_transform([diagram])
    test_label_1_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    test_label_1_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    test_label_1_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_1: 
    test_label_1_landscape_L1_norm_dim0.insert(idx, 0)
    test_label_1_landscape_L1_norm_dim1.insert(idx, 0)
    test_label_1_landscape_L1_norm_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check

#### Label 3

In [31]:
# TRAIN DATA
train_label_3_landscape_L1_norm_dim0 = []
train_label_3_landscape_L1_norm_dim1 = []
train_label_3_landscape_L1_norm_dim2 = []


for diagram in train_short_persistence_diagrams_label_3:
    persistence_landscape = PL.fit_transform([diagram])
    train_label_3_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    train_label_3_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    train_label_3_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_3: 
    train_label_3_landscape_L1_norm_dim0.insert(idx, 0)
    train_label_3_landscape_L1_norm_dim1.insert(idx, 0)
    train_label_3_landscape_L1_norm_dim2.insert(idx, 0)


# TEST DATA
test_label_3_landscape_L1_norm_dim0 = []
test_label_3_landscape_L1_norm_dim1 = []
test_label_3_landscape_L1_norm_dim2 = []

for diagram in test_short_persistence_diagrams_label_3:
    persistence_landscape = PL.fit_transform([diagram])
    test_label_3_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    test_label_3_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    test_label_3_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_3: 
    test_label_3_landscape_L1_norm_dim0.insert(idx, 0)
    test_label_3_landscape_L1_norm_dim1.insert(idx, 0)
    test_label_3_landscape_L1_norm_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check

#### Label 5

In [32]:
# TRAIN DATA
train_label_5_landscape_L1_norm_dim0 = []
train_label_5_landscape_L1_norm_dim1 = []
train_label_5_landscape_L1_norm_dim2 = []


for diagram in train_short_persistence_diagrams_label_5:
    persistence_landscape = PL.fit_transform([diagram])
    train_label_5_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    train_label_5_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    train_label_5_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_5: 
    train_label_5_landscape_L1_norm_dim0.insert(idx, 0)
    train_label_5_landscape_L1_norm_dim1.insert(idx, 0)
    train_label_5_landscape_L1_norm_dim2.insert(idx, 0)


# TEST DATA
test_label_5_landscape_L1_norm_dim0 = []
test_label_5_landscape_L1_norm_dim1 = []
test_label_5_landscape_L1_norm_dim2 = []

for diagram in test_short_persistence_diagrams_label_5:
    persistence_landscape = PL.fit_transform([diagram])
    test_label_5_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    test_label_5_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    test_label_5_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_5: 
    test_label_5_landscape_L1_norm_dim0.insert(idx, 0)
    test_label_5_landscape_L1_norm_dim1.insert(idx, 0)
    test_label_5_landscape_L1_norm_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check

#### Label 7

In [33]:
# TRAIN DATA
train_label_7_landscape_L1_norm_dim0 = []
train_label_7_landscape_L1_norm_dim1 = []
train_label_7_landscape_L1_norm_dim2 = []


for diagram in train_short_persistence_diagrams_label_7:
    persistence_landscape = PL.fit_transform([diagram])
    train_label_7_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    train_label_7_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    train_label_7_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_train_indices_label_7: 
    train_label_7_landscape_L1_norm_dim0.insert(idx, 0)
    train_label_7_landscape_L1_norm_dim1.insert(idx, 0)
    train_label_7_landscape_L1_norm_dim2.insert(idx, 0)


# TEST DATA
test_label_7_landscape_L1_norm_dim0 = []
test_label_7_landscape_L1_norm_dim1 = []
test_label_7_landscape_L1_norm_dim2 = []

for diagram in test_short_persistence_diagrams_label_7:
    persistence_landscape = PL.fit_transform([diagram])
    test_label_7_landscape_L1_norm_dim0.append(norm(persistence_landscape[0][0], 1))
    test_label_7_landscape_L1_norm_dim1.append(norm(persistence_landscape[0][1], 1))
    test_label_7_landscape_L1_norm_dim2.append(norm(persistence_landscape[0][2], 1))

# Preliminary
# Add intensity 0 to those diagrams that were to short and were removed
for idx in removed_test_indices_label_7: 
    test_label_7_landscape_L1_norm_dim0.insert(idx, 0)
    test_label_7_landscape_L1_norm_dim1.insert(idx, 0)
    test_label_7_landscape_L1_norm_dim2.insert(idx, 0)

  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)
  Xnew = check_array(X, **kwargs)


# Betti Curve

# Save Signature Features

In [34]:
def create_feature_df(kernel_intensity_dim0, kernel_intensity_dim1, kernel_intensity_dim2, landscape_L1_norm_dim0, landscape_L1_norm_dim1, landscape_L1_norm_dim2, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_intensity_dim0 (int): intensity of heatkernel for homology dimension 0
    - kernel_intensity_dim1 (int): intensity of heatkernel for homology dimension 1
    - kernel_intensity_dim2 (int): intensity of heatkernel for homology dimension 2
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame()

    # All 3 columns (corresponding to hole dimensions)
    feature_df["Kernel_Intensity_Dim0"] = kernel_intensity_dim0
    feature_df["Kernel_Intensity_Dim1"] = kernel_intensity_dim1
    feature_df["Kernel_Intensity_Dim2"] = kernel_intensity_dim2

    feature_df["Landscape_L1_Norm_Dim0"] = landscape_L1_norm_dim0
    feature_df["Landscape_L1_Norm_Dim1"] = landscape_L1_norm_dim1
    feature_df["Landscape_L1_Norm_Dim2"] = landscape_L1_norm_dim2

    # Label
    feature_df["Label"] = label

    return feature_df
    

In [35]:
# Label 1
train_df_label_1 = create_feature_df(train_label_1_kernel_intensity_dim0, train_label_1_kernel_intensity_dim1, train_label_1_kernel_intensity_dim2, train_label_1_landscape_L1_norm_dim0, train_label_1_landscape_L1_norm_dim1, train_label_1_landscape_L1_norm_dim2, 1)
test_df_label_1 = create_feature_df(test_label_1_kernel_intensity_dim0, test_label_1_kernel_intensity_dim1, test_label_1_kernel_intensity_dim2, test_label_1_landscape_L1_norm_dim0, test_label_1_landscape_L1_norm_dim1, test_label_1_landscape_L1_norm_dim2, 1)

# Label 3
train_df_label_3 = create_feature_df(train_label_3_kernel_intensity_dim0, train_label_3_kernel_intensity_dim1, train_label_3_kernel_intensity_dim2, train_label_3_landscape_L1_norm_dim0, train_label_3_landscape_L1_norm_dim1, train_label_3_landscape_L1_norm_dim2, 3)
test_df_label_3 = create_feature_df(test_label_3_kernel_intensity_dim0, test_label_3_kernel_intensity_dim1, test_label_3_kernel_intensity_dim2,  test_label_3_landscape_L1_norm_dim0, test_label_3_landscape_L1_norm_dim1, test_label_3_landscape_L1_norm_dim2, 3)

# Label 5
train_df_label_5 = create_feature_df(train_label_5_kernel_intensity_dim0, train_label_5_kernel_intensity_dim1, train_label_5_kernel_intensity_dim2, train_label_5_landscape_L1_norm_dim0, train_label_5_landscape_L1_norm_dim1, train_label_5_landscape_L1_norm_dim2, 5)
test_df_label_5 = create_feature_df(test_label_5_kernel_intensity_dim0, test_label_5_kernel_intensity_dim1, test_label_5_kernel_intensity_dim2,  test_label_5_landscape_L1_norm_dim0, test_label_5_landscape_L1_norm_dim1, test_label_5_landscape_L1_norm_dim2, 5)

# Label 3
train_df_label_7 = create_feature_df(train_label_7_kernel_intensity_dim0, train_label_7_kernel_intensity_dim1, train_label_7_kernel_intensity_dim2, train_label_7_landscape_L1_norm_dim0, train_label_7_landscape_L1_norm_dim1, train_label_7_landscape_L1_norm_dim2, 7)
test_df_label_7 = create_feature_df(test_label_7_kernel_intensity_dim0, test_label_7_kernel_intensity_dim1, test_label_7_kernel_intensity_dim2,  test_label_7_landscape_L1_norm_dim0, test_label_7_landscape_L1_norm_dim1, test_label_7_landscape_L1_norm_dim2, 7)

In [36]:
# Concatenate and save features of training persistence diagrams
train_feature_df = pd.concat([train_df_label_1, train_df_label_3, train_df_label_5, train_df_label_7], ignore_index=True)
train_feature_df.to_csv("Features/Train_Signature_Statistics.csv")

# Concatenate and save features of training persistence diagrams
test_feature_df = pd.concat([test_df_label_1, test_df_label_3, test_df_label_5, test_df_label_7], ignore_index=True)
test_feature_df.to_csv("Features/Test_Signature_Statistics.csv")


In [None]:
# Concatenate and save features of training persistence diagrams
train_feature_df = pd.concat([train_df_label_1, train_df_label_3, train_df_label_5, train_df_label_7], ignore_index=True)
train_feature_df.to_csv("Features/Train_Signature_Statistics.csv")

# Concatenate and save features of training persistence diagrams
test_feature_df = pd.concat([test_df_label_1, test_df_label_3, test_df_label_5, test_df_label_7], ignore_index=True)
test_feature_df.to_csv("Features/Test_Signature_Statistics.csv")