In [61]:
import numpy as np
import pyedflib
import statistics
import plotly.graph_objects as go
import pandas as pd
from gtda.time_series import SingleTakensEmbedding
from gtda.homology import VietorisRipsPersistence
from gtda.diagrams import PersistenceEntropy, Amplitude, NumberOfPoints, ComplexPolynomial, PersistenceLandscape, HeatKernel, Silhouette, BettiCurve, PairwiseDistance, ForgetDimension
from gtda.plotting import plot_point_cloud, plot_heatmap, plot_diagram
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA, FastICA
from gtda.pipeline import Pipeline 
from numpy.linalg import norm
from scipy.stats import skew, kurtosis

# Load Data and set important variables

In [62]:
# choose individuum
subject = "m292"

In [63]:
label_list = [0, 1, 2, 3, 4]

In [64]:
# Load persistence diagrams

persistence_diagrams = np.load("Embeddings_and_Persistence_Diagrams/"+str(subject)+"/Persistence_Diagrams.npy", allow_pickle=True).item()

In [65]:
def precompute_signature(SG, label_list = label_list, persistence_diagrams = persistence_diagrams):

    amount_to_average = 80  # The imaging data has 20Hz, therefore we have to average 80 images to get 4 seconds

    signatures = {}
    for label in label_list:
        signatures[label] = {}
        
        for idx in range(int(len(persistence_diagrams[label]))):
            
            signatures[label][idx] = SG.fit_transform([persistence_diagrams[label][idx].astype("float")])

    return signatures

# Entries of vectorizations as direct features

In [66]:
def compute_vectorizations(persistence_diagrams, label_list, signatures):
    
    vectorizations = {}

    for label in label_list:
        
        vectorizations["Label_"+str(label)] = []
    
        for idx in range(len(persistence_diagrams[label])):
            
            signature = signatures[label][idx]

            vectorizations["Label_"+str(label)].append(signature)

    return vectorizations



In [67]:
def reshape_vectorizations_type_1(vectorizations, label_list):
    """
    Reshape vectorizations of shape (num_persistence_diagrams, 1, 3, 100) to separate dimensions for each label.

    Parameters:
    - vectorizations (dict): Dictionary containing vectorizations for each label.
    - label_list (list): List of labels.

    Returns:
    - reshaped_vectorizations (dict): Dictionary containing reshaped vectorizations for each label and dimension.
    """

    amount_to_average = 80  # The imaging data has 20Hz, therefore we have to average 80 images to get 4 seconds

    # Initialize dictionary to store reshaped vectorizations
    reshaped_vectorizations = {}
    
    # Iterate over each label
    for label in label_list:
        # Initialize dictionary to store reshaped vectorizations for the current label
        reshaped_vectorizations["Label_" + str(label)] = {}

        for coordinate_idx in range(100):

            # For each vectorization coordinate (there are 100), initialize one dictionary
            # which will contain lists of 74 vectorization coordinates (the "vectorization_idx"st coordinate
            # of the 74 persistence images) as values and the homology dimensions as keys
            reshaped_vectorizations["Label_" + str(label)]["Coordinate_" + str(coordinate_idx)] = {}
 
            for hom_dim in range(2):
                # Initialize list to store reshaped vectorizations for the current homology dimension
                reshaped_vectorizations["Label_" + str(label)]["Coordinate_" + str(coordinate_idx)]["Hom_Dim_" + str(hom_dim)] = []

    
    # Iterate over each label
    for label in label_list:
        # Iterate over each vectorization for the current label
        for coordinate_idx in range(100):
            
            for hom_dim in range(2):
                
                for vectorization_idx in range(int(len(vectorizations["Label_" + str(label)])/amount_to_average)):

                    vectorizations_to_average = []

                    for counter in range(amount_to_average):
                        vct = vectorizations["Label_" + str(label)][amount_to_average* vectorization_idx + counter][0][hom_dim][coordinate_idx]
                        
                        vectorizations_to_average.append(vct)
                        
                    # Append the component corresponding to the current homology dimension to the list
                    reshaped_vectorizations["Label_" + str(label)]["Coordinate_" + str(coordinate_idx)]["Hom_Dim_" + str(hom_dim)].append(np.mean(vectorizations_to_average, axis=0))

    return reshaped_vectorizations


In [68]:
# Initialize vectorizations
vectorizations = {}

## Persistence Landscape

In [69]:
PL = PersistenceLandscape()

landscapes = precompute_signature(PL)

In [70]:
vectorizations_before_reshaping = compute_vectorizations(persistence_diagrams, label_list, landscapes)
vectorizations["PL"] = reshape_vectorizations_type_1(vectorizations_before_reshaping, label_list)

## Betti Curve

In [71]:
BC = BettiCurve()

betti_curves = precompute_signature(BC)

In [72]:
vectorizations_before_reshaping = compute_vectorizations(persistence_diagrams, label_list, betti_curves)
vectorizations["BC"] = reshape_vectorizations_type_1(vectorizations_before_reshaping, label_list)

## Silhouette

In [73]:
SH = Silhouette()
silhouettes = precompute_signature(SH)

In [74]:
vectorizations_before_reshaping = compute_vectorizations(persistence_diagrams, label_list, silhouettes)
vectorizations["SH"] = reshape_vectorizations_type_1(vectorizations_before_reshaping, label_list)

# Save Signature Features

In [75]:
def create_feature_df(vectorizations, num_diagrams, label):
    """
    Create DataFrame for each label from features

    Parameters:
    - kernel_densities (list): intensities of heatkernel
    - L1_norms (list): L1 norms of signatures
    - num_diagrams (int): How many diagrams are there in total?
    - label (int): Label for which we want to create a dataframe. 1, 3, 5 or 7.

    Returns:
    - Feature DataFrame (DataFrame)
    """
    
    feature_df = pd.DataFrame(index=np.arange(0, num_diagrams))

    for signature in vectorizations.keys():
        for hom_dim in range(2):
            for coordinate_idx in range(100):
                feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
                "_Homology_Dim_"+str(hom_dim)] = vectorizations[signature]["Label_"+str(label)]["Coordinate_" + \
                str(coordinate_idx)]["Hom_Dim_" + str(hom_dim)]

    # Label
    feature_df["Label"] = label

    return feature_df

In [76]:
dataframes = {}

for label in label_list:
    dataframes["Label_"+str(label)] = create_feature_df(vectorizations, 75, label)

  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+str(coordinate_idx)+\
  feature_df["BI_"+str(signature)+"_Vectorization_Coordinate_"+st

In [77]:
# Concatenate and save features of training persistence diagrams
# TODO make the creation of this dataframe nicer
feature_df = pd.concat([dataframes["Label_"+str(0)], dataframes["Label_"+str(1)], dataframes["Label_"+str(2)], dataframes["Label_"+str(3)], dataframes["Label_"+str(4)]], ignore_index=True)
feature_df.to_csv("Features/"+str(subject)+"/Signature_Statistics.csv")