In [1]:
import pandas as pd
import numpy as np
import h5py
from gtda.images import RadialFiltration
from gtda.homology import CubicalPersistence

In [2]:
subject = "m300"

In [3]:
dataframes = {}

label_list = [0,1,2,3,4] 

n_folds = 5

for label in label_list:
    filename = "Data/"+str(subject)+"/run0"+str(label)+"/Brain_Imaging_Data.h5"
    file = h5py.File(filename,'r')
    dataframes[label] = file['Data']
    

# Compute persistence diagram for each time step separately

In [4]:
def compute_persistence_diagrams(dataframes, label_list):

    radial_filtration = RadialFiltration(center=np.array([150, 150]))

    persistence_diagrams = {}
    
    for label in label_list:
        persistence_diagrams["Label_"+str(label)] = []
        for image in dataframes[label]:
            filtration = radial_filtration.fit_transform([image])
            cubical_persistence = CubicalPersistence(n_jobs=-1)
            diagram = cubical_persistence.fit_transform(filtration)
            persistence_diagrams["Label_"+str(label)].append(diagram[0])


    return persistence_diagrams


#persistence_diagrams = compute_persistence_diagrams(dataframes, label_list)

# Create extended persistence diagrams

In [5]:
persistence_diagrams = np.load("Embeddings_and_Persistence_Diagrams/"+str(subject)+"/Persistence_Diagrams.npy", allow_pickle=True).item()

In [6]:
def find_largest_dimension(persistence_diagrams, label_list):
    """
    Find the length of the longest list in a list of lists.

    Parameters:
    - persistence_diagrams (dictionary): List of persistence diagrams, with labels as keys.
    - label_list (list): List of all labels.

    Returns:
    - longest_lengths_each_dimension (dictionary): Highest amount of holes in a persistence diagram for each dimension.
    """

    # Initialize highest number of holes of each dimension for each label
    longest_lengths_each_dimension = {}
    for label in label_list:
        longest_lengths_each_dimension["Label_"+str(label)] = {}

        for hole_dim in range(2):
            longest_lengths_each_dimension["Label_"+str(label)]["Hole_Dim_"+str(hole_dim)] = 0

    for label in label_list:

        # Only look at persistence diagrams of current label
        persistence_diagrams_with_label = persistence_diagrams["Label_"+str(label)]


        # For each hole dimension (between 0 and 2), find the largest amount of holes
        # of this dimension which there is in one of the persistence diagrams
        
        for hole_dim in range(2):

            # for the current hole dimension, count the amount of holes in each diagram
            for diagram in persistence_diagrams_with_label:
                current_number_of_holes_of_hole_dim = 0 # Initialize
                
                for hole in diagram:
                    # only take those holes of the current hole dimension into account
                    # (the hole dimension is indicated by the 3rd entry of the hole)
                    if hole[2] == hole_dim:
                        current_number_of_holes_of_hole_dim += 1

                # If the amount of holes of the current diagram is higher than the 
                # previous highest number of holes, redefine the highest number
                if current_number_of_holes_of_hole_dim > longest_lengths_each_dimension["Label_"+str(label)]["Hole_Dim_"+str(hole_dim)]:
                    longest_lengths_each_dimension["Label_"+str(label)]["Hole_Dim_"+str(hole_dim)] = current_number_of_holes_of_hole_dim

    # Because this is what we want in this case, compute the overall longest dimensions which we will use for all labels
    longest_lengths_each_dimension_overall = {key: max(label_dict[key] for label_dict in longest_lengths_each_dimension.values())
    for key in ['Hole_Dim_0', 'Hole_Dim_1']}
    
    return longest_lengths_each_dimension_overall


In [7]:
longest_lengths_each_dimension_overall = find_largest_dimension(persistence_diagrams, label_list)

In [8]:
def extend_diagrams_to_largest_dimensions(persistence_diagrams, desired_lengths_each_dimension, label_list):
    """
    Extend all persistence diagrams to the largest dimension 
    by appending (0, 0, hole_dim) tuples.

    Parameters:
    - persistence_diagrams (dict): List of persistence_diagrams of each label.
    - longest_lengths_each_dimension_overall (dict): Contains the wished amount of holes for each dimension.
    - label_list (list): List of all labels.

    Returns:
    - extended_persistence_diagrams (dict): List of extended eprsistence_diagrams for each label.
    """

    # Initialize dictionary of extended persistence diagrams (with labels as keys)
    extended_persistence_diagrams = {}

    for label in label_list:

        # Initialize extended persistence diagrams for current label
        extended_persistence_diagrams["Label_"+str(label)] = []
        
        # Only look at persistence diagrams of current label
        persistence_diagrams_with_label = persistence_diagrams["Label_"+str(label)]

        # Extend each diagram to the highest number of holes for each dimension
        for diagram in persistence_diagrams_with_label:

            extended_diagram = list(diagram)
            
            for hole_dim in range(2):

                current_number_of_holes_of_hole_dim = sum(1 for hole in diagram if hole[2] == hole_dim)


                holes_to_append = [[0, 0, hole_dim]] * (desired_lengths_each_dimension["Hole_Dim_"+str(hole_dim)] - current_number_of_holes_of_hole_dim)
                extended_diagram = extended_diagram + holes_to_append
                
            extended_persistence_diagrams["Label_"+str(label)].append(extended_diagram)
            
    return extended_persistence_diagrams


In [None]:
extended_persistence_diagrams = extend_diagrams_to_largest_dimensions(persistence_diagrams, longest_lengths_each_dimension_overall, label_list)

# Save persistence diagrams and embeddings

In [None]:
#np.save("Embeddings_and_Persistence_Diagrams/"+str(subject)+"/Persistence_Diagrams.npy", np.array(persistence_diagrams, dtype=object), allow_pickle=True)

In [None]:
# For memory efficiency, use npz

np.savez("Embeddings_and_Persistence_Diagrams/"+str(subject)+"/Extended_Persistence_Diagrams.npz", Label_0=extended_persistence_diagrams["Label_0"], Label_1=extended_persistence_diagrams["Label_1"], Label_2=extended_persistence_diagrams["Label_2"], Label_3=extended_persistence_diagrams["Label_3"], Label_4=extended_persistence_diagrams["Label_4"])
