In [1]:
import pandas as pd
import numpy as np
import pickle
import time

In [2]:
label_list = [1, 3, 5, 7]

In [3]:
# Load persistence diagrams

train_persistence_diagrams = {} # dictionary with labels as keys, persistence diagrams of the respective classes as values
test_persistence_diagrams = {} # dictionary with labels as keys, persistence diagrams of the respective classes as values


for label in label_list:
    train_persistence_diagrams[label] = np.load("Embeddings_and_Persistence_Diagrams/Train_PD"+str(label)+".npy", allow_pickle=True)
    test_persistence_diagrams[label] = np.load("Embeddings_and_Persistence_Diagrams/Test_PD"+str(label)+".npy", allow_pickle=True)

In [4]:
# Adcock-Carlson coordinates

In [5]:
def carlsson_coordinates(persistence_diagrams):
    n = len(persistence_diagrams)
    persistence_diagrams_features_cc1 = np.zeros(n)
    persistence_diagrams_features_cc2 = np.zeros(n)
    persistence_diagrams_features_cc3 = np.zeros(n)
    persistence_diagrams_features_cc4 = np.zeros(n)
    start = time.time()
    ymax = 0
    for i in range(0,n):
        if len(persistence_diagrams[i])>0:
            b = np.max(persistence_diagrams[i][:,1])
        else:
            b = ymax
        if ymax < b:
            ymax = b
        else:
            ymax = ymax
    print(ymax)
    for i in range(0,n):
        if len(persistence_diagrams[i])>0:
            x = persistence_diagrams[i][:,0]
            y = persistence_diagrams[i][:,1]
            persistence_diagrams_features_cc1[i] = sum(x*(y-x))
            persistence_diagrams_features_cc2[i] = sum((ymax - y)*(y-x))
            persistence_diagrams_features_cc3[i] = sum(x**2*(y-x)**4)
            persistence_diagrams_features_cc4[i] = sum((ymax-y)**2*(y-x)**4)
        else:
            persistence_diagrams_features_cc1[i] = 0
            persistence_diagrams_features_cc2[i] = 0
            persistence_diagrams_features_cc3[i] = 0
            persistence_diagrams_features_cc4[i] = 0
            
    return persistence_diagrams_features_cc1, persistence_diagrams_features_cc2, persistence_diagrams_features_cc3, persistence_diagrams_features_cc4

In [6]:
def create_dataframe(X_features_cc1, X_features_cc2, X_features_cc3, X_features_cc4, label):
    features_df = pd.DataFrame()

    features_df["AC_coordinate_cc1"] = X_features_cc1
    features_df["AC_coordinate_cc2"] = X_features_cc2
    features_df["AC_coordinate_cc3"] = X_features_cc3
    features_df["AC_coordinate_cc4"] = X_features_cc4
    features_df["Label"] = label

    return features_df

In [9]:
train_features_dataframes = {}
test_features_dataframes = {}


for label in label_list:
    
    # Train data Adcock Carlson coordinates 
    train_coord_1, train_coord_2, train_coord_3, train_coord_4 = carlsson_coordinates(train_persistence_diagrams[label])
    train_features_dataframes[label] = create_dataframe(train_coord_1, train_coord_2, train_coord_3, train_coord_4, label)

    # Test data Adcock Carlson coordinates 
    test_coord_1, test_coord_2, test_coord_3, test_coord_4 = carlsson_coordinates(test_persistence_diagrams[label])
    test_features_dataframes[label] = create_dataframe(test_coord_1, test_coord_2, test_coord_3, test_coord_4, label)

0.00509025389328599
0.0006078073056414723
0.0013039779150858521
0.0012807620223611593
0.0007453467696905136
0.0008103330619633198
0.010027261450886726
0.010001965798437595


In [10]:
# Concatenate and save dataframes
train_feature_df = pd.concat([train_features_dataframes[1], train_features_dataframes[3], train_features_dataframes[5], \
                              train_features_dataframes[7]], ignore_index=True)

test_feature_df = pd.concat([test_features_dataframes[1], test_features_dataframes[3], test_features_dataframes[5], \
                             test_features_dataframes[7]], ignore_index=True)

train_feature_df.to_csv("Features/Train_Advanced_Features.csv")
test_feature_df.to_csv("Features/Test_Advanced_Features.csv")