In [1]:
import pandas as pd
import numpy as np
import pickle
import time

In [2]:
from gudhi.representations.vector_methods import Landscape


In [3]:
"""
# read persistence diagrams

objects = []
with (open("persistence//diagrams_0_1000.pickle", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break
"""

'\n# read persistence diagrams\n\nobjects = []\nwith (open("persistence//diagrams_0_1000.pickle", "rb")) as openfile:\n    while True:\n        try:\n            objects.append(pickle.load(openfile))\n        except EOFError:\n            break\n'

In [4]:
persistence_diagrams1 = np.load('PD1.npy', allow_pickle=True)
persistence_diagrams3 = np.load('PD3.npy', allow_pickle=True)


In [6]:
def carlsson_coordinates(X_train, X_test):
    n = len(X_train)
    X_train_features_cc1 = np.zeros(n)
    X_train_features_cc2 = np.zeros(n)
    X_train_features_cc3 = np.zeros(n)
    X_train_features_cc4 = np.zeros(n)
    start = time.time()
    ymax = 0
    for i in range(0,n):
        if len(X_train[i])>0:
            b = np.max(X_train[i][:,1])
        else:
            b = ymax
        if ymax < b:
            ymax = b
        else:
            ymax = ymax
    print(ymax)
    for i in range(0,n):
        if len(X_train[i])>0:
            x = X_train[i][:,0]
            y = X_train[i][:,1]
            X_train_features_cc1[i] = sum(x*(y-x))
            X_train_features_cc2[i] = sum((ymax - y)*(y-x))
            X_train_features_cc3[i] = sum(x**2*(y-x)**4)
            X_train_features_cc4[i] = sum((ymax-y)**2*(y-x)**4)
        else:
            X_train_features_cc1[i] = 0
            X_train_features_cc2[i] = 0
            X_train_features_cc3[i] = 0
            X_train_features_cc4[i] = 0

    n = len(X_test)
    X_test_features_cc1 = np.zeros(n)
    X_test_features_cc2 = np.zeros(n)
    X_test_features_cc3 = np.zeros(n)
    X_test_features_cc4 = np.zeros(n)
    ymax = 0
    for i in range(0,n):
        if len(X_test[i])>0:
            b = np.max(X_test[i][:,1])
        else:
            b = ymax
        if ymax < b:
            ymax = b
        else:
            ymax = ymax
    for i in range(0,n):
        if len(X_test[i])>0:
            x = X_test[i][:,0]
            y = X_test[i][:,1]
            X_test_features_cc1[i] = sum(x*(y-x))
            X_test_features_cc2[i] = sum((ymax - y)*(y-x))
            X_test_features_cc3[i] = sum(x**2*(y-x)**4)
            X_test_features_cc4[i] = sum((ymax-y)**2*(y-x)**4)
        else:
            X_test_features_cc1[i] = 0
            X_test_features_cc2[i] = 0
            X_test_features_cc3[i] = 0
            X_test_features_cc4[i] = 0
    print("Total Time (Carlsson Coordinates): ", time.time()-start)
    return X_train_features_cc1, X_train_features_cc2, X_train_features_cc3, X_train_features_cc4, X_test_features_cc1, X_test_features_cc2, X_test_features_cc3, X_test_features_cc4

def landscape_features(X_train, X_test, num_landscapes=5, resolution=100):
    start = time.time()
    landscapes = Landscape(num_landscapes, resolution)
    lr = landscapes.fit(X_train)
    X_train_features = lr.transform(X_train)
    X_test_features = lr.transform(X_test)
    print("Total Time (Landscape Features): ", time.time()-start)
    return X_train_features, X_test_features

def persistence_image_features(X_train, X_test, pixels=[20,20], spread=1):
    start = time.time()
    pim = PersImage(pixels=pixels, spread=spread)
    imgs_train = pim.transform(X_train)
    X_train_features = np.array([img.flatten() for img in imgs_train])
    pim = PersImage(pixels=pixels, spread=spread)
    imgs_test = pim.transform(X_test)
    X_test_features = np.array([img.flatten() for img in imgs_test])
    print("Total Time (Persistence Images): ", time.time()-start)
    return X_train_features, X_test_features

In [8]:
X_train_features_cc1, X_train_features_cc2, X_train_features_cc3, X_train_features_cc4, X_test_features_cc1, X_test_features_cc2, X_test_features_cc3, X_test_features_cc4 = carlsson_coordinates(persistence_diagrams1, persistence_diagrams1)

0.0005438531516119838
Total Time (Carlsson Coordinates):  0.03448200225830078


In [13]:
features_df1 = pd.DataFrame()

features_df1["AC_coordinate_cc1"] = X_train_features_cc1
features_df1["AC_coordinate_cc2"] = X_train_features_cc2
features_df1["AC_coordinate_cc3"] = X_train_features_cc3
features_df1["AC_coordinate_cc4"] = X_train_features_cc4
features_df1["Label"] = 1

In [14]:
X_train_features_cc1, X_train_features_cc2, X_train_features_cc3, X_train_features_cc4, X_test_features_cc1, X_test_features_cc2, X_test_features_cc3, X_test_features_cc4 = carlsson_coordinates(persistence_diagrams3, persistence_diagrams3)

0.0010116847697645426
Total Time (Carlsson Coordinates):  0.03249001502990723


In [15]:
features_df3 = pd.DataFrame()

features_df3["AC_coordinate_cc1"] = X_train_features_cc1
features_df3["AC_coordinate_cc2"] = X_train_features_cc2
features_df3["AC_coordinate_cc3"] = X_train_features_cc3
features_df3["AC_coordinate_cc4"] = X_train_features_cc4
features_df3["Label"] = 3

In [17]:
feature_df = pd.concat([features_df1, features_df3])

In [18]:
feature_df.to_csv("Advanced_Features.csv")