In [None]:
!pip install giotto-tda

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt, seaborn as sns
import os, gc
import sklearn
from gtda.time_series import SingleTakensEmbedding, PearsonDissimilarity
from gtda.homology import VietorisRipsPersistence
from gtda.plotting import plot_point_cloud
from gtda.diagrams import Amplitude, PersistenceEntropy
from gtda.plotting import plot_diagram

In [None]:
%%time
train = pd.read_feather('../input/amexfeather/train_data.ftr')
train.head()

In [None]:
plt.figure()
train["target"].hist()
plt.show()

In [None]:
def_customer = train[train["target"] == 1]["customer_ID"].unique()
ndef_customer = train[train["target"] == 0]["customer_ID"].unique()

In [None]:
train[train["customer_ID"] == def_customer[1]]

In [None]:
cat_features = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']
bin_features = ['B_31', 'D_87']
cont_features = sorted([f for f in train.columns if f not in cat_features + bin_features + ['customer_ID', 'target', 'S_2']])

Due to the size of the dataset, we use $200$ customers for illustration.

In [None]:
sample_customers = []
ncf = len(cont_features)
for i, cust_id in enumerate(np.concatenate([def_customer[0:100], ndef_customer[0:100]])):
    temp = np.zeros((13, ncf))
    curr_cust = train[train["customer_ID"] == cust_id]
    curr_cust_days = curr_cust.shape[0]
    temp[0:curr_cust_days, 0:ncf] = curr_cust[cont_features].fillna(-1).values
    sample_customers.append(temp)

In [None]:
VR = VietorisRipsPersistence(homology_dimensions=[0, 1, 2])

## Vietoris-Rips Persistence Homology
calculate persistence diagrams for customers.

In [None]:
diagrams = VR.fit_transform(sample_customers)

In [None]:
diagrams.shape

In [None]:
plot_diagram(diagrams[0])

In [None]:
plot_diagram(diagrams[1])

In [None]:
plot_diagram(diagrams[2])

## Persistence Entropy
extract scalar features from the persistence diagrams

In [None]:
PE = PersistenceEntropy()
features = PE.fit_transform(diagrams)

In [None]:
len(features)

In [None]:
features

## Reference
1. https://arxiv.org/pdf/2001.07588.pdf
2. https://arxiv.org/pdf/1701.07857.pdf