In [2]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
import seaborn as sns
import sys
import copy
from tqdm.notebook import tqdm
from numba import jit
from scipy import stats
import networkx as nx
import random
import re
from numba import njit

#plt.style.use('seaborn-deep')
plt.rcParams["text.usetex"] = True
plt.rcParams['text.latex.preamble'] = r'\usepackage{amssymb,amsmath}'

plt.rcParams["figure.figsize"] = 11.7, 8.3
plt.rcParams["figure.dpi"] = 75

plt.rcParams["font.size"] = 24
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = ["Fira Sans", 'PT Sans', 'Open Sans', 'Roboto', 'DejaVu Sans', 'Liberation Sans', 'sans-serif']

plt.rcParams["legend.frameon"] = True
plt.rcParams["legend.fancybox"] = True
plt.rcParams["legend.fontsize"] = "small"

plt.rcParams["lines.linewidth"] = 2.5
plt.rcParams["lines.markersize"] = 14
plt.rcParams["lines.markeredgewidth"] = 2

plt.rcParams["xtick.major.size"] = 8
plt.rcParams["ytick.major.size"] = 8

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Taken from https://github.com/giotto-ai/giotto-tda/blob/master/examples/classifying_shapes.ipynb

In [3]:
from generate_datasets import make_point_clouds
num_points = 20
num_samples = 10
point_clouds_basic, labels_basic = make_point_clouds(n_samples_per_shape=num_samples, n_points=num_points, noise=0.5)
point_clouds_basic.shape, labels_basic.shape

((30, 400, 3), (30,))

In [4]:
from gtda.plotting import plot_point_cloud

plot_point_cloud(point_clouds_basic[0])

In [5]:
plot_point_cloud(point_clouds_basic[10])

In [6]:
plot_point_cloud(point_clouds_basic[-1])

In [7]:
from gtda.homology import VietorisRipsPersistence

# Track connected components, loops, and voids
homology_dimensions = [0, 1, 2]

# Collapse edges to speed up H2 persistence calculation!
persistence = VietorisRipsPersistence(
    metric="euclidean",
    homology_dimensions=homology_dimensions,
    n_jobs=6,
    collapse_edges=True,
)

diagrams_basic = persistence.fit_transform(point_clouds_basic)

In [8]:
point_clouds_basic.shape

(30, 400, 3)

In [9]:
from gtda.plotting import plot_diagram

plot_diagram(diagrams_basic[10])

In [10]:
plot_diagram(diagrams_basic[-1])

In [11]:
point_clouds_basic[2].shape

(400, 3)

In [12]:
diagrams_basic.shape

(30, 600, 3)

In [9]:
from gtda.diagrams import PersistenceEntropy

persistence_entropy = PersistenceEntropy()

# calculate topological feature matrix
X_basic = persistence_entropy.fit_transform(diagrams_basic)

# expect shape - (n_point_clouds, n_homology_dims)
X_basic.shape

(30, 3)

In [10]:
plot_point_cloud(X_basic)

In [11]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(oob_score=True)
rf.fit(X_basic, labels_basic)

print(f"OOB score: {rf.oob_score_:.3f}")

OOB score: 1.000


In [12]:
from gtda.pipeline import Pipeline

steps = [
    ("persistence", VietorisRipsPersistence(metric="euclidean", homology_dimensions=homology_dimensions, n_jobs=6)),
    ("entropy", PersistenceEntropy()),
    ("model", RandomForestClassifier(oob_score=True)),
]

pipeline = Pipeline(steps)

In [13]:
pipeline.fit(point_clouds_basic, labels_basic)

In [14]:
pipeline["model"].oob_score_

1.0

---
---

In [15]:
from openml.datasets.functions import get_dataset

df = get_dataset('shapes').get_data(dataset_format='dataframe')[0]
df.head()

Unnamed: 0,x,y,z,target
0,0.341007,0.318606,0.096725,human_arms_out9
1,0.329226,0.421601,0.056749,human_arms_out9
2,0.446869,0.648674,0.12409,human_arms_out9
3,0.314729,0.21786,0.070847,human_arms_out9
4,0.426678,0.919195,0.047609,human_arms_out9


In [16]:
import openml

In [17]:
plot_point_cloud(df.query('target == "biplane0"')[["x", "y", "z"]].values)

In [18]:
import numpy as np

point_clouds = np.asarray(
    [
        df.query("target == @shape")[["x", "y", "z"]].values
        for shape in df["target"].unique()
    ]
)
point_clouds.shape

(40, 400, 3)

In [19]:
persistence = VietorisRipsPersistence(
    metric="euclidean",
    homology_dimensions=homology_dimensions,
    n_jobs=6,
    collapse_edges=True,
)
persistence_diagrams = persistence.fit_transform(point_clouds)

In [20]:
# Index - (human_arms_out, 0), (vase, 10), (dining_chair, 20), (biplane, 30)
index = 30
plot_diagram(persistence_diagrams[index])

In [21]:
persistence_entropy = PersistenceEntropy(normalize=True)
# Calculate topological feature matrix
X = persistence_entropy.fit_transform(persistence_diagrams)
# Visualise feature matrix
plot_point_cloud(X)

In [22]:
labels = np.zeros(40)
labels[10:20] = 1
labels[20:30] = 2
labels[30:] = 3

rf = RandomForestClassifier(oob_score=True, random_state=42)
rf.fit(X, labels)
rf.oob_score_

0.6