In [1]:
import numpy as np
import pandas as pd
# Data viz
from gtda.plotting import plot_point_cloud

# TDA magic
from gtda.mapper import (
    CubicalCover,
    make_mapper_pipeline,
    Projection,
    plot_static_mapper_graph,
    plot_interactive_mapper_graph,
    MapperInteractivePlotter
)

# ML tools
from sklearn import datasets
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import gtda

In [3]:
dataset = "CelebA"
metadata = pd.read_parquet(f"../../embeddings/{dataset}/df_dataset.parquet")
metadata_identities = pd.read_parquet(f"../../embeddings/{dataset}/df_dataset_identities.parquet")
metadata_attributes = pd.read_parquet(f"../../embeddings/{dataset}/df_dataset_attributes.parquet")

MODEL_NAMES = ["retina_facenet","retina_arcface"]
MODEL_METRIC = ["euclidean", "cosine"]


embeddings = []
for m, model_name in enumerate(MODEL_NAMES):
    data_npz = np.load(f"../../embeddings/{dataset}/embeddings_{model_name}.npz")
    embeddings.append(data_npz['a'])

Compute the barycenters of the identities

In [6]:
datas_barycenters = []
identities_ids = []
for m, model_name in enumerate(MODEL_NAMES):
    identities_ids.append(np.sort(metadata.loc[metadata[f"keep_{model_name}"]==True]["identity"].unique()))
    datas_barycenters.append([])
    for n, identity in enumerate(identities_ids[-1]):
        datas_barycenters[-1].append(embeddings[m][metadata.loc[
        (metadata[f"keep_{model_name}"]==True)&(metadata["identity"]==identity)
        ].index,:].mean(axis = 0))

# Convert to np.array
datas_barycenters = [np.array(datas_barycenters[m]) for m in range(len(MODEL_NAMES))]

### Compute the MAPPER graph

In [7]:
# Define filter function – can be any scikit-learn transformer
pca = PCA(n_components=2)

filter_func = pca
# Define cover
cover = CubicalCover(n_intervals=50, overlap_frac=0.4)
# Choose clustering algorithm – default is DBSCAN
clusterer = DBSCAN()

# Configure parallelism of clustering step
n_jobs = 1

# Initialise pipeline
pipe = make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer,
    verbose=False,
    n_jobs=n_jobs,
)

### Facenet

In [8]:
i = 0
attribute = "Big_Nose"

identities_ids = np.sort(metadata.loc[metadata[f"keep_{MODEL_NAMES[i]}"]==True]["identity"].unique())
feat = np.array(metadata_identities["avg_"+attribute][identities_ids])

fig = plot_static_mapper_graph(pipe, datas_barycenters[i],
                               color_data = feat)
fig.show(config={'scrollZoom': True})

### Arcface

In [45]:
i = 1
attribute = "Male"

identities_ids = np.sort(metadata.loc[metadata[f"keep_{MODEL_NAMES[i]}"]==True]["identity"].unique())
feat = np.array(metadata_identities["avg_"+attribute][identities_ids])

fig = plot_static_mapper_graph(pipe, datas_barycenters[i],
                               color_data = feat)
fig.show(config={'scrollZoom': True})