In [1]:
# Imports
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import logistic as lgtc
import clef as clf
import hill
import damex as dmx
import peng as pg
import clef_asymptotic as clf_as
import utilities as ut

# Generate data

In [2]:
dim = 100
nb_faces = 80
faces = ut.gen_random_faces(dim, nb_faces)  # generate random subsets of {1,...,d}

In [3]:
n = int(2e4)  # sample number
as_dep = 0.5  # dependence coefficient
X = lgtc.asym_logistic(dim, faces, n, as_dep)
V = ut.rank_transformation(X)  # standardize each marginal to pareto distribution

# Split train/test

In [4]:
V_train, V_test = train_test_split(V, test_size=0.1)
n_train, n_test = V_train.shape[0], V_test.shape[0]

## Extreme points

In [5]:
R = 200
print(f'average dist to true faces = {ut.levenshtein_faces_radius(faces, R, V_test)}')
V_train_bin = ut.above_radius_bin(V_train, R)
print(f'nb extreme points train = {V_train_bin.shape[0]}')
V_test_bin = ut.above_radius_bin(V_test, R)
print(f'nb extreme points test = {V_test_bin.shape[0]}')

average dist to true faces = 0.5224360956503813
nb extreme points train = 4005
nb extreme points test = 462


# CLEF

In [6]:
faces_clf = clf.clef(V_train, R, kappa_min=0.05)
print(f'nb maximal faces = {len(faces_clf)}')
print(f'average dist to clef faces = {ut.levenshtein_faces_radius(faces_clf, R, V_test)}')

nb maximal faces = 131
average dist to clef faces = 0.5027498235290443


# Hill

In [7]:
k = int(n_train/R - 1)
delta = 0.005
faces_hill = hill.hill(V_train, delta, k)
print(f'nb maximal faces: {len(faces_hill)}')
print(f'average dist to hill faces = {ut.levenshtein_faces_radius(faces_hill, R, V_test)}')

nb maximal faces: 99
average dist to hill faces = 0.5184386380814953


# DAMEX

In [8]:
eps = 0.3
nb_min = 10
faces_dmx = dmx.damex(V_train, R, eps, nb_min)
print(f'nb faces: {len(faces_dmx)}')
print(f'average dist to damex faces = {ut.levenshtein_faces_radius(faces_dmx, R, V_test)}')

nb faces: 53
average dist to damex faces = 0.5948241693371563


# Peng

In [9]:
k = int(n_train/R - 1)
delta_p = 0.05
faces_peng = pg.peng(V_train, delta_p, k, rho_min=0.05)
print(f'nb maximal faces: {len(faces_peng)}')
print(f'average dist to peng faces = {ut.levenshtein_faces_radius(faces_peng, R, V_test)}')

nb maximal faces: 119
average dist to peng faces = 0.5096015035950101


# CLEF asymptotic

In [10]:
kappa_as_min = 0.05
delta_k = 0.05
faces_clf_as = clf_as.clef_as(V_train, delta_k, k, kappa_as_min)
print(f'nb maximal faces: {len(faces_clf_as)}')
print(f'average dist to peng faces = {ut.levenshtein_faces_radius(faces_clf_as, R, V_test)}')

nb maximal faces: 267
average dist to peng faces = 0.4858392989886497
