In [1]:
# Imports
from __future__ import division
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

import logistic as lgtc
import generate_alphas as ga
import extreme_data as xtr
import clef_algo as clf
import hill_estimator as hill
import utilities as ut
import damex_algo as dmx
import peng_estimator as pg
import kappa_estimator as kp

from sklearn.model_selection import ShuffleSplit

## Generate data

In [2]:
d = 100
K = 80
max_size = 8
p_geom = 0.25
list_charged_faces = ga.gen_random_alphas(d, K, max_size, p_geom, with_singlet=False)[0]
np.save('saves/faces.npy', list_charged_faces)
#list_charged_faces = np.load('saves/faces.npy')

In [4]:
n = int(2e4)
as_dep = 0.5
X = lgtc.asym_logistic(d, list_charged_faces, n, as_dep)
np.save('saves/logistic_' + str(as_dep)[2:] + '.npy', X)
#X = np.load('saves/logistic_' + str(as_dep)[2:] + '.npy')
V = xtr.rank_transformation(X)

## Split train/test

In [5]:
rs = ShuffleSplit(n_splits=1, test_size=.1, random_state=0)
ind_train, ind_test = list(rs.split(V))[0]
V_train = V[ind_train]
n_train = V_train.shape[0]
print('nb train:', n_train)
V_test = V[ind_test]
n_test = V_test.shape[0]
print('nb test:', n_test)

nb train: 18000
nb test: 2000


## Levenshtein distance on true faces

In [None]:
Rs = np.array([10, 50, 100, 150, 200, 400, 600, 800, 1000, 2000, 3000, 5000, 7500, 10000])
Ds_1, Ns = ut.dist_levenshtein_Rs(list_charged_faces, d, Rs, V, eps=0.1)
print(min(Ds_1), Rs[np.argmin(Ds_1)])
plt.plot(Rs, Ds_1)
#plt.xlim(Ns[0], Ns[-1])
#plt.xticks(Ns, Rs)

### Extreme points

In [7]:
R = 200
k = int(n_train/R - 1)
print('k = ', k)
feats_min = 0
print('nb extreme points:', np.sum(np.sum(V > R, axis=1) > feats_min))
V_bin = 1.*(V > R)

V_bin_train = V_bin[ind_train]
V_bin_train = V_bin_train[np.sum(V_bin_train, axis=1) > feats_min]
print('nb extreme train points:', np.sum(np.sum(V_train > R, axis=1) > feats_min))

V_bin_test = V_bin[ind_test]
V_bin_test = V_bin_test[np.sum(V_bin_test, axis=1) > feats_min]
print('nb extreme test points:', np.sum(np.sum(V_test > R, axis=1) > feats_min))

k =  89
nb extreme points: 4464
nb extreme train points: 4043
nb extreme test points: 421


In [None]:
print(ut.dist_levenshtein_R(list_charged_faces, d, V_bin_test))

## CLEF

In [None]:
kappa_min = 0.1
faces_clf = clf.clef_0(V_bin_train, kappa_min)
print(list(map(len, xtr.check_errors(list_charged_faces, faces_clf, d))))
print('nb faces:', len(faces_clf))
print(ut.dist_levenshtein_R(faces_clf, d, V_bin_test))

In [None]:
Ds_2, Ns = ut.dist_levenshtein_Rs(faces_clf, d, Rs, V_test)
plt.plot(Ns, Ds_2)
plt.xlim(Ns[0], Ns[-1])
plt.show()

## Hill

In [None]:
k = int(n_train/R - 1)
r_p = n_train/(k + int(k**(3./4)) + 1)
r_m = n_train/(k - int(k**(3./4)) + 1)
delta = 0.1
faces_hill = hill.hill_0(V_train, V_train > R, V_train > r_p, V_train > r_m, delta, k, var_max=5, verbose=1)
print(list(map(len, xtr.check_errors(list_charged_faces, faces_hill, d))))
print('nb faces:', len(faces_hill))
print(ut.dist_levenshtein_R(faces_hill, d, V_bin_test))

In [None]:
Ds_3, Ns = ut.dist_levenshtein_Rs(faces_hill, d, Rs, V_test)
plt.plot(Ns, Ds_3)
plt.xlim(Ns[0], Ns[-1])
plt.show()

## DAMEX

In [None]:
eps = 0.1
V_bin_dmx = 1.*(V_train[np.max(V_train, axis=1) > R] > R*eps)
V_bin_dmx = V_bin_dmx[np.sum(V_bin_dmx, axis=1) > 0]
faces_dmx, mass = dmx.damex_0(V_bin_dmx)
faces_dmx = faces_dmx[:np.sum(mass > 1)]
print(list(map(len, xtr.check_errors(list_charged_faces, faces_dmx, d))))
print('nb faces:', len(faces_dmx))
#V_bin_dmx_test = 1.*(V_test[np.max(V_test, axis=1) > R] > R*eps)
#V_bin_dmx_test = V_bin_dmx_test[np.sum(V_bin_dmx_test, axis=1) > 1]
print(ut.dist_levenshtein_R(faces_dmx, d, V_bin_test))

In [None]:
Ds_4, Ns = ut.dist_levenshtein_Rs(faces_dmx, d, Rs, V_test)
plt.plot(Ns, Ds_4)
plt.xlim(Ns[0], Ns[-1])
plt.show()

## Peng

In [13]:
k = int(n_train/R - 1)
r_p = n_train/(k + int(k**(3./4)) + 1)
r_m = n_train/(k - int(k**(3./4)) + 1)
delta_p = 0.3
r_2 = n_train/(2*k + 1)
faces_peng = pg.peng_0(V_train > R, V_train > r_2, V_train > r_p, V_train > r_m, delta_p, k, rho_min=0.05)
print(list(map(len, xtr.check_errors(list_charged_faces, faces_peng, d))))
print('nb faces:', len(faces_peng))
print(ut.dist_levenshtein_R(faces_peng, d, V_bin_test))

2 : 514
3 : 506
4 : 285
5 : 98
6 : 20
[15, 66, 0, 147, 37]
nb faces: 162
0.551899894774004


## CLEF asymptotic

In [19]:
kappa_as = 0.05
delta_k = 0.05
faces_kapas = kp.kappa_as_0(V_train, V_train > R, V_train > r_p, V_train > r_m, delta_k, k, kappa_as)
print(list(map(len, xtr.check_errors(list_charged_faces, faces_kapas, d))))
print('nb faces:', len(faces_kapas))
print(ut.dist_levenshtein_R(faces_kapas, d, V_bin_test))

2 : 669
3 : 946
4 : 746
5 : 319
6 : 59
[30, 51, 8, 216, 90]
nb faces: 254
0.5000047128906987
