#  EagleEye tutorial


We import necessary libraries and set up custom plot configurations for consistency.

In [6]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Directory setup for custom modules
import sys
module_path = '../../eagleeye'
sys.path.append(module_path)
import EagleEye
from utils_EE import compute_the_null, partitioning_function

# Custom plotting settings
sns.set(style="darkgrid")
plt.rcParams.update({
    'axes.titlesize': 21,
    'axes.labelsize': 17,
    'xtick.labelsize': 17,
    'ytick.labelsize': 17,
    'legend.fontsize': 17,
    'axes.grid': True,
    'grid.alpha': 0.3,
    'grid.linestyle': '--',
    'grid.color': 'silver'
})


## 1. Generate the datasets 

In [7]:
# Functions for generating random points and data with anomalies
def generate_random_points(num_points, num_dimensions, radius, shift_factor=.5):
    """Generates random points in 3D with specified characteristic scale."""
    theta, phi = np.random.uniform(0, 2 * np.pi, (2, num_points))
    x = (radius + radius / 6 * np.cos(phi)) * np.cos(theta) + shift_factor
    y = (radius + radius / 6 * np.cos(phi)) * np.sin(theta) 
    z = (radius / 6) * np.sin(phi)
    if num_dimensions > 3:
        #noise = np.random.normal(0, radius / 2, (num_points, num_dimensions - 3))
        mean = np.random.normal(0, radius, num_dimensions - 3)
        covariance = np.eye(num_dimensions - 3) *radius**2  
        noise = np.random.multivariate_normal(mean, covariance, num_points)
        
        points = np.column_stack((x, y, z, noise))
    else:
        points = np.column_stack((x, y))
    return points



#def generate_donuts(dim, sizes,R, sig):
def generate_data_with_torus_anomalies(num_dimensions, cluster_sizes, anomaly_radius, shift_factors):
    samples = []
    
    samples.append(np.random.multivariate_normal(np.array([0] + [0] * (num_dimensions - 1)), np.eye(num_dimensions), sizes[0]))
    samples.append( generate_random_points(cluster_sizes[1], num_dimensions,anomaly_radius, shift_factors) )
    if len(sizes)>2:
        samples.append( generate_random_points(cluster_sizes[2], num_dimensions,anomaly_radius, shift_factors) )
    return np.vstack(samples)


def generate_gaussian_mixture(dim, sizes, means, covariances):
    samples = []
    for mean, cov, size in zip(means, covariances, sizes):
        samples.append(np.random.multivariate_normal(mean, cov, size))
    return np.vstack(samples)


#def setup_gaussian_components(dim=10,s1=3000,sig=0.5):
def setup_gaussian_components(num_dimensions=10, background_size=10000, shift_factors = 0.5, contamination_size=200):
    m1D1, m1D2 = -shift_factors, 0
    m2D1, m2D2 = +shift_factors, 0
    sizes = [background_size, contamination_size,1]
    means = [np.array([0] * (num_dimensions)), np.array([m1D1, m1D2] + [0] * (num_dimensions - 2)), np.array([m2D1, m2D2] + [0] * (num_dimensions - 2))]
    cstd2 = 0.01 + np.random.rand() * 0.02
    return num_dimensions, sizes, means, cstd2

In [8]:
#%% EagleEye hyperparameters

p       = .5

K_M     = 500

p_ext   = 1e-5

n_jobs  = 10

In [9]:
stats_null                     = compute_the_null(p=p, K_M=K_M)

In [10]:

contamination_sizes=[1000, 750, 500, 250, 150, 70]

results_10k = {
"Torous": {
    i: {"len_Pruned": None, "len_Repechaged": None,"Upsilon_star": None}
    for i in contamination_sizes
},
"Gaussian": {
    i: {"len_Pruned": None, "len_Repechaged": None,"Upsilon_star": None}
    for i in contamination_sizes
}
}


In [13]:
num_dimensions = 10
tot_samples = 10000

In [14]:

for contamination_size in contamination_sizes:
    background_size=tot_samples-contamination_size-1
    #loop over different center locations
    sig=1.
    sigma_a = .3

    dim, sizes, means, cstd2 = setup_gaussian_components(num_dimensions=num_dimensions, background_size=background_size, shift_factors = sig, contamination_size=contamination_size )
    cstd1 = sigma_a
    covariances = [np.eye(dim), sigma_a**2 * np.eye(dim), cstd2 * np.eye(dim)]

    X = np.random.multivariate_normal(np.array([0] + [0] * (dim - 1)), np.eye(dim), np.array(sizes).sum())

    test_data_G = generate_gaussian_mixture(dim, sizes, means, covariances)
    test_data_T = generate_data_with_torus_anomalies(num_dimensions=dim, cluster_sizes=sizes, anomaly_radius=sigma_a, shift_factors=sig)



    #VALIDATION            = reference_data.shape[0]


    result_dict, stats_null = EagleEye.Soar(X, test_data_T, K_M=K_M, p_ext=p_ext, n_jobs=n_jobs, stats_null=stats_null, result_dict_in={})

    #%% Cluter the Putative anomalies
    
    clusters = partitioning_function(X,test_data_T,result_dict,p_ext=p_ext,Z=2.65 )
    
    #%% Repêchage
    
    EE_book = EagleEye.Repechage(X,test_data_T,result_dict,clusters,p_ext=1e-5)
    

    result_dict_G, stats_null = EagleEye.Soar(X, test_data_G, K_M=K_M, p_ext=p_ext, n_jobs=n_jobs, stats_null=stats_null, result_dict_in={})

    #%% Cluter the Putative anomalies
    
    clusters_G = partitioning_function(X,test_data_G,result_dict,p_ext=p_ext,Z=2.65 )
    
    #%% Repêchage
    
    EE_book_G = EagleEye.Repechage(X,test_data_G,result_dict,clusters,p_ext=1e-5)
    
        
    results_10k['Torous'][contamination_size]['len_Repechaged'] = sum(len(EE_book['Y_OVER_clusters'][clust]['Repechaged']) if EE_book['Y_OVER_clusters'][clust]['Repechaged'] is not None else 0 for clust in range(len(EE_book['Y_OVER_clusters'])))
    results_10k['Torous'][contamination_size]['len_Pruned']  = sum(len(EE_book['Y_OVER_clusters'][clust]['Pruned']) if EE_book['Y_OVER_clusters'][clust]['Pruned'] is not None else 0 for clust in range(len(EE_book['Y_OVER_clusters'])))
    results_10k['Torous'][contamination_size]['Upsilon_star']  = result_dict['Upsilon_star_plus'][result_dict['p_ext']]

    results_10k['Gaussian'][contamination_size]['len_Repechaged'] = sum(len(EE_book_G['Y_OVER_clusters'][clust]['Repechaged']) if EE_book_G['Y_OVER_clusters'][clust]['Repechaged'] is not None else 0 for clust in range(len(EE_book_G['Y_OVER_clusters'])))
    results_10k['Gaussian'][contamination_size]['len_Pruned'] = sum(len(EE_book_G['Y_OVER_clusters'][clust]['Pruned']) if EE_book_G['Y_OVER_clusters'][clust]['Pruned'] is not None else 0 for clust in range(len(EE_book_G['Y_OVER_clusters'])))
    results_10k['Gaussian'][contamination_size]['Upsilon_star'] = result_dict['Upsilon_star_plus'][result_dict['p_ext']]


    

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering


  from .autonotebook import tqdm as notebook_tqdm


-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

<IPython.core.display.Math object>

alpha = 1


'DONE!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

<IPython.core.display.Math object>

alpha = 1


'DONE!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %
KNN completed: 50.00 %
-----------------------------------------------------------------
Flagging of putative anomalous points
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Pruning via iterative density equalization (IDE)
-----------------------------------------------------------------


<IPython.core.display.Math object>

'DONE!'

-----------------------------------------------------------------
Clustering
-----------------------------------------------------------------
-----------------------------------------------------------------
Repêchage
-----------------------------------------------------------------


<IPython.core.display.Math object>

alpha = 1


'DONE!'

'!!! No X-Overdensities found !!!'

Computed p-values for k=220
Computed p-values for k=221
Computed p-values for k=222
Computed p-values for k=223
Computed p-values for k=224
Computed p-values for k=225
Computed p-values for k=226
Computed p-values for k=227
Computed p-values for k=228
Computed p-values for k=229
Computed p-values for k=230
Computed p-values for k=231
Computed p-values for k=232
Computed p-values for k=233
Computed p-values for k=234
Computed p-values for k=235
Computed p-values for k=236
Computed p-values for k=237
Computed p-values for k=238
Computed p-values for k=239
Computed p-values for k=240
Computed p-values for k=241
Computed p-values for k=242
Computed p-values for k=243
Computed p-values for k=244
Computed p-values for k=245
Computed p-values for k=246
Computed p-values for k=247
Computed p-values for k=248
Computed p-values for k=249
Computed p-values for k=250
Computed p-values for k=251
Computed p-values for k=252
Computed p-values for k=253
Computed p-values for k=254
Computed p-values fo

Computed p-values for k=49
Computed p-values for k=50
Computed p-values for k=51
Computed p-values for k=52
Computed p-values for k=53
Computed p-values for k=54
Computed p-values for k=55
Computed p-values for k=56
Computed p-values for k=57
Computed p-values for k=58
Computed p-values for k=59
Computed p-values for k=60
Computed p-values for k=61
Computed p-values for k=62
Computed p-values for k=63
Computed p-values for k=64
Computed p-values for k=65
Computed p-values for k=66
Computed p-values for k=67
Computed p-values for k=68
Computed p-values for k=69
Computed p-values for k=70
Computed p-values for k=71
Computed p-values for k=72
Computed p-values for k=73
Computed p-values for k=74
Computed p-values for k=75
Computed p-values for k=76
Computed p-values for k=77
Computed p-values for k=78
Computed p-values for k=79
Computed p-values for k=80
Computed p-values for k=81
Computed p-values for k=82
Computed p-values for k=83
Computed p-values for k=84
Computed p-values for k=85
C

Computed p-values for k=366
Computed p-values for k=367
Computed p-values for k=368
Computed p-values for k=369
Computed p-values for k=370
Computed p-values for k=371
Computed p-values for k=372
Computed p-values for k=373
Computed p-values for k=374
Computed p-values for k=375
Computed p-values for k=376
Computed p-values for k=377
Computed p-values for k=378
Computed p-values for k=379
Computed p-values for k=380
Computed p-values for k=381
Computed p-values for k=382
Computed p-values for k=383
Computed p-values for k=384
Computed p-values for k=385
Computed p-values for k=386
Computed p-values for k=387
Computed p-values for k=388
Computed p-values for k=389
Computed p-values for k=390
Computed p-values for k=391
Computed p-values for k=392
Computed p-values for k=393
Computed p-values for k=394
Computed p-values for k=395
Computed p-values for k=396
Computed p-values for k=397
Computed p-values for k=398
Computed p-values for k=399
Computed p-values for k=400
Computed p-values fo

Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f4f8881b1a0>
Traceback (most recent call last):
  File "/u/s/sspringe/anaconda3/lib/python3.11/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/u/s/sspringe/anaconda3/lib/python3.11/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/u/s/sspringe/anaconda3/lib/python3.11/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
                   ^^^^^^^^^^^^^^^^^^
  File "/u/s/sspringe/anaconda3/lib/python3.11/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
             ^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no 

0.53 seconds for computing distances
ID estimation finished: selecting ID of [9.26]
kstar estimation started, Dthr = 23.92812698
0.01 seconds computing kstar
kstar-NN density estimation started
k-NN density estimation finished
Clustering started
init succeded
Raw identification of the putative centers:  0.000 sec
Further checking on centers:  0.001 sec 
Pruning of the centers wrongly identified in part one:  0.000 sec
Preliminary assignation finished:  0.003 sec
('Number of clusters before multimodality test=', 13)
Identification of the saddle points:  0.008 sec
Multimodality test finished:  0.000 sec
Final operations: 0.00022220611572265625 sec
Clustering finished, 1 clusters found
total time is, 0.01372671127319336
Computation of distances started
Computation of the distances up to 4 NNs started
0.01 seconds for computing distances
ID estimation finished: selecting ID of [3.7]
kstar estimation started, Dthr = 23.92812698
0.00 seconds computing kstar
kstar-NN density estimation starte

TypeError: unhashable type: 'numpy.ndarray'

In [15]:
results_10k

{'Torous': {1000: {'len_Pruned': 1019,
   'len_Repechaged': 1012,
   'Upsilon_star': 14.794602592183983},
  750: {'len_Pruned': 704,
   'len_Repechaged': 740,
   'Upsilon_star': 14.794602592183983},
  500: {'len_Pruned': 453,
   'len_Repechaged': 506,
   'Upsilon_star': 14.794602592183983},
  250: {'len_Pruned': 216,
   'len_Repechaged': 276,
   'Upsilon_star': 14.794602592183983},
  150: {'len_Pruned': 112,
   'len_Repechaged': 150,
   'Upsilon_star': 14.794602592183983},
  70: {'len_Pruned': 44,
   'len_Repechaged': 64,
   'Upsilon_star': 14.794602592183983}},
 'Gaussian': {1000: {'len_Pruned': 1019,
   'len_Repechaged': 1012,
   'Upsilon_star': 14.794602592183983},
  750: {'len_Pruned': 704,
   'len_Repechaged': 740,
   'Upsilon_star': 14.794602592183983},
  500: {'len_Pruned': 453,
   'len_Repechaged': 506,
   'Upsilon_star': 14.794602592183983},
  250: {'len_Pruned': 216,
   'len_Repechaged': 276,
   'Upsilon_star': 14.794602592183983},
  150: {'len_Pruned': 112,
   'len_Repechage

In [20]:
# define the null 
num_dimensions = 10
tot_samples_100k = 100000

In [21]:
results_100k = {
"Torous": {
    i: {"len_IE_extra": None, "len_From_test": None,"Upsilon_star": None}
    for i in contamination_sizes
},
"Gaussian": {
    i: {"len_IE_extra": None, "len_From_test": None,"Upsilon_star": None}
    for i in contamination_sizes
}
}


In [None]:
for contamination_size in contamination_sizes:
    background_size_100k=tot_samples_100k-contamination_size-1
    #loop over different center locations
    sig=1.
    sigma_a = .3

    dim, sizes, means, cstd2 = setup_gaussian_components(num_dimensions=num_dimensions, background_size=background_size_100k, shift_factors = sig, contamination_size=contamination_size )
    cstd1 = sigma_a
    covariances = [np.eye(dim), sigma_a**2 * np.eye(dim), cstd2 * np.eye(dim)]

    X = np.random.multivariate_normal(np.array([0] + [0] * (dim - 1)), np.eye(dim), np.array(sizes).sum())

    test_data_G = generate_gaussian_mixture(dim, sizes, means, covariances)
    test_data_T = generate_data_with_torus_anomalies(num_dimensions=dim, cluster_sizes=sizes, anomaly_radius=sigma_a, shift_factors=sig)


#VALIDATION            = reference_data.shape[0]


    result_dict, stats_null = EagleEye.Soar(X, test_data_T, K_M=K_M, p_ext=p_ext, n_jobs=n_jobs, stats_null=stats_null, result_dict_in={})

    #%% Cluter the Putative anomalies
    
    clusters = partitioning_function(X,test_data_T,result_dict,p_ext=p_ext,Z=2.65 )
    
    #%% Repêchage
    
    EE_book = EagleEye.Repechage(X,test_data_T,result_dict,clusters,p_ext=1e-5)
    

    result_dict_G, stats_null = EagleEye.Soar(X, test_data_G, K_M=K_M, p_ext=p_ext, n_jobs=n_jobs, stats_null=stats_null, result_dict_in={})

    #%% Cluter the Putative anomalies
    
    clusters_G = partitioning_function(X,test_data_G,result_dict,p_ext=p_ext,Z=2.65 )
    
    #%% Repêchage
    
    EE_book_G = EagleEye.Repechage(X,test_data_G,result_dict,clusters,p_ext=1e-5)
    
        
    results_100k['Torous'][contamination_size]['len_Repechaged'] = sum(len(EE_book['Y_OVER_clusters'][clust]['Repechaged']) if EE_book['Y_OVER_clusters'][clust]['Repechaged'] is not None else 0 for clust in range(len(EE_book['Y_OVER_clusters'])))
    results_100k['Torous'][contamination_size]['len_Pruned']  = sum(len(EE_book['Y_OVER_clusters'][clust]['Pruned']) if EE_book['Y_OVER_clusters'][clust]['Pruned'] is not None else 0 for clust in range(len(EE_book['Y_OVER_clusters'])))
    results_100k['Torous'][contamination_size]['Upsilon_star']  = result_dict['Upsilon_star_plus'][result_dict['p_ext']]

    results_100k['Gaussian'][contamination_size]['len_Repechaged'] = sum(len(EE_book_G['Y_OVER_clusters'][clust]['Repechaged']) if EE_book_G['Y_OVER_clusters'][clust]['Repechaged'] is not None else 0 for clust in range(len(EE_book_G['Y_OVER_clusters'])))
    results_100k['Gaussian'][contamination_size]['len_Pruned'] = sum(len(EE_book_G['Y_OVER_clusters'][clust]['Pruned']) if EE_book_G['Y_OVER_clusters'][clust]['Pruned'] is not None else 0 for clust in range(len(EE_book_G['Y_OVER_clusters'])))
    results_100k['Gaussian'][contamination_size]['Upsilon_star'] = result_dict['Upsilon_star_plus'][result_dict['p_ext']]

    

-----------------------------------------------------------------
Eagle...Soar!
-----------------------------------------------------------------
Compute the nearest neighbours
KNN completed: 0.00 %


In [None]:
TFRep_100k = [results_100k['Torous'][cluster]['len_Repechaged'] for cluster in contamination_sizes]
GFRep_100k = [results_100k['Gaussian'][cluster]['len_Repechaged'] for cluster in contamination_sizes]
TFPru_100k = [results_100k['Torous'][cluster]['len_Pruned'] for cluster in contamination_sizes]
GFPru_100k = [results_100k['Gaussian'][cluster]['len_Pruned'] for cluster in contamination_sizes]

In [None]:
plt.figure()
plt.scatter(contamination_sizes, TFPru_100k)
plt.scatter(contamination_sizes, GFPru_100k)
plt.plot(contamination_sizes,contamination_sizes, c='k')
plt.xlim([-30,1030])
plt.ylim([-30,1030])
plt.legend(['Pruned Torous 100k','Pruned Gaussian 100k'])
plt.xlabel('n_anomaly')
plt.ylabel('n_equalized')

In [None]:
plt.figure()
plt.scatter(contamination_sizes, np.array(TFPru_100k)/np.array(contamination_sizes))
plt.scatter(contamination_sizes, np.array(GFPru_100k)/np.array(contamination_sizes))

plt.xlim([-30,1030])
plt.ylim([0,2])
plt.legend(['Pruned Torous 100k','Pruned Gaussian 100k'])
plt.xlabel('n_anomaly')
plt.ylabel('proportion_equalized')

In [None]:
plt.figure()
plt.scatter(contamination_sizes, TFRep_100k)
plt.scatter(contamination_sizes, GFRep_100k)
plt.plot(contamination_sizes,contamination_sizes, c='k')
plt.xlim([-30,1030])
plt.ylim([-30,2030])
plt.legend(['Repechage Torous 100k','Repechage Gaussian 100k'])
plt.xlabel('n_anomaly')
plt.ylabel('n_repechage')

In [None]:
plt.figure()
plt.scatter(contamination_sizes, np.array(TFRep_100k)/np.array(contamination_sizes))
plt.scatter(contamination_sizes, np.array(GFRep_100k)/np.array(contamination_sizes))
plt.xlim([-30,1030])
plt.ylim([0,2])
plt.legend(['Repechage Torous 100k','Repechage Gaussian 100k'])
plt.xlabel('n_anomaly')
plt.ylabel('proportion_repechage')