k fold 교차검증 orbit

# Import

In [14]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import random
import pickle

import warnings
warnings.simplefilter("ignore", np.ComplexWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
from haversine import haversine
from IPython.display import HTML
import plotly.graph_objects as go
import copy 

import tqdm
from rpy2.robjects.packages import importr
from rpy2.robjects.vectors import FloatVector 

from pygsp import graphs, filters, plotting, utils

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, roc_curve, auc

In [15]:
from pyod.models.lof import LOF
from pyod.models.knn import KNN
from pyod.models.cblof import CBLOF
from pyod.models.ocsvm import OCSVM
from pyod.models.mcd import MCD
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.hbos import HBOS
from pyod.models.sos import SOS
from pyod.models.so_gaal import SO_GAAL
from pyod.models.mo_gaal import MO_GAAL
from pyod.models.lscp import LSCP

# Class

In [16]:
class Conf_matrx:
    def __init__(self,original,compare):
        self.original = original
        self.compare = compare
    def conf(self,name):
        self.name = name
        self.conf_matrix = confusion_matrix(self.original, self.compare)
        
        # fig, ax = plt.subplots(figsize=(5, 5))
        # ax.matshow(self.conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
        # for i in range(self.conf_matrix.shape[0]):
        #     for j in range(self.conf_matrix.shape[1]):
        #         ax.text(x=j, y=i,s=self.conf_matrix[i, j], va='center', ha='center', size='xx-large')
        # plt.xlabel('Predictions', fontsize=18)
        # plt.ylabel('Actuals', fontsize=18)
        # plt.title('Confusion Matrix of ' + str(name), fontsize=18)
        # plt.show()
        
        self.acc = accuracy_score(self.original, self.compare)
        self.pre = precision_score(self.original, self.compare)
        self.rec = recall_score(self.original, self.compare)
        self.f1 = f1_score(self.original, self.compare)
        
        # print('Accuracy: %.3f' % self.acc)
        # print('Precision: %.3f' % self.pre)
        # print('Recall: %.3f' % self.rec)
        # print('F1 Score: %.3f' % self.f1)

In [17]:
class Linear:
    def __init__(self,df):
        self.df = df
        self.y = df.y.to_numpy()
        self.x = df.x.to_numpy()
        self.n = len(self.y)
        self.W = w
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)      
    def fit(self,sd=20): # fit with ebayesthresh
        self._eigen()
        self.ybar = self.Psi.T @ self.y # fbar := graph fourier transform of f
        self.power = self.ybar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.ybar_threshed = np.where(self.power_threshed>0,self.ybar,0)
        self.yhat = self.Psi@self.ybar_threshed
        self.df = self.df.assign(yHat = self.yhat)
        self.df = self.df.assign(Residual = self.df.y- self.df.yHat)

In [18]:
class Orbit:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.n = len(self.f)
        self.theta= None
    def get_distance(self):
        self.D = np.zeros([self.n,self.n])
        locations = np.stack([self.x, self.y],axis=1)
        for i in tqdm.tqdm(range(self.n)):
            for j in range(i,self.n):
                self.D[i,j]=np.linalg.norm(locations[i]-locations[j])
        self.D = self.D + self.D.T
    def get_weightmatrix(self,theta=1,beta=0.5,kappa=4000):
        self.theta = theta
        dist = np.where(self.D < kappa,self.D,0)
        self.W = np.exp(-(dist/self.theta)**2)
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.f # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f- self.df.fHat)

In [19]:
class BUNNY:
    def __init__(self,df):
        self.df = df 
        self.f = df.f.to_numpy()
        self.z = df.z.to_numpy()
        self.x = df.x.to_numpy()
        self.y = df.y.to_numpy()
        self.noise = df.noise.to_numpy()
        self.fnoise = self.f + self.noise
        self.W = _W
        self.n = len(self.f)
        self.theta= None
    def _eigen(self):
        d= self.W.sum(axis=1)
        D= np.diag(d)
        self.L = np.diag(1/np.sqrt(d)) @ (D-self.W) @ np.diag(1/np.sqrt(d))
        self.lamb, self.Psi = np.linalg.eigh(self.L)
        self.Lamb = np.diag(self.lamb)       
    def fit(self,sd=5): # fit with ebayesthresh
        self._eigen()
        self.fbar = self.Psi.T @ self.fnoise # fbar := graph fourier transform of f
        self.power = self.fbar**2 
        ebayesthresh = importr('EbayesThresh').ebayesthresh
        self.power_threshed=np.array(ebayesthresh(FloatVector(self.power),sd=sd))
        self.fbar_threshed = np.where(self.power_threshed>0,self.fbar,0)
        self.fhat = self.Psi@self.fbar_threshed
        self.df = self.df.assign(fnoise = self.fnoise)
        self.df = self.df.assign(fHat = self.fhat)
        self.df = self.df.assign(Residual = self.df.f + self.df.noise - self.df.fHat)

In [54]:
class fortable:
    def __init__(self, df, clf, tab, outlier_true, conf_name = "Method"):
        self.df = df
        self.clf = clf
        self.conf_name = conf_name
        self.tabb = tab
        self.outlier_true = outlier_true
        
    def _forfit(self):
        if 'fnoise' in self.df.columns:
            self.clf.fit(self.df[['x', 'y','fnoise']])
        elif 'f' in self.df.columns:
            self.clf.fit(self.df[['x', 'y', 'f']])
        if 'f' not in self.df.columns:
            self.clf.fit(self.df[['x', 'y']])

    def _forlabels(self):
        self.labels = list(self.clf.labels_)

    def _forpredict(self):
        self.predict = self.clf.fit_predict(self.df)
        
    def comparison(self, compare_outlier = None, conf_outlier = None, gode = False):
        if gode == False:
            self._forfit()
            self._forlabels()
            if 'fnoise' in self.df.columns:
                compare_outlier = self.clf.decision_function(np.array(self.df[['x', 'y','fnoise']]))
            else:
                compare_outlier = self.clf.decision_function(self.df)
            if self.conf_name == "LOF":
                self._forpredict()
                conf_outlier = self.predict
            elif self.conf_name != "LOF":
                conf_outlier = self.labels
        elif gode == True:
            compare_outlier = compare_outlier
            conf_outlier = conf_outlier

        fpr, tpr, thresh = roc_curve(self.outlier_true, compare_outlier)       
        
        _conf = Conf_matrx(self.outlier_true, conf_outlier)
        _conf.conf(self.conf_name)
        
        _table = pd.concat([self.tabb,
                   pd.DataFrame({"Accuracy":[_conf.acc],"Precision":[_conf.pre],"Recall":[_conf.rec],"F1":[_conf.f1],"AUC":[auc(fpr, tpr)],"N":n, "Contamination": eta_sparsity},index = [_conf.name])])
        
        return _table

In [34]:
class fortable:
    def __init__(self, df, clf, tab, outlier_true, conf_name = "Method"):
        self.df = df
        self.clf = clf
        self.conf_name = conf_name
        self.tabb = tab
        self.outlier_true = outlier_true
        
    def _forfit(self):
        if 'fnoise' in self.df.columns:
            self.clf.fit(self.df[['x', 'y','fnoise']])
        elif 'f' in self.df.columns:
            self.clf.fit(self.df[['x', 'y', 'f']])
        if 'f' not in self.df.columns:
            self.clf.fit(self.df[['x', 'y']])

    def _forlabels(self):
        self.labels = list(self.clf.labels_)

    def _forpredict(self):
        self.predict = self.clf.fit_predict(self.df)
        
    def comparison(self, compare_outlier = None, conf_outlier = None, gode = False):
        if gode == False:
            self._forfit()
            self._forlabels()
            if 'fnoise' in self.df.columns:
                compare_outlier = self.clf.decision_function(np.array(self.df[['x', 'y','fnoise']]))
            else:
                compare_outlier = self.clf.decision_function(self.df)
            if self.conf_name == "LOF":
                self._forpredict()
                conf_outlier = self.predict
            elif self.conf_name != "LOF":
                conf_outlier = self.labels
        elif gode == True:
            compare_outlier = compare_outlier
            conf_outlier = conf_outlier

        fpr, tpr, thresh = roc_curve(self.outlier_true, compare_outlier)       
        
        _conf = Conf_matrx(self.outlier_true, conf_outlier)
        _conf.conf(self.conf_name)
        
        _table = pd.concat([self.tabb,
                   pd.DataFrame({"Accuracy":[_conf.acc],"Precision":[_conf.pre],"Recall":[_conf.rec],"F1":[_conf.f1],"AUC":[auc(fpr, tpr)],"N":n, "Contamination": eta_sparsity,"kappa":kappa},index = [_conf.name])])
        
        return _table

# Linear

In [None]:
# tab_linear = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])

In [None]:
n = 10000
eta_sparsity = 0.2
random_seed = 77

In [None]:
np.random.seed(6)

epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-7, -5, round(n*eta_sparsity/2)).round(15), np.random.uniform(5, 7, round(n*eta_sparsity/2)).round(15), np.repeat(0, n - round(n*eta_sparsity)))), n)
eta = signal + epsilon

outlier_true_linear= signal.copy()
outlier_true_linear = list(map(lambda x: 1 if x!=0 else 0,outlier_true_linear))

x_1 = np.linspace(0,2,n)
y1_1 = 5 * x_1
y_1 = y1_1 + eta # eta = signal + epsilon

_df=pd.DataFrame({'x':x_1, 'y':y_1})

w=np.zeros((n,n))

for i in range(n):
    for j in range(n):
        if i==j :
            w[i,j] = 0
        elif np.abs(i-j) <= 1 : 
            w[i,j] = 1

index_of_trueoutlier_bool = signal!=0

## GODE_Linear

In [None]:
_Linear = Linear(_df)
_Linear.fit(sd=20)

outlier_GODE_linear_old = (_Linear.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_linear_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
five_percent = sorted_data[index]
outlier_GODE_linear = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_linear_old))

tab_lin = fortable(_df, clf = None, tab =tab_linear, outlier_true=outlier_true_linear, conf_name ="GODE")

tab_linear = tab_lin.comparison(compare_outlier = outlier_GODE_linear_old, conf_outlier = outlier_GODE_linear, gode = True)

## LOF_Linear

In [None]:
np.random.seed(random_seed)
clf = LOF(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "LOF")

tab_linear = tab_lin.comparison()

## KNN_Linear

In [None]:
np.random.seed(random_seed)
clf = KNN(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "KNN")

tab_linear = tab_lin.comparison()

## CBLOF_Linear

In [None]:
clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)
tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "CBLOF")

tab_linear = tab_lin.comparison()

## OCSVM_Linear

In [None]:
np.random.seed(random_seed)
clf = OCSVM(nu=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "OCSVM")

tab_linear = tab_lin.comparison()

## MCD_Linear

In [None]:
clf = MCD(contamination=eta_sparsity, random_state = random_seed)
tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "MCD")

tab_linear = tab_lin.comparison()

## Feature Bagging_Linear

In [None]:
clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "Feature Bagging")

tab_linear = tab_lin.comparison()

## ABOD_Linear

In [None]:
np.random.seed(random_seed)
clf = ABOD(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "ABOD")

tab_linear = tab_lin.comparison()

## IForest_Linear

In [None]:
clf = IForest(contamination=eta_sparsity, random_state=random_seed)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "Isolation Forest")

tab_linear = tab_lin.comparison()

## HBOS_Linear

In [None]:
np.random.seed(random_seed)
clf = HBOS(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "HBOS")

tab_linear = tab_lin.comparison()

## SOS_Linear

In [None]:
np.random.seed(random_seed)
clf = SOS(contamination=eta_sparsity)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "SOS")

tab_linear = tab_lin.comparison()

## LSCP_Linear

In [None]:
detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)

tab_lin = fortable(_df, clf, tab_linear, outlier_true_linear, "LSCP")

tab_linear = tab_lin.comparison()

## tab_linear

In [None]:
round(tab_linear,3)#.to_csv('./Example_1_2.csv')

In [None]:
# tab_linear.to_csv('./Example_1_Dataset.csv')

# Orbit

In [25]:
# tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])

In [35]:
n = 1000
eta_sparsity = 0.05
random_seed=77

In [37]:
n_values = list([1000,5000,10000])  
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kapapa"])

In [38]:
np.random.seed(777)
epsilon = np.around(np.random.normal(size=n),15)
signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
eta = signal + epsilon
pi=np.pi
ang=np.linspace(-pi,pi-2*pi/n,n)
r=5+np.cos(np.linspace(0,12*pi,n))
vx=r*np.cos(ang)
vy=r*np.sin(ang)
f1=10*np.sin(np.linspace(0,6*pi,n))
f = f1 + eta
_df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f})
outlier_true_orbit = signal.copy()
outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))

## GODE_Orbit

In [40]:
n_values = list([1000,5000,10000])  
eta_sparsity_list = list([0.01,0.05,0.1])
random_seed=77
kappa=1.21

tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kapapa"])

In [41]:
# _Orbit = Orbit(_df)
# _Orbit.get_distance()

# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa) 
# _Orbit.fit(sd=15)

# outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
# sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
# index = int(len(sorted_data) * eta_sparsity)
# five_percent = sorted_data[index]
# outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))

# tab_orb = fortable(_df, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")

# tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)

100%|██████████| 1000/1000 [00:01<00:00, 764.14it/s]
  _table = pd.concat([self.tabb,


In [42]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])
    
for eta_sparsity in eta_sparsity_list:
    
    for n in n_values:

        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
    
        _df_orbit = _df[['x','y','f']]

        _Orbit = Orbit(_df_orbit)
        _Orbit.get_distance()
        
        _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=kappa) 
        _Orbit.fit(sd=15)
        
        outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
        sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
        index = int(len(sorted_data) * eta_sparsity)
        percent = sorted_data[index]
        outlier_GODE_orbit = list(map(lambda x: 1 if x > percent else 0,outlier_GODE_orbit_old))
        
        tab_orb = fortable(_df_orbit, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")
        
        tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)

100%|██████████| 1000/1000 [00:01<00:00, 733.98it/s]
  _table = pd.concat([self.tabb,
100%|██████████| 5000/5000 [00:31<00:00, 158.63it/s] 
100%|██████████| 10000/10000 [02:03<00:00, 80.67it/s] 
100%|██████████| 1000/1000 [00:01<00:00, 783.42it/s]
100%|██████████| 5000/5000 [00:30<00:00, 161.77it/s]
100%|██████████| 10000/10000 [02:02<00:00, 81.36it/s] 
100%|██████████| 1000/1000 [00:01<00:00, 814.47it/s]
100%|██████████| 5000/5000 [00:31<00:00, 161.02it/s]
100%|██████████| 10000/10000 [02:05<00:00, 79.47it/s] 


In [44]:
tab_orbit_gode = tab_orbit.copy()

In [None]:
# _Orbit = Orbit(_df)
# _Orbit.get_distance()

# _Orbit.get_weightmatrix(theta=(_Orbit.D[_Orbit.D>0].mean()),kappa=2500) 
# _Orbit.fit(sd=15)

# outlier_GODE_orbit_old = (_Orbit.df['Residual']**2).tolist()
# sorted_data = sorted(outlier_GODE_orbit_old,reverse=True)
# index = int(len(sorted_data) * eta_sparsity)
# five_percent = sorted_data[index]
# outlier_GODE_orbit = list(map(lambda x: 1 if x > five_percent else 0,outlier_GODE_orbit_old))

# tab_orb = fortable(_df, clf = None, tab =tab_orbit, outlier_true=outlier_true_orbit, conf_name ="GODE")

# tab_orbit = tab_orb.comparison(compare_outlier = outlier_GODE_orbit_old, conf_outlier = outlier_GODE_orbit, gode = True)

## LOF_Orbit

In [116]:
np.random.seed(random_seed)
clf = LOF(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LOF")

tab_orbit = tab_orb.comparison()



In [117]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC","N","Contamination","kappa"])

for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = LOF(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LOF")
        
        tab_orbit = tab_orb.comparison()

  _table = pd.concat([self.tabb,


In [118]:
tab_orbit_LOF = tab_orbit

## KNN_Orbit

In [119]:
np.random.seed(random_seed)
clf = KNN(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "kNN")

tab_orbit = tab_orb.comparison()

In [120]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = KNN(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "kNN")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_kNN = tab_orbit

  _table = pd.concat([self.tabb,


## CBLOF_Orbit

In [121]:
clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "CBLOF")

tab_orbit = tab_orb.comparison()

  super()._check_params_vs_input(X, default_n_init=10)


In [122]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "CBLOF")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_CBLOF = tab_orbit

  super()._check_params_vs_input(X, default_n_init=10)
  _table = pd.concat([self.tabb,
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


## OCSVM_Orbit

In [123]:
np.random.seed(random_seed)
clf = OCSVM(nu=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "OCSVM")

tab_orbit = tab_orb.comparison()



In [124]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = OCSVM(nu=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "OCSVM")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_OCSVM = tab_orbit

  _table = pd.concat([self.tabb,


## MCD_Orbit

In [92]:
clf = MCD(contamination=eta_sparsity , random_state = random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "MCD")

tab_orbit = tab_orb.comparison()

In [93]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = MCD(contamination=eta_sparsity , random_state = random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "MCD")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_MCD = tab_orbit

  _table = pd.concat([self.tabb,


## Feature Bagging_Orbit

In [94]:
clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Feature Bagging")

tab_orbit = tab_orb.comparison()

In [95]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Feature Bagging")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_Feature = tab_orbit

  _table = pd.concat([self.tabb,


## ABOD_Orbit

In [96]:
np.random.seed(random_seed)
clf = ABOD(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "ABOD")

tab_orbit = tab_orb.comparison()

In [97]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = ABOD(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "ABOD")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_ABOD = tab_orbit

  _table = pd.concat([self.tabb,


## IForest_Orbit

In [98]:
clf = IForest(contamination=eta_sparsity,random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Isolation Forest")

tab_orbit = tab_orb.comparison()



In [99]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        clf = IForest(contamination=eta_sparsity,random_state=random_seed)

        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "Isolation Forest")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_Isolation = tab_orbit

  _table = pd.concat([self.tabb,


## HBOS_Orbit

In [100]:
np.random.seed(random_seed)
clf = HBOS(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "HBOS")

tab_orbit = tab_orb.comparison()

In [101]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = HBOS(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "HBOS")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_HBOS = tab_orbit

  _table = pd.concat([self.tabb,


## SOS_Orbit

In [102]:
np.random.seed(random_seed)
clf = SOS(contamination=eta_sparsity)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "SOS")

tab_orbit = tab_orb.comparison()

In [103]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        np.random.seed(random_seed)
        clf = SOS(contamination=eta_sparsity)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "SOS")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_SOS = tab_orbit

  _table = pd.concat([self.tabb,


## LSCP_Orbit

In [104]:
detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)

tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LSCP")

tab_orbit = tab_orb.comparison()



In [105]:
tab_orbit = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])
for n in n_values:
    for eta_sparsity in eta_sparsity_list:
        np.random.seed(777)
        epsilon = np.around(np.random.normal(size=n),15)
        signal = np.random.choice(np.concatenate((np.random.uniform(-4, -1, round(n * eta_sparsity / 2)).round(15), np.random.uniform(1, 4, round(n * eta_sparsity / 2)).round(15), np.repeat(0, n - round(n * eta_sparsity)))), n)
        eta = signal + epsilon
        pi=np.pi
        ang=np.linspace(-pi,pi-2*pi/n,n)
        r=5+np.cos(np.linspace(0,12*pi,n))
        vx=r*np.cos(ang)
        vy=r*np.sin(ang)
        f1=10*np.sin(np.linspace(0,6*pi,n))
        f = f1 + eta
        _df = pd.DataFrame({'x' : vx, 'y' : vy, 'f' : f,'f1':f1})
        outlier_true_orbit = signal.copy()
        outlier_true_orbit = list(map(lambda x: 1 if x!=0 else 0,outlier_true_orbit))
        index_of_trueoutlier_bool = signal!=0
            
        detectors = [KNN(), LOF(), OCSVM()]
        clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)
        
        tab_orb = fortable(_df[['x','y','f']], clf, tab_orbit, outlier_true_orbit, "LSCP")
        
        tab_orbit = tab_orb.comparison()
tab_orbit_LSCP = tab_orbit

  _table = pd.concat([self.tabb,


## tab_orbit

In [None]:
round(tab_orbit,3)

In [126]:
pd.concat([tab_orbit_gode.iloc[:,:-1],
           tab_orbit_LOF,
           tab_orbit_kNN,
          tab_orbit_CBLOF,
          tab_orbit_OCSVM,
          tab_orbit_MCD,
          tab_orbit_Feature,
          tab_orbit_ABOD,
          tab_orbit_Isolation,
          tab_orbit_HBOS,
          tab_orbit_SOS,
          tab_orbit_LSCP]).to_csv('./Example_2_Dataset.csv')

# Bunny

In [None]:
# tab_bunny = pd.DataFrame(columns=["Accuracy","Precision","Recall","F1","AUC"])

In [None]:
eta_sparsity = 0.2
random_seed=77
n = 2503

In [None]:
with open("../../2_research/Bunny.pkl", "rb") as file:
    loaded_obj = pickle.load(file)

In [None]:
_df = pd.DataFrame({'x':loaded_obj['x'],'y':loaded_obj['y'],'z':loaded_obj['z'],'fnoise':loaded_obj['f']+loaded_obj['noise'],'f':loaded_obj['f'],'noise':loaded_obj['noise']})
outlier_true_bunny = loaded_obj['unif'].copy()
outlier_true_bunny = list(map(lambda x: 1 if x !=0  else 0,outlier_true_bunny))
index_of_trueoutlier_bool_bunny = loaded_obj['unif']!=0
_W = loaded_obj['W'].copy()

## GODE_Bunny

In [None]:
_BUNNY = BUNNY(_df)
_BUNNY.fit(sd=20)

outlier_GODE_bunny_old = (_BUNNY.df['Residual']**2).tolist()
sorted_data = sorted(outlier_GODE_bunny_old,reverse=True)
index = int(len(sorted_data) * eta_sparsity)
n_percent = sorted_data[index]
outlier_GODE_bunny = list(map(lambda x: 1 if x > n_percent else 0,outlier_GODE_bunny_old))

tab_bun = fortable(_df, clf = None, tab =tab_bunny, outlier_true=outlier_true_bunny, conf_name ="GODE")

tab_bunny = tab_bun.comparison(compare_outlier = outlier_GODE_bunny_old, conf_outlier = outlier_GODE_bunny, gode = True)

## LOF_Bunny

In [None]:
np.random.seed(random_seed)
clf = LOF(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "LOF")

tab_bunny = tab_bun.comparison()

## KNN_Bunny

In [None]:
np.random.seed(random_seed)
clf = KNN(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "kNN")

tab_bunny = tab_bun.comparison()

## CBLOF_Bunny

In [None]:
clf = CBLOF(contamination=eta_sparsity,random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "CBLOF")

tab_bunny = tab_bun.comparison()

## OCSVM_Bunny

In [None]:
np.random.seed(random_seed)
clf = OCSVM(nu=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "OCSVM")

tab_bunny = tab_bun.comparison()

## MCD_Bunny

In [None]:
clf = MCD(contamination=eta_sparsity , random_state = random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "MCD")

tab_bunny = tab_bun.comparison()

## Feature Bagging_Bunny

In [None]:
clf = FeatureBagging(contamination=eta_sparsity, random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "Feature Bagging")

tab_bunny = tab_bun.comparison()

## ABOD_Bunny

In [None]:
np.random.seed(random_seed)
clf = ABOD(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "ABOD")

tab_bunny = tab_bun.comparison()

## IForest_Bunny

In [None]:
clf = IForest(contamination=eta_sparsity,random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "Isolation Forest")

tab_bunny = tab_bun.comparison()

## HBOS_Bunny

In [None]:
np.random.seed(random_seed)
clf = HBOS(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "HBOS")

tab_bunny = tab_bun.comparison()

## SOS_Bunny

In [None]:
np.random.seed(random_seed)
clf = SOS(contamination=eta_sparsity)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "SOS")

tab_bunny = tab_bun.comparison()

## LSCP_Bunny

In [None]:
detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(detectors,contamination=eta_sparsity, random_state=random_seed)

tab_bun = fortable(_df, clf, tab_bunny, outlier_true_bunny, "LSCP")

tab_bunny = tab_bun.comparison()

## tab_bunny

In [None]:
round(tab_bunny,3)

In [None]:
# tab_bunny.to_csv('./Example_3_Dataset.csv')