In [1]:
import pyod

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [3]:
import os
import sys
from sklearn.model_selection import train_test_split

In [4]:
from scipy.io import loadmat # for matlab file

# Importing PYOD packages and methods

In [5]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM

In [6]:
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS

In [7]:
from pyod.models.abod import ABOD


In [8]:
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging



# Performance matrics packages

In [9]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

# Define Data file and read X & Y

In [10]:
my_list=['arrhythmia','cardio','glass','ionosphere','letter','lympho','mnist','musk','optdigits','pendigits','pima','satellite','satimage-2','shuttle','vertebral','vowels','wbc']

In [11]:
my_list

['arrhythmia',
 'cardio',
 'glass',
 'ionosphere',
 'letter',
 'lympho',
 'mnist',
 'musk',
 'optdigits',
 'pendigits',
 'pima',
 'satellite',
 'satimage-2',
 'shuttle',
 'vertebral',
 'vowels',
 'wbc']

# How to load Matfile

In [12]:
data=loadmat("D:\\LetsUpgrade\\Projects\\Anomaly Detection\\cardio.mat")

In [13]:
len(data)

5

In [14]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [15]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [16]:
data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

In [17]:
type(data)

dict

# Independent Features

In [18]:
type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

# Dependent Variable

In [19]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [20]:
data['X'].shape

(1831, 21)

In [21]:
df_columns=['Data','Samples','Dimensions','outlier perc','ABOD','CBLOF','FB','HBOS','IFores','KNN','LOF','MCD','OCSVM','PCA']

# ROC Curve: Region of characteristics

In [22]:
roc_df=pd.DataFrame(columns=df_columns)

In [23]:
roc_df

Unnamed: 0,Data,Samples,Dimensions,outlier perc,ABOD,CBLOF,FB,HBOS,IFores,KNN,LOF,MCD,OCSVM,PCA


# Performance Evaluation table

In [24]:
prn_df=pd.DataFrame(columns=df_columns)

# Time DataFrame

In [25]:
time_df=pd.DataFrame(columns=df_columns)

# Exploring all matfiles

In [26]:
from time import time
for mat_file in my_list: # list created
    print("\n...Processing",mat_file,"...")
    mat=loadmat(os.path.join("D:\\LetsUpgrade\\Projects\\Anomaly Detection",mat_file)) #path of data
    X=mat['X']# Independent variable
    y=mat['y'].ravel() # dependent variable, ravel convert 2D into 1D
    outlier_fraction=np.count_nonzero(y)/len(y) # entire count of y value
    outlier_percentage=round(outlier_fraction*100,ndigits=4) # to round the value
    
    # construct the conatiner for storing the result
    roc_list=[mat_file[:-4],X.shape[0],X.shape[1],outlier_percentage]
    prn_list=[mat_file[:-4],X.shape[0],X.shape[1],outlier_percentage]
    time_list=[mat_file[:-4],X.shape[0],X.shape[1],outlier_percentage]
    
    # split the data
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=42)
    
    # standardizing the data for procesing
    X_train_norm,X_test_norm= standardizer(X_train,X_test)
    
    # define the algorithms
    classifier={'Angle Based outlier detector':ABOD(contamination=outlier_fraction),
               'Cluster Based outlier Factor':CBLOF(contamination=outlier_fraction,check_estimator=False,random_state=42),
               'Feature Bagging':FeatureBagging(contamination=outlier_fraction,random_state=42),
               'Histogram Based outlier Detection':HBOS(contamination=outlier_fraction),
               'Isolation Forest':IForest(contamination=outlier_fraction,random_state=42),
               'K Nearest Neighbors':KNN(contamination=outlier_fraction),
               'Local outlier Factor':LOF(contamination=outlier_fraction),
                'Minimum Covariance Determinat':MCD(contamination=outlier_fraction,random_state=42),
               'Principal Component Analysis':PCA(contamination=outlier_fraction,random_state=42),
                'One Class SVM':OCSVM(contamination=outlier_fraction)
               }
    for  clf_name, clf in classifier.items():
        t0=time() # time before training
        clf.fit(X_train_norm) # Fitting the model
        test_scores=clf.decision_function(X_test_norm) # Testing the model
        t1=time()# when testing is completed
        duration=round(t1-t0,ndigits=4) # Total time
        time_list.append(duration)
        
        roc=round(roc_auc_score(y_test,test_scores),ndigits=4)
        prn=round(precision_n_scores(y_test,test_scores),ndigits=4)
        
        print('{clf_name} ROC:{roc}, precision @ rank n:{prn},'
              'execution time:{duration}s'.format(
              clf_name=clf_name,roc=roc,prn=prn,duration=duration))
        roc_list.append(roc)
        prn_list.append(prn)
    
    temp_df=pd.DataFrame(time_list).transpose()
    temp_df.columns=df_columns
    time_df=pd.concat([time_df,temp_df],axis=0)
    
    temp_df=pd.DataFrame(roc_list).transpose()
    temp_df.columns=df_columns
    roc_df=pd.concat([roc_df,temp_df],axis=0)
    
    temp_df=pd.DataFrame(prn_list).transpose()
    temp_df.columns=df_columns
    prn_df=pd.concat([prn_df,temp_df],axis=0)
    
    


...Processing arrhythmia ...
Angle Based outlier detector ROC:0.7687, precision @ rank n:0.3571,execution time:7.6262s




Cluster Based outlier Factor ROC:0.7803, precision @ rank n:0.4643,execution time:6.1275s
Feature Bagging ROC:0.7775, precision @ rank n:0.4643,execution time:1.666s
Histogram Based outlier Detection ROC:0.8511, precision @ rank n:0.5714,execution time:9.2048s
Isolation Forest ROC:0.8184, precision @ rank n:0.5357,execution time:2.5644s
K Nearest Neighbors ROC:0.782, precision @ rank n:0.5,execution time:0.2978s
Local outlier Factor ROC:0.7787, precision @ rank n:0.4643,execution time:0.1949s




Minimum Covariance Determinat ROC:0.8228, precision @ rank n:0.4286,execution time:2.8682s
Principal Component Analysis ROC:0.8, precision @ rank n:0.5,execution time:0.2149s
One Class SVM ROC:0.7986, precision @ rank n:0.5,execution time:0.0989s

...Processing cardio ...
Angle Based outlier detector ROC:0.5928, precision @ rank n:0.2838,execution time:2.0997s




Cluster Based outlier Factor ROC:0.8609, precision @ rank n:0.5541,execution time:0.5277s
Feature Bagging ROC:0.6407, precision @ rank n:0.2162,execution time:3.3659s
Histogram Based outlier Detection ROC:0.87, precision @ rank n:0.5135,execution time:0.02s
Isolation Forest ROC:0.9314, precision @ rank n:0.527,execution time:1.5231s
K Nearest Neighbors ROC:0.7642, precision @ rank n:0.4054,execution time:0.6066s
Local outlier Factor ROC:0.6432, precision @ rank n:0.2162,execution time:0.3228s




Minimum Covariance Determinat ROC:0.8587, precision @ rank n:0.4324,execution time:2.5804s
Principal Component Analysis ROC:0.9581, precision @ rank n:0.6216,execution time:0.031s
One Class SVM ROC:0.9462, precision @ rank n:0.527,execution time:0.3208s

...Processing glass ...
Angle Based outlier detector ROC:0.7805, precision @ rank n:0.25,execution time:0.3348s




Cluster Based outlier Factor ROC:0.9116, precision @ rank n:0.25,execution time:0.4697s
Feature Bagging ROC:0.7805, precision @ rank n:0.25,execution time:0.2918s
Histogram Based outlier Detection ROC:0.7256, precision @ rank n:0.25,execution time:0.037s
Isolation Forest ROC:0.7622, precision @ rank n:0.25,execution time:1.3212s
K Nearest Neighbors ROC:0.8902, precision @ rank n:0.25,execution time:0.031s
Local outlier Factor ROC:0.7287, precision @ rank n:0.25,execution time:0.009s




Minimum Covariance Determinat ROC:0.8079, precision @ rank n:0.0,execution time:0.1599s
Principal Component Analysis ROC:0.747, precision @ rank n:0.25,execution time:0.008s
One Class SVM ROC:0.753, precision @ rank n:0.25,execution time:0.009s

...Processing ionosphere ...
Angle Based outlier detector ROC:0.9247, precision @ rank n:0.8868,execution time:0.3338s
Cluster Based outlier Factor ROC:0.8846, precision @ rank n:0.7736,execution time:0.1939s




Feature Bagging ROC:0.9337, precision @ rank n:0.8113,execution time:0.2429s
Histogram Based outlier Detection ROC:0.5154, precision @ rank n:0.3585,execution time:0.038s
Isolation Forest ROC:0.8538, precision @ rank n:0.6792,execution time:1.587s
K Nearest Neighbors ROC:0.9226, precision @ rank n:0.8868,execution time:0.1299s
Local outlier Factor ROC:0.9312, precision @ rank n:0.7736,execution time:0.02s
Minimum Covariance Determinat ROC:0.9659, precision @ rank n:0.9057,execution time:0.4417s
Principal Component Analysis ROC:0.8068, precision @ rank n:0.6226,execution time:0.007s
One Class SVM ROC:0.8799, precision @ rank n:0.7736,execution time:0.013s

...Processing letter ...
Angle Based outlier detector ROC:0.8949, precision @ rank n:0.3721,execution time:1.7569s




Cluster Based outlier Factor ROC:0.7641, precision @ rank n:0.2558,execution time:0.3928s
Feature Bagging ROC:0.8948, precision @ rank n:0.4651,execution time:3.0521s
Histogram Based outlier Detection ROC:0.6248, precision @ rank n:0.093,execution time:0.039s
Isolation Forest ROC:0.6031, precision @ rank n:0.0698,execution time:1.4221s
K Nearest Neighbors ROC:0.8689, precision @ rank n:0.3488,execution time:0.4397s
Local outlier Factor ROC:0.8781, precision @ rank n:0.3721,execution time:0.2938s
Minimum Covariance Determinat ROC:0.8256, precision @ rank n:0.2326,execution time:5.7165s
Principal Component Analysis ROC:0.5142, precision @ rank n:0.1163,execution time:0.015s
One Class SVM ROC:0.5827, precision @ rank n:0.1395,execution time:0.2558s

...Processing lympho ...
Angle Based outlier detector ROC:0.9138, precision @ rank n:0.0,execution time:0.1169s




Cluster Based outlier Factor ROC:1.0, precision @ rank n:1.0,execution time:0.3818s
Feature Bagging ROC:1.0, precision @ rank n:1.0,execution time:0.1009s
Histogram Based outlier Detection ROC:1.0, precision @ rank n:1.0,execution time:0.015s
Isolation Forest ROC:1.0, precision @ rank n:1.0,execution time:0.9334s
K Nearest Neighbors ROC:0.9828, precision @ rank n:0.5,execution time:0.024s
Local outlier Factor ROC:1.0, precision @ rank n:1.0,execution time:0.009s




Minimum Covariance Determinat ROC:0.8966, precision @ rank n:0.5,execution time:0.1999s
Principal Component Analysis ROC:1.0, precision @ rank n:1.0,execution time:0.009s
One Class SVM ROC:1.0, precision @ rank n:1.0,execution time:0.007s

...Processing mnist ...
Angle Based outlier detector ROC:0.7682, precision @ rank n:0.3643,execution time:26.0899s




Cluster Based outlier Factor ROC:0.8583, precision @ rank n:0.4126,execution time:3.268s
Feature Bagging ROC:0.6929, precision @ rank n:0.3346,execution time:144.0028s
Histogram Based outlier Detection ROC:0.5578, precision @ rank n:0.0967,execution time:0.1499s
Isolation Forest ROC:0.8257, precision @ rank n:0.316,execution time:5.1308s
K Nearest Neighbors ROC:0.8423, precision @ rank n:0.4349,execution time:15.7093s
Local outlier Factor ROC:0.7012, precision @ rank n:0.3309,execution time:14.7279s




Minimum Covariance Determinat ROC:0.8498, precision @ rank n:0.2268,execution time:9.0054s
Principal Component Analysis ROC:0.8613, precision @ rank n:0.3903,execution time:0.3668s
One Class SVM ROC:0.8622, precision @ rank n:0.4015,execution time:10.1777s

...Processing musk ...
Angle Based outlier detector ROC:0.1012, precision @ rank n:0.0294,execution time:5.8884s




Cluster Based outlier Factor ROC:1.0, precision @ rank n:1.0,execution time:0.6807s
Feature Bagging ROC:0.5627, precision @ rank n:0.1471,execution time:27.4691s
Histogram Based outlier Detection ROC:1.0, precision @ rank n:1.0,execution time:0.1979s
Isolation Forest ROC:1.0, precision @ rank n:1.0,execution time:3.5468s
K Nearest Neighbors ROC:0.7729, precision @ rank n:0.2059,execution time:4.1964s
Local outlier Factor ROC:0.5567, precision @ rank n:0.1471,execution time:3.6857s
Minimum Covariance Determinat ROC:1.0, precision @ rank n:1.0,execution time:36.2097s
Principal Component Analysis ROC:1.0, precision @ rank n:1.0,execution time:0.3348s
One Class SVM ROC:1.0, precision @ rank n:1.0,execution time:2.4555s

...Processing optdigits ...
Angle Based outlier detector ROC:0.4601, precision @ rank n:0.0,execution time:9.1144s




Cluster Based outlier Factor ROC:0.7186, precision @ rank n:0.0,execution time:0.9654s
Feature Bagging ROC:0.4174, precision @ rank n:0.0492,execution time:38.5942s
Histogram Based outlier Detection ROC:0.8962, precision @ rank n:0.1803,execution time:0.0979s
Isolation Forest ROC:0.7042, precision @ rank n:0.0328,execution time:2.6873s
K Nearest Neighbors ROC:0.3717, precision @ rank n:0.0,execution time:5.1508s
Local outlier Factor ROC:0.4191, precision @ rank n:0.0164,execution time:4.887s




Minimum Covariance Determinat ROC:0.3802, precision @ rank n:0.0,execution time:4.819s
Principal Component Analysis ROC:0.507, precision @ rank n:0.0,execution time:0.1829s
One Class SVM ROC:0.4817, precision @ rank n:0.0,execution time:4.0905s

...Processing pendigits ...
Angle Based outlier detector ROC:0.677, precision @ rank n:0.0725,execution time:7.8422s




Cluster Based outlier Factor ROC:0.9682, precision @ rank n:0.3623,execution time:1.0543s
Feature Bagging ROC:0.4491, precision @ rank n:0.0725,execution time:15.3535s
Histogram Based outlier Detection ROC:0.9261, precision @ rank n:0.2609,execution time:0.027s
Isolation Forest ROC:0.9497, precision @ rank n:0.3188,execution time:2.2876s
K Nearest Neighbors ROC:0.7603, precision @ rank n:0.1594,execution time:1.9518s
Local outlier Factor ROC:0.4578, precision @ rank n:0.0725,execution time:1.557s
Minimum Covariance Determinat ROC:0.8376, precision @ rank n:0.1159,execution time:7.2026s
Principal Component Analysis ROC:0.9402, precision @ rank n:0.3478,execution time:0.029s
One Class SVM ROC:0.946, precision @ rank n:0.3333,execution time:3.0301s

...Processing pima ...
Angle Based outlier detector ROC:0.665, precision @ rank n:0.4314,execution time:0.6386s




Cluster Based outlier Factor ROC:0.6752, precision @ rank n:0.4608,execution time:0.2179s
Feature Bagging ROC:0.6414, precision @ rank n:0.4118,execution time:0.3088s
Histogram Based outlier Detection ROC:0.7029, precision @ rank n:0.5392,execution time:0.01s
Isolation Forest ROC:0.679, precision @ rank n:0.4608,execution time:1.1143s
K Nearest Neighbors ROC:0.7123, precision @ rank n:0.5,execution time:0.1429s
Local outlier Factor ROC:0.6621, precision @ rank n:0.4706,execution time:0.031s
Minimum Covariance Determinat ROC:0.6977, precision @ rank n:0.4804,execution time:0.1979s
Principal Component Analysis ROC:0.6444, precision @ rank n:0.451,execution time:0.008s
One Class SVM ROC:0.6423, precision @ rank n:0.4412,execution time:0.035s

...Processing satellite ...
Angle Based outlier detector ROC:0.5598, precision @ rank n:0.3723,execution time:7.3804s




Cluster Based outlier Factor ROC:0.7158, precision @ rank n:0.5346,execution time:0.8465s
Feature Bagging ROC:0.5329, precision @ rank n:0.3572,execution time:23.5455s
Histogram Based outlier Detection ROC:0.7574, precision @ rank n:0.5585,execution time:0.056s
Isolation Forest ROC:0.6803, precision @ rank n:0.5245,execution time:2.3945s
K Nearest Neighbors ROC:0.6755, precision @ rank n:0.4818,execution time:4.4822s
Local outlier Factor ROC:0.5339, precision @ rank n:0.3572,execution time:2.5454s
Minimum Covariance Determinat ROC:0.7864, precision @ rank n:0.6692,execution time:7.7702s
Principal Component Analysis ROC:0.5762, precision @ rank n:0.4642,execution time:0.059s
One Class SVM ROC:0.6437, precision @ rank n:0.5119,execution time:3.6368s

...Processing satimage-2 ...
Angle Based outlier detector ROC:0.8675, precision @ rank n:0.3333,execution time:6.399s




Cluster Based outlier Factor ROC:0.9998, precision @ rank n:0.9259,execution time:0.8095s
Feature Bagging ROC:0.5683, precision @ rank n:0.1111,execution time:18.1738s
Histogram Based outlier Detection ROC:0.9809, precision @ rank n:0.6667,execution time:0.058s
Isolation Forest ROC:0.9955, precision @ rank n:0.7778,execution time:2.2586s
K Nearest Neighbors ROC:0.9595, precision @ rank n:0.4444,execution time:2.9382s
Local outlier Factor ROC:0.5699, precision @ rank n:0.1111,execution time:2.3955s
Minimum Covariance Determinat ROC:0.9965, precision @ rank n:0.6296,execution time:8.3408s
Principal Component Analysis ROC:0.9834, precision @ rank n:0.8148,execution time:0.0999s
One Class SVM ROC:0.9997, precision @ rank n:0.8889,execution time:3.8846s

...Processing shuttle ...
Angle Based outlier detector ROC:0.6096, precision @ rank n:0.1853,execution time:67.2428s




Cluster Based outlier Factor ROC:0.5871, precision @ rank n:0.2339,execution time:1.9688s
Feature Bagging ROC:0.4867, precision @ rank n:0.073,execution time:167.4187s
Histogram Based outlier Detection ROC:0.9842, precision @ rank n:0.9896,execution time:0.062s
Isolation Forest ROC:0.9979, precision @ rank n:0.9628,execution time:9.7835s
K Nearest Neighbors ROC:0.643, precision @ rank n:0.2153,execution time:25.896s
Local outlier Factor ROC:0.529, precision @ rank n:0.1323,execution time:33.3024s






Minimum Covariance Determinat ROC:0.9901, precision @ rank n:0.7375,execution time:38.894s
Principal Component Analysis ROC:0.9904, precision @ rank n:0.9542,execution time:0.0949s
One Class SVM ROC:0.9922, precision @ rank n:0.9578,execution time:167.7065s

...Processing vertebral ...
Angle Based outlier detector ROC:0.3968, precision @ rank n:0.0909,execution time:0.1719s
Cluster Based outlier Factor ROC:0.3604, precision @ rank n:0.0909,execution time:0.1419s




Feature Bagging ROC:0.4118, precision @ rank n:0.0909,execution time:0.1169s
Histogram Based outlier Detection ROC:0.3166, precision @ rank n:0.0,execution time:0.007s
Isolation Forest ROC:0.3765, precision @ rank n:0.0,execution time:0.8535s
K Nearest Neighbors ROC:0.384, precision @ rank n:0.0,execution time:0.048s
Local outlier Factor ROC:0.4342, precision @ rank n:0.0909,execution time:0.009s




Minimum Covariance Determinat ROC:0.4139, precision @ rank n:0.0,execution time:0.2491s
Principal Component Analysis ROC:0.3615, precision @ rank n:0.0,execution time:0.005s
One Class SVM ROC:0.3947, precision @ rank n:0.0909,execution time:0.005s

...Processing vowels ...
Angle Based outlier detector ROC:0.9515, precision @ rank n:0.5,execution time:1.1173s




Cluster Based outlier Factor ROC:0.942, precision @ rank n:0.3182,execution time:0.3358s
Feature Bagging ROC:0.9113, precision @ rank n:0.2727,execution time:0.8125s
Histogram Based outlier Detection ROC:0.6365, precision @ rank n:0.0,execution time:0.013s
Isolation Forest ROC:0.744, precision @ rank n:0.0909,execution time:1.0216s
K Nearest Neighbors ROC:0.9763, precision @ rank n:0.4545,execution time:0.2538s
Local outlier Factor ROC:0.9119, precision @ rank n:0.2727,execution time:0.0899s
Minimum Covariance Determinat ROC:0.8905, precision @ rank n:0.1818,execution time:2.2896s
Principal Component Analysis ROC:0.5675, precision @ rank n:0.0909,execution time:0.008s
One Class SVM ROC:0.7763, precision @ rank n:0.1364,execution time:0.1179s

...Processing wbc ...
Angle Based outlier detector ROC:0.9181, precision @ rank n:0.5455,execution time:0.2918s
Cluster Based outlier Factor ROC:0.9291, precision @ rank n:0.4545,execution time:0.1839s




Feature Bagging ROC:0.9555, precision @ rank n:0.6364,execution time:0.2468s
Histogram Based outlier Detection ROC:0.9594, precision @ rank n:0.6364,execution time:0.033s
Isolation Forest ROC:0.951, precision @ rank n:0.6364,execution time:0.9214s
K Nearest Neighbors ROC:0.9426, precision @ rank n:0.7273,execution time:0.061s
Local outlier Factor ROC:0.9536, precision @ rank n:0.6364,execution time:0.021s
Minimum Covariance Determinat ROC:0.9226, precision @ rank n:0.4545,execution time:0.2089s
Principal Component Analysis ROC:0.9329, precision @ rank n:0.6364,execution time:0.007s
One Class SVM ROC:0.9433, precision @ rank n:0.6364,execution time:0.014s


In [27]:
roc_df

Unnamed: 0,Data,Samples,Dimensions,outlier perc,ABOD,CBLOF,FB,HBOS,IFores,KNN,LOF,MCD,OCSVM,PCA
0,arrhyt,452,274,14.6018,0.7687,0.7803,0.7775,0.8511,0.8184,0.782,0.7787,0.8228,0.8,0.7986
0,ca,1831,21,9.6122,0.5928,0.8609,0.6407,0.87,0.9314,0.7642,0.6432,0.8587,0.9581,0.9462
0,g,214,9,4.2056,0.7805,0.9116,0.7805,0.7256,0.7622,0.8902,0.7287,0.8079,0.747,0.753
0,ionosp,351,33,35.8974,0.9247,0.8846,0.9337,0.5154,0.8538,0.9226,0.9312,0.9659,0.8068,0.8799
0,le,1600,32,6.25,0.8949,0.7641,0.8948,0.6248,0.6031,0.8689,0.8781,0.8256,0.5142,0.5827
0,ly,148,18,4.0541,0.9138,1.0,1.0,1.0,1.0,0.9828,1.0,0.8966,1.0,1.0
0,m,7603,100,9.2069,0.7682,0.8583,0.6929,0.5578,0.8257,0.8423,0.7012,0.8498,0.8613,0.8622
0,,3062,166,3.1679,0.1012,1.0,0.5627,1.0,1.0,0.7729,0.5567,1.0,1.0,1.0
0,optdi,5216,64,2.8758,0.4601,0.7186,0.4174,0.8962,0.7042,0.3717,0.4191,0.3802,0.507,0.4817
0,pendi,6870,16,2.2707,0.677,0.9682,0.4491,0.9261,0.9497,0.7603,0.4578,0.8376,0.9402,0.946


In [28]:
prn_df

Unnamed: 0,Data,Samples,Dimensions,outlier perc,ABOD,CBLOF,FB,HBOS,IFores,KNN,LOF,MCD,OCSVM,PCA
0,arrhyt,452,274,14.6018,0.3571,0.4643,0.4643,0.5714,0.5357,0.5,0.4643,0.4286,0.5,0.5
0,ca,1831,21,9.6122,0.2838,0.5541,0.2162,0.5135,0.527,0.4054,0.2162,0.4324,0.6216,0.527
0,g,214,9,4.2056,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0.0,0.25,0.25
0,ionosp,351,33,35.8974,0.8868,0.7736,0.8113,0.3585,0.6792,0.8868,0.7736,0.9057,0.6226,0.7736
0,le,1600,32,6.25,0.3721,0.2558,0.4651,0.093,0.0698,0.3488,0.3721,0.2326,0.1163,0.1395
0,ly,148,18,4.0541,0.0,1.0,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0
0,m,7603,100,9.2069,0.3643,0.4126,0.3346,0.0967,0.316,0.4349,0.3309,0.2268,0.3903,0.4015
0,,3062,166,3.1679,0.0294,1.0,0.1471,1.0,1.0,0.2059,0.1471,1.0,1.0,1.0
0,optdi,5216,64,2.8758,0.0,0.0,0.0492,0.1803,0.0328,0.0,0.0164,0.0,0.0,0.0
0,pendi,6870,16,2.2707,0.0725,0.3623,0.0725,0.2609,0.3188,0.1594,0.0725,0.1159,0.3478,0.3333


In [29]:
time_df

Unnamed: 0,Data,Samples,Dimensions,outlier perc,ABOD,CBLOF,FB,HBOS,IFores,KNN,LOF,MCD,OCSVM,PCA
0,arrhyt,452,274,14.6018,7.6262,6.1275,1.666,9.2048,2.5644,0.2978,0.1949,2.8682,0.2149,0.0989
0,ca,1831,21,9.6122,2.0997,0.5277,3.3659,0.02,1.5231,0.6066,0.3228,2.5804,0.031,0.3208
0,g,214,9,4.2056,0.3348,0.4697,0.2918,0.037,1.3212,0.031,0.009,0.1599,0.008,0.009
0,ionosp,351,33,35.8974,0.3338,0.1939,0.2429,0.038,1.587,0.1299,0.02,0.4417,0.007,0.013
0,le,1600,32,6.25,1.7569,0.3928,3.0521,0.039,1.4221,0.4397,0.2938,5.7165,0.015,0.2558
0,ly,148,18,4.0541,0.1169,0.3818,0.1009,0.015,0.9334,0.024,0.009,0.1999,0.009,0.007
0,m,7603,100,9.2069,26.0899,3.268,144.003,0.1499,5.1308,15.7093,14.7279,9.0054,0.3668,10.1777
0,,3062,166,3.1679,5.8884,0.6807,27.4691,0.1979,3.5468,4.1964,3.6857,36.2097,0.3348,2.4555
0,optdi,5216,64,2.8758,9.1144,0.9654,38.5942,0.0979,2.6873,5.1508,4.887,4.819,0.1829,4.0905
0,pendi,6870,16,2.2707,7.8422,1.0543,15.3535,0.027,2.2876,1.9518,1.557,7.2026,0.029,3.0301
