#PROJECT-1
#"ANAMOLY DETECTION"

In [46]:
import pandas as pd
import numpy as np
import os
import sys
from scipy.io import loadmat
from sklearn.model_selection import train_test_split

In [47]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

In [48]:
from pyod.utils.utility import standardizer,precision_n_scores
from sklearn.metrics import roc_auc_score

In [49]:
mat_file=["arrhythmia.mat",
         "cardio.mat",
         "glass.mat",
         "ionosphere.mat",
         "letter.mat",
         "lympho.mat",
         "mnist.mat",
         "musk.mat",
         "optdigits.mat",
         "pendigits.mat",
         "pima.mat",
         "satellite.mat",
         "satimage-2.mat",
         "shuttle.mat",
         "vertebral.mat",
         "vowels.mat",
         "wbc.mat"]

In [50]:
data=loadmat(mat_file[0])
print(data.items())


dict_items([('__header__', b'MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Tue May 17 11:53:12 2016'), ('__version__', '1.0'), ('__globals__', []), ('X', array([[ 75. ,   0. , 190. , ...,   2.9,  23.3,  49.4],
       [ 56. ,   1. , 165. , ...,   2.1,  20.4,  38.8],
       [ 54. ,   0. , 172. , ...,   3.4,  12.3,  49. ],
       ...,
       [ 36. ,   0. , 166. , ...,   1. , -44.2, -33.2],
       [ 32. ,   1. , 155. , ...,   2.4,  25. ,  46.6],
       [ 78. ,   1. , 160. , ...,   1.6,  21.3,  32.8]])), ('y', array([[1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [51]:
type(data["X"]),data["X"].shape

(numpy.ndarray, (452, 274))

In [52]:
type(data["y"]),data["y"].shape

(numpy.ndarray, (452, 1))

EXPLORING THE MAT-FILES


In [53]:
df_columns=["Data","Samples","Dimensions","Outlier Perc","PCA","MCD","OCSVM","LOF","CBLOF","KNN","ABOD","HBOS","IForest","FeatureBagging"]
time_df=pd.DataFrame(columns=df_columns)
prn_df=pd.DataFrame(columns=df_columns)
roc_df=pd.DataFrame(columns=df_columns)

from time import time
random_state=np.random.RandomState(50)
for file in mat_file:
    print("PROCESSING... ",file)
    mat=loadmat(file)
    x=mat["X"]
    y=mat["y"].ravel()
    outliers_fraction=np.count_nonzero(y)/len(y)
    outliers_percentage=round(outliers_fraction*100,ndigits=4)
    roc_list=[file[:-4],x.shape[0],x.shape[1],outliers_percentage]
    prn_list=[file[:-4],x.shape[0],x.shape[1],outliers_percentage]
    time_list=[file[:-4],x.shape[0],x.shape[1],outliers_percentage]
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=random_state)
    x_train_norm,x_test_norm=standardizer(x_train,x_test)
    
    
    classifiers={
        
        "Principal Componenet Analysis": PCA(contamination=outliers_fraction,random_state=random_state),
        "Minimum Covariance Determinant": MCD(contamination=outliers_fraction,random_state=random_state),
        "One Class SVM": OCSVM(contamination=outliers_fraction),
        "Local Outlier Factor": LOF(contamination=outliers_fraction),
        "Cluster-Based Local Outlier Factor": CBLOF(contamination=outliers_fraction,random_state=random_state,check_estimator=False),
        "K Nearest Neighbor":KNN(contamination=outliers_fraction),
        "Angle based Outlier Detection":ABOD(contamination=outliers_fraction),
        "Histogram Based Outlier Detection": HBOS(contamination=outliers_fraction),
        "Isolation Forest":IForest(contamination=outliers_fraction),
        "Feature Bagging": FeatureBagging(contamination=outliers_fraction,random_state=random_state)
           
        
    }
    for clf_name, clf in classifiers.items():
        t0=time()
        clf.fit(x_train_norm)
        test_scores=clf.decision_function(x_test_norm)
        t1=time()
        duration=round(t1-t0,ndigits=4)
        time_list.append(duration)
        
        roc=round(roc_auc_score(y_test,test_scores),ndigits=4)
        prn=round(precision_n_scores(y_test,test_scores),ndigits=4)
        
        print("{clf_name}\n ROC:{roc}\n precision @ rank n:{prn}\n execution time:{duration}s".format(clf_name=clf_name, roc=roc,prn=prn,duration=duration))
        
        roc_list.append(roc)
        prn_list.append(prn)
    
    
    temp_df=pd.DataFrame(time_list).transpose()
    temp_df.columns=df_columns
    time_df=pd.concat([time_df,temp_df],axis=0)
    
    temp_df=pd.DataFrame(prn_list).transpose()
    temp_df.columns=df_columns
    prn_df=pd.concat([prn_df,temp_df],axis=0)
     
    temp_df=pd.DataFrame(roc_list).transpose()
    temp_df.columns=df_columns
    roc_df=pd.concat([roc_df,temp_df],axis=0)
        
        
        

PROCESSING...  arrhythmia.mat
Principal Componenet Analysis
 ROC:0.7166
 precision @ rank n:0.2857
 execution time:0.1251s




Minimum Covariance Determinant
 ROC:0.7477
 precision @ rank n:0.2857
 execution time:1.3783s
One Class SVM
 ROC:0.7155
 precision @ rank n:0.2857
 execution time:0.0591s
Local Outlier Factor
 ROC:0.7172
 precision @ rank n:0.2143
 execution time:0.0981s




Cluster-Based Local Outlier Factor
 ROC:0.7184
 precision @ rank n:0.2857
 execution time:0.1832s
K Nearest Neighbor
 ROC:0.7359
 precision @ rank n:0.2857
 execution time:0.1321s
Angle based Outlier Detection
 ROC:0.7196
 precision @ rank n:0.3571
 execution time:0.3143s
Histogram Based Outlier Detection
 ROC:0.7547
 precision @ rank n:0.3571
 execution time:0.1361s
Isolation Forest
 ROC:0.7213
 precision @ rank n:0.3571
 execution time:0.7587s
Feature Bagging
 ROC:0.7061
 precision @ rank n:0.2143
 execution time:0.8048s
PROCESSING...  cardio.mat
Principal Componenet Analysis
 ROC:0.9487
 precision @ rank n:0.5556
 execution time:0.007s




Minimum Covariance Determinant
 ROC:0.8665
 precision @ rank n:0.4815
 execution time:1.0419s
One Class SVM
 ROC:0.9383
 precision @ rank n:0.4815
 execution time:0.1712s
Local Outlier Factor
 ROC:0.5982
 precision @ rank n:0.2037
 execution time:0.1832s




Cluster-Based Local Outlier Factor
 ROC:0.875
 precision @ rank n:0.5556
 execution time:0.2783s
K Nearest Neighbor
 ROC:0.7942
 precision @ rank n:0.4074
 execution time:0.3053s
Angle based Outlier Detection
 ROC:0.6406
 precision @ rank n:0.2222
 execution time:1.058s
Histogram Based Outlier Detection
 ROC:0.8682
 precision @ rank n:0.4815
 execution time:0.014s
Isolation Forest
 ROC:0.9141
 precision @ rank n:0.4259
 execution time:0.7808s
Feature Bagging
 ROC:0.6092
 precision @ rank n:0.2037
 execution time:1.3653s
PROCESSING...  glass.mat
Principal Componenet Analysis
 ROC:0.6505
 precision @ rank n:0.3333
 execution time:0.005s
Minimum Covariance Determinant
 ROC:0.7742
 precision @ rank n:0.0
 execution time:0.1361s
One Class SVM
 ROC:0.5968
 precision @ rank n:0.3333
 execution time:0.006s




Local Outlier Factor
 ROC:0.7796
 precision @ rank n:0.0
 execution time:0.009s
Cluster-Based Local Outlier Factor
 ROC:0.7742
 precision @ rank n:0.3333
 execution time:0.0911s
K Nearest Neighbor
 ROC:0.8011
 precision @ rank n:0.3333
 execution time:0.015s
Angle based Outlier Detection
 ROC:0.7312
 precision @ rank n:0.3333
 execution time:0.0971s
Histogram Based Outlier Detection
 ROC:0.7473
 precision @ rank n:0.0
 execution time:0.005s
Isolation Forest
 ROC:0.7043
 precision @ rank n:0.0
 execution time:0.4955s
Feature Bagging
 ROC:0.8172
 precision @ rank n:0.3333
 execution time:0.0621s
PROCESSING...  ionosphere.mat
Principal Componenet Analysis
 ROC:0.7728
 precision @ rank n:0.5676
 execution time:0.005s
Minimum Covariance Determinant
 ROC:0.9671
 precision @ rank n:0.8649
 execution time:0.1121s
One Class SVM
 ROC:0.8026
 precision @ rank n:0.6486
 execution time:0.012s
Local Outlier Factor
 ROC:0.8813
 precision @ rank n:0.7297
 execution time:0.013s
Cluster-Based Local Out



Angle based Outlier Detection
 ROC:0.9287
 precision @ rank n:0.8108
 execution time:0.1632s
Histogram Based Outlier Detection
 ROC:0.6063
 precision @ rank n:0.4054
 execution time:0.018s
Isolation Forest
 ROC:0.8308
 precision @ rank n:0.6486
 execution time:0.5225s
Feature Bagging
 ROC:0.8766
 precision @ rank n:0.7027
 execution time:0.1291s
PROCESSING...  letter.mat
Principal Componenet Analysis
 ROC:0.5285
 precision @ rank n:0.1351
 execution time:0.011s
Minimum Covariance Determinant
 ROC:0.8091
 precision @ rank n:0.2432
 execution time:2.0549s
One Class SVM
 ROC:0.5945
 precision @ rank n:0.1622
 execution time:0.1441s
Local Outlier Factor
 ROC:0.8781
 precision @ rank n:0.4595
 execution time:0.1962s




Cluster-Based Local Outlier Factor
 ROC:0.7671
 precision @ rank n:0.2703
 execution time:0.2182s
K Nearest Neighbor
 ROC:0.8686
 precision @ rank n:0.4054
 execution time:0.2703s
Angle based Outlier Detection
 ROC:0.8636
 precision @ rank n:0.4054
 execution time:0.8507s
Histogram Based Outlier Detection
 ROC:0.6022
 precision @ rank n:0.1351
 execution time:0.021s
Isolation Forest
 ROC:0.6589
 precision @ rank n:0.1351
 execution time:0.6997s
Feature Bagging
 ROC:0.8546
 precision @ rank n:0.4595
 execution time:1.4013s
PROCESSING...  lympho.mat
Principal Componenet Analysis
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.003s
Minimum Covariance Determinant
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.0601s
One Class SVM
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.002s
Local Outlier Factor
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.005s
Cluster-Based Local Outlier Factor
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.0771s
K Nearest Neighbor
 ROC:1.0
 prec




Angle based Outlier Detection
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.0711s
Histogram Based Outlier Detection
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.01s
Isolation Forest
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.4794s
Feature Bagging
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.052s
PROCESSING...  mnist.mat
Principal Componenet Analysis
 ROC:0.8529
 precision @ rank n:0.3832
 execution time:0.2152s




Minimum Covariance Determinant
 ROC:0.8461
 precision @ rank n:0.2664
 execution time:5.6133s
One Class SVM
 ROC:0.8537
 precision @ rank n:0.3925
 execution time:7.7184s
Local Outlier Factor
 ROC:0.7021
 precision @ rank n:0.3131
 execution time:10.4277s




Cluster-Based Local Outlier Factor
 ROC:0.851
 precision @ rank n:0.4252
 execution time:1.4694s
K Nearest Neighbor
 ROC:0.8432
 precision @ rank n:0.4299
 execution time:11.2956s
Angle based Outlier Detection
 ROC:0.7657
 precision @ rank n:0.3832
 execution time:13.7109s
Histogram Based Outlier Detection
 ROC:0.571
 precision @ rank n:0.1168
 execution time:0.1001s
Isolation Forest
 ROC:0.7934
 precision @ rank n:0.3271
 execution time:3.5393s
Feature Bagging
 ROC:0.695
 precision @ rank n:0.3131
 execution time:72.1385s
PROCESSING...  musk.mat
Principal Componenet Analysis
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.2061s
Minimum Covariance Determinant
 ROC:1.0
 precision @ rank n:1.0
 execution time:22.0086s
One Class SVM
 ROC:1.0
 precision @ rank n:1.0
 execution time:1.7066s
Local Outlier Factor
 ROC:0.5718
 precision @ rank n:0.2593
 execution time:3.244s




Cluster-Based Local Outlier Factor
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.6016s
K Nearest Neighbor
 ROC:0.8072
 precision @ rank n:0.2963
 execution time:3.6264s
Angle based Outlier Detection
 ROC:0.1734
 precision @ rank n:0.1111
 execution time:4.3571s
Histogram Based Outlier Detection
 ROC:1.0
 precision @ rank n:1.0
 execution time:0.1211s
Isolation Forest
 ROC:1.0
 precision @ rank n:1.0
 execution time:2.4062s
Feature Bagging
 ROC:0.5641
 precision @ rank n:0.2593
 execution time:22.0256s
PROCESSING...  optdigits.mat
Principal Componenet Analysis
 ROC:0.5331
 precision @ rank n:0.0
 execution time:0.0711s




Minimum Covariance Determinant
 ROC:0.3694
 precision @ rank n:0.0
 execution time:2.3009s
One Class SVM
 ROC:0.5249
 precision @ rank n:0.0
 execution time:2.3382s
Local Outlier Factor
 ROC:0.4769
 precision @ rank n:0.0667
 execution time:2.3182s




Cluster-Based Local Outlier Factor
 ROC:0.7677
 precision @ rank n:0.0
 execution time:0.6556s
K Nearest Neighbor
 ROC:0.3911
 precision @ rank n:0.0
 execution time:2.6915s
Angle based Outlier Detection
 ROC:0.4781
 precision @ rank n:0.0
 execution time:4.5633s
Histogram Based Outlier Detection
 ROC:0.8617
 precision @ rank n:0.2889
 execution time:0.0571s
Isolation Forest
 ROC:0.7918
 precision @ rank n:0.0667
 execution time:1.8297s
Feature Bagging
 ROC:0.4728
 precision @ rank n:0.0667
 execution time:17.4604s
PROCESSING...  pendigits.mat
Principal Componenet Analysis
 ROC:0.9156
 precision @ rank n:0.2
 execution time:0.0171s
Minimum Covariance Determinant
 ROC:0.8033
 precision @ rank n:0.05
 execution time:3.6053s
One Class SVM
 ROC:0.9098
 precision @ rank n:0.225
 execution time:2.0259s
Local Outlier Factor
 ROC:0.4664
 precision @ rank n:0.1
 execution time:0.9109s




Cluster-Based Local Outlier Factor
 ROC:0.9538
 precision @ rank n:0.25
 execution time:0.3964s
K Nearest Neighbor
 ROC:0.7258
 precision @ rank n:0.1
 execution time:1.012s
Angle based Outlier Detection
 ROC:0.6474
 precision @ rank n:0.075
 execution time:3.3712s
Histogram Based Outlier Detection
 ROC:0.8779
 precision @ rank n:0.225
 execution time:0.017s
Isolation Forest
 ROC:0.9327
 precision @ rank n:0.175
 execution time:1.2261s
Feature Bagging
 ROC:0.4611
 precision @ rank n:0.1
 execution time:5.9042s
PROCESSING...  pima.mat
Principal Componenet Analysis
 ROC:0.6135
 precision @ rank n:0.4937
 execution time:0.004s
Minimum Covariance Determinant
 ROC:0.6576
 precision @ rank n:0.5063
 execution time:1.029s
One Class SVM
 ROC:0.6096
 precision @ rank n:0.4937
 execution time:0.022s
Local Outlier Factor
 ROC:0.6236
 precision @ rank n:0.4304
 execution time:0.022s
Cluster-Based Local Outlier Factor
 ROC:0.6373
 precision @ rank n:0.443
 execution time:0.1432s




K Nearest Neighbor
 ROC:0.6885
 precision @ rank n:0.519
 execution time:0.0651s
Angle based Outlier Detection
 ROC:0.6692
 precision @ rank n:0.5063
 execution time:0.3273s
Histogram Based Outlier Detection
 ROC:0.647
 precision @ rank n:0.4937
 execution time:0.006s
Isolation Forest
 ROC:0.6347
 precision @ rank n:0.519
 execution time:0.5365s
Feature Bagging
 ROC:0.6289
 precision @ rank n:0.4557
 execution time:0.1972s
PROCESSING...  satellite.mat
Principal Componenet Analysis
 ROC:0.5783
 precision @ rank n:0.4609
 execution time:0.042s
Minimum Covariance Determinant
 ROC:0.7994
 precision @ rank n:0.6762
 execution time:4.1699s
One Class SVM
 ROC:0.6544
 precision @ rank n:0.5104
 execution time:2.5714s
Local Outlier Factor
 ROC:0.5613
 precision @ rank n:0.4003
 execution time:1.7307s




Cluster-Based Local Outlier Factor
 ROC:0.6873
 precision @ rank n:0.5199
 execution time:0.5004s
K Nearest Neighbor
 ROC:0.6789
 precision @ rank n:0.4928
 execution time:1.8307s
Angle based Outlier Detection
 ROC:0.5744
 precision @ rank n:0.4083
 execution time:4.0068s
Histogram Based Outlier Detection
 ROC:0.747
 precision @ rank n:0.5486
 execution time:0.035s
Isolation Forest
 ROC:0.6862
 precision @ rank n:0.5518
 execution time:1.6145s
Feature Bagging
 ROC:0.5574
 precision @ rank n:0.4003
 execution time:12.2905s
PROCESSING...  satimage-2.mat
Principal Componenet Analysis
 ROC:0.9529
 precision @ rank n:0.8
 execution time:0.035s
Minimum Covariance Determinant
 ROC:0.9954
 precision @ rank n:0.64
 execution time:3.6814s
One Class SVM
 ROC:0.9931
 precision @ rank n:0.88
 execution time:2.0269s
Local Outlier Factor
 ROC:0.5157
 precision @ rank n:0.16
 execution time:1.5955s




Cluster-Based Local Outlier Factor
 ROC:0.9977
 precision @ rank n:0.92
 execution time:0.4935s
K Nearest Neighbor
 ROC:0.9586
 precision @ rank n:0.44
 execution time:1.6916s
Angle based Outlier Detection
 ROC:0.8022
 precision @ rank n:0.12
 execution time:3.6795s
Histogram Based Outlier Detection
 ROC:0.9506
 precision @ rank n:0.72
 execution time:0.033s
Isolation Forest
 ROC:0.9921
 precision @ rank n:0.88
 execution time:1.4403s
Feature Bagging
 ROC:0.5104
 precision @ rank n:0.16
 execution time:11.7269s
PROCESSING...  shuttle.mat
Principal Componenet Analysis
 ROC:0.9872
 precision @ rank n:0.9524
 execution time:0.0651s






Minimum Covariance Determinant
 ROC:0.9906
 precision @ rank n:0.7572
 execution time:18.3141s
One Class SVM
 ROC:0.9895
 precision @ rank n:0.9561
 execution time:127.5872s
Local Outlier Factor
 ROC:0.5163
 precision @ rank n:0.1204
 execution time:21.437s




Cluster-Based Local Outlier Factor
 ROC:0.6044
 precision @ rank n:0.2269
 execution time:1.126s
K Nearest Neighbor
 ROC:0.6402
 precision @ rank n:0.2157
 execution time:15.3864s
Angle based Outlier Detection
 ROC:0.6182
 precision @ rank n:0.1849
 execution time:33.7195s
Histogram Based Outlier Detection
 ROC:0.9824
 precision @ rank n:0.9435
 execution time:0.039s
Isolation Forest
 ROC:0.9966
 precision @ rank n:0.9599
 execution time:5.8545s
Feature Bagging
 ROC:0.4189
 precision @ rank n:0.0644
 execution time:114.8498s
PROCESSING...  vertebral.mat
Principal Componenet Analysis
 ROC:0.4128
 precision @ rank n:0.0
 execution time:0.003s
Minimum Covariance Determinant
 ROC:0.4739
 precision @ rank n:0.0909
 execution time:0.0931s
One Class SVM
 ROC:0.5261
 precision @ rank n:0.0909
 execution time:0.004s
Local Outlier Factor
 ROC:0.4844
 precision @ rank n:0.1818
 execution time:0.005s




Cluster-Based Local Outlier Factor
 ROC:0.3979
 precision @ rank n:0.0
 execution time:0.0901s
K Nearest Neighbor
 ROC:0.4262
 precision @ rank n:0.0909
 execution time:0.016s
Angle based Outlier Detection
 ROC:0.4784
 precision @ rank n:0.2727
 execution time:0.1001s
Histogram Based Outlier Detection
 ROC:0.3353
 precision @ rank n:0.0
 execution time:0.005s
Isolation Forest
 ROC:0.4247
 precision @ rank n:0.0909
 execution time:0.4995s
Feature Bagging
 ROC:0.5216
 precision @ rank n:0.1818
 execution time:0.0621s
PROCESSING...  vowels.mat
Principal Componenet Analysis
 ROC:0.6762
 precision @ rank n:0.1875
 execution time:0.004s
Minimum Covariance Determinant
 ROC:0.6529
 precision @ rank n:0.0
 execution time:1.2312s
One Class SVM
 ROC:0.7914
 precision @ rank n:0.4375
 execution time:0.0811s
Local Outlier Factor
 ROC:0.955
 precision @ rank n:0.5
 execution time:0.0671s




Cluster-Based Local Outlier Factor
 ROC:0.8999
 precision @ rank n:0.4375
 execution time:0.1561s
K Nearest Neighbor
 ROC:0.9654
 precision @ rank n:0.5625
 execution time:0.1351s
Angle based Outlier Detection
 ROC:0.9745
 precision @ rank n:0.625
 execution time:0.6586s
Histogram Based Outlier Detection
 ROC:0.7044
 precision @ rank n:0.1875
 execution time:0.009s
Isolation Forest
 ROC:0.7594
 precision @ rank n:0.1875
 execution time:0.6246s
Feature Bagging
 ROC:0.9592
 precision @ rank n:0.5625
 execution time:0.5295s
PROCESSING...  wbc.mat
Principal Componenet Analysis
 ROC:0.985
 precision @ rank n:0.6667
 execution time:0.004s
Minimum Covariance Determinant
 ROC:0.979
 precision @ rank n:0.6667
 execution time:0.1081s
One Class SVM
 ROC:0.985
 precision @ rank n:0.6667
 execution time:0.01s
Local Outlier Factor
 ROC:0.994
 precision @ rank n:0.6667
 execution time:0.015s




Cluster-Based Local Outlier Factor
 ROC:0.985
 precision @ rank n:0.6667
 execution time:0.1271s
K Nearest Neighbor
 ROC:0.985
 precision @ rank n:0.6667
 execution time:0.0331s
Angle based Outlier Detection
 ROC:0.9369
 precision @ rank n:0.0
 execution time:0.1691s
Histogram Based Outlier Detection
 ROC:0.991
 precision @ rank n:0.6667
 execution time:0.016s
Isolation Forest
 ROC:0.988
 precision @ rank n:0.6667
 execution time:0.5195s
Feature Bagging
 ROC:0.994
 precision @ rank n:0.6667
 execution time:0.1351s


In [54]:
time_df

Unnamed: 0,Data,Samples,Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,ABOD,HBOS,IForest,FeatureBagging
0,arrhythmia,452,274,14.6018,0.1251,1.3783,0.0591,0.0981,0.1832,0.1321,0.3143,0.1361,0.7587,0.8048
0,cardio,1831,21,9.6122,0.007,1.0419,0.1712,0.1832,0.2783,0.3053,1.058,0.014,0.7808,1.3653
0,glass,214,9,4.2056,0.005,0.1361,0.006,0.009,0.0911,0.015,0.0971,0.005,0.4955,0.0621
0,ionosphere,351,33,35.8974,0.005,0.1121,0.012,0.013,0.0841,0.03,0.1632,0.018,0.5225,0.1291
0,letter,1600,32,6.25,0.011,2.0549,0.1441,0.1962,0.2182,0.2703,0.8507,0.021,0.6997,1.4013
0,lympho,148,18,4.0541,0.003,0.0601,0.002,0.005,0.0771,0.01,0.0711,0.01,0.4794,0.052
0,mnist,7603,100,9.2069,0.2152,5.6133,7.7184,10.4277,1.4694,11.2956,13.7109,0.1001,3.5393,72.1385
0,musk,3062,166,3.1679,0.2061,22.0086,1.7066,3.244,0.6016,3.6264,4.3571,0.1211,2.4062,22.0256
0,optdigits,5216,64,2.8758,0.0711,2.3009,2.3382,2.3182,0.6556,2.6915,4.5633,0.0571,1.8297,17.4604
0,pendigits,6870,16,2.2707,0.0171,3.6053,2.0259,0.9109,0.3964,1.012,3.3712,0.017,1.2261,5.9042


In [55]:
prn_df

Unnamed: 0,Data,Samples,Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,ABOD,HBOS,IForest,FeatureBagging
0,arrhythmia,452,274,14.6018,0.2857,0.2857,0.2857,0.2143,0.2857,0.2857,0.3571,0.3571,0.3571,0.2143
0,cardio,1831,21,9.6122,0.5556,0.4815,0.4815,0.2037,0.5556,0.4074,0.2222,0.4815,0.4259,0.2037
0,glass,214,9,4.2056,0.3333,0.0,0.3333,0.0,0.3333,0.3333,0.3333,0.0,0.0,0.3333
0,ionosphere,351,33,35.8974,0.5676,0.8649,0.6486,0.7297,0.6486,0.8378,0.8108,0.4054,0.6486,0.7027
0,letter,1600,32,6.25,0.1351,0.2432,0.1622,0.4595,0.2703,0.4054,0.4054,0.1351,0.1351,0.4595
0,lympho,148,18,4.0541,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.3832,0.2664,0.3925,0.3131,0.4252,0.4299,0.3832,0.1168,0.3271,0.3131
0,musk,3062,166,3.1679,1.0,1.0,1.0,0.2593,1.0,0.2963,0.1111,1.0,1.0,0.2593
0,optdigits,5216,64,2.8758,0.0,0.0,0.0,0.0667,0.0,0.0,0.0,0.2889,0.0667,0.0667
0,pendigits,6870,16,2.2707,0.2,0.05,0.225,0.1,0.25,0.1,0.075,0.225,0.175,0.1


In [56]:
roc_df

Unnamed: 0,Data,Samples,Dimensions,Outlier Perc,PCA,MCD,OCSVM,LOF,CBLOF,KNN,ABOD,HBOS,IForest,FeatureBagging
0,arrhythmia,452,274,14.6018,0.7166,0.7477,0.7155,0.7172,0.7184,0.7359,0.7196,0.7547,0.7213,0.7061
0,cardio,1831,21,9.6122,0.9487,0.8665,0.9383,0.5982,0.875,0.7942,0.6406,0.8682,0.9141,0.6092
0,glass,214,9,4.2056,0.6505,0.7742,0.5968,0.7796,0.7742,0.8011,0.7312,0.7473,0.7043,0.8172
0,ionosphere,351,33,35.8974,0.7728,0.9671,0.8026,0.8813,0.8559,0.9181,0.9287,0.6063,0.8308,0.8766
0,letter,1600,32,6.25,0.5285,0.8091,0.5945,0.8781,0.7671,0.8686,0.8636,0.6022,0.6589,0.8546
0,lympho,148,18,4.0541,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
0,mnist,7603,100,9.2069,0.8529,0.8461,0.8537,0.7021,0.851,0.8432,0.7657,0.571,0.7934,0.695
0,musk,3062,166,3.1679,1.0,1.0,1.0,0.5718,1.0,0.8072,0.1734,1.0,1.0,0.5641
0,optdigits,5216,64,2.8758,0.5331,0.3694,0.5249,0.4769,0.7677,0.3911,0.4781,0.8617,0.7918,0.4728
0,pendigits,6870,16,2.2707,0.9156,0.8033,0.9098,0.4664,0.9538,0.7258,0.6474,0.8779,0.9327,0.4611
