In [1]:
import os
import sys
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.io import loadmat

In [2]:
from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

In [3]:
from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [4]:
mat_file_list = ['arrhythmia.mat',
                 'cardio.mat',
                 'glass.mat',
                 'ionosphere.mat',
                 'letter.mat',
                 'lympho.mat',
                 'mnist.mat',
                 'musk.mat',
                 'optdigits.mat',
                 'pendigits.mat',
                 'pima.mat',
                 'satellite.mat',
                 'satimage-2.mat',
                 'shuttle.mat',
                 'vertebral.mat',
                 'vowels.mat',
                 'wbc.mat']
mat_file_list

['arrhythmia.mat',
 'cardio.mat',
 'glass.mat',
 'ionosphere.mat',
 'letter.mat',
 'lympho.mat',
 'mnist.mat',
 'musk.mat',
 'optdigits.mat',
 'pendigits.mat',
 'pima.mat',
 'satellite.mat',
 'satimage-2.mat',
 'shuttle.mat',
 'vertebral.mat',
 'vowels.mat',
 'wbc.mat']

In [5]:
from scipy.io import loadmat

In [6]:
data=loadmat('C:\\Users\\Dhruvraj\\Desktop\\LETS UPGRADE\\LetsUpgrade AI ML\\csv\\anomaly\\cardio.mat')

In [7]:
data

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [8]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

In [9]:

data.values()

dict_values([b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC', '1.0', [], array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
        -0.28978574, -0.49329397],
       [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
        -0.25638541, -0.49329397],
       [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
        -0.25638541,  1.14001753],
       ...,
       [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
         0.24461959, -0.49329397],
       [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
         0.14441859, -0.49329397],
       [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
         3.58465295, -0.49329397]]), array([[0.],
       [0.],
       [0.],
       ...,
       [1.],
       [1.],
       [1.]])])

In [10]:
type(data['X']),data['X'].shape

(numpy.ndarray, (1831, 21))

In [11]:
type(data['y']),data['y'].shape

(numpy.ndarray, (1831, 1))

In [12]:
df_columns = ['Data', '#Samples', '# Dimensions', 'Outlier Perc',
              'ABOD', 'CBLOF', 'FB', 'HBOS', 'IForest', 'KNN', 'LOF', 'MCD',
              'OCSVM', 'PCA']

In [13]:
roc_df = pd.DataFrame(columns=df_columns)
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


In [14]:
prn_df = pd.DataFrame(columns=df_columns)
prn_df


Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


In [15]:
time_df = pd.DataFrame(columns=df_columns)
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA


In [17]:
from time import time
random_state = np.random.RandomState(30)

for mat_file in mat_file_list:
    print("\n... Processing", mat_file, '...')
    mat = loadmat('C:\\Users\\Dhruvraj\\Desktop\\LETS UPGRADE\\LetsUpgrade AI ML\\csv\\anomaly\\'+mat_file)

    X = mat['X']
    y = mat['y'].ravel()
    outliers_fraction = np.count_nonzero(y) / len(y)
    outliers_percentage = round(outliers_fraction * 100, ndigits=4)

    # construct containers for saving results
    roc_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    prn_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]
    time_list = [mat_file[:-4], X.shape[0], X.shape[1], outliers_percentage]

    # 60% data for training and 40% for testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
                                                        random_state=random_state)

    # standardizing data for processing
    X_train_norm, X_test_norm = standardizer(X_train, X_test)

    classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
        contamination=outliers_fraction),
        'Cluster-based Local Outlier Factor': CBLOF(
            contamination=outliers_fraction, check_estimator=False,
            random_state=random_state),
        'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                          random_state=random_state),
        'Histogram-base Outlier Detection (HBOS)': HBOS(
            contamination=outliers_fraction),
        'Isolation Forest': IForest(contamination=outliers_fraction,
                                    random_state=random_state),
        'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
        'Local Outlier Factor (LOF)': LOF(
            contamination=outliers_fraction),
        'Minimum Covariance Determinant (MCD)': MCD(
            contamination=outliers_fraction, random_state=random_state),
        'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
        'Principal Component Analysis (PCA)': PCA(
            contamination=outliers_fraction, random_state=random_state),
    }

    for clf_name, clf in classifiers.items():
        t0 = time()
        clf.fit(X_train_norm)
        test_scores = clf.decision_function(X_test_norm)
        t1 = time()
        duration = round(t1 - t0, ndigits=4)
        time_list.append(duration)

        roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
        prn = round(precision_n_scores(y_test, test_scores), ndigits=4)

        print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
              'execution time: {duration}s'.format(
            clf_name=clf_name, roc=roc, prn=prn, duration=duration))

        roc_list.append(roc)
        prn_list.append(prn)

    temp_df = pd.DataFrame(time_list).transpose()
    temp_df.columns = df_columns
    time_df = pd.concat([time_df, temp_df], axis=0)

    temp_df = pd.DataFrame(roc_list).transpose()
    temp_df.columns = df_columns
    roc_df = pd.concat([roc_df, temp_df], axis=0)

    temp_df = pd.DataFrame(prn_list).transpose()
    temp_df.columns = df_columns
    prn_df = pd.concat([prn_df, temp_df], axis=0)


... Processing arrhythmia.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7895, precision @ rank n:0.3333, execution time: 0.3473s




Cluster-based Local Outlier Factor ROC:0.7447, precision @ rank n:0.4167, execution time: 0.2942s
Feature Bagging ROC:0.7688, precision @ rank n:0.375, execution time: 1.299s
Histogram-base Outlier Detection (HBOS) ROC:0.801, precision @ rank n:0.4583, execution time: 0.1582s
Isolation Forest ROC:0.7853, precision @ rank n:0.5, execution time: 1.087s
K Nearest Neighbors (KNN) ROC:0.7625, precision @ rank n:0.4167, execution time: 0.3276s
Local Outlier Factor (LOF) ROC:0.776, precision @ rank n:0.375, execution time: 0.1588s




Minimum Covariance Determinant (MCD) ROC:0.7678, precision @ rank n:0.375, execution time: 9.0441s
One-class SVM (OCSVM) ROC:0.7649, precision @ rank n:0.4167, execution time: 0.1301s
Principal Component Analysis (PCA) ROC:0.7622, precision @ rank n:0.4167, execution time: 0.2907s

... Processing cardio.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6001, precision @ rank n:0.1746, execution time: 0.9424s




Cluster-based Local Outlier Factor ROC:0.7067, precision @ rank n:0.3016, execution time: 0.2648s
Feature Bagging ROC:0.6363, precision @ rank n:0.1905, execution time: 1.7135s
Histogram-base Outlier Detection (HBOS) ROC:0.857, precision @ rank n:0.4286, execution time: 0.0146s
Isolation Forest ROC:0.9152, precision @ rank n:0.4762, execution time: 1.2705s
K Nearest Neighbors (KNN) ROC:0.7787, precision @ rank n:0.3175, execution time: 0.4468s
Local Outlier Factor (LOF) ROC:0.5984, precision @ rank n:0.1746, execution time: 0.2376s




Minimum Covariance Determinant (MCD) ROC:0.8096, precision @ rank n:0.4444, execution time: 3.7981s
One-class SVM (OCSVM) ROC:0.9374, precision @ rank n:0.4444, execution time: 0.2234s
Principal Component Analysis (PCA) ROC:0.9504, precision @ rank n:0.5238, execution time: 0.019s

... Processing glass.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8312, precision @ rank n:0.1667, execution time: 0.2177s
Cluster-based Local Outlier Factor ROC:0.8979, precision @ rank n:0.1667, execution time: 0.0893s
Feature Bagging ROC:0.8938, precision @ rank n:0.3333, execution time: 0.0964s




Histogram-base Outlier Detection (HBOS) ROC:0.7562, precision @ rank n:0.1667, execution time: 0.008s
Isolation Forest ROC:0.7563, precision @ rank n:0.1667, execution time: 0.7435s
K Nearest Neighbors (KNN) ROC:0.8646, precision @ rank n:0.1667, execution time: 0.0399s
Local Outlier Factor (LOF) ROC:0.8958, precision @ rank n:0.3333, execution time: 0.0113s




Minimum Covariance Determinant (MCD) ROC:0.8271, precision @ rank n:0.1667, execution time: 0.1778s
One-class SVM (OCSVM) ROC:0.5125, precision @ rank n:0.1667, execution time: 0.0042s
Principal Component Analysis (PCA) ROC:0.6604, precision @ rank n:0.1667, execution time: 0.0081s

... Processing ionosphere.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9041, precision @ rank n:0.8235, execution time: 0.189s
Cluster-based Local Outlier Factor ROC:0.9111, precision @ rank n:0.7843, execution time: 0.1072s




Feature Bagging ROC:0.9017, precision @ rank n:0.7647, execution time: 0.1583s
Histogram-base Outlier Detection (HBOS) ROC:0.6017, precision @ rank n:0.3529, execution time: 0.0395s
Isolation Forest ROC:0.8458, precision @ rank n:0.6471, execution time: 1.461s
K Nearest Neighbors (KNN) ROC:0.9135, precision @ rank n:0.8235, execution time: 0.1168s
Local Outlier Factor (LOF) ROC:0.9065, precision @ rank n:0.7647, execution time: 0.0445s
Minimum Covariance Determinant (MCD) ROC:0.9519, precision @ rank n:0.8431, execution time: 0.9871s
One-class SVM (OCSVM) ROC:0.8388, precision @ rank n:0.7451, execution time: 0.0217s
Principal Component Analysis (PCA) ROC:0.79, precision @ rank n:0.5686, execution time: 0.0088s

... Processing letter.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8836, precision @ rank n:0.4571, execution time: 1.5575s




Cluster-based Local Outlier Factor ROC:0.7731, precision @ rank n:0.2857, execution time: 0.2145s
Feature Bagging ROC:0.8519, precision @ rank n:0.4, execution time: 1.7835s
Histogram-base Outlier Detection (HBOS) ROC:0.6493, precision @ rank n:0.0571, execution time: 0.0217s
Isolation Forest ROC:0.6337, precision @ rank n:0.0571, execution time: 0.9143s
K Nearest Neighbors (KNN) ROC:0.8868, precision @ rank n:0.3714, execution time: 0.2851s
Local Outlier Factor (LOF) ROC:0.8354, precision @ rank n:0.4, execution time: 0.1757s
Minimum Covariance Determinant (MCD) ROC:0.8454, precision @ rank n:0.1429, execution time: 11.0448s
One-class SVM (OCSVM) ROC:0.5951, precision @ rank n:0.1429, execution time: 0.2155s
Principal Component Analysis (PCA) ROC:0.5413, precision @ rank n:0.0571, execution time: 0.0235s

... Processing lympho.mat ...
Angle-based Outlier Detector (ABOD) ROC:1.0, precision @ rank n:1.0, execution time: 0.0847s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.1297s
Feature Bagging ROC:1.0, precision @ rank n:1.0, execution time: 0.1s
Histogram-base Outlier Detection (HBOS) ROC:1.0, precision @ rank n:1.0, execution time: 0.0124s
Isolation Forest ROC:1.0, precision @ rank n:1.0, execution time: 1.1073s
K Nearest Neighbors (KNN) ROC:1.0, precision @ rank n:1.0, execution time: 0.0381s
Local Outlier Factor (LOF) ROC:1.0, precision @ rank n:1.0, execution time: 0.0094s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 0.2088s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 0.0082s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.0058s

... Processing mnist.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7673, precision @ rank n:0.32, execution time: 17.9729s




Cluster-based Local Outlier Factor ROC:0.8091, precision @ rank n:0.3273, execution time: 1.2524s
Feature Bagging ROC:0.753, precision @ rank n:0.3527, execution time: 110.6102s
Histogram-base Outlier Detection (HBOS) ROC:0.5744, precision @ rank n:0.1055, execution time: 0.1189s
Isolation Forest ROC:0.8055, precision @ rank n:0.3345, execution time: 4.0109s
K Nearest Neighbors (KNN) ROC:0.8407, precision @ rank n:0.3891, execution time: 14.8087s
Local Outlier Factor (LOF) ROC:0.7291, precision @ rank n:0.3236, execution time: 14.794s




Minimum Covariance Determinant (MCD) ROC:0.8383, precision @ rank n:0.2873, execution time: 36.6786s
One-class SVM (OCSVM) ROC:0.8208, precision @ rank n:0.36, execution time: 11.4146s
Principal Component Analysis (PCA) ROC:0.8231, precision @ rank n:0.3527, execution time: 0.3455s

... Processing musk.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.2022, precision @ rank n:0.0882, execution time: 5.7861s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.4488s
Feature Bagging ROC:0.6164, precision @ rank n:0.3235, execution time: 25.8016s
Histogram-base Outlier Detection (HBOS) ROC:0.9999, precision @ rank n:0.9706, execution time: 0.0844s
Isolation Forest ROC:0.9976, precision @ rank n:0.8235, execution time: 2.0262s
K Nearest Neighbors (KNN) ROC:0.7879, precision @ rank n:0.2941, execution time: 4.4224s
Local Outlier Factor (LOF) ROC:0.6232, precision @ rank n:0.3235, execution time: 3.5104s
Minimum Covariance Determinant (MCD) ROC:1.0, precision @ rank n:1.0, execution time: 62.0611s
One-class SVM (OCSVM) ROC:1.0, precision @ rank n:1.0, execution time: 1.9192s
Principal Component Analysis (PCA) ROC:1.0, precision @ rank n:1.0, execution time: 0.2585s

... Processing optdigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5215, precision @ rank n:0.0308, execution time: 4.0647s




Cluster-based Local Outlier Factor ROC:0.7791, precision @ rank n:0.0, execution time: 0.4944s
Feature Bagging ROC:0.495, precision @ rank n:0.0154, execution time: 27.5305s
Histogram-base Outlier Detection (HBOS) ROC:0.8653, precision @ rank n:0.1846, execution time: 0.0661s
Isolation Forest ROC:0.6918, precision @ rank n:0.0154, execution time: 1.6053s
K Nearest Neighbors (KNN) ROC:0.3781, precision @ rank n:0.0, execution time: 3.5675s
Local Outlier Factor (LOF) ROC:0.5069, precision @ rank n:0.0308, execution time: 2.8576s




Minimum Covariance Determinant (MCD) ROC:0.4368, precision @ rank n:0.0, execution time: 7.5252s
One-class SVM (OCSVM) ROC:0.5062, precision @ rank n:0.0, execution time: 2.633s
Principal Component Analysis (PCA) ROC:0.5103, precision @ rank n:0.0, execution time: 0.0979s

... Processing pendigits.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6817, precision @ rank n:0.0508, execution time: 2.4217s




Cluster-based Local Outlier Factor ROC:0.9707, precision @ rank n:0.4068, execution time: 0.4509s
Feature Bagging ROC:0.5441, precision @ rank n:0.1186, execution time: 7.3058s
Histogram-base Outlier Detection (HBOS) ROC:0.9332, precision @ rank n:0.339, execution time: 0.0164s
Isolation Forest ROC:0.9594, precision @ rank n:0.3898, execution time: 1.4524s
K Nearest Neighbors (KNN) ROC:0.7682, precision @ rank n:0.1186, execution time: 1.1067s
Local Outlier Factor (LOF) ROC:0.5454, precision @ rank n:0.1017, execution time: 0.9792s
Minimum Covariance Determinant (MCD) ROC:0.8434, precision @ rank n:0.1017, execution time: 11.7817s
One-class SVM (OCSVM) ROC:0.9253, precision @ rank n:0.3729, execution time: 1.626s
Principal Component Analysis (PCA) ROC:0.9348, precision @ rank n:0.3898, execution time: 0.0243s

... Processing pima.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.7085, precision @ rank n:0.5586, execution time: 0.2191s
Cluster-based Local Outlier Factor ROC:0.6861, prec



Feature Bagging ROC:0.6317, precision @ rank n:0.4955, execution time: 0.2118s
Histogram-base Outlier Detection (HBOS) ROC:0.7, precision @ rank n:0.5586, execution time: 0.0098s
Isolation Forest ROC:0.7018, precision @ rank n:0.5405, execution time: 0.7332s
K Nearest Neighbors (KNN) ROC:0.7247, precision @ rank n:0.5766, execution time: 0.082s
Local Outlier Factor (LOF) ROC:0.6459, precision @ rank n:0.4775, execution time: 0.0241s
Minimum Covariance Determinant (MCD) ROC:0.7033, precision @ rank n:0.5495, execution time: 0.1704s
One-class SVM (OCSVM) ROC:0.6564, precision @ rank n:0.5045, execution time: 0.0187s
Principal Component Analysis (PCA) ROC:0.6786, precision @ rank n:0.5405, execution time: 0.0018s

... Processing satellite.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.5733, precision @ rank n:0.3863, execution time: 2.9641s




Cluster-based Local Outlier Factor ROC:0.7781, precision @ rank n:0.6149, execution time: 0.5192s
Feature Bagging ROC:0.5589, precision @ rank n:0.3814, execution time: 10.2603s
Histogram-base Outlier Detection (HBOS) ROC:0.7711, precision @ rank n:0.5807, execution time: 0.0339s
Isolation Forest ROC:0.6855, precision @ rank n:0.555, execution time: 1.1581s
K Nearest Neighbors (KNN) ROC:0.69, precision @ rank n:0.5024, execution time: 1.3966s
Local Outlier Factor (LOF) ROC:0.5599, precision @ rank n:0.3851, execution time: 1.4583s
Minimum Covariance Determinant (MCD) ROC:0.8099, precision @ rank n:0.6895, execution time: 7.074s
One-class SVM (OCSVM) ROC:0.6666, precision @ rank n:0.5416, execution time: 1.818s
Principal Component Analysis (PCA) ROC:0.6048, precision @ rank n:0.4841, execution time: 0.0321s

... Processing satimage-2.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.8432, precision @ rank n:0.1905, execution time: 2.2652s




Cluster-based Local Outlier Factor ROC:1.0, precision @ rank n:1.0, execution time: 0.284s
Feature Bagging ROC:0.5526, precision @ rank n:0.0, execution time: 7.1435s
Histogram-base Outlier Detection (HBOS) ROC:0.992, precision @ rank n:0.7619, execution time: 0.0284s
Isolation Forest ROC:0.999, precision @ rank n:0.9524, execution time: 1.0086s
K Nearest Neighbors (KNN) ROC:0.9556, precision @ rank n:0.2857, execution time: 1.2991s
Local Outlier Factor (LOF) ROC:0.5463, precision @ rank n:0.0, execution time: 1.0393s
Minimum Covariance Determinant (MCD) ROC:0.9954, precision @ rank n:0.5714, execution time: 6.6928s
One-class SVM (OCSVM) ROC:0.9999, precision @ rank n:0.9524, execution time: 1.4082s
Principal Component Analysis (PCA) ROC:0.9933, precision @ rank n:0.9048, execution time: 0.0244s

... Processing shuttle.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.6174, precision @ rank n:0.1845, execution time: 19.9351s




Cluster-based Local Outlier Factor ROC:0.592, precision @ rank n:0.2381, execution time: 0.7086s
Feature Bagging ROC:0.474, precision @ rank n:0.1006, execution time: 82.2874s
Histogram-base Outlier Detection (HBOS) ROC:0.9838, precision @ rank n:0.9754, execution time: 0.0133s
Isolation Forest ROC:0.9979, precision @ rank n:0.9413, execution time: 4.0996s
K Nearest Neighbors (KNN) ROC:0.6459, precision @ rank n:0.2042, execution time: 9.9672s
Local Outlier Factor (LOF) ROC:0.5236, precision @ rank n:0.1259, execution time: 11.32s






Minimum Covariance Determinant (MCD) ROC:0.9904, precision @ rank n:0.7489, execution time: 23.4201s
One-class SVM (OCSVM) ROC:0.9918, precision @ rank n:0.9544, execution time: 61.0575s
Principal Component Analysis (PCA) ROC:0.9897, precision @ rank n:0.9515, execution time: 0.04s

... Processing vertebral.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.4167, precision @ rank n:0.1667, execution time: 0.0664s




Cluster-based Local Outlier Factor ROC:0.437, precision @ rank n:0.0, execution time: 0.0593s
Feature Bagging ROC:0.4167, precision @ rank n:0.1667, execution time: 0.0353s
Histogram-base Outlier Detection (HBOS) ROC:0.2593, precision @ rank n:0.0, execution time: 0.0028s
Isolation Forest ROC:0.363, precision @ rank n:0.0, execution time: 0.5433s
K Nearest Neighbors (KNN) ROC:0.4352, precision @ rank n:0.0, execution time: 0.0174s
Local Outlier Factor (LOF) ROC:0.4204, precision @ rank n:0.1667, execution time: 0.0081s
Minimum Covariance Determinant (MCD) ROC:0.4556, precision @ rank n:0.0, execution time: 0.1187s
One-class SVM (OCSVM) ROC:0.4407, precision @ rank n:0.0, execution time: 0.0061s
Principal Component Analysis (PCA) ROC:0.2963, precision @ rank n:0.0, execution time: 0.0082s

... Processing vowels.mat ...




Angle-based Outlier Detector (ABOD) ROC:0.9209, precision @ rank n:0.4375, execution time: 0.3507s
Cluster-based Local Outlier Factor ROC:0.9396, precision @ rank n:0.4118, execution time: 0.1335s




Feature Bagging ROC:0.9415, precision @ rank n:0.3529, execution time: 0.3769s
Histogram-base Outlier Detection (HBOS) ROC:0.7061, precision @ rank n:0.1176, execution time: 0.0051s
Isolation Forest ROC:0.7372, precision @ rank n:0.1765, execution time: 0.7028s
K Nearest Neighbors (KNN) ROC:0.9672, precision @ rank n:0.5294, execution time: 0.0926s
Local Outlier Factor (LOF) ROC:0.9374, precision @ rank n:0.2941, execution time: 0.0374s
Minimum Covariance Determinant (MCD) ROC:0.7191, precision @ rank n:0.0588, execution time: 2.3377s
One-class SVM (OCSVM) ROC:0.7922, precision @ rank n:0.2941, execution time: 0.0387s
Principal Component Analysis (PCA) ROC:0.5946, precision @ rank n:0.1176, execution time: 0.0037s

... Processing wbc.mat ...
Angle-based Outlier Detector (ABOD) ROC:0.9005, precision @ rank n:0.3333, execution time: 0.1811s
Cluster-based Local Outlier Factor ROC:0.9075, precision @ rank n:0.4444, execution time: 0.1022s
Feature Bagging ROC:0.9192, precision @ rank n:0.44



Histogram-base Outlier Detection (HBOS) ROC:0.9503, precision @ rank n:0.6667, execution time: 0.011s
Isolation Forest ROC:0.9316, precision @ rank n:0.4444, execution time: 0.4551s
K Nearest Neighbors (KNN) ROC:0.9277, precision @ rank n:0.4444, execution time: 0.0213s
Local Outlier Factor (LOF) ROC:0.9239, precision @ rank n:0.5556, execution time: 0.008s
Minimum Covariance Determinant (MCD) ROC:0.9021, precision @ rank n:0.3333, execution time: 0.2224s
One-class SVM (OCSVM) ROC:0.9277, precision @ rank n:0.4444, execution time: 0.001s
Principal Component Analysis (PCA) ROC:0.9231, precision @ rank n:0.4444, execution time: 0.0014s


In [18]:
roc_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.7456,0.7901,0.7714,0.8159,0.8458,0.783,0.774,0.7882,0.7811,0.7811
0,cardio,1831,21,9.6122,0.6097,0.7981,0.5617,0.8472,0.9226,0.7219,0.5534,0.8604,0.9263,0.9463
0,glass,214,9,4.2056,0.7975,0.8988,0.8469,0.7309,0.6247,0.8691,0.8642,0.7877,0.4247,0.4889
0,ionosphere,351,33,35.8974,0.9355,0.89,0.8849,0.5674,0.8625,0.9129,0.8846,0.9536,0.8163,0.761
0,letter,1600,32,6.25,0.8728,0.7704,0.8493,0.6206,0.6468,0.8559,0.8401,0.7924,0.6219,0.5607
0,lympho,148,18,4.0541,0.8973,0.9241,0.9643,0.9955,0.9777,0.9598,0.9643,0.9375,0.9643,0.9732
0,mnist,7603,100,9.2069,0.7859,0.8365,0.7414,0.5761,0.8178,0.8527,0.743,0.8886,0.8418,0.8416
0,musk,3062,166,3.1679,0.2095,1.0,0.6355,0.9998,0.996,0.7444,0.6699,0.999,1.0,1.0
0,optdigits,5216,64,2.8758,0.5233,0.7906,0.4683,0.8565,0.7721,0.3835,0.4596,0.389,0.5178,0.5225
0,pendigits,6870,16,2.2707,0.6131,0.9707,0.3381,0.9171,0.9288,0.7533,0.3332,0.8312,0.9329,0.9433


In [19]:
prn_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,0.4516,0.4516,0.4194,0.4516,0.5806,0.4516,0.4194,0.4194,0.4516,0.4516
0,cardio,1831,21,9.6122,0.2576,0.4697,0.1212,0.3788,0.4545,0.3182,0.0909,0.4394,0.4545,0.5758
0,glass,214,9,4.2056,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,ionosphere,351,33,35.8974,0.8333,0.7917,0.7292,0.2917,0.6875,0.8333,0.7083,0.875,0.7083,0.5833
0,letter,1600,32,6.25,0.3158,0.1579,0.3158,0.0789,0.1316,0.2368,0.2895,0.1579,0.1316,0.0789
0,lympho,148,18,4.0541,0.5,0.5,0.5,0.75,0.5,0.5,0.5,0.5,0.5,0.5
0,mnist,7603,100,9.2069,0.3962,0.4075,0.3396,0.1057,0.3132,0.4151,0.3245,0.4415,0.3547,0.3472
0,musk,3062,166,3.1679,0.125,1.0,0.3125,0.9688,0.8125,0.375,0.3125,0.9375,1.0,1.0
0,optdigits,5216,64,2.8758,0.0455,0.0,0.0758,0.197,0.0455,0.0,0.0758,0.0,0.0,0.0
0,pendigits,6870,16,2.2707,0.0469,0.3906,0.0156,0.3281,0.3438,0.0469,0.0312,0.0938,0.3594,0.3125


In [20]:
time_df

Unnamed: 0,Data,#Samples,# Dimensions,Outlier Perc,ABOD,CBLOF,FB,HBOS,IForest,KNN,LOF,MCD,OCSVM,PCA
0,arrhythmia,452,274,14.6018,2.8495,3.1863,1.4847,3.7958,0.8897,0.2006,0.1657,6.8453,0.09,0.2944
0,cardio,1831,21,9.6122,0.8855,0.2418,1.5842,0.0114,0.825,0.3199,0.2752,3.4449,0.1723,0.0239
0,glass,214,9,4.2056,0.077,0.1017,0.062,0.017,0.7663,0.0312,0.0,0.0871,0.0,0.0
0,ionosphere,351,33,35.8974,0.1399,0.0996,0.139,0.0174,0.7288,0.0324,0.0081,0.54,0.0169,0.0
0,letter,1600,32,6.25,0.8709,0.2352,1.6283,0.0204,0.8817,0.2912,0.1858,10.9702,0.1612,0.0178
0,lympho,148,18,4.0541,0.0611,0.1028,0.071,0.0169,0.6629,0.0119,0.0086,0.1533,0.0,0.0043
0,mnist,7603,100,9.2069,15.621,1.494,94.089,0.116,4.1425,14.787,13.8581,31.5735,9.747,0.327
0,musk,3062,166,3.1679,4.8472,0.4844,24.2625,0.1137,2.413,3.7544,3.6666,134.129,2.5026,0.4518
0,optdigits,5216,64,2.8758,5.6729,0.5402,27.2354,0.0752,2.1165,3.7783,3.433,14.143,2.8119,0.116
0,pendigits,6870,16,2.2707,3.6925,0.4209,10.9487,0.0257,2.0733,1.8122,1.4726,6.5873,1.9457,0.0104
