# Run the customized algorithms by ADBench
- Here we provide an example for testing 3 AD algorithms on 4 datasets, and any customized algorithm could be evaluated in ADBench.
- For reproducing the complete experiment results in ADBench, please run the code in the run.py file.

In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# import the necessary package
from data_generator import DataGenerator
from myutils import Utils

datagenerator = DataGenerator()
utils = Utils()

- 3 algorithms: unsupervised IForest, semi-supervised DevNet and fully-supervised CatB
- 4 datasets: cardio, musk, optdigits and vowels

In [2]:
from baseline.PyOD import PYOD
from baseline.DevNet.run import DevNet
from baseline.Supervised import supervised
from baseline.REPEN.run import REPEN
from baseline.DevNet.run import DevNet
from baseline.PReNet.run import PReNet
from baseline.FEAWAD.run import FEAWAD
from baseline.DAGMM.run import DAGMM # Unsup
from baseline.DeepSAD.src.run import DeepSAD # Semi

# dataset and model list / dict
dataset_list = ['6_cardio.npz']
model_dict = {'DeepSVDD':PYOD, 'DAGMM':DAGMM, 'COPOD': PYOD, 'ECOD': PYOD, 'XGBOD':PYOD, 'DeepSAD':DeepSAD,
             'REPEN':REPEN, 'DevNet':DevNet, 'PReNet':PReNet, 'FEAWAD':FEAWAD}

# save the results
df_AUCROC = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_AUCPR = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())

In [None]:
# seed for reproducible results
seed = 42

for dataset in dataset_list:
    '''
    la: ratio of labeled anomalies, from 0.0 to 1.0
    realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
    noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
    '''
    
    # import the dataset
    datagenerator.dataset = dataset # specify the dataset name
    data = datagenerator.generator(la=0.1, realistic_synthetic_mode=None, noise_type=None) # only 10% labeled anomalies are available
    
    for name, clf in model_dict.items():
        # model initialization
        if name == 'DevNet':
            clf = clf(seed=seed, model_name=name, save_suffix='test') # DevNet use early stopping to save the model parameter
        else:
            clf = clf(seed=seed, model_name=name)
        
        # training, for unsupervised models the y label will be discarded
        clf = clf.fit(X_train=data['X_train'], y_train=data['y_train'])
        
        # output predicted anomaly score on testing set
        if name =="DAGMM":
            score = clf.predict_score(data['X_train'], data['X_test'])
        else:
            score = clf.predict_score(data['X_test'])

        # evaluation
        result = utils.metric(y_true=data['y_test'], y_score=score)
        
        # save results
        df_AUCROC.loc[dataset, name] = result['aucroc']
        df_AUCPR.loc[dataset, name] = result['aucpr']

current noise type: None
{'Samples': 1831, 'Features': 21, 'Anomalies': 176, 'Anomalies Ratio(%)': 9.61}
best param: None
Model: "model_47"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_32 (InputLayer)       [(None, 21)]              0         
                                                                 
 dense_11 (Dense)            (None, 64)                1344      
                                                                 
 net_output (Dense)          (None, 32)                2048      
                                                                 
 tf.math.subtract_11 (TFOpLa  (None, 32)               0         
 mbda)                                                           
                                                                 
 tf.math.pow_11 (TFOpLambda)  (None, 32)               0         
                                                                 
 t

In [11]:
df_AUCROC

Unnamed: 0,DeepSVDD,IForest,DevNet,CatB
6_cardio.npz,0.689495,0.944193,0.992901,0.983827
25_musk.npz,0.655909,1.0,1.0,1.0
26_optdigits.npz,0.324357,0.825365,1.0,0.996506
37_speech.npz,0.545598,0.484273,0.666871,0.569036
41_vowels.npz,0.559558,0.780727,0.91485,0.848973


In [12]:
df_AUCPR

Unnamed: 0,DeepSVDD,IForest,DevNet,CatB
6_cardio.npz,0.323306,0.615718,0.953546,0.909086
25_musk.npz,0.183157,1.0,1.0,1.0
26_optdigits.npz,0.020052,0.076759,1.0,0.895495
37_speech.npz,0.01881,0.016013,0.083006,0.022035
41_vowels.npz,0.046331,0.389675,0.582586,0.533293


In [12]:
result

{'aucroc': 0.8489731437598735, 'aucpr': 0.5332927688096551}

In [6]:
import tensorflow as tf 
print(tf.__version__)

print('1: ', tf.config.list_physical_devices('GPU'))
print('2: ', tf.test.is_built_with_cuda)
print('3: ', tf.test.gpu_device_name())
print('4: ', tf.config.get_visible_devices())

2.9.1
1:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
2:  <function is_built_with_cuda at 0x0000016E30D45280>
3:  /device:GPU:0
4:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [7]:
import torch
ngpu = torch.cuda.device_count()
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
device

device(type='cuda', index=0)

In [8]:
import torch
torch.cuda.is_available()


True