In [1]:
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# import the necessary package
from data_generator import DataGenerator
from myutils import Utils

datagenerator = DataGenerator()
utils = Utils()

In [2]:
from baseline.PyOD import PYOD
from baseline.Supervised import supervised
from baseline.DAGMM.run import DAGMM # Unsup
from baseline.DeepSAD.src.run import DeepSAD # Semi
from baseline.REPEN.run import REPEN
from baseline.DevNet.run import DevNet
from baseline.PReNet.run import PReNet
from baseline.FEAWAD.run import FEAWAD

# dataset and model list / dict
dataset_list = ['6_cardio.npz']
model_dict = {'DeepSVDD':PYOD, 'DAGMM':DAGMM, 'COPOD': PYOD, 'ECOD': PYOD}

# save the results
df_AUCROC = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())
df_AUCPR = pd.DataFrame(data=None, index=dataset_list, columns = model_dict.keys())

In [3]:
# seed for reproducible results
seed = 42

for dataset in dataset_list:
    '''
    la: ratio of labeled anomalies, from 0.0 to 1.0
    realistic_synthetic_mode: types of synthetic anomalies, can be local, global, dependency or cluster
    noise_type: inject data noises for testing model robustness, can be duplicated_anomalies, irrelevant_features or label_contamination
    '''
    
    # import the dataset
    datagenerator.dataset = dataset # specify the dataset name
    data = datagenerator.generator(la=0.1, realistic_synthetic_mode=None, noise_type=None) # only 10% labeled anomalies are available
    for name, clf in model_dict.items():
        # model initialization
        print("*************************", name)
        if name == 'DevNet':
            clf = clf(seed=seed, model_name=name, save_suffix='test') # DevNet use early stopping to save the model parameter
        else:
            clf = clf(seed=seed, model_name=name)
        
        # training, for unsupervised models the y label will be discarded
        clf = clf.fit(X_train=data['X_train'], y_train=data['y_train'])
        
        # output predicted anomaly score on testing set
        score = clf.predict_score(data['X_test'])
#         if name =="DAGMM":
#             score = clf.predict_score(data['X_train'], data['X_test'])
#         else:
#             score = clf.predict_score(data['X_test'])

        # evaluation
        result = utils.metric(y_true=data['y_test'], y_score=score)
        
        # save results
        df_AUCROC.loc[dataset, name] = result['aucroc']
        df_AUCPR.loc[dataset, name] = result['aucpr']

current noise type: None
{'Samples': 1831, 'Features': 21, 'Anomalies': 176, 'Anomalies Ratio(%)': 9.61}
************************* DeepSVDD
best param: None
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 21)]              0         
                                                                 
 dense_1 (Dense)             (None, 64)                1344      
                                                                 
 net_output (Dense)          (None, 32)                2048      
                                                                 
 tf_op_layer_sub_1 (TensorFl  [(None, 32)]             0         
 owOpLayer)                                                      
                                                                 
 tf_op_layer_pow_1 (TensorFl  [(None, 32)]             0         
 owOpLayer)                       

ValueError: ('Error when checking model target: expected no data, but got:', array([[ 0.33103447, -0.23026209, -0.2054641 , ...,  0.22376698,
        -0.62831557, -0.51489416],
       [ 0.43623297,  0.69637158, -0.2054641 , ...,  1.04373026,
        -0.29046045, -0.51489416],
       [-0.82614903,  1.59547714, -0.2054641 , ..., -0.45953576,
        -0.45938801, -0.51489416],
       ...,
       [-0.61575203, -0.93031368, -0.2054641 , ..., -0.11788439,
        -0.62831557, -0.51489416],
       [-1.03654603,  0.08321484, -0.2054641 , ..., -0.73285685,
        -0.56074455, -0.51489416],
       [ 0.12063747, -0.93031368, -0.14054555, ..., -0.39120548,
        -0.4256025 ,  1.12177934]]))

In [35]:
df_AUCROC

Unnamed: 0,COPOD,ECOD,DeepSVDD,DAGMM
6_cardio.npz,0.928363,0.942827,0.689495,0.704567


In [36]:
df_AUCPR

Unnamed: 0,COPOD,ECOD,DeepSVDD,DAGMM
6_cardio.npz,0.604146,0.592825,0.323306,0.20108


In [None]:
# Printing performance metrics 
metrics_values = [['Accuracy', accuracy_score(y_test, y_pred)],
                 ['Precision', precision_score(y_test, y_pred)],
                 ['Recall', recall_score(y_test, y_pred)],
                 ['F1_score', f1_score(y_test, y_pred)]]
metrics_values_df = pd.DataFrame(metrics_values, columns=['Metrics', 'Result'])
print(metrics_values_df)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Visualization
ax = plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax)
ax.set_xlabel('Predicted Values')
ax.set_ylabel('Actual Values')
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['Normal', 'Fraud'])
ax.yaxis.set_ticklabels(['Normal', 'Fraud'])