In [1]:
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

from sklearn import preprocessing

from pyod.models.ocsvm import OCSVM
from pyod.models.deep_svdd import DeepSVDD
from pyod.models.base import BaseDetector
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.metrics import balanced_accuracy_score

In [2]:
dataset = loadmat('shuttle.mat')

In [3]:
dataset

{'__header__': b'MATLAB 5.0 MAT-file, Platform: MACI64, Created on: Tue Apr 26 18:16:23 2016',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 50,  21,  77, ...,  27,  48,  22],
        [ 53,   0,  82, ...,  29,  30,   2],
        [ 37,   0,  76, ...,  40,  48,   8],
        ...,
        [ 49,   0,  87, ...,  38,  41,   2],
        [ 80,   0,  84, ...,   4, 120, 116],
        [ 37,   0, 103, ...,  66,  85,  20]], dtype=int16),
 'y': array([[1],
        [0],
        [0],
        ...,
        [0],
        [1],
        [0]], dtype=uint8)}

In [4]:
X_train, X_test, y_train, y_test = train_test_split(dataset['X'], dataset['y'], train_size=0.5)

In [5]:
normalizer = preprocessing.Normalizer()
X_train = normalizer.fit_transform(X_train)
X_test = normalizer.transform(X_test)

In [6]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((24548, 9), (24549, 9), (24548, 1), (24549, 1))

In [7]:
def evaluate_model(model_type: BaseDetector, model_args: dict, train_data, test_data, train_labels, test_labels):
    model = model_type(**model_args)
    model.fit(train_data)
    y_pred = model.predict(test_data)
    auc = roc_auc_score(test_labels, y_pred)
    bal_acc = balanced_accuracy_score(test_labels, y_pred)
    print(f'Model {model_type.__name__} AUC: {auc}, Balanced Accuracy: {bal_acc}')

In [10]:
evaluate_model(OCSVM, {'contamination': 0.1}, X_train, X_test, y_train, y_test)
evaluate_model(OCSVM, {'contamination': 0.2}, X_train, X_test, y_train, y_test)
evaluate_model(DeepSVDD, {'n_features': X_train.shape[1], 'epochs': 10, 'hidden_neurons': [64, 32, 16]}, X_train, X_test, y_train, y_test)
evaluate_model(DeepSVDD, {'n_features': X_train.shape[1], 'epochs': 10, 'hidden_neurons': [128, 64, 32]}, X_train, X_test, y_train, y_test)
evaluate_model(DeepSVDD, {'n_features': X_train.shape[1], 'epochs': 10, 'hidden_neurons': [1024, 512, 256, 128, 64]}, X_train, X_test, y_train, y_test)

Model OCSVM AUC: 0.975447403924457, Balanced Accuracy: 0.9754474039244571
Model OCSVM AUC: 0.9274310145744227, Balanced Accuracy: 0.9274310145744227
Epoch 1/10, Loss: 172.73445318639278
Epoch 2/10, Loss: 173.59464502334595
Epoch 3/10, Loss: 172.70170405507088
Epoch 4/10, Loss: 174.2941930219531
Epoch 5/10, Loss: 172.52147118002176
Epoch 6/10, Loss: 174.22789814323187
Epoch 7/10, Loss: 174.05153877288103
Epoch 8/10, Loss: 174.4821264743805
Epoch 9/10, Loss: 175.1611326932907
Epoch 10/10, Loss: 175.51774188876152
Model DeepSVDD AUC: 0.9781373625388011, Balanced Accuracy: 0.9781373625388012
Epoch 1/10, Loss: 128.89461359754205
Epoch 2/10, Loss: 130.39587535709143
Epoch 3/10, Loss: 130.38112896680832
Epoch 4/10, Loss: 129.9434836320579
Epoch 5/10, Loss: 129.52119014039636
Epoch 6/10, Loss: 130.28410729020834
Epoch 7/10, Loss: 130.0515279993415
Epoch 8/10, Loss: 129.79421920329332
Epoch 9/10, Loss: 129.71573555469513
Epoch 10/10, Loss: 129.65603530034423
Model DeepSVDD AUC: 0.98421974277842