### Ex. 2.1.

In [197]:
import matplotlib as mpl
mpl.use("TkAgg")
import matplotlib.pyplot as plt
import numpy as np
import os
from pyod.models.base import BaseDetector
from pyod.models.deep_svdd import DeepSVDD
from pyod.models.ocsvm import OCSVM
from pyod.utils.data import generate_data
from sklearn.metrics import confusion_matrix, roc_auc_score

In [198]:
def compute_ba(model: BaseDetector, X: np.ndarray, y: np.ndarray) -> float:
    preds = model.predict(X)
    tn, fp, fn, tp = confusion_matrix(y_true=y, y_pred=preds).ravel()
    tpr = tp / (tp + fn)
    tnr = tn / (tn + fp)
    balanced_acc = (tpr + tnr) / 2

    return balanced_acc


def compute_roc_auc(model: BaseDetector, X: np.ndarray, y: np.ndarray) -> float:
    scores = model.decision_function(X)

    return roc_auc_score(y_true=y, y_score=scores)

In [199]:
def pipeline(data: tuple[np.ndarray], model: BaseDetector, figtitle: str) -> None:
    # Generate the data
    X_train, X_test, y_train, y_test = data
    # Fit and predict
    model.fit(X_train)
    preds_test = model.predict(X_test)
    preds_train = model.predict(X_train)
    print(f"Balanced Accuracy: {compute_ba(model, X_test, y_test):.6f}")
    print(f"ROC AUC: {compute_roc_auc(model, X_test, y_test):.6f}")
    # Plot
    # Train & GT
    fig = plt.figure(figsize=(7, 7))
    ax = fig.add_subplot(2, 2, 1, projection='3d')
    ax.scatter(*X_train[y_train == 0].T, c="blue")
    ax.scatter(*X_train[y_train == 1].T, c="red")
    ax.set_title("Train data & GT")
    # Train & preds
    ax = fig.add_subplot(2, 2, 2, projection='3d')
    ax.scatter(*X_train[preds_train == 0].T, c="blue")
    ax.scatter(*X_train[preds_train == 1].T, c="red")
    ax.set_title("Train data & preds")
    # Test & GT
    ax = fig.add_subplot(2, 2, 3, projection='3d')
    ax.scatter(*X_test[y_test == 0].T, c="blue")
    ax.scatter(*X_test[y_test == 1].T, c="red")
    ax.set_title("Test data & GT")
    # Test & preds
    ax = fig.add_subplot(2, 2, 4, projection='3d')
    ax.scatter(*X_test[preds_test == 0].T, c="blue")
    ax.scatter(*X_test[preds_test == 1].T, c="red")
    ax.set_title("Test data & preds")
    # Save and plot
    plt.suptitle(figtitle)
    plt.savefig(os.path.join("plots", figtitle))
    plt.show()

In [200]:
contamination = 0.15
data = generate_data(n_train=300, n_test=200, n_features=3, contamination=0.15)
pipeline(data, model=OCSVM(kernel="linear", contamination=contamination), figtitle="OCSVM Linear")
pipeline(data, model=OCSVM(kernel="rbf", contamination=contamination), figtitle="OCSVM RBF")
pipeline(data, model=DeepSVDD(n_features=3, contamination=contamination), figtitle="DeepSVDD")

Balanced Accuracy: 0.997059
ROC AUC: 1.000000
Balanced Accuracy: 1.000000
ROC AUC: 1.000000
Epoch 1/100, Loss: 7.551330149173737
Epoch 2/100, Loss: 7.313066691160202
Epoch 3/100, Loss: 7.401457488536835
Epoch 4/100, Loss: 7.960722357034683
Epoch 5/100, Loss: 7.430682003498077
Epoch 6/100, Loss: 7.517746388912201
Epoch 7/100, Loss: 7.799739062786102
Epoch 8/100, Loss: 7.655837118625641
Epoch 9/100, Loss: 7.528184473514557
Epoch 10/100, Loss: 7.644055008888245
Epoch 11/100, Loss: 7.542639076709747
Epoch 12/100, Loss: 7.38510075211525
Epoch 13/100, Loss: 7.4671937227249146
Epoch 14/100, Loss: 7.640761077404022
Epoch 15/100, Loss: 7.500731289386749
Epoch 16/100, Loss: 7.679833203554153
Epoch 17/100, Loss: 7.664327144622803
Epoch 18/100, Loss: 7.644629061222076
Epoch 19/100, Loss: 7.772035390138626
Epoch 20/100, Loss: 7.322074711322784
Epoch 21/100, Loss: 7.513035833835602
Epoch 22/100, Loss: 7.870510160923004
Epoch 23/100, Loss: 7.777105033397675
Epoch 24/100, Loss: 7.5382800698280334
Epoc

### Ex. 2.2.

In [201]:
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import OneClassSVM

In [202]:
def pyod_to_sklearn(labels: np.ndarray) -> np.ndarray:
    return -(labels * 2 - 1)


def sklearn_to_pyod(labels: np.ndarray) -> np.ndarray:
    return (-labels + 1) / 2


data = loadmat("cardio.mat")
X_cardio = data["X"]
y_cardio = data["y"]
X_train: np.ndarray
X_test: np.ndarray
y_train: np.ndarray
y_test: np.ndarray
X_train, X_test, y_train, y_test = train_test_split(X_cardio, y_cardio, train_size=0.4)
contamination = np.mean(y_train)
base_estimator = Pipeline([("stds", StandardScaler()), ("ocsvm", OneClassSVM())])
nu_values = np.hstack((np.linspace(0.1, 0.9, num=21), [contamination]))
param_grid = [
    {"ocsvm__kernel": ["linear"], "ocsvm__nu": nu_values},
    {"ocsvm__kernel": ["rbf"], "ocsvm__gamma": ["scale", "auto", *np.logspace(-1, -6, 6)], "ocsvm__nu": nu_values},
]
search = GridSearchCV(estimator=base_estimator, param_grid=param_grid, scoring="balanced_accuracy")
model = search.fit(X_train, pyod_to_sklearn(y_train))
score_train = model.score(X_train, pyod_to_sklearn(y_train))
score_test = model.score(X_test, pyod_to_sklearn(y_test))
print(f"Train Balanced Accuracy: {score_train:.6f}")
print(f"Test Balanced Accuracy: {score_test:.6f}")

Train Balanced Accuracy: 0.869500
Test Balanced Accuracy: 0.898416


In [203]:
# Print best params
print("Best params:")
for k, v in model.best_params_.items():
    print(f" - {k}: {v}")
# Create model with these params and train on the whole train dataset
best_model = OneClassSVM(
    kernel=model.best_params_.get("ocsvm__kernel"),
    gamma=model.best_params_.get("ocsvm__gamma"),
    nu=model.best_params_.get("ocsvm__nu"),
)
best_model.fit(X_train, pyod_to_sklearn(y_train))
# Predict on the test dataset
print(f"Train Balanced Accuracy: {compute_ba(best_model, X_train, pyod_to_sklearn(y_train)):.6f}")
print(f"Train ROC AUC: {compute_roc_auc(best_model, X_train, pyod_to_sklearn(y_train)):.6f}")
print(f"Test Balanced Accuracy: {compute_ba(best_model, X_test, pyod_to_sklearn(y_test)):.6f}")
print(f"Test ROC AUC: {compute_roc_auc(best_model, X_test, pyod_to_sklearn(y_test)):.6f}")

Best params:
 - ocsvm__gamma: 0.01
 - ocsvm__kernel: rbf
 - ocsvm__nu: 0.26
Train Balanced Accuracy: 0.868745
Train ROC AUC: 0.930291
Test Balanced Accuracy: 0.901073
Test ROC AUC: 0.943709


### Ex. 2.3.

In [None]:
import numpy as np
from pyod.models.base import BaseDetector
from pyod.models.deep_svdd import DeepSVDD
from pyod.models.ocsvm import OCSVM
from scipy.io import loadmat
from sklearn.model_selection import train_test_split

In [205]:
def pipeline(data: tuple[np.ndarray], model: BaseDetector, title: str) -> None:
    X_train, X_test, y_train, y_test = data
    model.fit(X_train, y_train)
    print(title)
    print(f"Balanced Accuracy: {compute_ba(model, X_test, y_test):.6f}")
    print(f"ROC AUC: {compute_roc_auc(model, X_test, y_test):.6f}")

In [206]:
data = loadmat("shuttle.mat")
X_shuttle = data["X"]
y_shuttle = data["y"]
mean_X = X_shuttle.mean(axis=0)
std_X = X_shuttle.std(axis=0)
X_shuttle = (X_shuttle - mean_X) / std_X
data = train_test_split(X_shuttle, y_shuttle, test_size=0.5)
pipeline(data, model=OCSVM(), title="OCSVM")
pipeline(data, model=DeepSVDD(n_features=9, hidden_neurons=[64, 32]), title="DeepSVDD(64, 32)")
pipeline(data, model=DeepSVDD(n_features=9, hidden_neurons=[32, 32, 32]), title="DeepSVDD(32, 32, 32, 32)")
pipeline(data, model=DeepSVDD(n_features=9, hidden_neurons=[32, 32, 16, 16]), title="DeepSVDD(32, 32, 32, 16, 16)")



OCSVM
Balanced Accuracy: 0.972311
ROC AUC: 0.992362




Epoch 1/100, Loss: 798.4233690053225
Epoch 2/100, Loss: 798.270662471652
Epoch 3/100, Loss: 798.0636148154736
Epoch 4/100, Loss: 798.120371311903
Epoch 5/100, Loss: 799.6661101877689
Epoch 6/100, Loss: 799.9474737942219
Epoch 7/100, Loss: 798.1457749605179
Epoch 8/100, Loss: 798.4264000803232
Epoch 9/100, Loss: 798.1017602980137
Epoch 10/100, Loss: 798.3201937377453
Epoch 11/100, Loss: 799.4625342339277
Epoch 12/100, Loss: 799.1642089635134
Epoch 13/100, Loss: 798.5627703368664
Epoch 14/100, Loss: 798.0874167606235
Epoch 15/100, Loss: 800.1436956524849
Epoch 16/100, Loss: 798.1009892970324
Epoch 17/100, Loss: 798.1205894649029
Epoch 18/100, Loss: 798.1185395866632
Epoch 19/100, Loss: 799.7452527135611
Epoch 20/100, Loss: 798.1427537798882
Epoch 21/100, Loss: 798.1214735209942
Epoch 22/100, Loss: 798.301298096776
Epoch 23/100, Loss: 798.3109539747238
Epoch 24/100, Loss: 798.5353368222713
Epoch 25/100, Loss: 799.843174919486
Epoch 26/100, Loss: 798.1318507194519
Epoch 27/100, Loss: 800.0



Epoch 1/100, Loss: 154.41167011857033
Epoch 2/100, Loss: 152.2140546925366
Epoch 3/100, Loss: 154.0693960674107
Epoch 4/100, Loss: 152.87555575370789
Epoch 5/100, Loss: 152.19404526799917
Epoch 6/100, Loss: 152.42597121745348
Epoch 7/100, Loss: 153.32916313037276
Epoch 8/100, Loss: 149.51813601329923
Epoch 9/100, Loss: 154.25612320750952
Epoch 10/100, Loss: 152.583402922377
Epoch 11/100, Loss: 153.9846686515957
Epoch 12/100, Loss: 149.0716383047402
Epoch 13/100, Loss: 150.07628136500716
Epoch 14/100, Loss: 152.06217650324106
Epoch 15/100, Loss: 154.7056689132005
Epoch 16/100, Loss: 153.0302333906293
Epoch 17/100, Loss: 150.97355984896421
Epoch 18/100, Loss: 153.64296123757958
Epoch 19/100, Loss: 153.87654077261686
Epoch 20/100, Loss: 152.9788662251085
Epoch 21/100, Loss: 163.78931798040867
Epoch 22/100, Loss: 150.68275920301676
Epoch 23/100, Loss: 147.66050252132118
Epoch 24/100, Loss: 151.87391224876046
Epoch 25/100, Loss: 155.00812098570168
Epoch 26/100, Loss: 151.2883891388774
Epoch



Epoch 1/100, Loss: 10.765669197513489
Epoch 2/100, Loss: 10.754960058024153
Epoch 3/100, Loss: 10.649487568996847
Epoch 4/100, Loss: 10.56897660868708
Epoch 5/100, Loss: 10.457548968726769
Epoch 6/100, Loss: 10.87233083031606
Epoch 7/100, Loss: 10.982156763784587
Epoch 8/100, Loss: 10.43108256754931
Epoch 9/100, Loss: 10.680455203633755
Epoch 10/100, Loss: 10.456777147948742
Epoch 11/100, Loss: 10.489611135562882
Epoch 12/100, Loss: 11.051403784542345
Epoch 13/100, Loss: 9.653907783562317
Epoch 14/100, Loss: 11.166488157701679
Epoch 15/100, Loss: 10.688390369294211
Epoch 16/100, Loss: 10.501363556832075
Epoch 17/100, Loss: 9.74162828316912
Epoch 18/100, Loss: 11.654084109002724
Epoch 19/100, Loss: 10.799046176718548
Epoch 20/100, Loss: 10.231317411991768
Epoch 21/100, Loss: 11.435890867142007
Epoch 22/100, Loss: 10.528125445358455
Epoch 23/100, Loss: 10.671361933229491
Epoch 24/100, Loss: 10.61713209352456
Epoch 25/100, Loss: 10.038359405880328
Epoch 26/100, Loss: 10.028846113826148
Ep