In [None]:
import numpy as np
import torch

import scipy.io
import mat73

import sparse

from tqdm.notebook import tqdm
from scipy.sparse.linalg import norm

import pandas as pd

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, f1_score, classification_report, roc_auc_score

import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
path = input('Enter reconstruction errors file path: ')
try:
    errors = scipy.io.loadmat(path)['slice_fits']
except:
    errors = mat73.loadmat(path)['slice_fits']

errors = np.array(errors)

In [None]:
dataset = pd.read_csv(input('Enter dataset path: ')).to_numpy()

data, labels = dataset[:, 0], dataset[:, 1]
data, labels = shuffle(data, labels, random_state=10)

data, labels = data[:len(errors)], labels[:len(errors)]

In [None]:
vals = {label: [value for gtruth, value in zip(labels, errors) if gtruth == label] for label in labels}

In [None]:
for label, rec_errors in vals.items():
    print(f"Key: {label}, Length of List: {len(rec_errors)}")

In [None]:
sns.histplot(vals[0])
sns.histplot(vals[1])

# Models

In [None]:
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.kde import KDE
from pyod.models.cblof import CBLOF
from pyod.models.gmm import GMM

from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN


from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize


In [None]:
X_train, X_test, y_train, y_test = train_test_split(np.array(errors).reshape(-1, 1), labels.astype(int), test_size=0.33, random_state=42)

In [None]:
lof = LOF()
model_name = type(lof).__name__

lof.fit(X_train, y_train)

In [None]:
y_train_pred = lof.labels_
y_train_scores = lof.decision_scores_

y_test_pred = lof.predict(X_test) 
y_test_scores = lof.decision_function(X_test) 
y_test_proba = lof.predict_proba(X_test)

In [None]:
evaluate_print(model_name, y_test, y_test_scores)

In [None]:
print(f'{model_name} Classification Report:\n')
print(classification_report(y_test, y_test_pred))

## Isolation Forest

In [None]:
ifor = IForest(behaviour='new')

model_name = type(ifor).__name__

ifor.fit(X_train, y_train)

In [None]:
y_train_pred = ifor.labels_
y_train_scores = ifor.decision_scores_

y_test_pred = ifor.predict(X_test) 
y_test_scores = ifor.decision_function(X_test) 
y_test_proba = ifor.predict_proba(X_test)

evaluate_print(model_name, y_test, y_test_scores)

## KDE

In [None]:
kde = KDE()

model_name = type(kde).__name__

kde.fit(X_train, y_train)

In [None]:
y_train_pred = kde.labels_
y_train_scores = kde.decision_scores_

y_test_pred = kde.predict(X_test) 
y_test_scores = kde.decision_function(X_test) 
y_test_proba = kde.predict_proba(X_test)

evaluate_print(model_name, y_test, y_test_scores)

## CBLOF

In [None]:
clf = CBLOF()
clf.fit(X_train, y_train)

# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_  # raw outlier scores

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test)  # outlier scores

# evaluate and print the results
print("\nOn Training Data:")
evaluate_print('CBLOF', y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print('CBLOF', y_test, y_test_scores)

## GMM

In [None]:
clf = GMM()
clf.fit(X_train, y_train)

# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_  # raw outlier scores

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test)  # outlier scores

# evaluate and print the results
print("\nOn Training Data:")
evaluate_print('CBLOF', y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print('CBLOF', y_test, y_test_scores)

## XGBOD

In [None]:
clf = XGBOD(estimator_list=[IForest()])
clf.fit(X_train, y_train)

# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_  # raw outlier scores

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test)  # outlier scores

# evaluate and print the results
print("\nOn Training Data:")
evaluate_print('XGBOD', y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print('XGBOD', y_test, y_test_scores)

## KNN

In [None]:
clf = KNN() 
clf.fit(X_train, y_train)

# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_  # raw outlier scores

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test)  # outlier scores

# evaluate and print the results
print("\nOn Training Data:")
evaluate_print('KNN', y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print('KNN', y_test, y_test_scores)

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(criterion="gini", random_state=42,max_depth=3, min_samples_leaf=5)   

clf.fit(X_train, y_train)

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)

# Only report results for the class specified by pos_label (1)
print('F1:', f1_score(y_test, y_test_pred))
print('ROC-AUC:', roc_auc_score(y_test, y_test_pred))

## AnoGAN

In [None]:
from pyod.models.anogan import AnoGAN

clf = AnoGAN() 
clf.fit(X_train, y_train)

# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_  # raw outlier scores

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test)  # outlier scores

# evaluate and print the results
print("\nOn Training Data:")
evaluate_print('AnoGAN', y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print('AnoGAN', y_test, y_test_scores)

## SVM

In [None]:
from sklearn.svm import SVC

clf = SVC(gamma=2, C=1, random_state=42) 

clf.fit(X_train, y_train)

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)

# Only report results for the class specified by pos_label (1)
print('F1:', f1_score(y_test, y_test_pred))
print('ROC-AUC:', roc_auc_score(y_test, y_test_pred))

In [None]:
from sklearn.neural_network import MLPClassifier

clf = MLPClassifier(random_state=42, max_iter=100)

clf.fit(X_train, y_train)

# get the prediction on the test data
y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)

# Only report results for the class specified by pos_label (1)
print('F1:', f1_score(y_test, y_test_pred))
print('ROC-AUC:', roc_auc_score(y_test, y_test_pred))