In [4]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.cof import COF
from pyod.models.hbos import HBOS
from pyod.models.pca import PCA
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.suod import SUOD

In [9]:
from pyod.models.combination import aom, moa, average, maximization
from pyod.utils.data import generate_data

In [10]:
import numpy as np
from jax import numpy as jnp

In [26]:
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.io import loadmat

from pyod.models.knn import KNN
from pyod.models.combination import aom, moa, average, maximization, median
from pyod.utils.utility import standardizer
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
import os
import sys


# Define data file and read X and y
# Generate some data if the source data is missing
mat_file = 'cardio.mat'
try:
    mat = loadmat(os.path.join('data', mat_file))

except TypeError:
    print('{data_file} does not exist. Use generated data'.format(
        data_file=mat_file))
    X, y = generate_data(train_only=True)  # load data
except IOError:
    print('{data_file} does not exist. Use generated data'.format(
        data_file=mat_file))
    X, y = generate_data(train_only=True)  # load data
else:
    X = mat['X']
    y = mat['y'].ravel()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

# standardizing data for processing
X_train_norm, X_test_norm = standardizer(X_train, X_test)

n_clf = 20  # number of base detectors

# Initialize 20 base detectors for combination
k_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140,
          150, 160, 170, 180, 190, 200]

train_scores = np.zeros([X_train.shape[0], n_clf])
test_scores = np.zeros([X_test.shape[0], n_clf])

print('Combining {n_clf} kNN detectors'.format(n_clf=n_clf))

for i in range(n_clf):
    k = k_list[i]

    clf = KNN(n_neighbors=k, method='largest')
    clf.fit(X_train_norm)

    train_scores[:, i] = clf.decision_scores_
    test_scores[:, i] = clf.decision_function(X_test_norm)

# Decision scores have to be normalized before combination
train_scores_norm, test_scores_norm = standardizer(train_scores,
                                                   test_scores)
# Combination by average
y_by_average = average(test_scores_norm)
evaluate_print('Combination by Average', y_test, y_by_average)

# Combination by max
y_by_maximization = maximization(test_scores_norm)
evaluate_print('Combination by Maximization', y_test, y_by_maximization)

# Combination by max
y_by_maximization = median(test_scores_norm)
evaluate_print('Combination by Median', y_test, y_by_maximization)

# Combination by aom
y_by_aom = aom(test_scores_norm, n_buckets=5)
evaluate_print('Combination by AOM', y_test, y_by_aom)

# Combination by moa
y_by_moa = moa(test_scores_norm, n_buckets=5)
evaluate_print('Combination by MOA', y_test, y_by_moa)

cardio.mat does not exist. Use generated data
Combining 20 kNN detectors
Combination by Average ROC:0.9999, precision @ rank n:0.9756
Combination by Maximization ROC:0.9999, precision @ rank n:0.9756
Combination by Median ROC:0.9999, precision @ rank n:0.9756
Combination by AOM ROC:0.9999, precision @ rank n:0.9756
Combination by MOA ROC:0.9999, precision @ rank n:0.9756


In [38]:
print(average(test_scores_norm[0:1]), np.mean(test_scores_norm[0]))

[-0.36218036] -0.36218036368474915


In [42]:
print(maximization(test_scores_norm[0:1]))

[-0.3424924]


In [48]:
aom(test_scores_norm[0:1])

array([-0.3503167])

In [49]:
moa(test_scores_norm[0:1])

array([-0.35627745])

In [61]:
from sklearn.utils import shuffle

In [63]:
shuffled_list = shuffle(list(range(0,20,1)))

In [None]:
# average == averaging all estimators scores
# median == take median of all estimators scores
# average of maximization == shufflely divide into n groups, and take each groups maximum scores, finally averaging them
# maximization of average == shufflely divide into n groups, and take each groups average scores, finally maximizing them

In [1]:
import numpy as np
import pandas as pd

In [6]:
t = np.random.random_sample(240)*100

In [9]:
pd.cut(t, bins=20)

[(60.316, 65.267], (94.973, 99.924], (55.365, 60.316], (15.756, 20.707], (50.413, 55.365], ..., (35.56, 40.511], (70.218, 75.169], (94.973, 99.924], (10.805, 15.756], (80.12, 85.071]]
Length: 240
Categories (20, interval[float64, right]): [(0.803, 5.854] < (5.854, 10.805] < (10.805, 15.756] < (15.756, 20.707] ... (80.12, 85.071] < (85.071, 90.022] < (90.022, 94.973] < (94.973, 99.924]]