<a href="https://colab.research.google.com/github/minghsu0107/ML/blob/master/machine-learning/novelty_detection_and_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## One Class SVM

In [2]:
from sklearn.svm import OneClassSVM
X = [[0], [0.44], [0.45], [0.46], [0.47], [1], [10], [20]]
clf = OneClassSVM(kernel='rbf', degree=3, gamma='auto', 
                  coef0=0.0, tol=0.001, nu=0.5, shrinking=True, 
                  cache_size=200, verbose=False, max_iter=-1, random_state=None)
clf.fit(X)

OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma='auto', kernel='rbf',
            max_iter=-1, nu=0.5, random_state=None, shrinking=True, tol=0.001,
            verbose=False)

In [3]:
clf.predict(X)

array([-1,  1,  1,  1,  1, -1, -1, -1])

In [4]:
clf.fit_predict(X)

array([-1,  1,  1,  1,  1, -1, -1, -1])

In [5]:
clf.score_samples(X)

array([1.36787944, 1.55479873, 1.55565497, 1.55635565, 1.5569007 ,
       1.36787944, 1.        , 1.        ])

In [0]:
clf.decision_function(X)

array([-2.74925564e-01,  5.28455635e-09,  8.06251090e-04,  1.35696904e-03,
       -3.21513634e-01])

## Isolation Forest

In [24]:
from sklearn.ensemble import IsolationForest
clf = IsolationForest(n_estimators=3)
X = [[0], [0.44], [0.45], [0.46], [0.47], [1], [10], [20]]
clf.fit(X)



IsolationForest(behaviour='old', bootstrap=False, contamination='legacy',
                max_features=1.0, max_samples='auto', n_estimators=3,
                n_jobs=None, random_state=None, verbose=0, warm_start=False)

In [25]:
clf.predict(X)



array([ 1,  1,  1,  1,  1,  1,  1, -1])

In [26]:
clf.fit_predict(X)



array([ 1,  1,  1,  1,  1,  1,  1, -1])

In [27]:
clf.score_samples(X)

array([-0.45205038, -0.3372726 , -0.3372726 , -0.3372726 , -0.3372726 ,
       -0.48487355, -0.65667444, -0.75549815])

In [28]:
clf.decision_function(X)

array([ 0.04794962,  0.1627274 ,  0.1627274 ,  0.1627274 ,  0.1627274 ,
        0.01512645, -0.15667444, -0.25549815])

## KNN

In [30]:
import numpy as np
from sklearn.datasets import make_classification
X, y = make_classification(10000)
X.shape, y.shape

((10000, 20), (10000,))

In [31]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(algorithm='brute', 
                           metric='mahalanobis', 
                           metric_params={'V': np.cov(X)})
clf.fit(X, y)

KNeighborsClassifier(algorithm='brute', leaf_size=30, metric='mahalanobis',
                     metric_params={'V': array([[ 1.45531483, -0.55200882, -0.42197689, ..., -0.29499374,
        -0.07983611,  0.59815703],
       [-0.55200882,  0.81993122, -0.16053144, ...,  0.24977534,
        -0.08559313, -0.20511078],
       [-0.42197689, -0.16053144,  1.12939036, ...,  0.19225978,
        -0.04673029, -0.18924043],
       ...,
       [-0.29499374,  0.24977534,  0.19225978, ...,  1.18757176,
        -0.28620764, -0.15804128],
       [-0.07983611, -0.08559313, -0.04673029, ..., -0.28620764,
         1.29589855, -0.18338022],
       [ 0.59815703, -0.20511078, -0.18924043, ..., -0.15804128,
        -0.18338022,  1.00379554]])},
                     n_jobs=None, n_neighbors=5, p=2, weights='uniform')

In [0]:
import time

def clock(func):
    def clocked(*args):
        t0 = time.perf_counter()
        result = func(*args)
        elapsed = time.perf_counter() - t0
        name = func.__name__
        arg_str = ', '.join(repr(arg) for arg in args)
        print('[%0.8fs] %s(%s) -> %r' % (elapsed, name, arg_str, result))
        return result
    return clocked

In [0]:
@clock
def run():
    clf.kneighbors(X[2:, :])

In [0]:
run()

[23.22153067s] run() -> None


In [34]:
clf2 = KNeighborsClassifier(algorithm='ball_tree')
clf2.fit(X, y)

KNeighborsClassifier(algorithm='ball_tree', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [0]:
@clock
def run2():
    clf2.kneighbors(X[2:, :])
    
@clock
def predict():
    print(clf2.predict(X))

In [36]:
run2()

[4.18227913s] run2() -> None


In [37]:
predict()

[0 1 1 ... 1 0 0]
[4.47854818s] predict() -> None


In [38]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf2, X, y,cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

Accuracy: 0.93 (+/- 0.01)


## Local Outlier Factor

In [6]:
from sklearn.neighbors import LocalOutlierFactor
X = [[0], [0.44], [0.45], [0.46], [0.47], [1], [10], [20]]
clf = LocalOutlierFactor(n_neighbors=3, contamination='auto')
clf.fit_predict(X)

array([-1,  1,  1,  1,  1, -1, -1, -1])

In [8]:
clf.kneighbors(X, return_distance=True)

(array([[0.00e+00, 4.40e-01, 4.50e-01],
        [0.00e+00, 1.00e-02, 2.00e-02],
        [0.00e+00, 1.00e-02, 1.00e-02],
        [0.00e+00, 1.00e-02, 1.00e-02],
        [0.00e+00, 1.00e-02, 2.00e-02],
        [0.00e+00, 5.30e-01, 5.40e-01],
        [0.00e+00, 9.00e+00, 9.53e+00],
        [0.00e+00, 1.00e+01, 1.90e+01]]), array([[0, 1, 2],
        [1, 2, 3],
        [2, 3, 1],
        [3, 4, 2],
        [4, 3, 2],
        [5, 4, 3],
        [6, 5, 4],
        [7, 6, 5]]))