### Half-Space Trees
* https://scikit-multiflow.github.io/
* `pip install -U scikit-multiflow`

In [42]:
# Imports
from skmultiflow.data import AnomalySineGenerator
from skmultiflow.anomaly_detection import HalfSpaceTrees
# Setup a data stream
stream = AnomalySineGenerator(random_state=1, n_samples=1000, n_anomalies=250)
# Setup Half-Space Trees estimator
half_space_trees = HalfSpaceTrees(random_state=1)
# Setup variables to control loop and track performance
max_samples = 1000
n_samples = 0
true_positives = 0
detected_anomalies = 0
# Train the estimator(s) with the samples provided by the data stream
while n_samples < max_samples and stream.has_more_samples():
    X, y = stream.next_sample()
    y_pred = half_space_trees.predict(X)
    if y[0] == 1:
        true_positives += 1
        if y_pred[0] == 1:
            detected_anomalies += 1
    half_space_trees.partial_fit(X, y)
    n_samples += 1
print('{} samples analyzed.'.format(n_samples))
print('Half-Space Trees correctly detected {} out of {} anomalies'.
      format(detected_anomalies, true_positives))

1000 samples analyzed.
Half-Space Trees correctly detected 157 out of 220 anomalies


### Welford Algorithm
* https://gist.github.com/alexalemi/2151722

In [43]:
import math
class Welford(object):
    """ Implements Welford's algorithm for computing a running mean
    and standard deviation as described at: 
        http://www.johndcook.com/standard_deviation.html
    can take single values or iterables
    Properties:
        mean    - returns the mean
        std     - returns the std
        meanfull- returns the mean and std of the mean
    Usage:
        >>> foo = Welford()
        >>> foo(range(100))
        >>> foo
        <Welford: 49.5 +- 29.0114919759>
        >>> foo([1]*1000)
        >>> foo
        <Welford: 5.40909090909 +- 16.4437417146>
        >>> foo.mean
        5.409090909090906
        >>> foo.std
        16.44374171455467
        >>> foo.meanfull
        (5.409090909090906, 0.4957974674244838)
    """

    def __init__(self,lst=None):
        self.k = 0
        self.M = 0
        self.S = 0
        
        self.__call__(lst)
    
    def update(self,x):
        if x is None:
            return
        self.k += 1
        newM = self.M + (x - self.M)*1./self.k
        newS = self.S + (x - self.M)*(x - newM)
        self.M, self.S = newM, newS

    def consume(self,lst):
        lst = iter(lst)
        for x in lst:
            self.update(x)
    
    def __call__(self,x):
        if hasattr(x,"__iter__"):
            self.consume(x)
        else:
            self.update(x)
            
    @property
    def mean(self):
        return self.M
    @property
    def meanfull(self):
        return self.mean, self.std/math.sqrt(self.k)
    @property
    def std(self):
        if self.k==1:
            return 0
        return math.sqrt(self.S/(self.k-1))
    def __repr__(self):
        return "<Welford: {} +- {}>".format(self.mean, self.std)

In [41]:
foo = Welford()
foo(range(10))
print(foo)
foo(range(10))
print(foo)
foo(range(10))
print(foo)
print(foo.meanfull)

<Welford: 4.5 +- 3.0276503540974917>
<Welford: 4.499999999999999 +- 2.9468984587725093>
<Welford: 4.499999999999998 +- 2.9213837061606083>
(4.499999999999998, 0.5333692516640697)


In [68]:
welford = Welford()
X = [13458.4938203836, 16049.4957449789, 19860.8045464739, 20169.7036589746, 18334.0489810167, 17648.5296324581, 17250.9967157409, 16695.4661875298, 15856.851747196, 15287.7706944907, 17994.4627253484, 17466.3685046983, 16327.5523300049, 15681.3499655485, 15937.5609079041, 15004.8706732747, 14691.5561787629, 19147.9313260361, 22893.9138627761, 23774.1139242753, 25745.3615012616, 26841.6405313642, 26294.5933524249, 56813.7444502739, 31244.4836055811, 23037.2198116763, 19023.2275023935, 17416.8217712373, 19150.0898907596, 19494.9275538184, 19609.2407898751, 17997.4979520362, 17280.8368701055, 16535.2102791069, 15801.0184371818, 15330.0276009174, 14994.0713348652, 41053.3742755135, 31539.1124986513]
print(welford)
welford([12300, 14000, 15000])
for x in X:
    welford(x)
    if welford.std == 0:
        z = 0
    else:
        z = (x - welford.mean) / welford.meanfull[1]
    print(z)

<Welford: 0 +- -0.0>
-0.41084197204814643
2.9389021991340796
4.377035003278943
3.713511314343811
2.0669770534930687
1.4081083939235324
0.9889651668839295
0.3393608956209708
-0.7480189211002257
-1.545885986166138
2.5416460460042343
1.6802058677596858
-0.28321302335132154
-1.4703212287480325
-0.9700582076173557
-2.8663182361749744
-3.48722737120222
6.171006092896868
11.968457694765535
11.616672352583679
12.568646218545112
12.2361830099304
10.455294615950713
22.994605858231253
6.994833869722056
1.921923091591489
-0.6418593342696949
-1.7017503713743882
-0.5194793808714601
-0.2766681711749315
-0.19366667957443093
-1.4005906740257905
-1.9508303677280923
-2.536049462819023
-3.122411888664018
-3.5003588012012377
-3.7694851911478042
17.058728643525
9.163522664151595


![Q function](https://d20ohkaloyme4g.cloudfront.net/img/document_thumbnails/1b2089f274d051cf03a7f236e63b2971/thumb_1200_1697.png)