## Q1.

Add a __setitem__ to the python linked list implementation from the lecture (this past wednesday).

In [4]:
#your code here
from doctest import run_docstring_examples as dtest
import numbers
import reprlib
class LL:
    """
    >>> A = LL()  
    >>> A[0]
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
    >>> A.insert_front(1)
    >>> A[0]
    1
    >>> A.insert_back(2)
    >>> A[1]
    2
    >>> A
    LL([1,...])
    >>> myll = LL.from_components([1,2])
    >>> myll[1]
    1
    >>> len(myll)
    2
    >>> myll[2]
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1]
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    """
    @classmethod
    def from_components(cls, components):
        inst = cls(components[0])
        for c in components[1:]:
            inst.insert_front(c)
        return inst
        
    def __init__(self, head=None):
        if head is None:
            self._headNode = None
        else:
            self._headNode = [head, None]
            
    def insert_front(self, element):
        new_node = [element, None]
        new_node[1] = self._headNode
        self._headNode = new_node
        
    def insert_back(self, element):
        new_node = [element, None]
        curr_ptr = self._headNode
        while curr_ptr[1] is not None:
            curr_ptr = curr_ptr[1]
        curr_ptr[1]= new_node
        
    def __repr__(self):
        class_name = type(self).__name__
        if len(self)==0:
            components=""
        else:
            components = reprlib.repr(self[0])
        return '{}([{},...])'.format(class_name,components)


    def __len__(self):
        curr_ptr = self._headNode
        count = 0
        if curr_ptr==None:
            return 0
        while 1:
            count = count + 1
            if curr_ptr[1] is None:
                break
            curr_ptr = curr_ptr[1]
        return count    
    
    def __getitem__(self, index):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:
                    return curr_ptr[0]
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            
    def __setitem__(self, index, element):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            count=0
            while 1:
                if index == count:
                    curr_ptr[0] = element
                    if index == 0:
                        self._headNode = curr_ptr
                    break
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name)) 
                count+=1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            

In [18]:
# Example showing use of set item method:

myLinkedList = LL()
myLinkedList.insert_front(5)
myLinkedList.insert_front(6)
myLinkedList.insert_front(6)
print("Length: ",len(myLinkedList))
myLinkedList[2]=4
myLinkedList[2]

Length:  3


4

## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [1]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [2]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use our generator functions to implement iterators:

In [3]:
g = make_data(5, 10)
list(g)

[1000000004.5396862,
 999999999.5303862,
 1000000002.8798914,
 1000000001.2541183,
 999999999.8562678,
 1000000002.679568,
 1000000002.5060941,
 999999998.0273261,
 999999995.3439487,
 1000000000.7521349,
 1000000004.6209784]

In [4]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)

<class 'generator'>


[1000000001.2137014,
 1000000000.6219122,
 1000000000.6693263,
 999999998.6083226,
 999999998.6633834,
 999999998.7978028,
 999999998.416349,
 999999998.6252012,
 999999999.0844879,
 999999999.2188281,
 999999999.2586182,
 999999999.3836002,
 999999999.6676482,
 999999999.7639234,
 999999999.9369645,
 999999999.8782458,
 999999999.8437614,
 999999999.9561348,
 999999999.9894805,
 1000000000.0144464,
 1000000000.0000138,
 999999999.9897428,
 1000000000.0940074,
 1000000000.0100394,
 1000000000.1134287,
 999999999.8597358,
 999999999.9580063,
 999999999.9568076,
 999999999.9271019,
 999999999.8705577,
 1000000000.0030863,
 999999999.9857712,
 1000000000.0163168,
 1000000000.0225146,
 1000000000.0537295,
 1000000000.0913427,
 1000000000.0115355,
 1000000000.017358,
 999999999.9930785,
 999999999.8482876,
 999999999.8124729,
 999999999.826397,
 999999999.8317163,
 999999999.8376387,
 999999999.901007,
 999999999.9143425,
 999999999.9254732,
 999999999.96317,
 999999999.9639083,
 999999999.

### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

In [38]:
# your code here
# Referenced Wikipedia - algorithms for calculating variance page
import math
def online_mean_dev(iterator):
    n = 0
    mu = 0
    dev_accum = 0 # Sum of squares of the deviations
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        dev_accum += delta*(value - mu)
        if n > 1:
            stddev = math.sqrt(dev_accum/(n-1))
            yield (n, value, mu, stddev)
        

Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [39]:
data_with_stats = online_mean_dev(make_data(5, 100000))

## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is inbetween `level`-$\sigma$ of the mean.

In [40]:
#your code here
def is_ok(level, t):
    (n, value, mu, stddev) = t
    return abs(value-mu)<level*stddev

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [41]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [42]:
list(anomalies)#materialize

[(1835, 999999985.4047824, 1000000000.0213615, 2.8663928324583314),
 (1984, 1000000016.493598, 999999999.9977401, 2.8969970469980386),
 (2122, 999999985.1916033, 999999999.9659057, 2.9005692160547323),
 (5217, 999999984.9747802, 999999999.9604595, 2.881714488262054),
 (5493, 999999985.386237, 999999999.9549211, 2.8858165730444814),
 (7514, 999999984.4401242, 999999999.9410933, 2.911229519679254),
 (11643, 999999985.3843204, 999999999.9593089, 2.9112379855307404),
 (11928, 999999984.6468571, 999999999.9577291, 2.9137738506712263),
 (15772, 1000000015.050465, 999999999.9813043, 2.9131542747276424),
 (16486, 1000000014.7177752, 999999999.98445, 2.915524946112416),
 (21880, 1000000014.8627565, 999999999.9908035, 2.910260998904454),
 (23845, 1000000014.6529014, 999999999.9922757, 2.90986031441394),
 (26595, 1000000021.120691, 999999999.9983298, 2.9083382503840647),
 (27209, 999999983.8309938, 1000000000.0023828, 2.9117781638082096),
 (28427, 999999985.3971981, 1000000000.0015264, 2.90935728

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).