## Q1.

Add a __setitem__ to the python linked list implementation from the lecture (this past wednesday).

In [14]:
#your code here
from doctest import run_docstring_examples as dtest
import numbers
import reprlib
import math
class LL:
    """
    >>> A = LL()  
    >>> A[0]
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
    >>> A.insert_front(1)
    >>> A[0]
    1
    >>> A.insert_back(2)
    >>> A[1]
    2
    >>> A
    LL([1,...])
    >>> myll = LL.from_components([1,2])
    >>> myll[1]
    1
    >>> len(myll)
    2
    >>> myll[2]
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1]
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    """
    @classmethod
    def from_components(cls, components):
        inst = cls(components[0])
        for c in components[1:]:
            inst.insert_front(c)
        return inst
        
    def __init__(self, head=None):
        if head is None:
            self._headNode = None
        else:
            self._headNode = [head, None]
            
    def insert_front(self, element):
        new_node = [element, None]
        new_node[1] = self._headNode
        self._headNode = new_node
        
    def insert_back(self, element):
        new_node = [element, None]
        curr_ptr = self._headNode
        while curr_ptr[1] is not None:
            curr_ptr = curr_ptr[1]
        curr_ptr[1]= new_node
        
    def __repr__(self):
        class_name = type(self).__name__
        if len(self)==0:
            components=""
        else:
            components = reprlib.repr(self[0])
        return '{}([{},...])'.format(class_name,components)


    def __len__(self):
        curr_ptr = self._headNode
        count = 0
        if curr_ptr==None:
            return 0
        while 1:
            count = count + 1
            if curr_ptr[1] is None:
                break
            curr_ptr = curr_ptr[1]
        return count    
    
    def __getitem__(self, index):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:
                    return curr_ptr[0]
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            
    def __setitem__(self, index, item):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:
                    curr_ptr[0] = item
                    return
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
        

In [15]:
myll=LL.from_components([1,2,32,-4,5])
myll[1]=3
print(myll[1])

3


## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [16]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [17]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use out generator functions to implement iterators:

In [18]:
g = make_data(5, 10)
list(g)

[1000000002.1771753,
 999999999.1167346,
 1000000000.1426084,
 999999997.8251112,
 1000000000.4011536,
 999999999.9735706,
 1000000002.0596654,
 1000000001.1196812,
 999999991.8793676,
 1000000001.0972152,
 999999999.2756776]

In [19]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)

<class 'generator'>


[999999999.9542464,
 1000000001.9719157,
 1000000000.3295027,
 1000000000.2437505,
 1000000000.8702788,
 1000000001.1020806,
 1000000000.9280082,
 1000000000.6201838,
 1000000000.3499814,
 1000000000.4991113,
 1000000000.4877763,
 1000000000.7805257,
 1000000000.691952,
 1000000000.1508715,
 1000000000.5979754,
 1000000000.1472319,
 1000000000.4968293,
 1000000000.4777606,
 1000000000.2707669,
 1000000000.3462082,
 1000000000.4931278,
 1000000000.471048,
 1000000000.4200879,
 1000000000.6053184,
 1000000000.6657466,
 1000000000.6433913,
 1000000000.5307808,
 1000000000.4626964,
 1000000000.447032,
 1000000000.3584279,
 1000000000.3490415,
 1000000000.3481492,
 1000000000.2993726,
 1000000000.3701665,
 1000000000.3193688,
 1000000000.3811215,
 1000000000.4423641,
 1000000000.568913,
 1000000000.7597395,
 1000000000.6689653,
 1000000000.6843696,
 1000000000.6722895,
 1000000000.6701565,
 1000000000.6701251,
 1000000000.6694323,
 1000000000.760179,
 1000000000.7019613,
 1000000000.6310574

### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

In [20]:
# your code here
def online_mean_dev(iterator):
    s = 0
    n = 0
    for value in iterator:
        n += 1
        if(n == 1):
            oldM = newM = value
            oldS = 0.0
        else:
            newM = oldM + (value - oldM)/n
            newS = oldS + (value - oldM) * (value - newM)
            
            oldM = newM
            oldS = newS
        if n > 1:
            yield (n, value, newM, math.sqrt(newS/(n-1)))
        else:
            yield (1, value, value, 0.0)
        
        
        

Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [21]:
data_with_stats = online_mean_dev(make_data(5, 100000))

## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is inbetween `level`-$\sigma$ of the mean.

In [22]:
#your code here
def is_ok(level, t):
    value = t[1]
    mean = t[2]
    sigma = t[3]
    return level-sigma <= value <= level+sigma

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [23]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [24]:
list(anomalies)#materialize

[(1, 999999999.9779623, 999999999.9779623, 0.0),
 (2, 1000000000.0073965, 999999999.9926794, 0.020813125319043734),
 (3, 1000000001.6426429, 1000000000.5426672, 0.9527205646178869),
 (4, 1000000000.7436835, 1000000000.5929213, 0.7843593158801201),
 (5, 999999999.3844577, 1000000000.3512286, 0.8680388798095776),
 (6, 1000000002.1305275, 1000000000.6477784, 1.0632233857818199),
 (7, 999999999.8404384, 1000000000.5324441, 1.0174235256548367),
 (8, 1000000004.9114027, 1000000001.079814, 1.8122311506942248),
 (9, 1000000003.6145673, 1000000001.3614532, 1.894081593636684),
 (10, 999999999.3030477, 1000000001.1556126, 1.900692579738644),
 (11, 999999989.5725417, 1000000000.1026062, 3.930447474985047),
 (12, 999999999.9844947, 1000000000.0927635, 3.7476897287445903),
 (13, 999999999.6667683, 1000000000.0599947, 3.5900844221381694),
 (14, 999999998.573437, 999999999.953812, 3.47204751223357),
 (15, 999999994.0334477, 999999999.559121, 3.678416580918804),
 (16, 999999998.0525013, 999999999.46495

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).