## Q1.

Add a __setitem__ to the python linked list implementation from the lecture (this past wednesday).

In [112]:
#your code here
#copy the previous class LL and add setitem to it.
from doctest import run_docstring_examples as dtest
import numbers
import reprlib
class LL:
    """
    >>> A = LL()  
    >>> A[0]
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
    >>> A.insert_front(1)
    >>> A[0]
    1
    >>> A.insert_back(2)
    >>> A[1]
    2
    >>> A
    LL([1,...])
    >>> myll = LL.from_components([1,2])
    >>> myll[1]
    1
    >>> len(myll)
    2
    >>> myll[2]
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1]
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    """
    @classmethod
    def from_components(cls, components):
        inst = cls(components[0])
        for c in components[1:]:
            inst.insert_front(c)
        return inst
        
    def __init__(self, head=None):
        if head is None:
            self._headNode = None
        else:
            self._headNode = [head, None]
            
    def insert_front(self, element):   #O(1)
        new_node = [element, None]
        new_node[1] = self._headNode
        self._headNode = new_node
        
    def insert_back(self, element):     #O(n)
        new_node = [element, None]
        curr_ptr = self._headNode
        while curr_ptr[1] is not None:
            curr_ptr = curr_ptr[1]
        curr_ptr[1]= new_node
        
    def __repr__(self):
        class_name = type(self).__name__
        if len(self)==0:
            components=""
        else:
            components = reprlib.repr(self[0])
        return '{}([{},...])'.format(class_name,components)


    def __len__(self):
        curr_ptr = self._headNode
        count = 0
        if curr_ptr==None:
            return 0
        while 1:
            count = count + 1
            if curr_ptr[1] is None:
                break
            curr_ptr = curr_ptr[1]
        return count    
    
    def __getitem__(self, index):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:
                    return curr_ptr[0]
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            
    #Add my __setitem__()
    def __setitem__(self, index, element):      #imitate the __getitem__()
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral):  #index should be an intergral
            curr_ptr = self._headNode
            if curr_ptr == None:               #if _headNode is None, it is empty, should raise IndexError.
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count:           #find the index and change its value to new element, then break
                    curr_ptr[0] = element    # or else, it will definitely go to the last one.
                    return
                if curr_ptr[1] is None:      #if the current index is not target one, and it is already the last one.
                    msg = '{class_name} index out of range'  # So that we can't find it in the range.
                    raise IndexError(msg.format(class_name=class_name)) 
                count += 1
                curr_ptr = curr_ptr[1]   #probe one by one
        else:
            msg = '{class_name} indices must be integers'   #if index is not an integer, raise TypeError 
            raise TypeError(msg.format(class_name=class_name))
            

In [118]:
# My test of my own __setitem__() of Class LL.
A = LL()
A.insert_front(5)
A[0]
A.insert_back(4)
A.insert_back(3)
old_element = A[2]
print("old_element:" + str(old_element))
A[2]= 5 
new_element = A[2]
print("new_element:" + str(new_element))

old_element:3
new_element:5


## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [1]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        

My remarks: _ has 3 main conventional uses in Python:

To hold the result of the last executed statement in an interactive interpreter session. This precedent was set by the standard CPython interpreter, and other interpreters have followed suit

For translation lookup in i18n (imported from the corresponding C conventions, I believe), as in code like: raise
forms.ValidationError(_("Please enter a correct username"))

As a general purpose "throwaway" variable name to indicate that part of a function result is being deliberately ignored, as in code like: label, has_label, _ = text.partition(':')

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [2]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use out generator functions to implement iterators:

In [3]:
g = make_data(5, 10)
list(g)

[999999992.9965278,
 1000000004.5457895,
 999999996.373355,
 999999998.9955627,
 999999999.1063766,
 1000000000.2265998,
 999999999.7131544,
 999999998.5092816,
 999999996.4040016,
 999999999.5817575,
 999999999.3223431]

In [56]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)[:10]

<class 'generator'>


[999999999.6965046,
 1000000000.426052,
 1000000000.4709922,
 999999999.7576729,
 999999999.9805667,
 999999999.8493887,
 999999999.8656821,
 1000000000.01072,
 1000000000.0257503,
 1000000000.0606006]

### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

#### My Remarks:

Mk = Mk-1+ (xk – Mk-1)/k

Sk = Sk-1 + (xk – Mk-1)*(xk – Mk)

In [5]:
# your code here
import math
def online_mean_dev(iterator): 
    n = 0
    mu = 0 
    dev_accum = 0
    for value in iterator:         # according to the formulae in my remarks 
        n += 1
        mu_new = (value - mu) / n + mu   #every time comes a new element, calculate a new mu
        dev_accum = dev_accum + (value - mu) * (value - mu_new) # use value, old_dev, old_mu and new_mu to calculate new dev.
        mu = mu_new     # update new mu
        if n > 1:
            stddev = math.sqrt(dev_accum/(n-1))  
        else:
            stddev = 0
        yield (n, value, mu, stddev)

Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [108]:
data_with_stats = online_mean_dev(make_data(5, 100000))
#list(data_with_stats)[0:10]

## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is inbetween `level`-$\sigma$ of the mean.

In [109]:
#your code here
def is_ok(level, t):       #is_ok is used in a lambda function below. So it should handle one single tuple once
    n, value, mu, stddev = t 
    if stddev == 0:
        return True
    if math.fabs((value - mu)) / stddev > level:  #the level-sigma definition
        return False         # should use return instead of yield; outliner return False
    else:
        return True

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [110]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [111]:
list(anomalies)#materialize

[(4981, 999999984.5056592, 999999999.9352322, 2.8525157695907333),
 (9617, 999999985.0695803, 999999999.9564091, 2.8771790448578565),
 (10984, 1000000014.8385775, 999999999.9663814, 2.89460291803623),
 (11662, 999999982.9632908, 999999999.9555298, 2.895591077537978),
 (16935, 999999985.0432805, 999999999.975334, 2.904416329357956),
 (21922, 1000000014.7192584, 999999999.9752358, 2.903694426189055),
 (31260, 1000000014.5527397, 999999999.9731972, 2.8905592520573973),
 (34327, 1000000015.0233517, 999999999.9745582, 2.8973529545900045),
 (42720, 1000000015.4256437, 999999999.9896747, 2.8896762295040905),
 (45021, 1000000014.6754482, 999999999.9927558, 2.8890507485014556),
 (47733, 1000000014.4856101, 999999999.9883006, 2.8880552472256036),
 (47769, 999999982.6505095, 999999999.9877168, 2.8890504887125084),
 (48945, 999999983.7104702, 999999999.9859334, 2.881964811704661),
 (49567, 999999982.9099447, 999999999.985068, 2.8840618843820645),
 (50728, 999999984.5860844, 999999999.9863853, 2.88

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).