## Q1.

Add a __setitem__ to the python linked list implementation from the lecture (this past wednesday).

In [4]:
#your code here
from doctest import run_docstring_examples as dtest
import numbers
import reprlib
class LL:
    """
    >>> A = LL()  
    >>> A[0]
    Traceback (most recent call last):
        ...
    IndexError: trying to index an empty LL
    >>> A.insert_front(1)
    >>> A[0]
    1
    >>> A.insert_back(2)
    >>> A[1]
    2
    >>> A
    LL([1,...])
    >>> myll = LL.from_components([1,2])
    >>> myll[1]
    1
    >>> len(myll)
    2
    >>> myll[2]
    Traceback (most recent call last):
        ...
    IndexError: LL index out of range
    >>> myll[0:1]
    Traceback (most recent call last):
        ...
    TypeError: LL indices must be integers
    """
    @classmethod
    def from_components(cls, components):
        inst = cls(components[0])
        for c in components[1:]:
            inst.insert_front(c)
        return inst
        
    def __init__(self, head=None):
        if head is None:
            self._headNode = None
        else:
            self._headNode = [head, None]
            
    def insert_front(self, element):
        new_node = [element, None]
        new_node[1] = self._headNode
        self._headNode = new_node
        
    def insert_back(self, element):
        new_node = [element, None]
        curr_ptr = self._headNode
        while curr_ptr[1] is not None:
            curr_ptr = curr_ptr[1]
        curr_ptr[1]= new_node
        
    def __repr__(self):
        class_name = type(self).__name__
        if len(self)==0:
            components=""
        else:
            components = reprlib.repr(self[0])
        return '{}([{},...])'.format(class_name,components)


    def __len__(self):
        curr_ptr = self._headNode
        count = 0
        if curr_ptr==None:
            return 0
        while 1:
            count = count + 1
            if curr_ptr[1] is None:
                break
            curr_ptr = curr_ptr[1]
        return count    
    
    def __getitem__(self, index):
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count: # hit the index we want
                    return curr_ptr[0]
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))
            
    # add setitem here:
    # most code would be same as __getitem__ above except for the set value
    def __setitem__(self, index, value): # add value to be set here
        class_name = type(self).__name__
        if isinstance(index, numbers.Integral): 
            curr_ptr = self._headNode
            if curr_ptr==None:
                msg = 'trying to index an empty {class_name}' 
                raise IndexError(msg.format(class_name=class_name))
            next_ptr = self._headNode[1]
            count = 0
            while 1:
                if index == count: # hit the index we want
                    curr_ptr[0] = value # set value here
                    return
                if curr_ptr[1] is None:
                    msg = '{class_name} index out of range' 
                    raise IndexError(msg.format(class_name=class_name))       
                count += 1
                curr_ptr = curr_ptr[1]
        else:
            msg = '{class_name} indices must be integers' 
            raise TypeError(msg.format(class_name=class_name))


In [9]:
# do some tests here
LL_test = LL()
print(LL_test)
print('\n')

LL_test.insert_front(1)
print(LL_test[0])
print('\n')

LL_test.insert_back(3)
print(LL_test[0])
print(LL_test[1])
print('\n')

LL_test.insert_front(5)
print(LL_test[0])
print(LL_test[1])
print(LL_test[2])
print('\n')

LL_test[2] = 7
print(LL_test[0])
print(LL_test[1])
print(LL_test[2])
print('\n')

LL([,...])


1


1
3


5
1
3


5
1
7




## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [10]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        # basically generate random data

#

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [11]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use out generator functions to implement iterators:

In [12]:
g = make_data(5, 10)
list(g)

[1000000001.0399555,
 1000000000.2334644,
 1000000002.3151091,
 999999999.5416874,
 999999999.3099096,
 999999996.2672925,
 1000000001.4120167,
 1000000002.997442,
 999999999.8163553,
 1000000000.5604103,
 1000000002.217348]

In [15]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)

<class 'generator'>


[1000000001.9752347,
 1000000001.763298,
 1000000001.3202178,
 999999998.4871863,
 999999998.7721341,
 1000000000.1265864,
 1000000000.0290396,
 999999999.9606212,
 999999999.9784322,
 999999999.9537594,
 999999999.9224043,
 1000000000.1358185,
 999999999.993111,
 999999999.7588227,
 999999999.9592094,
 999999999.9689106,
 1000000000.2252524,
 1000000000.1895767,
 1000000000.1764816,
 1000000000.2784233,
 1000000000.3887349,
 1000000000.3164344,
 1000000000.2994766,
 1000000000.1113019,
 999999999.9900454,
 999999999.9540758,
 999999999.7418033,
 999999999.780638,
 999999999.9372736,
 1000000000.0718778,
 1000000000.1713707,
 1000000000.1675891,
 1000000000.1876358,
 1000000000.1036062,
 1000000000.0987314,
 1000000000.1110234,
 1000000000.1364968,
 1000000000.151045,
 1000000000.1454729,
 1000000000.1378688,
 1000000000.112916,
 1000000000.088737,
 1000000000.0395533,
 999999999.9949068,
 1000000000.1150222,
 1000000000.1135926,
 1000000000.1097603,
 1000000000.1322618,
 1000000000.18

### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

In [25]:
# your code here
# consuming iterator and send out iterator...
#
# using the formulas from the link provided
# Mk = Mk-1+ (xk – Mk-1)/k
# Sk = Sk-1 + (xk – Mk-1)*(xk – Mk)
#
import math

def online_mean_dev(iterator):
    n = 0
    mu = 0    
    dev_accum = 0 # define dev_accum (i.e., Sk) here
    
    for value in iterator:

        # update n
        n += 1
        
        # update mu
        delta = value - mu
        mu = mu + delta/n
    
        # update dev_accum
        dev_accum = dev_accum + delta * (value - mu)
    
        if n > 1:
            stddev = math.sqrt(dev_accum/(n-1))
        else:
            stddev = 0
            
        yield (n, value, mu, stddev)
        

Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [51]:
data_with_stats = online_mean_dev(make_data(5, 100000)) # try smaller number first
#data_with_stats = online_mean_dev(make_data(5, 20)) # try smaller number first
#list(data_with_stats)

## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is inbetween `level`-$\sigma$ of the mean.

In [48]:
#your code here
import math

def is_ok(level, t):
    
    n, value, mu, stddev = t
    
    if stddev == 0:
        return True
    elif (math.fabs((value - mu)) / stddev) < level: # within: mean +- level*sigma
        return True
    else:
        return False # out of: mean +- level*sigma

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [52]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [53]:
list(anomalies)#materialize

[(2763, 999999985.9468982, 999999999.9812105, 2.797967848102974),
 (10849, 999999984.6192626, 1000000000.0343418, 2.857632550534303),
 (13283, 1000000015.4091518, 1000000000.0254759, 2.8660162273817598),
 (13957, 1000000014.620393, 1000000000.0221534, 2.871257040103971),
 (14465, 999999985.0510014, 1000000000.0211911, 2.8795346939354194),
 (14929, 1000000015.4942422, 1000000000.0245999, 2.881653311317319),
 (16082, 999999985.5500134, 1000000000.0080868, 2.878863735206887),
 (21090, 999999983.726664, 1000000000.0115852, 2.8845338554231486),
 (23151, 1000000015.0397172, 1000000000.0120796, 2.884114959939939),
 (36485, 999999984.7150387, 1000000000.0160526, 2.8973567815487815),
 (40177, 999999983.7024192, 1000000000.0040559, 2.8903409195977092),
 (42492, 999999985.1947402, 1000000000.0047457, 2.895827684329287),
 (45374, 1000000014.9804556, 1000000000.0044757, 2.8923172036624116),
 (50841, 1000000017.797233, 1000000000.0003852, 2.8875777033885535),
 (56369, 1000000014.65807, 1000000000.00

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).