## Q1.

Add methods `__iter__` to your project Time Series class to iterate over values, a method `itertimes` to iterate over times, a method `itervalues` to iterate over values, and a method `iteritems` to iterate over time-value pairs. (This is a similar interface to python dictionaries). To test these, check both the types of the results and the answers you expect.

In [1]:
#your code here
import numpy as np

def f(a):
    return a

class LazyOperation():
    """
    An class that takes a function and an arbitrary number of positional arguments 
    or keyword arguments as input
    
    Parameters
    ----------
    function : an arbitrary function
    args : arbitrary positional arguments
    kwargs : arbitrary keyword arguments
   
    Returns
    -------
    eval(LazyOperation): value
        a value representing the result of evaluating function with arguments args and kwargs
    __str__ / __repr__:
        when printing LazyOperation, the class name is printed followed by the function name,
        the positional arguments and the keyword arguments 
    
    Examples
    --------
    >>> a = TimeSeries([0,5,10], [1,2,3])
    >>> b = TimeSeries([1,2,3], [5,8,9])
    >>> thunk = check_length(a,b)
    >>> thunk.eval()
    True
    >>> assert isinstance( lazy_add(1,2), LazyOperation ) == True
    >>> thunk = lazy_mul( lazy_add(1,2), 4)
    >>> thunk.eval()
    12
    """
      
    def __init__(self,function,*args,**kwargs):
        self.function = function
        self.args = args
        self.kwargs = kwargs
    def __str__(self):
        class_name = type(self).__name__
        function_name = self.function.__name__
        str_return = "{}( {}, args = {}, kwargs = {} )".format(class_name, function_name, self.args, self.kwargs)
        return str_return
    def eval(self):
        l = []
        for arg in self.args:
            if type(arg).__name__ == type(self).__name__:
                l += [arg.eval()]
            else:
                l += [arg]
        self.args = tuple(l)
        for kwarg in self.kwargs:
            if type(self.kwargs[kwarg]).__name__ == type(self).__name__:
                self.kwargs[kwarg] = self.kwargs[kwarg].eval()
        return self.function(*self.args,**self.kwargs)


class TimeSeries(): 
    """
    An class that takes a sequence of integers or floats as input
    
    Parameters
    ----------
    data : any finite numeric sequence
    time : any finite, monotonically increasing numeric sequence
   
    Returns
    -------
    len(TimeSeries): int
        an integer representing the length of the time series
    Timeseries[position:int]: number
        returns the value of the TimeSeries at position
    Timeseries[position:int] = value:int/float
        set value of TimeSeries at position to be value
    __str__ / __repr__:
        when printing TimeSeries, if the total length of the Timeseries is greater than 10
        the result shows the first ten elements and its total length, else it prints the 
        whole Timeseries
        
    Examples
    --------
    >>> a = TimeSeries([0,5,10], [1,2,3])
    >>> threes = TimeSeries(range(100),range(100))
    >>> len(a)
    3
    >>> a[10]
    3
    >>> a[10]=10
    >>> a[10]
    10
    >>> print(a)
    [(0, 1), (5, 2), (10, 10)]
    >>> print(threes)
    [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), ...], length=100
    >>> [v for v in TimeSeries([0,1,2],[1,3,5])]
    [1, 3, 5]
    >>> a = TimeSeries([0,5,10], [1,2,3])
    >>> b = TimeSeries([2.5,7.5], [100, -100])
    >>> print(a.interpolate([1])) 
    [(1, 1.2)]
    >>> print(a.interpolate(b.times()))
    [(2.5, 1.5), (7.5, 2.5)]
    >>> print(a.interpolate([-100,100]))
    [(-100, 1.0), (100, 3.0)]
    >>> b.mean()
    0.0
    >>> a.mean()
    2.0
    >>> a = TimeSeries([],[])
    >>> a.mean()
    nan
    >>> a = TimeSeries([1,2],[1,'a'])
    >>> a.mean()
    Traceback (most recent call last):
        ...
    TypeError: cannot perform reduce with flexible type
    
    Notes
    -----
    PRE: `data` is numeric
    
    """
    def __init__(self,time,data):
        if len(time)!=len(data):
            raise "Not the same length"
        self.time=np.array(time)
        self.data=np.array(data)
        self.index=0
        self.len=len(time)
        
    def __len__(self):
        return len(self.data)
    def __getitem__(self, time):
        if time in self.time:
            return int(self.data[np.where(self.time==time)])
        raise "Time does not exist"
    def __setitem__(self,time,value):
        if time not in self.time:
             raise "Time does not exist"
        self.data[np.where(self.time==time)]=value
    def __contains__(self, time):
        return time in self.time
    def __next__(self): 
        try:
            word = self.data[self.index] 
        except IndexError:
            raise StopIteration() 
        self.index += 1
        return word 
    def __iter__(self):
        return self    
    def itertimes(self):
        return iter(self.time)
    def itervalues(self):
        return iter(self.data)
    def iteritems(self):
        return iter(list(zip(self.time,self.data)))
    def __str__(self):
        if self.len>10:
            return '[{}, ...], length={}'.format(str(list(zip(self.time,self.data))[0:10])[1:-1], self.len)
        return '{}'.format(list(zip(self.time,self.data)))
    def __repr__(self):
        if self.len>10:
            return '[{}, ...], length={}'.format(str(list(zip(self.time,self.data))[0:10])[1:-1], self.len)
        return '{}'.format(list(zip(self.time,self.data)))
    def values(self):
        return list(self.data)
    def times(self):
        return list(self.time)
    def items(self):
        return list(zip(self.time,self.data))
    def interpolate(self,newtime):
        newvalue=np.interp(newtime,self.time,self.data)
        return TimeSeries(newtime,newvalue)
    @property
    def lazy(self):
        lazy_fun = LazyOperation(f,self)
        return lazy_fun
    def mean(self):
        return np.mean(self.data)
    def median(self):
        return np.median(self.data)


## Q2.

An online mean and standard deviation algorithm.

Below is a function to generate a potentially infinite stream of 1-D data.

In [1]:
from random import normalvariate, random
from itertools import count
def make_data(m, stop=None):
    for _ in count():
        if stop and _ > stop:
            break
        yield 1.0e09 + normalvariate(0, m*random() )
        

Here is an implementation of an online mean algorithm..see http://www.johndcook.com/blog/standard_deviation/ and the link to http://www.johndcook.com/blog/2008/09/26/comparing-three-methods-of-computing-standard-deviation/ in-between. (Convince yourselves of the formulas...)

In [2]:
def online_mean(iterator):
    n = 0
    mu = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        yield mu

We use out generator functions to implement iterators:

In [3]:
g = make_data(5, 10)
list(g)

[1000000006.314605,
 999999996.4336594,
 1000000000.9531634,
 999999995.715105,
 999999997.922358,
 999999998.6563528,
 1000000002.0435847,
 1000000000.0359453,
 1000000001.2967277,
 1000000001.50001,
 1000000000.1735404]

In [4]:
g = online_mean(make_data(5, 100))
print(type(g))
list(g)

<class 'generator'>


[1000000000.6555499,
 999999998.6159835,
 1000000001.1071713,
 1000000000.9390246,
 1000000000.323393,
 1000000000.1634421,
 999999999.4538594,
 999999999.4331237,
 999999999.8460852,
 999999999.815531,
 999999999.8424581,
 1000000000.3945439,
 1000000000.3868847,
 1000000000.078794,
 1000000000.1230247,
 1000000000.1642759,
 999999999.7851408,
 999999999.8366209,
 999999999.7881701,
 999999999.820433,
 999999999.8508124,
 999999999.8567665,
 999999999.9455887,
 999999999.9456179,
 999999999.9921421,
 1000000000.2514474,
 1000000000.3623946,
 1000000000.3315352,
 1000000000.3438259,
 1000000000.2435598,
 1000000000.115088,
 1000000000.1365296,
 1000000000.1571035,
 1000000000.1563869,
 1000000000.1282873,
 1000000000.111607,
 1000000000.0867754,
 999999999.9948819,
 999999999.9366332,
 999999999.9505067,
 1000000000.0718905,
 1000000000.0922825,
 1000000000.1421386,
 1000000000.0197759,
 1000000000.070217,
 1000000000.1984698,
 1000000000.1402993,
 1000000000.112825,
 1000000000.102664

### 2.1

Implement the standard deviation algorithm as a generator function as

```python
def online_mean_dev(iterator):
    BLA BLA
    if n > 1:
        stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)
```

In [9]:
# your code here
import math

def online_mean_dev(iterator):
    n = 0
    mu_1 = 0
    mu = 0
    dev_accum = 0
    stddev = 0
    for value in iterator:
        n += 1
        delta = value - mu
        mu = mu + delta/n
        prod = (value-mu)*(value-mu_1)
        dev_accum = dev_accum + prod
        mu_1 = mu
        if n > 1:
            stddev = math.sqrt(dev_accum/(n-1))
        yield (n, value, mu, stddev)

Here we make 100000 element data, and run this iterator on it (imagine running this on a time-series being slowly read from disk

In [15]:
data_with_stats = online_mean_dev(make_data(5, 100000))

## Q3.

Let's do Anomaly detection. Write a routine `is_ok`:

```python
def is_ok(level, t)
```

which takes a tuple like the one yielded by your code above and returns True if the value is inbetween `level`-$\sigma$ of the mean.

In [14]:
#your code here
def is_ok(level, t):
    return abs(t[1]-t[2])<level

We use this function to create a predicate passed through to `itertools.filterfalse` which is then used to obtain an iterator on the anomalies.

In [16]:
from itertools import filterfalse
pred = lambda t: is_ok(5, t)
anomalies = filterfalse(pred, data_with_stats)

We materialize the anomalies...

In [17]:
list(anomalies)#materialize

[(9, 999999994.4699967, 1000000000.2213477, 2.8989605533301774),
 (12, 1000000009.9442416, 1000000000.8314465, 3.816093863103389),
 (19, 999999995.2896675, 1000000000.6043333, 3.4657400063304435),
 (42, 1000000015.1205368, 1000000000.7059922, 3.722115883687841),
 (51, 1000000005.9218997, 1000000000.6949203, 3.5424021407097506),
 (91, 1000000007.2168533, 1000000000.7023329, 3.05706810298021),
 (93, 1000000006.9875977, 1000000000.7641236, 3.0937341878086326),
 (128, 1000000011.9100189, 1000000000.7329949, 3.034544645070366),
 (136, 999999993.0306716, 1000000000.5990481, 3.0484896933021357),
 (142, 1000000010.2697396, 1000000000.6340764, 3.14184400658044),
 (145, 1000000005.7504617, 1000000000.6804694, 3.1642429608810505),
 (149, 1000000007.8109115, 1000000000.703773, 3.181672896783604),
 (150, 1000000006.4707452, 1000000000.7422194, 3.2057483260609967),
 (158, 999999993.8131195, 1000000000.6551567, 3.208337623193107),
 (162, 999999994.4312352, 1000000000.6032213, 3.207435241195914),
 (16

## To think of, but not hand in

What kinds of anomalies will this algorithm pick up? What kinds would a shorter "window" of anomaly detection, like 100 points around the time in question pick? How might you create an algorithm which does window based averaging? (hint: the window size is small compared to the time series size). 

Finally think a bit of how you might implement all of this in a production environment..remember that data streaming in might get backed up when you handle an anomaly.

(Some inspiration might accrue if you look at the docs for `collections.deque`).