In [None]:
from datetime import datetime
import json
import random

i = 0
record_names = ['Alice', 'Bob', 'Charlie']

def create_record():
    global i
    i += 1
    record = {'name': random.choice(record_names),
              'i': i,
              'x': random.random(),
              'y': random.randint(0, 10),
              'time': str(datetime.now())}
    return json.dumps(record)

In [None]:
create_record()

In [None]:
type(create_record())

### Basic Streams and Map

In [None]:
from streamz import Stream
from tornado.ioloop import IOLoop

source = Stream()
source

In [None]:
records = source.map(json.loads)
records

In [None]:
names = records.map(lambda r: r['name'])
names

In [None]:
records.map(lambda r: r['time'])

In [None]:
record = create_record()
record

In [None]:
source.visualize()

In [None]:
record = create_record()
source.emit(record)  # push data into front side of stream

### Continuous updates


In [None]:
from tornado import gen
from tornado.ioloop import IOLoop

async def f():
    while True:
        await gen.sleep(0.100)
        record = create_record()
        await source.emit(record, asynchronous=True)
        
IOLoop.current().add_callback(f)

### Accumulators

In [None]:
records

In [None]:
def add(acc, new):
    return acc + new

records.map(lambda d: d['x']).accumulate(add, start=0)

In [None]:
def accumulator(acc, new):
    acc = acc.copy()
    if new in acc:
        acc[new] += 1
    else:
        acc[new] = 1 
    return acc
        
    
names.accumulate(accumulator, start={})

### Streams of Dataframes

In [None]:
import pandas as pd
batches = records.timed_window('200ms')
dfs = batches.map(list).map(pd.DataFrame)
dfs

In [None]:
def query(df):
    return df[df.name == 'Alice']

def aggregate(acc, new):
    if len(new) == 0:
        return acc
    else:
        return acc + new.x.sum()

dfs.map(query).accumulate(aggregate, start=0)

### Streaming Dataframes

In [None]:
from streamz.dataframe import DataFrame

example = pd.DataFrame([json.loads(create_record())])

df = DataFrame(stream=dfs, example=example)
# df.tail(5)

In [None]:
df[df.name == 'Alice'].x.sum()

In [None]:
df['time'] = df['time'].astype('M8[ns]')
df = df.set_index('time')
df.tail(5)

In [None]:
df.window('5s').groupby('name')[['x', 'y']].mean()

In [None]:
import streamz.dataframe.holoviews

In [None]:
df.window('5s').groupby('name')[['x', 'y']].mean().plot.bar()

In [None]:
df.x.plot.hist()

In [None]:
source.visualize()