# Simple streamz example

In [None]:
def inc(x):
    return x + 1

def double(x):
    return 2 * x

# Slightly more realistic example with JSON data

## Code to create random JSON data

In [None]:
from datetime import datetime
import json
import random

i = 0
record_names = ['Alice', 'Bob', 'Charlie']

def create_record():
    global i
    i += 1
    record = {'name': random.choice(record_names),
              'i': i,
              'x': random.random(),
              'y': random.randint(0, 10),
              'time': str(datetime.now())}
    return json.dumps(record)

## Basic Streams and Map

In [None]:
from streamz import Stream

source = Stream()
source

In [None]:
# Create stream of json-parsed records


In [None]:
# Create stream of names


In [None]:
# Push data into stream


### Continuous updates


In [None]:
# Push data into stream continuously

from tornado import gen
from tornado.ioloop import IOLoop

async def f():
    while True:
        await gen.sleep(0.100)
        record = create_record()
        await source.emit(record, asynchronous=True)
        
IOLoop.current().add_callback(f)

### Accumulators

In [None]:
records

In [None]:
# Sum the 'x' value of the records over time


In [None]:
# Count the number occurences of names over time

def accumulator(acc, new):
    acc = acc.copy()
    if new in acc:
        acc[new] += 1
    else:
        acc[new] = 1 
    return acc
        
names.accumulate(accumulator, start={})

### Streams of Dataframes

In [None]:
# Collect records over time, turn batches into Pandas Dataframes

import pandas as pd
batches = 
dfs = 

In [None]:
# Compute: df[df.name == 'Alice'].x.sum()

def query(df):
    return df[df.name == 'Alice']

def aggregate(acc, new):
    if len(new) == 0:
        return acc
    else:
        return acc + new.x.sum()

dfs.map(query).accumulate(aggregate, start=0)

### Streaming Dataframes

In [None]:
from streamz.dataframe import DataFrame

example = pd.DataFrame([json.loads(create_record())])

df = DataFrame(stream=dfs, example=example)
df.tail(5)

In [None]:
# Compute: df[df.name == 'Alice'].x.sum()


In [None]:
# Do a bit of data munging with Pandas syntax
df['time'] = df['time'].astype('M8[ns]')
df = df.set_index('time')
df.tail(5)

In [None]:
# Window by 5s, groupby name, compute mean of x and y columns


In [None]:
# Produce a live plot of the computation above

import streamz.dataframe.holoviews

In [None]:
# Plot x series with line

In [None]:
# cumsum of x series line plot

In [None]:
# histogram of x series

In [None]:
source.visualize()