In [16]:
# Basic imports
import nbimporter
import logging
import json
import random
import time
import asyncio
from datetime import date, datetime

# Library imports
import pandas as pd
import numpy as np
import pyarrow as pa

# pyEX is an easy-to-use IEX API interface built for Python
import pyEX

# The main course
import perspective

logging.basicConfig(format="%(asctime)s %(message)s", level=logging.INFO)

# Streaming Data Sources

Inside `datasources.ipynb`, there are a few streaming datasources that will feed live data to Perspective. 

Each datasource runs on its own subprocess and subthread in order to not block the main Jupyter thread from running, so cells can still be added and evaluated as normal. In the background, the datasource will fetch data, clean it (if necessary), and update the Perspective tables—which will display the new results in each widget in the notebook.

In [9]:
from datasources import IEXIntervalDataSource, IEXSSEDataSource, IEXStaticDataSource

Importing Jupyter notebook from datasources.ipynb


In [10]:
# Create a pyEX client with the token - this is just an example sandbox token.
token = "Tpk_ecc89ddf30a611e9958142010a80043c"
client = pyEX.Client(api_token=token, version="sandbox")

Create the schemas for the tables we are initializing—Perspective will infer data types from what data is passed in, but using a schema offers greater control.

In [13]:
batch_schema = {
    "symbol": str,
    "companyName": str,
    "open": float,
    "openTime": datetime,
    "close": float,
    "closeTime": datetime,
    "high": float,
    "highTime": datetime,
    "low": float,
    "lowTime": datetime,
    "latestPrice": float,
    "latestUpdate": datetime,
    "latestVolume": int,
    "volume": int
}

# TODO: rename
last_schema = {
    "symbol": str,
    "price": float,
    "time": datetime,
    "size": int,
}
tops_schema = {
    "symbol": str,
    "bidSize": int,
    "bidPrice": float,
    "askSize": int,
    "askPrice": float,
    "volume": int,
    "lastSalePrice": float,
    "lastSaleSize": int,
    "lastSaleTime": datetime,
    "lastUpdated": datetime,
    "sector": str,
    "securityType": str,
    "seq": int
}
holdings_schema = {
    "symbol": str,
    "quantity": int,
    "price": float,
    "time": datetime
}
charts_schema = {
    "date": date,
    "open": float,
    "high": float,
    "low": float,
    "close": float,
    "volume": int,
    "symbol": str,
    "quantity": int
}

### Our Portfolio

For this demonstration, let's set up a fictional portfolio of stocks—it's one of the most natural use cases for streaming data, and it provides a way for us to join static and streaming data together intuitively. In a more comprehensive example, our holdings of individual stocks will probably change over time, but we'll keep it fixed for now.

In [19]:
symbols = ["AAPL", "MSFT", "AMZN", "TSLA", "SPY", "SNAP", "ZM", "JPM"]

To save our portfolio, we're going to use two Perspective tables:

- `holdings_table`, which is indexed on `symbol` and will always return the latest value of our portfolio based on the prices for each component.
- `holdings_total_table`, which is not indexed, and will hold a history of prices and values for each symbol, allowing us to see the value of our portfolio over time.

Using `on_update`, we link the two tables together; whenever `holdings_table` updates from the datasource, it will pass the updated rows to `holdings_total_table`.

In [20]:
holdings_table = perspective.Table(holdings_schema, index="symbol")
holdings = {symbol: random.randint(5, 10) for symbol in symbols}
holdings_table.update({
    "symbol": symbols,
    "quantity": [holdings[symbol] for symbol in symbols]
})

holdings_total_table = perspective.Table(holdings_schema)
holdings_view = holdings_table.view()

def update_total(port, delta):
    holdings_total_table.update(delta)

holdings_view.on_update(update_total, mode="row")

TODO: clean this up

In [None]:
save_holdings_view = holdings_total_table.view(
    columns=["symbol", "quantity", "value", "time"],
    computed_columns=[{
        "column": "value", 
        "computed_function_name": "*",
        "inputs": ["quantity", "price"]
    }]
)

# TODO: clean this up
async def _save():
    while True:
        name = "portfolio_value_{0:%Y_%m_%d}.arrow".format(datetime.today())
        with open(name, "wb") as value_arrow:
            value_arrow.write(save_holdings_view.to_arrow())
        logging.info("Saved %d rows to %s", holdings_total_table.size(), name)
        await asyncio.sleep(60)

def save_to_arrow():
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    task = loop.create_task(_save())
    loop.run_until_complete(task)
    
save_thread = threading.Thread(target=save_to_arrow)

In [None]:
save_thread.start()

Using our indexed `holdings_table`, we can create a new `PerspectiveWidget` to view the Table in Jupyterlab. Using `PerspectiveWidget`'s configuration options, we can set up the view to be exactly what we want—to show the latest price and value for our portfolio.

Notice the `computed_columns` field—the `value` of the portfolio isn't constructed elsewhere. Instead, the Perspective engine offers computed functions, which allow for column-wise computations to be applied on your data. As your data updates, the results of these "computed columns" update as well. Computed columns can be defined here in Python, or in the UI using a minimal expression language with autocomplete and syntax highlighting/type checking.

In [None]:
holdings_widget = perspective.PerspectiveWidget(
    holdings_table,
    aggregates={
        "value": "sum",
        "price": "last"
    },
    row_pivots=["symbol"],
    columns=["price", "quantity", "value"],
    sort=[["value", "desc"]],
    computed_columns=[{
        "column": "value", 
        "computed_function_name": "*",
        "inputs": ["quantity", "price"]
    }]
)
holdings_widget

And do the same for our `holdings_total_table`—here, we see a line chart of the portfolio value as new prices tick in, split by each symbol so we can see how the portfolio's total value is divided amongst each component.

In [None]:
holdings_total_widget = perspective.PerspectiveWidget(
    holdings_total_table,
    plugin="y_line",
    row_pivots=["time"],
    column_pivots=["symbol"],
    aggregates={
        "quantity": "last",
        "price": "last"
    },
    columns=["value"],
    computed_columns=[{
        "column": "value", 
        "computed_function_name": "*",
        "inputs": ["quantity", "price"]
    }]
)
holdings_total_widget

TODO: feed live quotes into holdings, join "static" with streaming data

In [None]:
quotes_table = perspective.Table(last_schema)
quotes_view = quotes_table.view()

def update_holdings(port, delta):
    holdings_table.update(delta)
    
quotes_view.on_update(update_holdings, mode="row")

In [None]:
quotes_widget = perspective.PerspectiveWidget(quotes_table, row_pivots=["symbol"], columns=["price"], aggregates={"price": "last"}, sort=[["price", "desc"]])

In [None]:
quotes_widget

TODO: Need to clean the ticks to have the right format (probably only for test data).

In [None]:
def clean_quote(tick):
    for t in tick:
        t["time"] = datetime.now()
    return tick

TODO: explain how we use the datasource

In [None]:
quotes = IEXIntervalDataSource(table=quotes_table, iex_source=client.last, data_cleaner=clean_quote, symbols=symbols)

In [None]:
quotes.start()

In [None]:
quotes.stop()

In [None]:
charts_table = perspective.Table(charts_schema)

TODO: use OHLC to demonstrate UI features, computed UI, filters, etc.

In [None]:
ohlc_config = {
    "plugin": "d3_ohlc",
    "row_pivots": ["date"],
    "columns": ["open", "close", "high", "low"],
    "aggregates": {"quantity": "last"},
    "filters": [["symbol", "==", "SPY"]],
    "computed_columns": [{
        "column": "value", 
        "computed_function_name": "*",
        "inputs": ["quantity", "close"]
    }]
}

value_config = {
    "plugin": "y_line",
    "row_pivots": ["date"],
    "column_pivots": ["symbol"],
    "columns": ["value"],
    "aggregates": {"quantity": "last"},
    "computed_columns": [{
        "column": "value", 
        "computed_function_name": "*",
        "inputs": ["quantity", "close"]
    }]
}

charts_widget = perspective.PerspectiveWidget(
    charts_table,
    **ohlc_config
)
charts_widget
# TODO: show computed time buckets here

In [None]:
def clean_charts(tick):
    out = []
    for k, v in tick.items():
        chart = v["chart"]
        for c in chart:
            c["symbol"] = k
            c["quantity"] = holdings[k]
            out.append(c)
    return out

In [None]:
# range_: 1d, 1m, 1y, etc.
charts = IEXStaticDataSource(charts_table, iex_source=client.batch, data_cleaner=clean_charts, symbols=symbols, fields="chart", range_="1y")

In [None]:
charts.start()

In [None]:
charts.stop()

In [None]:
with open("portfolio_value_{0:%Y_%m_%d}.arrow".format(datetime.today()), "rb") as arr:
    w = perspective.PerspectiveWidget(arr.read(), sort=[["time", "desc"]])
    display(w)

In [17]:
# TODO: remember to mention that all this code can be modularized and run as a tornado server for perspective in the browser