In [None]:
import os
import sys
import random
from datetime import date, datetime

import pandas as pd
import numpy as np
import pyarrow as pa

import perspective

### Introduction to Perspective

Perspective is an interactive visualization component for large, real-time datasets. Originally developed for J.P. Morgan's trading business, Perspective makes it simple to build real-time & user configurable analytics in both the browser and in Python. Use it to create reports, dashboards, notebooks and applications, with static data or streaming updates via Apache Arrow.

### Tables and Views

The `Table` is the core data container in Perspective. You can create a `Table` by initializing it with a dataset, or with a schema—a mapping of column names to data types.

If you create a `Table` with an `index` column, it will update with new data in-place based on the primary key values.

In [None]:
# Set up some dummy data
data = pd.DataFrame({
    "a": np.random.rand(100),
    "b": np.ones(100),
    "c": [str(i) for i in range(100)],
    "d": [datetime.now() for i in range(100)],
    "e": [datetime.today() for i in range(100)]
})

# create a perspective.Table - the base container for data
table = perspective.Table(data)

# Create an indexed table from a schema - a mapping of column names to types, primary keyed by `a`
schema_table = perspective.Table({
    "a": float,
    "b": float,
    "c": str,
    "d": datetime
}, index="a")

The `View` allows you to query and transform data, as well as serialize the Table's data:

In [None]:
# create a view - a continuous query on the data
view = table.view(filter=[["a", ">", 0.5]], sort=[["a", "desc"]])

For a more detailed overview of the `Table` and `View` API, see the [Perspective User Guide](https://perspective.finos.org/docs/md/python.html).

### Updates

To append data or make in-place updates, call the Table's `update()` method with the dataset you want to add.

If the Table has an `index` set, like `schema_table` above, updates will occur in-place based on the index column. If the Table is unindexed, updates will append
data to the end of the Table.

In [None]:
# Update the unindexed table
table.update([
    {"a": 1500, "b": 1, "c": "string", "d": datetime.now()},
    {"a": 2500, "b": 2, "c": "string2", "d": datetime.now()}
])

# Query it to see the new data
sorted_view = table.view(sort=[["a", "asc"]])

sorted_view.to_df()

In [None]:
table.update([
    {"a": 1500, "b": 1, "c": "string", "d": datetime.now()},
    {"a": 2500, "b": 2, "c": "string2", "d": datetime.now()}
])

In [None]:
sorted_view.to_df()

In [None]:
# On indexed tables, updates with new primary keys will append
schema_table.update([
    {"a": 1.5, "b": 100, "c": "a"},
    {"a": 2.5, "b": 150, "c": "b"},
    {"a": 3.5, "b": 200, "c": "c"},
])

schema_view = schema_table.view()
schema_view.to_df()

In [None]:
# Updates with the same primary keys will overwrite
schema_table.update([
    {"a": 5.5, "b": 150, "c": "appended!"},
    {"a": 1.5, "b": 1500, "c": "overwritten!"},
])

schema_view = schema_table.view()
schema_view.to_df()

### `on_update` callbacks

Using `on_update`, you can execute a callback whenever the Table updates. This offers a world of possibilities throgh chaining together Perspective Tables, triggering actions on a UI, making calculations whenever the Table has updated, etc.

Here, we use `on_update` to update an indexed table from an unindexed table:

In [None]:
# Add an `on_update` callback, which fires with an Arrow-encoded binary of the updated rows
def callback(port, delta):
    # Update the indexed table with the updated rows, which will update in-place based on `index`
    schema_table.update(delta)

view.on_update(callback, mode="row")

Now we can update the unindexed table, and the indexed table will automatically update:

In [None]:
# Update the unindexed table, and query the indexed table
# On indexed tables, updates with new primary keys will append
table.update([
    {"a": 1.5, "b": 100, "c": "just updated"},
    {"a": 2.5, "b": 150, "c": "also new"},
    {"a": 3.5, "b": 200, "c": "me three"},
    {"a": 6.5, "b": 200, "c": "just appended"},
])

schema_view.to_df()

### Serializing Data

Data can be retrieved from a `View` by calling one of its `to_*` methods: `to_df`, `to_arrow`, `to_dicts`, `to_records`, and `to_csv`. When the underlying `Table` updates, the `View` is automatically notified with new data and will always return the most up-to-date dataset.

In [None]:
# get some data from the view - supported formats are dataframes, Apache Arrow, dicts of numpy arrays, row/columnar JSON, and CSV
filtered = view.to_df(end_row=10)
display(filtered)

In [None]:
# append to the table with new data
table.update([{"a": 1.5, "b": 1, "c": "string", "d": datetime.now()}])

# re-query the data
filtered = view.to_df(end_row=10)
display(filtered)

### Working with Arrows  

Perspective can load and create binaries in the Apache Arrow format - just call the `View`'s `to_arrow()` method. After the arrow is emitted, it can be saved to disk, coerced into a `pyarrow.Table`, and more. 

In [None]:
arrow = view.to_arrow()
with open("example.arrow", "wb") as file:
    file.write(arrow)

Opening arrows from the filesystem is easy:

In [None]:
with open("example.arrow", "rb") as arrow:
    w = perspective.PerspectiveWidget(arrow.read())
    display(w)

### PerspectiveWidget

`PerspectiveWidget` is a Jupyter widget that uses `<perspective-viewer>` to visualize and transform your dataset within the browser, with all the same features and performance as Perspective in a standalone web application.

In [None]:
# Create a PerspectiveWidget from the tables we just created
widget = perspective.PerspectiveWidget(schema_table, server=True, plugin="datagrid", row_pivots=["a"], aggregates={"a": "avg"}, editable=True)
display(widget)

### Streaming data in Perspective

We've demonstrated how Perspective can be used with static datasets and schemas, but let's create a streaming example that shows how it performs over rapidly streaming data. The following datasource generates random rows every 50 milliseconds, and runs on an IOloop:

In [None]:
import asyncio
import threading

In [None]:
SECURITIES = ["AAPL.N", "AMZN.N", "QQQ.N", "NVDA.N", "TSLA.N",
              "FB.N", "MSFT.N", "TLT.N", "XIV.N", "YY.N",
              "CSCO.N", "GOOGL.N", "PCLN.N"]

CLIENTS = ["Homer", "Marge", "Bart", "Lisa", "Maggie",
           "Moe", "Lenny", "Carl", "Krusty"]

def data_source():
    rows = []
    modifier = random.random() * random.randint(1, 50)
    for i in range(5):
        rows.append({
            "name": SECURITIES[random.randint(0, len(SECURITIES) - 1)],
            "client": CLIENTS[random.randint(0, len(CLIENTS) - 1)],
            "open": (random.random() * 75 + random.randint(0, 9)) * modifier,
            "high": (random.random() * 105 + random.randint(1, 3)) * modifier,
            "low": (random.random() * 85 + random.randint(1, 3)) * modifier,
            "close": (random.random() * 90 + random.randint(1, 3)) * modifier,
            "lastUpdate": datetime.now(),
            "date": date.today()
        })
    return rows

In [None]:
streaming_table = perspective.Table({
    "name": str,
    "client": str,
    "open": float,
    "high": float,
    "low": float,
    "close": float,
    "lastUpdate": datetime,
    "date": date
}, limit=2500)

In [None]:
streaming_widget = perspective.PerspectiveWidget(
    streaming_table,
    plugin="d3_candlestick",
    row_pivots=["lastUpdate"],
    columns=["open", "close", "high", "low"]
)

In [None]:
streaming_widget

In [None]:
async def _update():
    while True:
        streaming_table.update(data_source())
        await asyncio.sleep(0.05)
        
def update_table():
    loop = asyncio.new_event_loop()
    task = loop.create_task(_update())
    loop.call_later(60, task.cancel)
    
    try:
        loop.run_until_complete(task)
    except asyncio.CancelledError:
        print("Stopped streaming!")
        pass

In [None]:
thread = threading.Thread(target=update_table)
thread.start()

In [None]:
thread.join()