In [2]:
import os
import sys
from datetime import date, datetime

import pandas as pd
import numpy as np
import pyarrow as pa

import perspective

### Perspective Basics

TODO: brief description

In [2]:
# Set up some dummy data
data = pd.DataFrame({
    "a": np.random.rand(100),
    "b": np.ones(100),
    "c": [str(i) for i in range(100)],
    "d": [datetime.now() for i in range(100)],
    "e": [datetime.today() for i in range(100)]
})

# create a perspective.Table - the base container for data
table = perspective.Table(data)

# create a view - a query on the data
view = table.view(filter=[["a", ">", 0.5]], sort=[["a", "desc"]])

#### Getting Data

Data can be retrieved from a `View` by calling one of its `to_*` methods: `to_df`, `to_arrow`, `to_dicts`, `to_records`, and `to_csv`. When the underlying `Table` updates, the `View` is automatically notified with new data and will always return the most up-to-date dataset.

In [3]:
# get some data from the view - supported formats are dataframes, Apache Arrow, dicts of numpy arrays, row/columnar JSON, and CSV
filtered = view.to_df(end_row=10)
display(filtered)

# append to the table with new data
table.update([{"a": 1.5, "b": 1, "c": "string", "d": datetime.now()}])

# re-query the data
filtered = view.to_df(end_row=10)
display("After update", filtered)

Unnamed: 0,index,a,b,c,d,e
0,14,0.980086,1.0,14,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
1,43,0.964105,1.0,43,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
2,62,0.958505,1.0,62,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
3,76,0.955317,1.0,76,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
4,42,0.953409,1.0,42,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
5,31,0.92974,1.0,31,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
6,84,0.928055,1.0,84,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
7,50,0.914237,1.0,50,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
8,11,0.901878,1.0,11,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
9,32,0.899002,1.0,32,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657


'After update'

Unnamed: 0,index,a,b,c,d,e
0,,1.5,1.0,string,2020-09-04 15:53:09.591,NaT
1,14.0,0.980086,1.0,14,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
2,43.0,0.964105,1.0,43,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
3,62.0,0.958505,1.0,62,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
4,76.0,0.955317,1.0,76,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
5,42.0,0.953409,1.0,42,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
6,31.0,0.92974,1.0,31,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
7,84.0,0.928055,1.0,84,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
8,50.0,0.914237,1.0,50,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657
9,11.0,0.901878,1.0,11,2020-09-04 11:53:08.657,2020-09-04 11:53:08.657


#### Schemas and Updates

In [4]:
# Create an indexed table from a schema - a mapping of column names to types, primary keyed by `a`
schema_table = perspective.Table({
    "a": float,
    "b": float,
    "c": str,
    "d": datetime
}, index="a")

# Add an `on_update` callback, which fires with an Arrow-encoded binary of the updated rows
def callback(port, delta):
    # Update the indexed table with the updated rows, which will update in-place based on `index`
    schema_table.update(delta)

view.on_update(callback, mode="row")

In [5]:
# Update the unindexed table, and query the indexed table
table.update([
    {"a": 1.5, "b": 1, "c": "string", "d": datetime.now()},
    {"a": 2.5, "b": 2, "c": "string2", "d": datetime.now()}
])

# Group values by `a`
pivoted_view = schema_table.view(row_pivots=["a"], aggregates={"c": "unique"})

# Get column-oriented JSON
pivoted_output = pivoted_view.to_columns()
display(pivoted_output)

{'__ROW_PATH__': [[], [1.5], [2.5]],
 'a': [4.0, 1.5, 2.5],
 'b': [3.0, 1.0, 2.0],
 'd': [2, 1, 1],
 'c': ['-', 'string', 'string2']}

In [6]:
# Updates with the same primary key will overwrite
table.update([
    {"a": 1.5, "b": 100, "c": "new string"}
])

pivoted_output = pivoted_view.to_columns()
display(pivoted_output)

{'__ROW_PATH__': [[], [1.5], [2.5]],
 'a': [4.0, 1.5, 2.5],
 'b': [102.0, 100.0, 2.0],
 'd': [2, 1, 1],
 'c': ['-', 'new string', 'string2']}

#### TODO: Working with Arrows

### PerspectiveWidget

TODO: Describe

In [8]:
# Create a PerspectiveWidget from the tables we just created
widget = perspective.PerspectiveWidget(schema_table, plugin="datagrid", row_pivots=["a"], aggregates={"a": "avg"}, editable=True)
display(widget)

PerspectiveError: Unrecognized `plugin`: datagrid

### Chaining Perspectives with `on_update`

In [10]:
# TODO