In [1]:
from progressivis_nb_widgets.nbwidgets import *

In [2]:
from progressivis.core import Scheduler, Every
from progressivis.table import PTable
from progressivis.vis import MCScatterPlot
from progressivis.io import ParquetLoader
from progressivis.table.constant import Constant
import asyncio as aio
import pyarrow as pa
import pyarrow.compute as pc
from functools import reduce

def _quiet(x): pass

gt = pc.greater
lt = pc.less

def _filter(bat):
    pklon = bat['pickup_longitude']
    pklat = bat['pickup_latitude']
    dolon = bat['dropoff_longitude']
    dolat = bat['dropoff_latitude']
    mask = [gt(pklon, -74.08), lt(pklon, -73.5), gt(pklat, 40.55), lt(pklat, 41.00),
                  gt(dolon, -74.08), lt(dolon, -73.5), gt(dolat, 40.55), lt(dolat, 41.00)]
    return bat.filter(reduce(pc.and_, mask))


try:
    s = scheduler
except NameError:
    s = Scheduler()

PREFIX = "../nyc-taxi/bk_500k_yellow_tripdata_2015"

URLS = [f"{PREFIX}-{month:0>2}.parquet" for month in range(1,7)]


FILENAMES = {'filename': URLS}
CST = Constant(PTable('filenames', data=FILENAMES), scheduler=s)
CSV = ParquetLoader(index_col=False, skipinitialspace=True,
                columns=['pickup_longitude', 'pickup_latitude',
                             'dropoff_longitude', 'dropoff_latitude'],
                filter_=_filter, scheduler=s) # TODO: reimplement filter in read_csv.py

CSV.input.filenames = CST.output[0]
PR = Every(scheduler=s, proc=_quiet)
PR.input[0] = CSV.output.result


MULTICLASS = MCScatterPlot(scheduler=s, classes=[
    ('pickup', 'pickup_longitude', 'pickup_latitude'),
    ('dropoff', 'dropoff_longitude', 'dropoff_latitude')], approximate=True)
MULTICLASS.create_dependent_modules(CSV)

# Create the dashboard object
psboard = PsBoard(s, order='desc')
# Visualisations require registration :
# 1) When widget provides the link_module() method do :
psboard.register_visualisation(Scatterplot(disable=['init_centroids']), MULTICLASS)
# 2) When widget is generic or for overloading the link_module() method do:
# psboard.register_visualisation(FooWidget(), bar_module, glue=glue_func)
# NB: glue_func(widget, module)-> [coroutine] must be provided
# Start application :

s.task_start();

Starting scheduler
# Scheduler added module(s): ['constant_1', 'dyn_var_1', 'dyn_var_2', 'every_1', 'histogram_index_1', 'histogram_index_2', 'histogram_index_3', 'histogram_index_4', 'max_1', 'max_2', 'max_3', 'max_4', 'mc_histogram2_d_1', 'mc_histogram2_d_2', 'mc_scatter_plot_1', 'merge_dict_1', 'merge_dict_2', 'merge_dict_3', 'merge_dict_4', 'min_1', 'min_2', 'min_3', 'min_4', 'parquet_loader_1', 'range_query2d_1', 'range_query2d_2', 'sample_1', 'sample_2']


#### Click  _&#x1f441; mc_scatter_plot_1_ below to display the scatterplot:

In [3]:
display(psboard)

PsBoard(children=(ControlPanel(children=(Button(description='Resume', disabled=True, icon='play', style=Button…























