In [1]:
from progressivis_nb_widgets.nbwidgets import *

In [2]:
import pandas as pd
from progressivis.core import Scheduler, Every
from progressivis.core.utils import gather_and_run
from progressivis.table import Table
from progressivis.vis import MCScatterPlot
from progressivis.io import CSVLoader
#from progressivis.datasets import get_dataset
from progressivis.table.constant import Constant
import asyncio as aio
import threading
import os

def _quiet(x): pass

def _filter(df):
    pklon = df['pickup_longitude']
    pklat = df['pickup_latitude']
    dolon = df['dropoff_longitude']
    dolat = df['dropoff_latitude']


    return df[(pklon > -74.08) & (pklon < -73.5) & (pklat > 40.55) & (pklat < 41.00) &
                  (dolon > -74.08) & (dolon < -73.5) & (dolat > 40.55) & (dolat < 41.00)]

try:
    s = scheduler
except NameError:
    s = Scheduler()
PREFIX = '../../nyc-taxi/'

SUFFIX = '.bz2'

URLS = [
    PREFIX+'yellow_tripdata_2015-01.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-02.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-03.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-04.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-05.csv'+SUFFIX,
    PREFIX+'yellow_tripdata_2015-06.csv'+SUFFIX,
]

FILENAMES = pd.DataFrame({'filename': URLS})
CST = Constant(Table('filenames', data=FILENAMES), scheduler=s)
CSV = CSVLoader(index_col=False, skipinitialspace=True,
                usecols=['pickup_longitude', 'pickup_latitude',
                             'dropoff_longitude', 'dropoff_latitude'],
                filter_=_filter, scheduler=s) # TODO: reimplement filter in read_csv.py

CSV.input.filenames = CST.output[0]
PR = Every(scheduler=s, proc=_quiet)
PR.input[0] = CSV.output[0]


MULTICLASS = MCScatterPlot(scheduler=s, classes=[
    ('pickup', 'pickup_longitude', 'pickup_latitude'),
    ('dropoff', 'dropoff_longitude', 'dropoff_latitude')], approximate=True)
MULTICLASS.create_dependent_modules(CSV)

# Create the dashboard object
psboard = PsBoard(s)
# Visualisations require registration :
# 1) When widget provides the link_module() method do :
psboard.register_visualisation(Scatterplot(disable=['init_centroids']), MULTICLASS)
# 2) When widget is generic or for overloading the link_module() method do:
# psboard.register_visualisation(FooWidget(), bar_module, glue=glue_func)
# NB: glue_func(widget, module)-> [coroutine] must be provided
# Start application :

gather_and_run(s.start(), *psboard.coroutines)

# Scheduler added module(s): ['constant_1', 'csv_loader_1', 'dyn_var_1', 'dyn_var_2', 'every_1', 'histogram_index_1', 'histogram_index_2', 'histogram_index_3', 'histogram_index_4', 'max_1', 'max_2', 'max_3', 'max_4', 'mc_histogram2_d_1', 'mc_histogram2_d_2', 'mc_scatter_plot_1', 'merge_dict_1', 'merge_dict_2', 'merge_dict_3', 'merge_dict_4', 'min_1', 'min_2', 'min_3', 'min_4', 'range_query2d_1', 'range_query2d_2', 'sample_1', 'sample_2']


In [3]:
#from sidecar import Sidecar
#sc = Sidecar(title='Sidecar Output')
#with sc:
display(psboard)

PsBoard(children=(ControlPanel(children=(Button(description='Resume', disabled=True, icon='play', style=Button…

In [4]:
import time
time.sleep(10)
def _length(x):
    print(len(x))

with s:
    PR2 = Every(name="Every2Added", scheduler=s, proc=_quiet)
    PR2.input[0] = CSV.output[0]

# Scheduler added module(s): ['Every2Added']


In [5]:
time.sleep(10)
with s as dataflow:
    dataflow.delete_modules("Every2Added")


# Scheduler deleted module(s): {'Every2Added'}


In [6]:
s.modules()

{'constant_1': Module Constant: constant_1,
 'csv_loader_1': Module CSVLoader: csv_loader_1,
 'every_1': Module Every: every_1,
 'mc_scatter_plot_1': Module MCScatterPlot: mc_scatter_plot_1,
 'dyn_var_1': Module DynVar: dyn_var_1,
 'dyn_var_2': Module DynVar: dyn_var_2,
 'range_query2d_1': Module RangeQuery2d: range_query2d_1,
 'histogram_index_1': Module HistogramIndex: histogram_index_1,
 'histogram_index_2': Module HistogramIndex: histogram_index_2,
 'min_1': Module Min: min_1,
 'min_2': Module Min: min_2,
 'merge_dict_1': Module MergeDict: merge_dict_1,
 'max_1': Module Max: max_1,
 'max_2': Module Max: max_2,
 'merge_dict_2': Module MergeDict: merge_dict_2,
 'mc_histogram2_d_1': Module MCHistogram2D: mc_histogram2_d_1,
 'sample_1': Module Sample: sample_1,
 'range_query2d_2': Module RangeQuery2d: range_query2d_2,
 'histogram_index_3': Module HistogramIndex: histogram_index_3,
 'histogram_index_4': Module HistogramIndex: histogram_index_4,
 'min_3': Module Min: min_3,
 'min_4': Mod

In [7]:
list(s._dependencies.keys())

['constant_1',
 'csv_loader_1',
 'every_1',
 'mc_scatter_plot_1',
 'dyn_var_1',
 'dyn_var_2',
 'range_query2d_1',
 'histogram_index_1',
 'histogram_index_2',
 'min_1',
 'min_2',
 'merge_dict_1',
 'max_1',
 'max_2',
 'merge_dict_2',
 'mc_histogram2_d_1',
 'sample_1',
 'range_query2d_2',
 'histogram_index_3',
 'histogram_index_4',
 'min_3',
 'min_4',
 'merge_dict_3',
 'max_3',
 'max_4',
 'merge_dict_4',
 'mc_histogram2_d_2',
 'sample_2']

In [8]:
s.to_json(short=False)




{'modules': [{'is_running': True,
   'is_terminated': False,
   'run_number': 1697,
   'id': 'constant_1',
   'classname': 'constant',
   'is_visualization': False,
   'last_update': 1,
   'state': 'state_terminated',
   'quality': 0.0,
   'progress': [0, 0],
   'speed': [],
   'order': 0,
   'start_time': 0,
   'end_time': 0.020991300989408046,
   'input_slots': {'_params': None},
   'output_slots': {'result': [{'output_name': 'result',
      'output_module': 'constant_1',
      'input_name': 'filenames',
      'input_module': 'csv_loader_1'}],
    '_trace': None},
   'default_step_size': 100,
   'parameters': {'quantum': 0.5, 'debug': False}},
  {'is_running': True,
   'is_terminated': False,
   'run_number': 1697,
   'id': 'csv_loader_1',
   'classname': 'csv_loader',
   'is_visualization': False,
   'last_update': 1691,
   'state': 'state_ready',
   'quality': 0.0,
   'progress': [0, 0],
   'speed': [202409.42860491425,
    210701.02516210102,
    210701.02516210102,
    189463.672











































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































In [8]:
s._dependencies

{'constant_1': {},
 'csv_loader_1': {'filenames': Slot(constant_1[result]->csv_loader_1[filenames])},
 'every_1': {'df': Slot(csv_loader_1[result]->every_1[df])},
 'mc_scatter_plot_1': {'table.86f46049-68e4-4d9d-b7ca-bd49a54153f4': Slot(mc_histogram2_d_1[result]->mc_scatter_plot_1[table.86f46049-68e4-4d9d-b7ca-bd49a54153f4/table]),
  'table.47a0c20a-78ef-4b0f-867f-b528bb141d85': Slot(sample_1[result]->mc_scatter_plot_1[table.47a0c20a-78ef-4b0f-867f-b528bb141d85/table]),
  'table.9dfcc740-c9d7-42c0-a18a-b3a9892c1837': Slot(mc_histogram2_d_2[result]->mc_scatter_plot_1[table.9dfcc740-c9d7-42c0-a18a-b3a9892c1837/table]),
  'table.8a4ef005-0cc4-4252-8d08-3d6d7fa897ac': Slot(sample_2[result]->mc_scatter_plot_1[table.8a4ef005-0cc4-4252-8d08-3d6d7fa897ac/table])},
 'dyn_var_1': {},
 'dyn_var_2': {},
 'range_query2d_1': {'table': Slot(csv_loader_1[result]->range_query2d_1[table]),
  'lower': Slot(dyn_var_1[result]->range_query2d_1[lower]),
  'upper': Slot(dyn_var_2[result]->range_query2d_1[uppe

In [9]:
m = s["csv_loader_1"]

In [10]:
m

Module CSVLoader: csv_loader_1

In [11]:
m._output_slots

{'result': [Slot(csv_loader_1[result]->every_1[df]),
  Slot(csv_loader_1[result]->range_query2d_1[table]),
  Slot(csv_loader_1[result]->histogram_index_1[table]),
  Slot(csv_loader_1[result]->histogram_index_2[table]),
  Slot(csv_loader_1[result]->range_query2d_2[table]),
  Slot(csv_loader_1[result]->histogram_index_3[table]),
  Slot(csv_loader_1[result]->histogram_index_4[table]),
  Slot(csv_loader_1[result]->Every2Added[df])],
 '_trace': None}