# Simple views demo

In [1]:
import warnings
warnings.filterwarnings("ignore")
RESOLUTION=512
URLS = [f"https://www.aviz.fr/nyc-taxi/yellow_tripdata_2015-0{m}.csv.bz2" for m in range(1, 7)]
from progressivis import SimpleCSVLoader, Corr, Sink, Constant, PTable
cst = Constant(PTable('filenames', data={'filename': URLS}))

In [2]:
from progressivis import (CSVLoader, Histogram2D, ConstDict, Heatmap, PDict, Corr,
                          BinningIndexND, RangeQuery2D, Variable, Quantiles)

col_x = "pickup_longitude"
col_y = "pickup_latitude"
# Create a csv loader for the taxi data file
csv = SimpleCSVLoader(usecols=[col_x, col_y])
csv.input.filenames = cst.output.result
# Create an indexing module on the csv loader output columns
index = BinningIndexND()
# Creates one index per numeric column
index.input.table = csv.output.result[col_x, col_y]
# Create a querying module
query = RangeQuery2D(column_x=col_x,
                     column_y=col_y)
# Variable modules allow to dynamically modify their values; here, the query ranges
var_min = Variable(name="var_min")
var_max = Variable(name="var_max")
query.input.lower = var_min.output.result
query.input.upper = var_max.output.result
query.input.index = index.output.result
query.input.min = index.output.min_out
query.input.max = index.output.max_out
# Create a module to compute the 2D histogram of the two columns specified
# with the given resolution
histogram2d = Histogram2D(col_x, col_y, xbins=RESOLUTION, ybins=RESOLUTION)
# Connect the module to the csv results and the min,max bounds to rescale
histogram2d.input.table = query.output.result
histogram2d.input.min = query.output.min
histogram2d.input.max = query.output.max
sink = Sink()
sink.input.inp = histogram2d.output.result
cov = Corr(mode="CovarianceOnly")
cov.input.table = csv.output.result
sink = Sink()
sink.input.inp = cov.output.result

In [3]:
import progressivis.core.aio as aio
# Start the scheduler
scheduler = csv.scheduler()
scheduler.task_start()
await aio.sleep(1)

Starting scheduler
# Scheduler added module(s): ['binning_index_nd_1', 'constant_1', 'corr_1', 'histogram2_d_1', 'range_query2_d_1', 'simple_csv_loader_1', 'sink_1', 'sink_2', 'var_max', 'var_min']


## Creating views

In [None]:
from ipyprogressivis.views import show
show(scheduler)

In [None]:
show(scheduler)

In [None]:
show(scheduler)