## Listing 1. Visualizing the Heatmap of a large data table with ProgressiVis

In [1]:
from progressivis import Scheduler
from progressivis.io import CSVLoader
from progressivis.stats import Histogram2D, Min, Max
from progressivis.vis import Heatmap

In [2]:
# URLS = [f"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-0{n}.csv" for n in range(1,7)]
URLS = [f"/home/fekete/src/nyc-taxi/yellow_tripdata_2015-0{n}.csv.bz2" for n in range(1,7)]
def filter_(df):
    lon = df['pickup_longitude']
    lat = df['pickup_latitude']
    return df[(lon>-74.10)&(lon<-73.7)&(lat>40.60)&(lat<41)]


In [3]:
s = Scheduler.default = Scheduler()

csv_module = CSVLoader(URLS,
                       index_col=False,
                       skipinitialspace=True,
                       filter_=filter_,
                        usecols=['pickup_longitude', 'pickup_latitude']) # load many compressed CSV files
min_module = Min() # computes the min value of each column
min_module.input.table = csv_module.output.result
max_module = Max() # computes the max value of each column
max_module.input.table = csv_module.output.result
histogram2d = Histogram2D('pickup_longitude', # compute a 2d histogram
                          'pickup_latitude', 
                          xbins=256, ybins=256)
histogram2d.input.table = csv_module.output.result
histogram2d.input.min = min_module.output.result
histogram2d.input.max = max_module.output.result
heatmap=Heatmap() # compute the Heatmap
heatmap.input.array = histogram2d.output.result

**NB:** the results will appear below after running all cells :

In [4]:
import ipywidgets as ipw
from IPython.display import display
wg = None
async def _after_run(m, run_number):
    global wg
    img = m.get_image_bin()
    if img is None:
        return
    if wg is None:
        wg = ipw.Image(value=img, width=512, height=512)
        display(wg)
    else:
        wg.value = img
heatmap.on_after_run(_after_run)

In [5]:
s.task_start()

<Task pending name='Task-5' coro=<Scheduler.start() running at /home/fekete/src/progressivis/progressivis/core/scheduler.py:273>>

Starting scheduler
# Scheduler added module(s): ['csv_loader_1', 'heatmap_1', 'histogram2_d_1', 'max_1', 'min_1']


Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\x00\x00\x00\x01\x00\x10\x00\x00\x00\x00)\x89+\xf…

In [6]:
# s.task_stop()
# heatmap.params.transform = 4

<Task pending name='Task-8' coro=<Scheduler.stop() running at /home/fekete/src/progressivis/progressivis/core/scheduler.py:610>>

Leaving run loop
