## Listing 1. Visualizing the Heatmap of a large data table with ProgressiVis

In [None]:
from progressivis import Scheduler
from progressivis.io import SimpleCSVLoader
from progressivis.stats import Histogram2D, Min, Max
from progressivis.datasets import get_dataset
from progressivis.vis import Heatmap
from progressivis.stats.blobs_table import BlobsPTable
s = Scheduler.default = Scheduler()
URLS = [f"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-0{n}.csv" for n in range(1,7)]
#csv_module = CSVLoader(URLS, index_col=False, skipinitialspace=True,
#                usecols=['pickup_longitude', 'pickup_latitude']) # load many compressed CSV files
cols = ['A', 'B']
csv_module = SimpleCSVLoader(get_dataset('bigfile_multiscale'), usecols=cols, throttle=100)
min_module = Min() # computes the min value of each column
min_module.input.table = csv_module.output.result
max_module = Max() # computes the max value of each column
max_module.input.table = csv_module.output.result
histogram2d = Histogram2D('A', # compute a 2d histogram
                          'B', 
                          xbins=32, ybins=32)
histogram2d.input.table = csv_module.output.result
histogram2d.input.min = min_module.output.result
histogram2d.input.max = max_module.output.result
heatmap=Heatmap() # compute the Heatmap
heatmap.input.array = histogram2d.output.result

**NB:** the results will appear below after running all cells :

In [None]:
from vega.widget import VegaWidget
from itertools import product
import scipy as sp
import numpy as np
spec_no_data = {'$schema': 'https://vega.github.io/schema/vega-lite/v4.8.1.json',
 'config': {'view': {'continuousHeight': 300, 'continuousWidth': 400}},
 'encoding': {'color': {'field': 'z', 'type': 'quantitative'},
  'x': {'field': 'x', 'type': 'ordinal'},
  'y': {'field': 'y', 'type': 'ordinal'}, "size": {"value": 0.1}},
 'mark': 'rect'}
wg = VegaWidget(spec=spec_no_data)
async def _after_run(m, run_number):
    histo = m.to_json()['chart']['buffers'][0]['binnedPixels']
    low = 0
    high = 2**16
    cmin = histo.min()
    cmax = histo.max()
    cscale = cmax - cmin
    scale_hl = float(high - low)
    scale = float(high - low) / cscale
    data = (np.log10(histo) * 1.0 - cmin) * scale_hl + 0.4999
    data[data > high] = high
    data[data < 0] = 0
    #data = data.tolist()
    if low != 0:
        data += low

    res = []
    spec_with_data = spec_no_data.copy()
    for i, j, in product(range(data.shape[0]), range(data.shape[1])):
        res.append(dict(y=i, x=j, z=int(data[i,j])))
    spec_with_data["data"] = {
        "name": "data",
        "values": res,
    }
    wg.spec = spec_with_data
display(wg)
histogram2d.after_run_proc = _after_run
s.task_start();