# Progressive Loading and Visualization with Interactions
This notebook shows the simplest code to download all the New York Yellow Taxi trips from 2015. They were all geolocated and the trip data is stored in multiple CSV files.
We visualize progressively the pickup locations (where people have been picked up by the taxis).

First, we define a few constants, where the file is located, the desired resolution, and the url of the taxi file.

In [None]:
# We make sure the libraries are reloaded when modified, and avoid warning messages
# %load_ext autoreload
# %autoreload 2
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Some constants we'll need: the data file to download and final image size
LARGE_TAXI_FILE = "https://www.aviz.fr/nyc-taxi/yellow_tripdata_2015-01.csv.bz2"
RESOLUTION=512

## Define NYC Bounds
If we know the bounds, this will simplify the code.
See https://en.wikipedia.org/wiki/Module:Location_map/data/USA_New_York_City

In [None]:
from dataclasses import dataclass
@dataclass
class Bounds:
    top: float = 40.92
    bottom: float = 40.49
    left: float = -74.27
    right: float = -73.68

bounds = Bounds()

## Create Modules
First, create the four modules we need.

In [None]:
from progressivis import (
    CSVLoader, Histogram2D, ConstDict, Heatmap, PDict,
    BinningIndexND, RangeQuery2D, Variable
)
import progressivis.core.aio as aio

col_x = "pickup_longitude"
col_y = "pickup_latitude"

csv = CSVLoader(LARGE_TAXI_FILE, usecols=[col_x, col_y])
# Create an indexing module on the csv loader output columns
index = BinningIndexND()
# Create a querying module
query = RangeQuery2D(column_x=col_x, column_y=col_y)
# Variable modules allow to dynamically modify their values; here, the query ranges
var_min = Variable(name="var_min")
var_max = Variable(name="var_max")
histogram2d = Histogram2D(col_x, col_y, xbins=RESOLUTION, ybins=RESOLUTION)
heatmap = Heatmap()

## Connect Modules

Then, connect the modules.

In [None]:
# Creates one index per numeric column
index.input.table = csv.output.result[col_x, col_y]
query.input.lower = var_min.output.result
query.input.upper = var_max.output.result
query.input.index = index.output.result
query.input.min = index.output.min_out
query.input.max = index.output.max_out
histogram2d.input.table = query.output.result
histogram2d.input.min = query.output.min
histogram2d.input.max = query.output.max
heatmap.input.array = histogram2d.output.result

## Visualize the Graph

In [None]:
try:
    import graphviz
    src = csv.scheduler.to_graphviz()
    gvz = graphviz.Source(src)
    display(gvz)
except ImportError:
    pass

## Display the Heatmap

In [None]:
heatmap.display_notebook()

## Start the scheduler

In [None]:
csv.scheduler.task_start()

## Initialize the Variable values

In [None]:
# Give it a bit of time to start
await aio.sleep(1)

bnds_min = PDict({col_x: bounds.left, col_y: bounds.bottom})
bnds_max = PDict({col_x: bounds.right, col_y: bounds.top})

await var_min.from_input(bnds_min)
await var_max.from_input(bnds_max)

## Create the Widgets

In [None]:
import ipywidgets as widgets
long_slider = widgets.FloatRangeSlider(
    value=[bnds_min[col_x], bnds_max[col_x]],
    min=bnds_min[col_x],
    max=bnds_max[col_x],
    step=(bnds_max[col_x]-bnds_min[col_x])/10,
    description='Longitude:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
lat_slider = widgets.FloatRangeSlider(
    value=[bnds_min[col_y], bnds_max[col_y]],
    min=bnds_min[col_y],
    max=bnds_max[col_y],
    step=(bnds_max[col_y]-bnds_min[col_y])/10,
    description='Latitude:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)

def observer(_):
    async def _coro():
        long_min, long_max = long_slider.value
        lat_min, lat_max = lat_slider.value
        await var_min.from_input({col_x: long_min, col_y: lat_min})
        await var_max.from_input({col_x: long_max, col_y: lat_max})
    aio.create_task(_coro())


long_slider.observe(observer, "value")
lat_slider.observe(observer, "value")
widgets.VBox([long_slider, lat_slider])

## Show the modules
printing the scheduler shows all the modules and their states

In [None]:
# Show what runs
csv.scheduler

## Stop the scheduler
To stop the scheduler, uncomment the next cell and run it

In [None]:
# csv.scheduler.task_stop()