In [1]:
from dask_yarn import YarnCluster
from dask.distributed import Client

# Create a cluster where each worker has 1 cores and 2 GiB of memory:
cluster = YarnCluster(environment="/home/hadoop/environment.tar.gz",
                      worker_vcores = 1,
                      worker_memory = "2GiB"
                      )

# Scale cluster out to 8 such workers:
cluster.scale(8)

# Connect to the cluster (before proceeding, you should wait for workers to be registered by the dask scheduler, as below):
client = Client(cluster)

  from distributed.utils import (
  from distributed.utils import (
distributed.scheduler - INFO - Clear task state
distributed.scheduler - INFO -   Scheduler at:  tcp://172.31.15.79:42407
distributed.scheduler - INFO -   dashboard at:                    :39595
distributed.scheduler - INFO - Receive client connection: Client-217f14ac-5a34-11ec-871e-02a73e572171
distributed.core - INFO - Starting established connection


In [None]:
import dask.dataframe as dd

df = dd.read_csv('s3://bl-buildings/Outputs/final_df_mercator.csv').persist()
df.tail()

In [None]:
import numpy as np
import holoviews as hv
from holoviews import opts
hv.extension('matplotlib') # specify matplotlib as backend

In [None]:
from holoviews.element.tiles import StamenTerrain
hv.extension('bokeh')

In [None]:
plot_width  = int(750)
plot_height = int(plot_width//1.2)
x_range, y_range = (-8832869-20000, -8832869+20000), (5411602-20000, 5411602+20000)
plot_options = hv.Options(width=plot_width, height=plot_height, xaxis=None, yaxis=None)
opts.defaults(
    opts.Points(width=plot_width, height=plot_height, size=5, color='blue'),
    opts.Overlay(width=plot_width, height=plot_height, xaxis=None, yaxis=None),
    opts.RGB(width=plot_width, height=plot_height))

samples = df.sample(frac=.05)
tiles = StamenTerrain().redim.range(x=x_range, y=y_range)
points = hv.Points(samples, ['x', 'y'])
(tiles * points)

In [None]:
from bokeh.models import HoverTool

points = hv.Points(samples, ['x', 'y'], ['auto_time', 'bicycle_time', 'pedestrian_time'])

tooltips = [
    ('auto_time', '@auto_time'),
    ('bicycle_time', '@bicycle_time'),
    ('pedestrian_time', '@pedestrian_time')
]

hover = HoverTool(tooltips=tooltips)

(tiles * points.opts(tools=[hover]))

In [None]:
import datashader as ds
from datashader import transfer_functions as tf
from datashader.colors import Greys9
Greys9_r = list(reversed(Greys9))[:-2]

# Create empty canvas to draw image:
cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height, x_range=x_range, y_range=y_range)

# Compute pixel-level aggregation (count number of points within a given pixel square)
# Note that datashader supports multiple reduction operations out of the box -- sum, mean, etc...
agg = cvs.points(df[['x', 'y', 'auto_time']], 'x', 'y',  ds.mean(column="auto_time"))

# Shade pixels according to number of points within pixel (higher count = darker)
img = tf.shade(agg, cmap=Greys9_r)

In [None]:
img

In [None]:
import holoviews.operation.datashader as hd
from datashader.colors import Hot
hv.extension('bokeh')

shaded = hd.datashade(hv.Points(df[['x', 'y', 'auto_time']], ['x', 'y']),
                      cmap=Hot,
                      aggregator=ds.mean(column="auto_time"))
viz = hd.dynspread(shaded).opts(bgcolor='black', xaxis=None, yaxis=None, width=900, height=500)
viz