Skip to content

Commit

Permalink
Added downsample_columns operation
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr committed Oct 5, 2016
1 parent 2be0aa2 commit 5f20ea5
Showing 1 changed file with 68 additions and 1 deletion.
69 changes: 68 additions & 1 deletion holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,17 @@

from ..core import (ElementOperation, NdOverlay, Overlay, GridMatrix,
HoloMap, Dataset, Element, Collator)
from ..core.util import find_minmax, group_sanitizer, label_sanitizer
from ..core.data import ArrayInterface, DictInterface
from ..core.util import find_minmax, group_sanitizer, label_sanitizer, pd
from ..element.chart import Histogram, Scatter
from ..element.raster import Raster, Image, RGB, QuadMesh
from ..element.path import Contours, Polygons
from ..streams import RangeXY

column_interfaces = [ArrayInterface, DictInterface]
if pd:
from ..core.data import PandasInterface
column_interfaces.append(PandasInterface)


def identity(x,k): return x
Expand Down Expand Up @@ -538,6 +545,66 @@ def _process(self, view, key=None):



class downsample_columns(ElementOperation):
"""
Downsamples any column based Element by sampling a specified
number of random rows from the data if the current view defined by
the x_range and y_range contains more than max_samples. By default
the operation returns a DynamicMap with a RangeXY stream allowing
dynamic downsampling.
"""

dynamic = param.Boolean(default=True, doc="""
Enables dynamic processing by default.""")

max_samples = param.Integer(default=800, doc="""
Maximum number of samples to display at the same time.""")

random_seed = param.Integer(default=42, doc="""
Seed used to initialize randomization.""")

streams = param.List(default=[RangeXY], doc="""
List of streams that are applied if dynamic=True, allowing
for dynamic interaction with the plot.""")

x_range = param.NumericTuple(default=None, length=2, doc="""
The x_range as a tuple of min and max x-value. Auto-ranges
if set to None.""")

y_range = param.NumericTuple(default=None, length=2, doc="""
The x_range as a tuple of min and max y-value. Auto-ranges
if set to None.""")

def _process(self, element, key=None):
if not isinstance(element, Dataset):
raise ValueError("Cannot downsample non-Dataset types.")
if element.interface not in column_interfaces:
element = plot.current_frame.clone(datatype=['dataframe', 'dictionary'])

This comment has been minimized.

Copy link
@jbednar

jbednar Apr 18, 2017

Member

@philippjfr, where is plot supposed to be defined? This line is raising an error on current code using decimate, and I can't see how it could ever work.

This comment has been minimized.

Copy link
@philippjfr

philippjfr Apr 18, 2017

Author Member

True, should be element.

This comment has been minimized.

Copy link
@jbednar

jbednar Apr 18, 2017

Member

Can you please test that? I already tried using element by adding decimate(points) to the end of census-hv.ipynb, but that didn't work for me either.

This comment has been minimized.

Copy link
@philippjfr

philippjfr Apr 18, 2017

Author Member

Are you using a dask dataframe?

This comment has been minimized.

Copy link
@jbednar

jbednar Apr 18, 2017

Member

Yes.

This comment has been minimized.

Copy link
@jbednar

xstart, xend = self.p.x_range if self.p.x_range else element.range(0)
ystart, yend = self.p.y_range if self.p.y_range else element.range(1)

# Slice element to current ranges
xdim, ydim = element.dimensions(label=True)[0:2]
sliced = element.select(**{xdim: (xstart, xend),
ydim: (ystart, yend)})

if len(sliced) > self.p.max_samples:
prng = np.random.RandomState(self.p.random_seed)
length = len(sliced)
if element.interface is PandasInterface:
data = sliced.data.sample(self.p.max_samples,
random_state=prng)
else:
inds = prng.choice(length, self.p.max_samples, False)
if isinstance(element.interface, DictInterface):
data = {k: v[inds] for k, v in sliced.data.items()}
else:
data = sliced.data[inds, :]
sliced = element.clone(data)
return sliced


#==================#
# Other operations #
#==================#
Expand Down

0 comments on commit 5f20ea5

Please sign in to comment.