Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make HeatMap more general #849

Merged
merged 22 commits into from Jan 9, 2017
Merged
Changes from 20 commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.
+228 −118
Diff settings

Always

Just for now

Copy path View file
@@ -607,14 +607,14 @@ def sort_topologically(graph):
}
sort_topologically(graph)
[set([1, 2]), set([3, 4]), set([5, 6])]
[[1, 2], [3, 4], [5, 6]]
"""
levels_by_name = {}
names_by_level = defaultdict(set)
names_by_level = defaultdict(list)

def add_level_to_name(name, level):
levels_by_name[name] = level
names_by_level[level].add(name)
names_by_level[level].append(name)


def walk_depth_first(name):
@@ -647,6 +647,30 @@ def walk_depth_first(name):
(names_by_level.get(i, None)
for i in itertools.count())))


def is_cyclic(graph):
"""Return True if the directed graph g has a cycle."""

This comment has been minimized.

Copy link
@jlstevens

jlstevens Jan 9, 2017

Contributor

What is the representation of the graph? A list of edges as tuples? Would be good to mention in the docstring.

This comment has been minimized.

Copy link
@jlstevens

jlstevens Jan 9, 2017

Contributor

I'm guessing the representation is similar as in one_to_one...even so, probably worth mentioning..

This comment has been minimized.

Copy link
@philippjfr

philippjfr Jan 9, 2017

Author Contributor

Right, all three methods here (sort_topologically, cyclical and one_to_one) use the same representation, which is mapping between nodes and edges, will add the docstring.

path = set()

def visit(vertex):
path.add(vertex)
for neighbour in graph.get(vertex, ()):
if neighbour in path or visit(neighbour):
return True
path.remove(vertex)
return False

return any(visit(v) for v in graph)


def one_to_one(graph, nodes):
"""Return True if graph contains only one to one mappings.
Pass a graph as a dictionary mapping of edges for each node and
a list of all nodes."""
edges = itertools.chain.from_iterable(graph.values())
return len(graph) == len(nodes) and len(set(edges)) == len(nodes)


def get_overlay_spec(o, k, v):
"""
Gets the type.group.label + key spec from an Element in an Overlay.
@@ -996,3 +1020,13 @@ def dt64_to_dt(dt64):
"""
ts = (dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
return dt.datetime.utcfromtimestamp(ts)


def is_nan(x):
"""
Checks whether value is NaN on arbitrary types
"""
try:
return np.isnan(x)
except:
return False
Copy path View file
@@ -14,7 +14,7 @@
from ..core.util import pd
from .chart import Curve
from .tabular import Table
from .util import compute_edges, toarray
from .util import compute_edges, toarray, categorical_aggregate2d

try:
from ..core.data import PandasInterface
@@ -365,16 +365,14 @@ def dimension_values(self, dimension, expanded=True, flat=True):
return super(QuadMesh, self).dimension_values(idx)



class HeatMap(Dataset, Element2D):
"""
HeatMap is an atomic Element used to visualize two dimensional
parameter spaces. It supports sparse or non-linear spaces, dynamically
upsampling them to a dense representation, which can be visualized.
A HeatMap can be initialized with any dict or NdMapping type with
two-dimensional keys. Once instantiated the dense representation is
available via the .data property.
two-dimensional keys.
"""

group = param.String(default='HeatMap', constant=True)
@@ -383,85 +381,16 @@ class HeatMap(Dataset, Element2D):

vdims = param.List(default=[Dimension('z')])

def __init__(self, data, extents=None, **params):
def __init__(self, data, **params):
super(HeatMap, self).__init__(data, **params)
data, self.raster = self._compute_raster()
self.data = data.data
self.interface = data.interface
self.depth = 1
if extents is None:
(d1, d2) = self.raster.shape[:2]
self.extents = (0, 0, d2, d1)
else:
self.extents = extents


def _compute_raster(self):
if self.interface.gridded:
return self, np.flipud(self.dimension_values(2, flat=False))
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys]
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Dataset(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype])
concat_data = self.interface.concatenate([dense_data, Dataset(self)], datatype=dtype)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', r'Mean of empty slice')
data = concat_data.aggregate(self.kdims, np.nanmean)
array = data.dimension_values(2).reshape(len(d1keys), len(d2keys))
return data, np.flipud(array.T)


def __setstate__(self, state):
if '_data' in state:
data = state['_data']
if isinstance(data, NdMapping):
items = [tuple(k)+((v,) if np.isscalar(v) else tuple(v))
for k, v in data.items()]
kdims = state['kdims'] if 'kdims' in state else self.kdims
vdims = state['vdims'] if 'vdims' in state else self.vdims
data = Dataset(items, kdims=kdims, vdims=vdims).data
elif isinstance(data, Dataset):
data = data.data
kdims = data.kdims
vdims = data.vdims
state['data'] = data
state['kdims'] = kdims
state['vdims'] = vdims
self.__dict__ = state

if isinstance(self.data, NdElement):
self.interface = NdElementInterface
elif isinstance(self.data, np.ndarray):
self.interface = ArrayInterface
elif util.is_dataframe(self.data):
self.interface = PandasInterface
elif isinstance(self.data, dict):
self.interface = DictInterface
self.depth = 1
data, self.raster = self._compute_raster()
self.interface = data.interface
self.data = data.data
if 'extents' not in state:
(d1, d2) = self.raster.shape[:2]
self.extents = (0, 0, d2, d1)

super(HeatMap, self).__setstate__(state)

def dense_keys(self):
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]))


def dframe(self, dense=False):
if dense:
keys1, keys2 = self.dense_keys()
dense_map = self.clone({(k1, k2): self._data.get((k1, k2), np.NaN)
for k1, k2 in product(keys1, keys2)})
return dense_map.dframe()
return super(HeatMap, self).dframe()
self.gridded = categorical_aggregate2d(self)

@property
def raster(self):
self.warning("The .raster attribute on HeatMap is deprecated, "
"the 2D aggregate is now computed dynamically "
"during plotting.")
return self.gridded.dimension_values(2, flat=False)


class Image(SheetCoordinateSystem, Raster):
Copy path View file
@@ -1,10 +1,24 @@
import itertools

import param
import numpy as np

from ..core import Dataset, OrderedDict
from ..core.operation import ElementOperation
from ..core.util import (pd, is_nan, sort_topologically,
cartesian_product, is_cyclic, one_to_one)

try:
import dask
except:
dask = None

try:
import xarray as xr
except:
xr = None


def toarray(v, index_value=False):
"""
Interface helper function to turn dask Arrays into numpy arrays as
@@ -30,3 +44,98 @@ def compute_edges(edges):
raise ValueError('Centered bins have to be of equal width.')
edges -= width/2.
return np.concatenate([edges, [edges[-1]+width]])


def reduce_fn(x):
"""
Aggregation function to get the first non-zero value.
"""
values = x.values if pd and isinstance(x, pd.Series) else x
for v in values:
if not is_nan(v):
return v
return np.NaN


class categorical_aggregate2d(ElementOperation):

This comment has been minimized.

Copy link
@jlstevens

jlstevens Jan 9, 2017

Contributor

Looks great! I was just wondering if you want to keep this class in util or move it to operation.element?

This comment has been minimized.

Copy link
@philippjfr

philippjfr Jan 9, 2017

Author Contributor

It's imported there but can't be moved, cyclical imports again.

This comment has been minimized.

Copy link
@jlstevens

jlstevens Jan 9, 2017

Contributor

Ok, having it available for operation.element is fine.

"""

This comment has been minimized.

Copy link
@jlstevens

jlstevens Jan 8, 2017

Contributor

Perhaps this would be better expressed as an operation? Then maybe it could have a minimal docstring example in the class docstring?

Generates a gridded Dataset of 2D aggregate arrays indexed by the
first two dimensions of the passed Element, turning all remaining
dimensions into value dimensions. The key dimensions of the
gridded array are treated as categorical indices. Useful for data
indexed by two independent categorical variables such as a table
of population values indexed by country and year. Data that is
indexed by continuous dimensions should be binned before
aggregation. The aggregation will retain the global sorting order
of both dimensions.
>> table = Table([('USA', 2000, 282.2), ('UK', 2005, 58.89)],
kdims=['Country', 'Year'], vdims=['Population'])
>> categorical_aggregate2d(table)
Dataset({'Country': ['USA', 'UK'], 'Year': [2000, 2005],
'Population': [[ 282.2 , np.NaN], [np.NaN, 58.89]]},
kdims=['Country', 'Year'], vdims=['Population'])
"""

datatype = param.List(['xarray', 'grid'] if xr else ['grid'], doc="""
The grid interface types to use when constructing the gridded Dataset.""")

def _process(self, obj, key=None):
"""
Generates a categorical 2D aggregate by inserting NaNs at all
cross-product locations that do not already have a value assigned.
Returns a 2D gridded Dataset object.
"""

This comment has been minimized.

Copy link
@jlstevens

jlstevens Jan 9, 2017

Contributor

Quite a long method...if you see chunks that could be split up into helper methods, that might be sensible. Up to you though!

This comment has been minimized.

Copy link
@philippjfr

philippjfr Jan 9, 2017

Author Contributor

Happy to split it up.

if isinstance(obj, Dataset) and obj.interface.gridded:
return obj
elif obj.ndims > 2:
raise ValueError("Cannot aggregate more than two dimensions")
elif len(obj.dimensions()) < 3:
raise ValueError("Must have at two dimensions to aggregate over"
"and one value dimension to aggregate on.")

dim_labels = obj.dimensions(label=True)
dims = obj.dimensions()
kdims, vdims = dims[:2], dims[2:]
xdim, ydim = dim_labels[:2]
nvdims = len(dims) - 2
d1keys = obj.dimension_values(xdim, False)
d2keys = obj.dimension_values(ydim, False)
shape = (len(d2keys), len(d1keys))
nsamples = np.product(shape)

# Determine global orderings of y-values using topological sort
grouped = obj.groupby(xdim, container_type=OrderedDict,
group_type=Dataset).values()
orderings = OrderedDict()
for group in grouped:
vals = group.dimension_values(ydim)
if len(vals) == 1:
orderings[vals[0]] = [vals[0]]
else:
for i in range(len(vals)-1):
p1, p2 = vals[i:i+2]
orderings[p1] = [p2]
if one_to_one(orderings, d2keys):
d2keys = np.sort(d2keys)
elif not is_cyclic(orderings):
d2keys = list(itertools.chain(*sort_topologically(orderings)))

# Pad data with NaNs
ys, xs = cartesian_product([d2keys, d1keys])
data = {xdim: xs.flatten(), ydim: ys.flatten()}
for vdim in vdims:
values = np.empty(nsamples)
values[:] = np.NaN
data[vdim.name] = values
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype])
concat_data = obj.interface.concatenate([dense_data, Dataset(obj)], datatype=dtype)
agg = concat_data.reindex([xdim, ydim]).aggregate([xdim, ydim], reduce_fn)

# Convert data to a gridded dataset
grid_data = {xdim: d1keys, ydim: d2keys}
for vdim in vdims:
grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape)
return agg.clone(grid_data, datatype=self.p.datatype)

@@ -15,6 +15,7 @@
from ..element.chart import Histogram, Scatter
from ..element.raster import Raster, Image, RGB, QuadMesh
from ..element.path import Contours, Polygons
from ..element.util import categorical_aggregate2d
from ..streams import RangeXY

column_interfaces = [ArrayInterface, DictInterface]
@@ -1,7 +1,13 @@
import numpy as np
import param

from ...core.util import cartesian_product
from bokeh.models.mappers import LinearColorMapper
try:
from bokeh.models.mappers import LogColorMapper
except ImportError:
LogColorMapper = None

from ...core.util import cartesian_product, is_nan, unique_array
from ...element import Image, Raster, RGB
from ..renderer import SkipRendering
from ..util import map_colors
@@ -130,27 +136,31 @@ class HeatmapPlot(ColorbarPlot):
def _axes_props(self, plots, subplots, element, ranges):
dims = element.dimensions()
labels = self._get_axis_labels(dims)
xvals, yvals = [element.dimension_values(i, False)
for i in range(2)]
agg = element.gridded
xvals, yvals = [agg.dimension_values(i, False) for i in range(2)]
if self.invert_yaxis: yvals = yvals[::-1]
plot_ranges = {'x_range': [str(x) for x in xvals],
'y_range': [str(y) for y in yvals]}
return ('auto', 'auto'), labels, plot_ranges


def get_data(self, element, ranges=None, empty=False):
x, y, z = element.dimensions(label=True)
x, y, z = element.dimensions(label=True)[:3]
aggregate = element.gridded
style = self.style[self.cyclic_index]
cmapper = self._get_colormapper(element.vdims[0], element, ranges, style)
if empty:
data = {x: [], y: [], z: [], 'color': []}
data = {x: [], y: [], z: []}
else:
zvals = np.rot90(element.raster, 3).flatten()
xvals, yvals = [[str(v) for v in element.dimension_values(i)]
zvals = aggregate.dimension_values(z)
xvals, yvals = [[str(v) for v in aggregate.dimension_values(i)]
for i in range(2)]
data = {x: xvals, y: yvals, z: zvals}
data = {x: xvals, y: yvals, 'zvalues': zvals}

return (data, {'x': x, 'y': y, 'fill_color': {'field': z, 'transform': cmapper},
if 'hover' in self.tools+self.default_tools:
for vdim in element.vdims:
data[vdim.name] = ['-' if is_nan(v) else vdim.pprint_value(v)
for v in aggregate.dimension_values(vdim)]
return (data, {'x': x, 'y': y, 'fill_color': {'field': 'zvalues', 'transform': cmapper},
'height': 1, 'width': 1})


Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.