# pyviz/holoviews

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

# Make HeatMap more general #849

Merged
merged 22 commits into from Jan 9, 2017
Merged

# Make HeatMap more general#849

Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
Filter file types
Failed to load files and symbols.
+228 −118

#### Just for now

@@ -607,14 +607,14 @@ def sort_topologically(graph):
}
sort_topologically(graph)
[set([1, 2]), set([3, 4]), set([5, 6])]
[[1, 2], [3, 4], [5, 6]]
"""
levels_by_name = {}
names_by_level = defaultdict(set)
names_by_level = defaultdict(list)

levels_by_name[name] = level
names_by_level[level].append(name)

def walk_depth_first(name):
@@ -647,6 +647,30 @@ def walk_depth_first(name):
(names_by_level.get(i, None)
for i in itertools.count())))

def is_cyclic(graph):
"""Return True if the directed graph g has a cycle."""

#### jlstevens Jan 9, 2017

Contributor

What is the representation of the graph? A list of edges as tuples? Would be good to mention in the docstring.

#### jlstevens Jan 9, 2017

Contributor

I'm guessing the representation is similar as in `one_to_one`...even so, probably worth mentioning..

#### philippjfr Jan 9, 2017

Author Contributor

Right, all three methods here (`sort_topologically`, `cyclical` and `one_to_one`) use the same representation, which is mapping between nodes and edges, will add the docstring.

path = set()

def visit(vertex):
for neighbour in graph.get(vertex, ()):
if neighbour in path or visit(neighbour):
return True
path.remove(vertex)
return False

return any(visit(v) for v in graph)

def one_to_one(graph, nodes):
"""Return True if graph contains only one to one mappings.
Pass a graph as a dictionary mapping of edges for each node and
a list of all nodes."""
edges = itertools.chain.from_iterable(graph.values())
return len(graph) == len(nodes) and len(set(edges)) == len(nodes)

def get_overlay_spec(o, k, v):
"""
Gets the type.group.label + key spec from an Element in an Overlay.
@@ -996,3 +1020,13 @@ def dt64_to_dt(dt64):
"""
ts = (dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
return dt.datetime.utcfromtimestamp(ts)

def is_nan(x):
"""
Checks whether value is NaN on arbitrary types
"""
try:
return np.isnan(x)
except:
return False
 @@ -14,7 +14,7 @@ from ..core.util import pd from .chart import Curve from .tabular import Table from .util import compute_edges, toarray from .util import compute_edges, toarray, categorical_aggregate2d try: from ..core.data import PandasInterface @@ -365,16 +365,14 @@ def dimension_values(self, dimension, expanded=True, flat=True): return super(QuadMesh, self).dimension_values(idx) class HeatMap(Dataset, Element2D): """ HeatMap is an atomic Element used to visualize two dimensional parameter spaces. It supports sparse or non-linear spaces, dynamically upsampling them to a dense representation, which can be visualized. A HeatMap can be initialized with any dict or NdMapping type with two-dimensional keys. Once instantiated the dense representation is available via the .data property. two-dimensional keys. """ group = param.String(default='HeatMap', constant=True) @@ -383,85 +381,16 @@ class HeatMap(Dataset, Element2D): vdims = param.List(default=[Dimension('z')]) def __init__(self, data, extents=None, **params): def __init__(self, data, **params): super(HeatMap, self).__init__(data, **params) data, self.raster = self._compute_raster() self.data = data.data self.interface = data.interface self.depth = 1 if extents is None: (d1, d2) = self.raster.shape[:2] self.extents = (0, 0, d2, d1) else: self.extents = extents def _compute_raster(self): if self.interface.gridded: return self, np.flipud(self.dimension_values(2, flat=False)) d1keys = self.dimension_values(0, False) d2keys = self.dimension_values(1, False) coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys] dtype = 'dataframe' if pd else 'dictionary' dense_data = Dataset(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype]) concat_data = self.interface.concatenate([dense_data, Dataset(self)], datatype=dtype) with warnings.catch_warnings(): warnings.filterwarnings('ignore', r'Mean of empty slice') data = concat_data.aggregate(self.kdims, np.nanmean) array = data.dimension_values(2).reshape(len(d1keys), len(d2keys)) return data, np.flipud(array.T) def __setstate__(self, state): if '_data' in state: data = state['_data'] if isinstance(data, NdMapping): items = [tuple(k)+((v,) if np.isscalar(v) else tuple(v)) for k, v in data.items()] kdims = state['kdims'] if 'kdims' in state else self.kdims vdims = state['vdims'] if 'vdims' in state else self.vdims data = Dataset(items, kdims=kdims, vdims=vdims).data elif isinstance(data, Dataset): data = data.data kdims = data.kdims vdims = data.vdims state['data'] = data state['kdims'] = kdims state['vdims'] = vdims self.__dict__ = state if isinstance(self.data, NdElement): self.interface = NdElementInterface elif isinstance(self.data, np.ndarray): self.interface = ArrayInterface elif util.is_dataframe(self.data): self.interface = PandasInterface elif isinstance(self.data, dict): self.interface = DictInterface self.depth = 1 data, self.raster = self._compute_raster() self.interface = data.interface self.data = data.data if 'extents' not in state: (d1, d2) = self.raster.shape[:2] self.extents = (0, 0, d2, d1) super(HeatMap, self).__setstate__(state) def dense_keys(self): d1keys = self.dimension_values(0, False) d2keys = self.dimension_values(1, False) return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys])) def dframe(self, dense=False): if dense: keys1, keys2 = self.dense_keys() dense_map = self.clone({(k1, k2): self._data.get((k1, k2), np.NaN) for k1, k2 in product(keys1, keys2)}) return dense_map.dframe() return super(HeatMap, self).dframe() self.gridded = categorical_aggregate2d(self) @property def raster(self): self.warning("The .raster attribute on HeatMap is deprecated, " "the 2D aggregate is now computed dynamically " "during plotting.") return self.gridded.dimension_values(2, flat=False) class Image(SheetCoordinateSystem, Raster):
@@ -1,10 +1,24 @@
import itertools

import param
import numpy as np

from ..core import Dataset, OrderedDict
from ..core.operation import ElementOperation
from ..core.util import (pd, is_nan, sort_topologically,
cartesian_product, is_cyclic, one_to_one)

try:
except:

try:
import xarray as xr
except:
xr = None

def toarray(v, index_value=False):
"""
Interface helper function to turn dask Arrays into numpy arrays as
@@ -30,3 +44,98 @@ def compute_edges(edges):
raise ValueError('Centered bins have to be of equal width.')
edges -= width/2.
return np.concatenate([edges, [edges[-1]+width]])

def reduce_fn(x):
"""
Aggregation function to get the first non-zero value.
"""
values = x.values if pd and isinstance(x, pd.Series) else x
for v in values:
if not is_nan(v):
return v
return np.NaN

class categorical_aggregate2d(ElementOperation):

#### jlstevens Jan 9, 2017 • edited

Contributor

Looks great! I was just wondering if you want to keep this class in `util` or move it to `operation.element`?

#### philippjfr Jan 9, 2017

Author Contributor

It's imported there but can't be moved, cyclical imports again.

#### jlstevens Jan 9, 2017

Contributor

Ok, having it available for `operation.element` is fine.

"""

#### jlstevens Jan 8, 2017

Contributor

Perhaps this would be better expressed as an operation? Then maybe it could have a minimal docstring example in the class docstring?

Generates a gridded Dataset of 2D aggregate arrays indexed by the
first two dimensions of the passed Element, turning all remaining
dimensions into value dimensions. The key dimensions of the
gridded array are treated as categorical indices. Useful for data
indexed by two independent categorical variables such as a table
of population values indexed by country and year. Data that is
indexed by continuous dimensions should be binned before
aggregation. The aggregation will retain the global sorting order
of both dimensions.
>> table = Table([('USA', 2000, 282.2), ('UK', 2005, 58.89)],
kdims=['Country', 'Year'], vdims=['Population'])
>> categorical_aggregate2d(table)
Dataset({'Country': ['USA', 'UK'], 'Year': [2000, 2005],
'Population': [[ 282.2 , np.NaN], [np.NaN, 58.89]]},
kdims=['Country', 'Year'], vdims=['Population'])
"""

datatype = param.List(['xarray', 'grid'] if xr else ['grid'], doc="""
The grid interface types to use when constructing the gridded Dataset.""")

def _process(self, obj, key=None):
"""
Generates a categorical 2D aggregate by inserting NaNs at all
cross-product locations that do not already have a value assigned.
Returns a 2D gridded Dataset object.
"""

#### jlstevens Jan 9, 2017

Contributor

Quite a long method...if you see chunks that could be split up into helper methods, that might be sensible. Up to you though!

#### philippjfr Jan 9, 2017

Author Contributor

Happy to split it up.

if isinstance(obj, Dataset) and obj.interface.gridded:
return obj
elif obj.ndims > 2:
raise ValueError("Cannot aggregate more than two dimensions")
elif len(obj.dimensions()) < 3:
raise ValueError("Must have at two dimensions to aggregate over"
"and one value dimension to aggregate on.")

dim_labels = obj.dimensions(label=True)
dims = obj.dimensions()
kdims, vdims = dims[:2], dims[2:]
xdim, ydim = dim_labels[:2]
nvdims = len(dims) - 2
d1keys = obj.dimension_values(xdim, False)
d2keys = obj.dimension_values(ydim, False)
shape = (len(d2keys), len(d1keys))
nsamples = np.product(shape)

# Determine global orderings of y-values using topological sort
grouped = obj.groupby(xdim, container_type=OrderedDict,
group_type=Dataset).values()
orderings = OrderedDict()
for group in grouped:
vals = group.dimension_values(ydim)
if len(vals) == 1:
orderings[vals[0]] = [vals[0]]
else:
for i in range(len(vals)-1):
p1, p2 = vals[i:i+2]
orderings[p1] = [p2]
if one_to_one(orderings, d2keys):
d2keys = np.sort(d2keys)
elif not is_cyclic(orderings):
d2keys = list(itertools.chain(*sort_topologically(orderings)))

# Pad data with NaNs
ys, xs = cartesian_product([d2keys, d1keys])
data = {xdim: xs.flatten(), ydim: ys.flatten()}
for vdim in vdims:
values = np.empty(nsamples)
values[:] = np.NaN
data[vdim.name] = values
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype])
concat_data = obj.interface.concatenate([dense_data, Dataset(obj)], datatype=dtype)
agg = concat_data.reindex([xdim, ydim]).aggregate([xdim, ydim], reduce_fn)

# Convert data to a gridded dataset
grid_data = {xdim: d1keys, ydim: d2keys}
for vdim in vdims:
grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape)
return agg.clone(grid_data, datatype=self.p.datatype)

 @@ -15,6 +15,7 @@ from ..element.chart import Histogram, Scatter from ..element.raster import Raster, Image, RGB, QuadMesh from ..element.path import Contours, Polygons from ..element.util import categorical_aggregate2d from ..streams import RangeXY column_interfaces = [ArrayInterface, DictInterface]
 @@ -1,7 +1,13 @@ import numpy as np import param from ...core.util import cartesian_product from bokeh.models.mappers import LinearColorMapper try: from bokeh.models.mappers import LogColorMapper except ImportError: LogColorMapper = None from ...core.util import cartesian_product, is_nan, unique_array from ...element import Image, Raster, RGB from ..renderer import SkipRendering from ..util import map_colors @@ -130,27 +136,31 @@ class HeatmapPlot(ColorbarPlot): def _axes_props(self, plots, subplots, element, ranges): dims = element.dimensions() labels = self._get_axis_labels(dims) xvals, yvals = [element.dimension_values(i, False) for i in range(2)] agg = element.gridded xvals, yvals = [agg.dimension_values(i, False) for i in range(2)] if self.invert_yaxis: yvals = yvals[::-1] plot_ranges = {'x_range': [str(x) for x in xvals], 'y_range': [str(y) for y in yvals]} return ('auto', 'auto'), labels, plot_ranges def get_data(self, element, ranges=None, empty=False): x, y, z = element.dimensions(label=True) x, y, z = element.dimensions(label=True)[:3] aggregate = element.gridded style = self.style[self.cyclic_index] cmapper = self._get_colormapper(element.vdims[0], element, ranges, style) if empty: data = {x: [], y: [], z: [], 'color': []} data = {x: [], y: [], z: []} else: zvals = np.rot90(element.raster, 3).flatten() xvals, yvals = [[str(v) for v in element.dimension_values(i)] zvals = aggregate.dimension_values(z) xvals, yvals = [[str(v) for v in aggregate.dimension_values(i)] for i in range(2)] data = {x: xvals, y: yvals, z: zvals} data = {x: xvals, y: yvals, 'zvalues': zvals} return (data, {'x': x, 'y': y, 'fill_color': {'field': z, 'transform': cmapper}, if 'hover' in self.tools+self.default_tools: for vdim in element.vdims: data[vdim.name] = ['-' if is_nan(v) else vdim.pprint_value(v) for v in aggregate.dimension_values(vdim)] return (data, {'x': x, 'y': y, 'fill_color': {'field': 'zvalues', 'transform': cmapper}, 'height': 1, 'width': 1})
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.