diff --git a/CHANGELOG.md b/CHANGELOG.md index a5781877d1..10172c4b64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,15 @@ This project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +## [1.8.7] - 2015-10-01 +### Added +- The FigureFactory can now create dendrogram plots with `.create_dendrogram`. + ## [1.8.6] - 2015-09-28 +### Fixed - Saving "world_readable" to your config file via `plotly.tools.set_config` actually works. + +### Added - You can also save `auto_open` and `sharing` to the config file so that you can forget these keyword argument in `py.iplot` and `py.plot`. diff --git a/plotly/tests/test_core/test_tools/test_figure_factory.py b/plotly/tests/test_core/test_tools/test_figure_factory.py index ff90b0d2ce..3616eb1e9c 100644 --- a/plotly/tests/test_core/test_tools/test_figure_factory.py +++ b/plotly/tests/test_core/test_tools/test_figure_factory.py @@ -3,7 +3,6 @@ import datetime from nose.tools import raises - import plotly.tools as tls from plotly.exceptions import PlotlyError from plotly.graph_objs import graph_objs diff --git a/plotly/tests/test_optional/optional_utils.py b/plotly/tests/test_optional/optional_utils.py index c31507bd47..684e25faea 100644 --- a/plotly/tests/test_optional/optional_utils.py +++ b/plotly/tests/test_optional/optional_utils.py @@ -3,7 +3,12 @@ import matplotlib # Force matplotlib to not use any Xwindows backend. matplotlib.use('Agg') + +import numpy as np + from plotly.matplotlylib import Exporter, PlotlyRenderer +from plotly.tests.utils import is_num_list +from plotly.utils import get_by_path, node_generator def run_fig(fig): @@ -11,3 +16,80 @@ def run_fig(fig): exporter = Exporter(renderer) exporter.run(fig) return renderer + + +class NumpyTestUtilsMixin(object): + """Provides some helper functions to make testing easier.""" + + def _format_path(self, path): + str_path = [repr(p) for p in path] + return '[' + ']['.join(sp for sp in str_path) + ']' + + def assert_dict_equal(self, d1, d2, msg=None): + """ + Uses `np.allclose()` on number arrays. + + :raises: (AssertionError) Using TestCase's self.failureException + + """ + self.assertIsInstance(d1, dict, 'First argument is not a dictionary') + self.assertIsInstance(d2, dict, 'Second argument is not a dictionary') + + for node, path in node_generator(d1): + + # first check that this sub-dict is contained in both dicts + try: + comp_node = get_by_path(d2, path) + except (KeyError, IndexError): + standard_msg = ( + 'Path {} exists in dict 1, but not dict 2.' + .format(path) + ) + self.fail(self._formatMessage(msg, standard_msg)) + self.assertIsInstance( + comp_node, dict, 'Value at path {} is not a dict.'.format(path) + ) + + # check that each key in the first is contained in the second + for key, val in node.items(): + if isinstance(val, dict): + continue # this gets tested as its own node + + # check that the values at this key are equal + val_path = path + (key, ) + try: + comp_val = comp_node[key] + except KeyError: + standard_msg = ( + 'Path {} exists in dict 1, but not dict 2.' + .format(self._format_path(val_path)) + ) + self.fail(self._formatMessage(msg, standard_msg)) + + if (isinstance(val, np.ndarray) or + isinstance(comp_val, np.ndarray)): + if np.array_equal(val, comp_val): + continue + elif val == comp_val: + continue + + if is_num_list(val) and is_num_list(comp_val): + if np.allclose(val, comp_val): + continue + + standard_msg = ( + 'Value comparison failed at path {}.\n' + '{} != {}' + .format(self._format_path(val_path), val, comp_val) + ) + self.fail(self._formatMessage(msg, standard_msg)) + + # finally, check that keys in the second are in the first + for key in comp_node: + val_path = path + (key, ) + if key not in node: + standard_msg = ( + 'Path {} exists in dict 2, but not dict 1.' + .format(self._format_path(val_path)) + ) + self.fail(self._formatMessage(msg, standard_msg)) diff --git a/plotly/tests/test_optional/test_opt_tracefactory.py b/plotly/tests/test_optional/test_figure_factory.py similarity index 57% rename from plotly/tests/test_optional/test_opt_tracefactory.py rename to plotly/tests/test_optional/test_figure_factory.py index b6de29eb8e..f9e107c5dc 100644 --- a/plotly/tests/test_optional/test_opt_tracefactory.py +++ b/plotly/tests/test_optional/test_figure_factory.py @@ -1,8 +1,9 @@ from unittest import TestCase -from plotly.graph_objs import graph_objs, Line +from plotly.graph_objs import graph_objs as go from plotly.exceptions import PlotlyError import plotly.tools as tls +from plotly.tests.test_optional.optional_utils import NumpyTestUtilsMixin import math from nose.tools import raises @@ -245,3 +246,286 @@ def test_simple_streamline(self): self.assertListEqual(strln['data'][0]['x'][0:100], expected_strln_0_100['x']) + +class TestDendrogram(NumpyTestUtilsMixin, TestCase): + + def test_default_dendrogram(self): + X = np.array([[1, 2, 3, 4], [1, 1, 3, 4], [1, 2, 1, 4], [1, 2, 3, 1]]) + dendro = tls.FigureFactory.create_dendrogram(X=X) + + expected_dendro = go.Figure( + data=go.Data([ + go.Scatter( + x=np.array([25., 25., 35., 35.]), + y=np.array([0., 1., 1., 0.]), + marker=go.Marker(color='rgb(61,153,112)'), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + x=np.array([15., 15., 30., 30.]), + y=np.array([0., 2.23606798, 2.23606798, 1.]), + marker=go.Marker(color='rgb(61,153,112)'), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + x=np.array([5., 5., 22.5, 22.5]), + y=np.array([0., 3.60555128, 3.60555128, 2.23606798]), + marker=go.Marker(color='rgb(0,116,217)'), + mode='lines', + xaxis='x', + yaxis='y' + ) + ]), + layout=go.Layout( + autosize=False, + height='100%', + hovermode='closest', + showlegend=False, + width='100%', + xaxis=go.XAxis( + mirror='allticks', + rangemode='tozero', + showgrid=False, + showline=True, + showticklabels=True, + tickmode='array', + ticks='outside', + ticktext=np.array(['3', '2', '0', '1']), + tickvals=[5.0, 15.0, 25.0, 35.0], + type='linear', + zeroline=False + ), + yaxis=go.YAxis( + mirror='allticks', + rangemode='tozero', + showgrid=False, + showline=True, + showticklabels=True, + ticks='outside', + type='linear', + zeroline=False + ) + ) + ) + + self.assertEqual(len(dendro['data']), 3) + + # this is actually a bit clearer when debugging tests. + self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0]) + self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1]) + self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2]) + + self.assert_dict_equal(dendro['layout'], expected_dendro['layout']) + + def test_dendrogram_random_matrix(self): + + # create a random uncorrelated matrix + X = np.random.rand(5, 5) + + # variable 2 is correlated with all the other variables + X[2, :] = sum(X, 0) + + names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark'] + dendro = tls.FigureFactory.create_dendrogram(X, labels=names) + + expected_dendro = go.Figure( + data=go.Data([ + go.Scatter( + marker=go.Marker(color='rgb(61,153,112)'), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + marker=go.Marker( + color='rgb(61,153,112)' + ), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + marker=go.Marker(color='rgb(61,153,112)'), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + marker=go.Marker(color='rgb(0,116,217)'), + mode='lines', + xaxis='x', + yaxis='y' + ) + ]), + layout=go.Layout( + autosize=False, + height='100%', + hovermode='closest', + showlegend=False, + width='100%', + xaxis=go.XAxis( + mirror='allticks', + rangemode='tozero', + showgrid=False, + showline=True, + showticklabels=True, + tickmode='array', + ticks='outside', + tickvals=[5.0, 15.0, 25.0, 35.0, 45.0], + type='linear', + zeroline=False + ), + yaxis=go.YAxis( + mirror='allticks', + rangemode='tozero', + showgrid=False, + showline=True, + showticklabels=True, + ticks='outside', + type='linear', + zeroline=False + ) + ) + ) + + self.assertEqual(len(dendro['data']), 4) + + # it's random, so we can only check that the values aren't equal + y_vals = [dendro['data'][0].pop('y'), dendro['data'][1].pop('y'), + dendro['data'][2].pop('y'), dendro['data'][3].pop('y')] + for i in range(len(y_vals)): + for j in range(len(y_vals)): + if i != j: + self.assertFalse(np.allclose(y_vals[i], y_vals[j])) + + x_vals = [dendro['data'][0].pop('x'), dendro['data'][1].pop('x'), + dendro['data'][2].pop('x'), dendro['data'][3].pop('x')] + for i in range(len(x_vals)): + for j in range(len(x_vals)): + if i != j: + self.assertFalse(np.allclose(x_vals[i], x_vals[j])) + + # we also need to check the ticktext manually + xaxis_ticktext = dendro['layout']['xaxis'].pop('ticktext') + self.assertEqual(xaxis_ticktext[0], 'John') + + # this is actually a bit clearer when debugging tests. + self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0]) + self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1]) + self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2]) + self.assert_dict_equal(dendro['data'][3], expected_dendro['data'][3]) + + self.assert_dict_equal(dendro['layout'], expected_dendro['layout']) + + def test_dendrogram_orientation(self): + X = np.random.rand(5, 5) + + dendro_left = tls.FigureFactory.create_dendrogram( + X, orientation='left') + self.assertEqual(len(dendro_left['layout']['yaxis']['ticktext']), 5) + tickvals_left = np.array(dendro_left['layout']['yaxis']['tickvals']) + self.assertTrue((tickvals_left <= 0).all()) + + dendro_right = tls.FigureFactory.create_dendrogram( + X, orientation='right') + tickvals_right = np.array(dendro_right['layout']['yaxis']['tickvals']) + self.assertTrue((tickvals_right >= 0).all()) + + dendro_bottom = tls.FigureFactory.create_dendrogram( + X, orientation='bottom') + self.assertEqual(len(dendro_bottom['layout']['xaxis']['ticktext']), 5) + tickvals_bottom = np.array( + dendro_bottom['layout']['xaxis']['tickvals'] + ) + self.assertTrue((tickvals_bottom >= 0).all()) + + dendro_top = tls.FigureFactory.create_dendrogram(X, orientation='top') + tickvals_top = np.array(dendro_top['layout']['xaxis']['tickvals']) + self.assertTrue((tickvals_top <= 0).all()) + + def test_dendrogram_colorscale(self): + X = np.array([[1, 2, 3, 4], + [1, 1, 3, 4], + [1, 2, 1, 4], + [1, 2, 3, 1]]) + greyscale = [ + 'rgb(0,0,0)', # black + 'rgb(05,105,105)', # dim grey + 'rgb(128,128,128)', # grey + 'rgb(169,169,169)', # dark grey + 'rgb(192,192,192)', # silver + 'rgb(211,211,211)', # light grey + 'rgb(220,220,220)', # gainsboro + 'rgb(245,245,245)'] # white smoke + + dendro = tls.FigureFactory.create_dendrogram(X, colorscale=greyscale) + + expected_dendro = go.Figure( + data=go.Data([ + go.Scatter( + x=np.array([25., 25., 35., 35.]), + y=np.array([0., 1., 1., 0.]), + marker=go.Marker(color='rgb(128,128,128)'), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + x=np.array([15., 15., 30., 30.]), + y=np.array([0., 2.23606798, 2.23606798, 1.]), + marker=go.Marker(color='rgb(128,128,128)'), + mode='lines', + xaxis='x', + yaxis='y' + ), + go.Scatter( + x=np.array([5., 5., 22.5, 22.5]), + y=np.array([0., 3.60555128, 3.60555128, 2.23606798]), + marker=go.Marker(color='rgb(0,0,0)'), + mode='lines', + xaxis='x', + yaxis='y' + ) + ]), + layout=go.Layout( + autosize=False, + height='100%', + hovermode='closest', + showlegend=False, + width='100%', + xaxis=go.XAxis( + mirror='allticks', + rangemode='tozero', + showgrid=False, + showline=True, + showticklabels=True, + tickmode='array', + ticks='outside', + ticktext=np.array(['3', '2', '0', '1']), + tickvals=[5.0, 15.0, 25.0, 35.0], + type='linear', + zeroline=False + ), + yaxis=go.YAxis( + mirror='allticks', + rangemode='tozero', + showgrid=False, + showline=True, + showticklabels=True, + ticks='outside', + type='linear', + zeroline=False + ) + ) + ) + + self.assertEqual(len(dendro['data']), 3) + + # this is actually a bit clearer when debugging tests. + self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0]) + self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1]) + self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2]) diff --git a/plotly/tools.py b/plotly/tools.py index 06df6cd418..82abb2c0ad 100644 --- a/plotly/tools.py +++ b/plotly/tools.py @@ -8,14 +8,13 @@ """ from __future__ import absolute_import +from collections import OrderedDict import warnings import six - import math - from plotly import utils from plotly import exceptions from plotly import graph_reference @@ -50,6 +49,24 @@ def warning_on_one_line(message, category, filename, lineno, except ImportError: _numpy_imported = False +try: + import scipy as scp + _scipy_imported = True +except ImportError: + _scipy_imported = False + +try: + import scipy.spatial as scs + _scipy__spatial_imported = True +except ImportError: + _scipy__spatial_imported = False + +try: + import scipy.cluster.hierarchy as sch + _scipy__cluster__hierarchy_imported = True +except ImportError: + _scipy__cluster__hierarchy_imported = False + try: import scipy import scipy.stats @@ -2424,6 +2441,58 @@ def create_distplot(hist_data, group_labels, return dist_fig + @staticmethod + def create_dendrogram(X, orientation="bottom", labels=None, + colorscale=None): + """ + BETA function that returns a dendrogram Plotly figure object. + + :param (ndarray) X: Matrix of observations as array of arrays + :param (str) orientation: 'top', 'right', 'bottom', or 'left' + :param (list) labels: List of axis category labels(observation labels) + :param (list) colorscale: Optional colorscale for dendrogram tree + clusters + + Example 1: Simple bottom oriented dendrogram + ``` + import numpy as np + + import plotly.plotly as py + from plotly.tools import FigureFactory as FF + + X = np.random.rand(5,5) + dendro = FF.create_dendrogram(X) + py.iplot(dendro, validate=False, height=300, width=1000) + + ``` + + Example 2: Dendrogram to put on the left of the heatmap + ``` + X = np.random.rand(5,5) + names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark'] + dendro = FF.create_dendrogram(X, orientation='right', labels=names) + + py.iplot(dendro, validate=False, height=1000, width=300) + ``` + + """ + dependencies = (_scipy_imported and _scipy__spatial_imported and + _scipy__cluster__hierarchy_imported) + + if dependencies is False: + raise ImportError("FigureFactory.create_dendrogram requires scipy, \ + scipy.spatial and scipy.hierarchy") + + s = X.shape + if len(s) != 2: + exceptions.PlotlyError("X should be 2-dimensional array.") + + dendrogram = _Dendrogram(X, orientation, labels, colorscale) + + return {'layout': dendrogram.layout, + 'data': dendrogram.data} + + class _Quiver(FigureFactory): """ Refer to FigureFactory.create_quiver() for docstring @@ -2841,7 +2910,6 @@ def sum_streamlines(self): streamline_y = sum(self.st_y, []) return streamline_x, streamline_y - class _OHLC(FigureFactory): """ Refer to FigureFactory.create_ohlc_increase() for docstring. @@ -3151,3 +3219,209 @@ def make_rug(self): marker=dict(color=self.colors[index], symbol='line-ns-open')) return rug + + +class _Dendrogram(FigureFactory): + """Refer to FigureFactory.create_dendrogram() for docstring.""" + + def __init__(self, X, orientation='bottom', labels=None, colorscale=None, + width="100%", height="100%", xaxis='xaxis', yaxis='yaxis'): + # TODO: protected until #282 + from plotly.graph_objs import graph_objs + self.orientation = orientation + self.labels = labels + self.xaxis = xaxis + self.yaxis = yaxis + self.data = [] + self.leaves = [] + self.sign = {self.xaxis: 1, self.yaxis: 1} + self.layout = {self.xaxis: {}, self.yaxis: {}} + + if self.orientation in ['left', 'bottom']: + self.sign[self.xaxis] = 1 + else: + self.sign[self.xaxis] = -1 + + if self.orientation in ['right', 'bottom']: + self.sign[self.yaxis] = 1 + else: + self.sign[self.yaxis] = -1 + + (dd_traces, xvals, yvals, + ordered_labels, leaves) = self.get_dendrogram_traces(X, colorscale) + + self.labels = ordered_labels + self.leaves = leaves + yvals_flat = yvals.flatten() + xvals_flat = xvals.flatten() + + self.zero_vals = [] + + for i in range(len(yvals_flat)): + if yvals_flat[i] == 0.0 and xvals_flat[i] not in self.zero_vals: + self.zero_vals.append(xvals_flat[i]) + + self.zero_vals.sort() + + self.layout = self.set_figure_layout(width, height) + self.data = graph_objs.Data(dd_traces) + + def get_color_dict(self, colorscale): + """ + Returns colorscale used for dendrogram tree clusters. + + :param (list) colorscale: Colors to use for the plot in rgb format. + :rtype (dict): A dict of default colors mapped to the user colorscale. + + """ + + # These are the color codes returned for dendrograms + # We're replacing them with nicer colors + d = {'r': 'red', + 'g': 'green', + 'b': 'blue', + 'c': 'cyan', + 'm': 'magenta', + 'y': 'yellow', + 'k': 'black', + 'w': 'white'} + default_colors = OrderedDict(sorted(d.items(), key=lambda t: t[0])) + + if colorscale is None: + colorscale = [ + 'rgb(0,116,217)', # blue + 'rgb(35,205,205)', # cyan + 'rgb(61,153,112)', # green + 'rgb(40,35,35)', # black + 'rgb(133,20,75)', # magenta + 'rgb(255,65,54)', # red + 'rgb(255,255,255)', # white + 'rgb(255,220,0)'] # yellow + + for i in range(len(default_colors.keys())): + k = list(default_colors.keys())[i] # PY3 won't index keys + if i < len(colorscale): + default_colors[k] = colorscale[i] + + return default_colors + + def set_axis_layout(self, axis_key): + """ + Sets and returns default axis object for dendrogram figure. + + :param (str) axis_key: E.g., 'xaxis', 'xaxis1', 'yaxis', yaxis1', etc. + :rtype (dict): An axis_key dictionary with set parameters. + + """ + axis_defaults = { + 'type': 'linear', + 'ticks': 'outside', + 'mirror': 'allticks', + 'rangemode': 'tozero', + 'showticklabels': True, + 'zeroline': False, + 'showgrid': False, + 'showline': True, + } + + if len(self.labels) != 0: + axis_key_labels = self.xaxis + if self.orientation in ['left', 'right']: + axis_key_labels = self.yaxis + if axis_key_labels not in self.layout: + self.layout[axis_key_labels] = {} + self.layout[axis_key_labels]['tickvals'] = \ + [zv*self.sign[axis_key] for zv in self.zero_vals] + self.layout[axis_key_labels]['ticktext'] = self.labels + self.layout[axis_key_labels]['tickmode'] = 'array' + + self.layout[axis_key].update(axis_defaults) + + return self.layout[axis_key] + + def set_figure_layout(self, width, height): + """ + Sets and returns default layout object for dendrogram figure. + + """ + self.layout.update({ + 'showlegend': False, + 'autosize': False, + 'hovermode': 'closest', + 'width': width, + 'height': height + }) + + self.set_axis_layout(self.xaxis) + self.set_axis_layout(self.yaxis) + + return self.layout + + def get_dendrogram_traces(self, X, colorscale): + """ + Calculates all the elements needed for plotting a dendrogram. + + :param (ndarray) X: Matrix of observations as array of arrays + :param (list) colorscale: Color scale for dendrogram tree clusters + :rtype (tuple): Contains all the traces in the following order: + (a) trace_list: List of Plotly trace objects for dendrogram tree + (b) icoord: All X points of the dendrogram tree as array of arrays + with length 4 + (c) dcoord: All Y points of the dendrogram tree as array of arrays + with length 4 + (d) ordered_labels: leaf labels in the order they are going to + appear on the plot + (e) P['leaves']: left-to-right traversal of the leaves + + """ + # TODO: protected until #282 + from plotly.graph_objs import graph_objs + d = scs.distance.pdist(X) + Z = sch.linkage(d, method='complete') + P = sch.dendrogram(Z, orientation=self.orientation, + labels=self.labels, no_plot=True) + + icoord = scp.array(P['icoord']) + dcoord = scp.array(P['dcoord']) + ordered_labels = scp.array(P['ivl']) + color_list = scp.array(P['color_list']) + colors = self.get_color_dict(colorscale) + + trace_list = [] + + for i in range(len(icoord)): + # xs and ys are arrays of 4 points that make up the '∩' shapes + # of the dendrogram tree + if self.orientation in ['top', 'bottom']: + xs = icoord[i] + else: + xs = dcoord[i] + + if self.orientation in ['top', 'bottom']: + ys = dcoord[i] + else: + ys = icoord[i] + color_key = color_list[i] + trace = graph_objs.Scatter( + x=np.multiply(self.sign[self.xaxis], xs), + y=np.multiply(self.sign[self.yaxis], ys), + mode='lines', + marker=graph_objs.Marker(color=colors[color_key]) + ) + + try: + x_index = int(self.xaxis[-1]) + except ValueError: + x_index = '' + + try: + y_index = int(self.yaxis[-1]) + except ValueError: + y_index = '' + + trace['xaxis'] = 'x' + x_index + trace['yaxis'] = 'y' + y_index + + trace_list.append(trace) + + return trace_list, icoord, dcoord, ordered_labels, P['leaves'] diff --git a/plotly/version.py b/plotly/version.py index 871921a52a..655be52946 100644 --- a/plotly/version.py +++ b/plotly/version.py @@ -1 +1 @@ -__version__ = '1.8.6' +__version__ = '1.8.7'