Programmatically define arbitrarily large style mappings (#2075)

* Programatically define arbitrary dash specs * Add unique default markers and update tests * Don't pass MarkerStyle into plt.plot This fails; see matplotlib/matplotlib#17432 * Update release notes * Emphasize that default dashes/markers are unique. * Add refs to github PRs
mwaskom · May 16, 2020 · 92f160a · 92f160a
1 parent 5fbb5c4
commit 92f160a
Show file tree

Hide file tree

Showing 5 changed files with 179 additions and 23 deletions.
diff --git a/doc/releases/v0.11.0.txt b/doc/releases/v0.11.0.txt
@@ -2,14 +2,18 @@
 v0.11.0 (Unreleased)
 --------------------
 
-- Enforced keyword-only arguments for most parameters of most functions and classes.
+- TODO stub for explaining improvements to variable specificiation. Make this good!  GH2017
 
-- Standardized the parameter names for the oldest functions (:func:`distplot`, :func:`kdeplot`, and :func:`rugplot`) to be `x` and `y`, as in other functions. Using the old names will warn now and break in the future.
+- Enforced keyword-only arguments for most parameters of most functions and classes.  GH2052
 
-- Added a ``tight_layout`` method to :class:`FacetGrid` and :class:`PairGrid`, which runs the :func:`matplotlib.pyplot.tight_layout` algorithm without interference from the external legend.
+- Standardized the parameter names for the oldest functions (:func:`distplot`, :func:`kdeplot`, and :func:`rugplot`) to be `x` and `y`, as in other functions. Using the old names will warn now and break in the future.  GH2060
 
-- Added an explicit warning in :func:`swarmplot` when more than 2% of the points are overlap in the "gutters" of the swarm.
+- Plots with a ``style`` semantic can now generate an infinite number of unique dashes and/or markers by default. Prevously, an error would be raised if the ``style`` variable had more levels than could be mapped using the default lists. The existing defaults were slightly modified as part of this change; if you need to exactly reproduce plots from earlier versions, refer to the `old defaults <https://github.com/mwaskom/seaborn/blob/v0.10.1/seaborn/relational.py#L24>`_. GH2075
 
-- Added the ``axes_dict`` attribute to :class:`FacetGrid` for named access to the component axes.
+- Added a ``tight_layout`` method to :class:`FacetGrid` and :class:`PairGrid`, which runs the :func:`matplotlib.pyplot.tight_layout` algorithm without interference from the external legend. GH2073
 
-- Made :meth:`FacetGrid.set_axis_labels` clear labels from "interior" axes.
+- Added an explicit warning in :func:`swarmplot` when more than 2% of the points are overlap in the "gutters" of the swarm. GH2045
+
+- Added the ``axes_dict`` attribute to :class:`FacetGrid` for named access to the component axes.  GH2046
+
+- Made :meth:`FacetGrid.set_axis_labels` clear labels from "interior" axes.  GH2046
diff --git a/seaborn/core.py b/seaborn/core.py
@@ -1,4 +1,6 @@
+import itertools
 from collections.abc import Iterable, Sequence, Mapping
+
 import numpy as np
 import pandas as pd
 
@@ -228,3 +230,100 @@ def establish_variables_longform(self, data=None, **kwargs):
         }
 
         return plot_data, variables
+
+
+def unique_dashes(n):
+    """Build an arbitrarily long list of unique dash styles for lines.
+
+    Parameters
+    ----------
+    n : int
+        Number of unique dash specs to generate.
+
+    Returns
+    -------
+    dashes : list of strings or tuples
+        Valid arguments for the ``dashes`` parameter on
+        :class:`matplotlib.lines.Line2D`. The first spec is a solid
+        line (``""``), the remainder are sequences of long and short
+        dashes.
+
+    """
+    # Start with dash specs that are well distinguishable
+    dashes = [
+        "",
+        (4, 1.5),
+        (1, 1),
+        (3, 1.25, 1.5, 1.25),
+        (5, 1, 1, 1),
+    ]
+
+    # Now programatically build as many as we need
+    p = 3
+    while len(dashes) < n:
+
+        # Take combinations of long and short dashes
+        a = itertools.combinations_with_replacement([3, 1.25], p)
+        b = itertools.combinations_with_replacement([4, 1], p)
+
+        # Interleave the combinations, reversing one of the streams
+        segment_list = itertools.chain(*zip(
+            list(a)[1:-1][::-1],
+            list(b)[1:-1]
+        ))
+
+        # Now insert the gaps
+        for segments in segment_list:
+            gap = min(segments)
+            spec = tuple(itertools.chain(*((seg, gap) for seg in segments)))
+            dashes.append(spec)
+
+        p += 1
+
+    return dashes[:n]
+
+
+def unique_markers(n):
+    """Build an arbitrarily long list of unique marker styles for points.
+
+    Parameters
+    ----------
+    n : int
+        Number of unique marker specs to generate.
+
+    Returns
+    -------
+    markers : list of string or tuples
+        Values for defining :class:`matplotlib.markers.MarkerStyle` objects.
+        All markers will be filled.
+
+    """
+    # Start with marker specs that are well distinguishable
+    markers = [
+        "o",
+        "X",
+        (4, 0, 45),
+        "P",
+        (4, 0, 0),
+        (4, 1, 0),
+        "^",
+        (4, 1, 45),
+        "v",
+    ]
+
+    # Now generate more from regular polygons of increasing order
+    s = 5
+    while len(markers) < n:
+        a = 360 / (s + 1) / 2
+        markers.extend([
+            (s + 1, 1, a),
+            (s + 1, 0, a),
+            (s, 1, 0),
+            (s, 0, 0),
+        ])
+        s += 1
+
+    # Convert to MarkerStyle object, using only exactly what we need
+    # markers = [mpl.markers.MarkerStyle(m) for m in markers[:n]]
+
+    return markers[:n]
diff --git a/seaborn/relational.py b/seaborn/relational.py
@@ -7,10 +7,10 @@
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 
-from .core import _VectorPlotter
-from . import utils
+from .core import (_VectorPlotter, unique_dashes, unique_markers)
 from .utils import (categorical_order, get_color_cycle, ci_to_errsize,
-                    remove_na, locator_to_legend_entries)
+                    remove_na, locator_to_legend_entries,
+                    ci as ci_func)
 from .algorithms import bootstrap
 from .palettes import (color_palette, cubehelix_palette,
                        _parse_cubehelix_args, QUAL_PALETTES)
@@ -36,12 +36,6 @@ class _RelationalPlotter(_VectorPlotter):
     # TODO this should match style of other defaults
     _default_size_range = 0, 1
 
-    # Defaults for style semantic
-    default_markers = ["o", "X", "s", "P", "D", "^", "v", "p"]
-    default_dashes = ["", (4, 1.5), (1, 1),
-                      (3, 1, 1.5, 1), (5, 1, 1, 1),
-                      (5, 1, 2, 1, 2, 1)]
-
     def categorical_to_palette(self, data, order, palette):
         """Determine colors when the hue variable is qualitative."""
         # -- Identify the order and name of the levels
@@ -250,6 +244,8 @@ def parse_hue(self, data, palette=None, order=None, norm=None):
         self.cmap = cmap
 
         # Update data as it may have changed dtype
+        # TODO This is messy! We need to rethink the order of operations
+        # to avoid changing the plot data after we have it.
         self.plot_data["hue"] = data
 
     def parse_size(self, data, sizes=None, order=None, norm=None):
@@ -373,11 +369,11 @@ def parse_style(self, data, markers=None, dashes=None, order=None):
                 levels = order
 
             markers = self.style_to_attributes(
-                levels, markers, self.default_markers, "markers"
+                levels, markers, unique_markers(len(levels)), "markers"
             )
 
             dashes = self.style_to_attributes(
-                levels, dashes, self.default_dashes, "dashes"
+                levels, dashes, unique_dashes(len(levels)), "dashes"
             )
 
         paths = {}
@@ -592,7 +588,7 @@ def bootstrapped_cis(vals):
                 return null_ci
 
             boots = bootstrap(vals, func=func, n_boot=n_boot, seed=seed)
-            cis = utils.ci(boots, ci)
+            cis = ci_func(boots, ci)
             return pd.Series(cis, ["low", "high"])
 
         # Group and get the aggregation estimate

diff --git a/seaborn/tests/test_core.py b/seaborn/tests/test_core.py
@@ -1,8 +1,13 @@
 import numpy as np
+import matplotlib as mpl
 
 from numpy.testing import assert_array_equal
 
-from ..core import _VectorPlotter
+from ..core import (
+    _VectorPlotter,
+    unique_dashes,
+    unique_markers,
+)
 
 
 class TestVectorPlotter:
@@ -33,3 +38,28 @@ def test_flat_variables(self, flat_data):
 
         assert p.variables["x"] == expected_x_name
         assert p.variables["y"] == expected_y_name
+
+
+class TestCoreFunc:
+
+    def test_unique_dashes(self):
+
+        n = 24
+        dashes = unique_dashes(n)
+
+        assert len(dashes) == n
+        assert len(set(dashes)) == n
+        assert dashes[0] == ""
+        for spec in dashes[1:]:
+            assert isinstance(spec, tuple)
+            assert not len(spec) % 2
+
+    def test_unique_markers(self):
+
+        n = 24
+        markers = unique_markers(n)
+
+        assert len(markers) == n
+        assert len(set(markers)) == n
+        for m in markers:
+            assert mpl.markers.MarkerStyle(m).is_filled()
diff --git a/seaborn/tests/test_relational.py b/seaborn/tests/test_relational.py
@@ -11,6 +11,11 @@
 from ..palettes import color_palette
 from ..utils import categorical_order
 
+from ..core import (
+    unique_dashes,
+    unique_markers,
+)
+
 from ..relational import (
     _RelationalPlotter,
     _LinePlotter,
@@ -860,8 +865,19 @@ def test_parse_style(self, long_df):
         # Test defaults
         markers, dashes = True, True
         p.parse_style(p.plot_data["style"], markers, dashes)
-        assert p.markers == dict(zip(p.style_levels, p.default_markers))
-        assert p.dashes == dict(zip(p.style_levels, p.default_dashes))
+
+        n = len(p.style_levels)
+        assert p.dashes == dict(zip(p.style_levels, unique_dashes(n)))
+
+        actual_marker_paths = {
+            k: mpl.markers.MarkerStyle(m).get_path()
+            for k, m in p.markers.items()
+        }
+        expected_marker_paths = {
+            k: mpl.markers.MarkerStyle(m).get_path()
+            for k, m in zip(p.style_levels, unique_markers(n))
+        }
+        assert actual_marker_paths == expected_marker_paths
 
         # Test lists
         markers, dashes = ["o", "s", "d"], [(1, 0), (1, 1), (2, 1, 3, 1)]
@@ -880,8 +896,19 @@ def test_parse_style(self, long_df):
         style_order = np.take(p.style_levels, [1, 2, 0])
         markers = dashes = True
         p.parse_style(p.plot_data["style"], markers, dashes, style_order)
-        assert p.markers == dict(zip(style_order, p.default_markers))
-        assert p.dashes == dict(zip(style_order, p.default_dashes))
+
+        n = len(style_order)
+        assert p.dashes == dict(zip(style_order, unique_dashes(n)))
+
+        actual_marker_paths = {
+            k: mpl.markers.MarkerStyle(m).get_path()
+            for k, m in p.markers.items()
+        }
+        expected_marker_paths = {
+            k: mpl.markers.MarkerStyle(m).get_path()
+            for k, m in zip(style_order, unique_markers(n))
+        }
+        assert actual_marker_paths == expected_marker_paths
 
         # Test too many levels with style lists
         markers, dashes = ["o", "s"], False