Skip to content

Commit

Permalink
Improve support for datetime (and categorical) data in relational plo…
Browse files Browse the repository at this point in the history
…ts (#2138)

* Improve support for datetime (and categorical) data in relational plots

Fixes #2130

* Disable pandas unit conversion during testing

* Force matplotlib date converters in test fixture
  • Loading branch information
mwaskom committed Jun 15, 2020
1 parent d1e1701 commit ca22f90
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 5 deletions.
2 changes: 2 additions & 0 deletions doc/releases/v0.11.0.txt
Expand Up @@ -60,6 +60,8 @@ Other

- Improved :meth:`FacetGrid.set_titles` with `margin titles=True`, such that texts representing the original row titles are removed before adding new ones. GH2083

- Improved support for datetime variables in :func:`scatterplot` and :func:`lineplot`. GH2138

- Fixed a bug where :func:`lineplot` did not pass the ``linestyle`` parameter down to matplotlib. GH2095

- Improved the error messages produced when categorical plots process the orientation parameter.
Expand Down
6 changes: 5 additions & 1 deletion seaborn/_core.py
Expand Up @@ -920,7 +920,11 @@ def _semantic_subsets(
def comp_data(self):
"""Dataframe with numeric x and y, after unit conversion and log scaling."""
if not hasattr(self, "ax"):
raise AttributeError("No Axes attached to plotter")
# Probably a good idea, but will need a bunch of tests updated
# Most of these tests should just use the external interface
# Then this can be reeneabled.
# raise AttributeError("No Axes attached to plotter")
return self.plot_data

if not hasattr(self, "_comp_data"):

Expand Down
16 changes: 15 additions & 1 deletion seaborn/conftest.py
@@ -1,10 +1,24 @@
import numpy as np
import pandas as pd
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt

import pytest


@pytest.fixture(scope="session", autouse=True)
def remove_pandas_unit_conversion():
# Prior to pandas 1.0, it registered its own datetime converters,
# but they are less powerful than what matplotlib added in 2.2,
# and we rely on that functionality in seaborn.
# https://github.com/matplotlib/matplotlib/pull/9779
# https://github.com/pandas-dev/pandas/issues/27036
mpl.units.registry[np.datetime64] = mpl.dates.DateConverter()
mpl.units.registry[datetime.date] = mpl.dates.DateConverter()
mpl.units.registry[datetime.datetime] = mpl.dates.DateConverter()


@pytest.fixture(autouse=True)
def close_figs():
yield
Expand Down Expand Up @@ -138,7 +152,7 @@ def long_df(rng):
a=rng.choice(list("abc"), n),
b=rng.choice(list("mnop"), n),
c=rng.choice([0, 1], n, [.3, .7]),
t=rng.choice(np.arange("2005-02-25", "2005-02-28", dtype="datetime64[D]"), n),
t=rng.choice(np.arange("2004-07-30", "2007-07-30", dtype="datetime64[Y]"), n),
s=rng.choice([2, 4, 8], n),
f=rng.choice([0.2, 0.3], n),
))
Expand Down
8 changes: 7 additions & 1 deletion seaborn/relational.py
Expand Up @@ -288,7 +288,9 @@ def plot(self, ax, kws):

# Loop over the semantic subsets and add to the plot
grouping_semantics = "hue", "size", "style"
for sub_vars, sub_data in self._semantic_subsets(grouping_semantics):
for sub_vars, sub_data in self._semantic_subsets(
grouping_semantics, from_comp_data=True
):

if self.sort:
sub_data = sub_data.sort_values(["units", "x", "y"])
Expand Down Expand Up @@ -641,6 +643,8 @@ def lineplot(
if ax is None:
ax = plt.gca()

p._attach(ax)

p.plot(ax, kwargs)
return ax

Expand Down Expand Up @@ -920,6 +924,8 @@ def scatterplot(
if ax is None:
ax = plt.gca()

p._attach(ax)

p.plot(ax, kwargs)

return ax
Expand Down
6 changes: 4 additions & 2 deletions seaborn/tests/test_core.py
Expand Up @@ -897,8 +897,10 @@ def test_comp_data(self, long_df):

p = VectorPlotter(data=long_df, variables={"x": "x", "y": "t"})

with pytest.raises(AttributeError):
p.comp_data
# We have disabled this check for now, while it remains part of
# the internal API, because it will require updating a number of tests
# with pytest.raises(AttributeError):
# p.comp_data

_, ax = plt.subplots()
p._attach(ax)
Expand Down
10 changes: 10 additions & 0 deletions seaborn/tests/test_relational.py
Expand Up @@ -1326,6 +1326,9 @@ def test_lineplot_smoke(
lineplot(x="x", y=long_df.y.values, data=long_df)
ax.clear()

lineplot(x="x", y="t", data=long_df)
ax.clear()

lineplot(x="x", y="y", hue="a", data=long_df)
ax.clear()

Expand Down Expand Up @@ -1692,6 +1695,13 @@ def test_linewidths(self, long_df):
scatterplot(data=long_df, x="x", y="y", linewidth=lw)
assert ax.collections[0].get_linewidths().item() == lw

def test_datetime_scale(self, long_df):

ax = scatterplot(data=long_df, x="t", y="y")
# Check that we avoid weird matplotlib default auto scaling
# https://github.com/matplotlib/matplotlib/issues/17586
ax.get_xlim()[0] > ax.xaxis.convert_units(np.datetime64("2002-01-01"))

def test_scatterplot_vs_relplot(self, long_df, long_semantics):

ax = scatterplot(data=long_df, **long_semantics)
Expand Down

0 comments on commit ca22f90

Please sign in to comment.