From 8e91fea185741c5b242890b0c18f9674976906bc Mon Sep 17 00:00:00 2001 From: tarsur909 <68882529+tarsur909@users.noreply.github.com> Date: Sat, 29 Jul 2023 00:17:57 -0700 Subject: [PATCH 1/6] fix hardcoded scatter marker size issue #54204 --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/plotting/_matplotlib/core.py | 11 ++++++++--- pandas/tests/plotting/test_misc.py | 30 +++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 0fdec3175f635..3d7f154671f01 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -605,7 +605,7 @@ Plotting ^^^^^^^^ - Bug in :meth:`Series.plot` when invoked with ``color=None`` (:issue:`51953`) - Fixed UserWarning in :meth:`DataFrame.plot.scatter` when invoked with ``c="b"`` (:issue:`53908`) -- +- Fixed bug in :meth:`DataFrame.plot.scatter` wherein marker size was previously hardcoded to a default value (:issue:`54204`) Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index c62f73271577d..4b61964b1ffa0 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1199,9 +1199,14 @@ def _kind(self) -> Literal["scatter"]: def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: if s is None: - # hide the matplotlib default for size, in case we want to change - # the handling of this argument later - s = 20 + # The default size of the elements in a scatter plot + # is now based on the rcParam ``lines.markersize``. + # This means that if rcParams are temporarily changed, + # the marker size changes as well according to mpl.rc_context(). + if mpl.rcParams["_internal.classic_mode"]: + s = 20 + else: + s = mpl.rcParams["lines.markersize"] ** 2.0 elif is_hashable(s) and s in data.columns: s = data[s] super().__init__(data, x, y, s=s, **kwargs) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 53219e0d20b6d..f6d933716175f 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -662,3 +662,33 @@ def test_bar_plt_xaxis_intervalrange(self): (a.get_text() == b.get_text()) for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + + def test_change_scatter_markersize_rcparams(self): + # GH 54204 + # Ensure proper use of lines.markersize to style pandas scatter + # plots like matplotlib does + df = DataFrame(data={"x": [1, 2, 3], "y": [1, 2, 3]}) + + pandas_default = df.plot.scatter( + x="x", y="y", title="pandas scatter, default rc marker size" + ) + + mpl_default = mpl.pyplot.scatter(df["x"], df["y"]) + + # verify that pandas and matplotlib scatter + # default marker size are the same (s = 6^2 = 36) + assert ( + pandas_default.collections[0].get_sizes()[0] == mpl_default.get_sizes()[0] + ) + + with mpl.rc_context({"lines.markersize": 10}): + pandas_changed = df.plot.scatter( + x="x", y="y", title="pandas scatter, changed rc marker size" + ) + mpl_changed = mpl.pyplot.scatter(df["x"], df["y"]) + + # verify that pandas and matplotlib scatter + # default marker size are the same (s = 10^2 = 100) + assert ( + pandas_changed.collections[0].get_sizes()[0] == mpl_changed.get_sizes()[0] + ) From fba89927a28e700d53f7e5d0a33cc7df1525f595 Mon Sep 17 00:00:00 2001 From: tarsur909 <68882529+tarsur909@users.noreply.github.com> Date: Mon, 31 Jul 2023 21:56:09 -0700 Subject: [PATCH 2/6] fix hardcoded scatter marker size issue #54204 --- pandas/plotting/_matplotlib/core.py | 12 ++++++++---- pandas/tests/plotting/test_misc.py | 6 ++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 4b61964b1ffa0..f3e9ec094168f 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1203,10 +1203,14 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs) -> None: # is now based on the rcParam ``lines.markersize``. # This means that if rcParams are temporarily changed, # the marker size changes as well according to mpl.rc_context(). - if mpl.rcParams["_internal.classic_mode"]: - s = 20 - else: - s = mpl.rcParams["lines.markersize"] ** 2.0 + warnings.warn( + """The default of s=20 is deprecated and + has changed to mpl.rcParams['lines.markersize']. + Specify `s` to suppress this warning""", + DeprecationWarning, + stacklevel=find_stack_level(), + ) + s = mpl.rcParams["lines.markersize"] ** 2.0 elif is_hashable(s) and s in data.columns: s = data[s] super().__init__(data, x, y, s=s, **kwargs) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index f6d933716175f..428073f7a7a04 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -663,10 +663,12 @@ def test_bar_plt_xaxis_intervalrange(self): for a, b in zip(s.plot.bar().get_xticklabels(), expected) ) + @pytest.mark.filterwarnings("default") def test_change_scatter_markersize_rcparams(self): # GH 54204 # Ensure proper use of lines.markersize to style pandas scatter - # plots like matplotlib does + # plots like matplotlib does. + # Will raise deprecation warnings. df = DataFrame(data={"x": [1, 2, 3], "y": [1, 2, 3]}) pandas_default = df.plot.scatter( @@ -688,7 +690,7 @@ def test_change_scatter_markersize_rcparams(self): mpl_changed = mpl.pyplot.scatter(df["x"], df["y"]) # verify that pandas and matplotlib scatter - # default marker size are the same (s = 10^2 = 100) + # changed marker size are the same (s = 10^2 = 100) assert ( pandas_changed.collections[0].get_sizes()[0] == mpl_changed.get_sizes()[0] ) From ad6717008ef0b9ce265b6e79e1983f77c321ff6f Mon Sep 17 00:00:00 2001 From: tarsur909 <68882529+tarsur909@users.noreply.github.com> Date: Mon, 31 Jul 2023 22:17:29 -0700 Subject: [PATCH 3/6] fix hardcoded scatter marker size issue #54204 --- .../getting_started/intro_tutorials/04_plotting.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst index ddc8a37911c98..9370ca54352ab 100644 --- a/doc/source/getting_started/intro_tutorials/04_plotting.rst +++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst @@ -117,11 +117,14 @@ standard Python to get an overview of the available plot methods: .. ipython:: python - [ - method_name - for method_name in dir(air_quality.plot) - if not method_name.startswith("_") - ] + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + [ + method_name + for method_name in dir(air_quality.plot) + if not method_name.startswith("_") + ] .. note:: In many development environments as well as IPython and From 5c3b676b2c549e94ee6ca8538fe687e869353f32 Mon Sep 17 00:00:00 2001 From: tarsur909 <68882529+tarsur909@users.noreply.github.com> Date: Mon, 31 Jul 2023 22:28:48 -0700 Subject: [PATCH 4/6] fix hardcoded scatter marker size issue #54204 --- doc/source/getting_started/intro_tutorials/04_plotting.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst index 9370ca54352ab..1a805d3071552 100644 --- a/doc/source/getting_started/intro_tutorials/04_plotting.rst +++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst @@ -103,7 +103,7 @@ I want to visually compare the :math:`NO_2` values measured in London versus Par .. ipython:: python @savefig 04_airqual_scatter.png - air_quality.plot.scatter(x="station_london", y="station_paris", alpha=0.5) + air_quality.plot.scatter(x="station_london", y="station_paris", s = 20, alpha=0.5) plt.show() .. raw:: html From d02538e8db6b27e8b60ab9054ffe3aca3defb7f0 Mon Sep 17 00:00:00 2001 From: tarsur909 <68882529+tarsur909@users.noreply.github.com> Date: Mon, 31 Jul 2023 22:40:31 -0700 Subject: [PATCH 5/6] fix hardcoded scatter marker size issue #54204 --- doc/source/user_guide/dsintro.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/dsintro.rst b/doc/source/user_guide/dsintro.rst index 4b0829e4a23b9..f359e9304ce24 100644 --- a/doc/source/user_guide/dsintro.rst +++ b/doc/source/user_guide/dsintro.rst @@ -570,7 +570,7 @@ greater than 5, calculate the ratio, and plot: SepalRatio=lambda x: x.SepalWidth / x.SepalLength, PetalRatio=lambda x: x.PetalWidth / x.PetalLength, ) - .plot(kind="scatter", x="SepalRatio", y="PetalRatio") + .plot(kind="scatter", x="SepalRatio", y="PetalRatio", s = 20) ) Since a function is passed in, the function is computed on the DataFrame From 1e20c1e3fb2ac6b6e46411313c915da06b3ea306 Mon Sep 17 00:00:00 2001 From: tarsur909 <68882529+tarsur909@users.noreply.github.com> Date: Mon, 31 Jul 2023 22:58:28 -0700 Subject: [PATCH 6/6] fix hardcoded scatter marker size issue #54204 --- doc/source/user_guide/visualization.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst index 9081d13ef2cf1..53e9314e80b46 100644 --- a/doc/source/user_guide/visualization.rst +++ b/doc/source/user_guide/visualization.rst @@ -626,7 +626,7 @@ It is recommended to specify ``color`` and ``label`` keywords to distinguish eac ax = df.plot.scatter(x="a", y="b", color="DarkBlue", label="Group 1") @savefig scatter_plot_repeated.png - df.plot.scatter(x="c", y="d", color="DarkGreen", label="Group 2", ax=ax); + df.plot.scatter(x="c", y="d", color="DarkGreen", label="Group 2", ax=ax, s = 20); .. ipython:: python :suppress: