Skip to content

Commit

Permalink
Make Jitter do something by default (#3066)
Browse files Browse the repository at this point in the history
* Apply some jitter by default in Jitter

* Add Jitter API examples and update release notes

* Fix docstring
  • Loading branch information
mwaskom committed Oct 9, 2022
1 parent a23cf31 commit 54cab15
Show file tree
Hide file tree
Showing 6 changed files with 236 additions and 21 deletions.
178 changes: 178 additions & 0 deletions doc/_docstrings/objects.Jitter.ipynb
@@ -0,0 +1,178 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f2e5a85d-c710-492b-a4fc-09b45ae26471",
"metadata": {
"tags": [
"hide"
]
},
"outputs": [],
"source": [
"import seaborn.objects as so\n",
"from seaborn import load_dataset\n",
"penguins = load_dataset(\"penguins\")"
]
},
{
"cell_type": "raw",
"id": "14b5927c-42f1-4934-adee-3d380b8b3228",
"metadata": {},
"source": [
"When used without any arguments, a small amount of jitter will be applied along the orientation axis:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc1b4941-bbe6-4afc-b51a-0ac67cbe417d",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(penguins, \"species\", \"body_mass_g\")\n",
" .add(so.Dots(), so.Jitter())\n",
")"
]
},
{
"cell_type": "raw",
"id": "1101690e-6c19-4219-aa4e-180798454df1",
"metadata": {},
"source": [
"The `width` parameter controls the amount of jitter relative to the spacing between the marks:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4251b9d-8b11-4c2c-905c-2f3b523dee70",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(penguins, \"species\", \"body_mass_g\")\n",
" .add(so.Dots(), so.Jitter(.5))\n",
")"
]
},
{
"cell_type": "raw",
"id": "38aa639a-356e-4674-970b-53d55379b2b7",
"metadata": {},
"source": [
"The `width` parameter always applies to the orientation axis, so the direction of jitter will adapt along with the orientation:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1cfe1c07-7e81-45a0-a989-240503046133",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(penguins, \"body_mass_g\", \"species\")\n",
" .add(so.Dots(), so.Jitter(.5))\n",
")"
]
},
{
"cell_type": "raw",
"id": "0f5de4cc-3383-4503-8b59-9c48230a12a5",
"metadata": {},
"source": [
"Because the `width` jitter is relative, it can be used when the orientation axis is numeric without further tweaking:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c94c41e8-29c4-4439-a5d1-0b8ffb244890",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(penguins[\"body_mass_g\"].round(-3), penguins[\"flipper_length_mm\"])\n",
" .add(so.Dots(), so.Jitter())\n",
")"
]
},
{
"cell_type": "raw",
"id": "dd982dfa-fd9f-4edc-8190-18f0e101ae1a",
"metadata": {},
"source": [
"In contrast to `width`, the `x` and `y` parameters always refer to specific axes and control the jitter in data units:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0f2e5ca-68ad-4439-a4ee-f32f65682e95",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(penguins[\"body_mass_g\"].round(-3), penguins[\"flipper_length_mm\"])\n",
" .add(so.Dots(), so.Jitter(x=100))\n",
")"
]
},
{
"cell_type": "raw",
"id": "a90ba526-8043-42ed-8f57-36445c163c0d",
"metadata": {},
"source": [
"Both `x` and `y` can be used in a single transform:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c07ed1d-ac77-4b30-90a8-e1b8760f9fad",
"metadata": {},
"outputs": [],
"source": [
"(\n",
" so.Plot(\n",
" penguins[\"body_mass_g\"].round(-3),\n",
" penguins[\"flipper_length_mm\"].round(-1),\n",
" )\n",
" .add(so.Dots(), so.Jitter(x=200, y=5))\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb04c7a2-93f0-44cf-aacf-0eb436d0f14b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "py310",
"language": "python",
"name": "py310"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
2 changes: 2 additions & 0 deletions doc/whatsnew/v0.12.1.rst
Expand Up @@ -8,6 +8,8 @@ v0.12.1 (Unreleased)

- |Feature| The :class:`Band` and :class:`Range` marks will now cover the full extent of the data if `min` / `max` variables are not explicitly assigned or added in a transform (:pr:`3056`).

- |Enhancement| The :class:`Jitter` move now applies a small amount of jitter by default (:pr:`3066`).

- |Enhancement| Marks that sort along the orient axis (e.g. :class:`Line`) now use a stable algorithm (:pr:`3064`).

- |Fix| Make :class:`objects.PolyFit` robust to missing data (:pr:`3010`).
Expand Down
42 changes: 29 additions & 13 deletions seaborn/_core/moves.py
@@ -1,12 +1,15 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import ClassVar, Callable, Optional, Union
from typing import ClassVar, Callable, Optional, Union, cast

import numpy as np
from pandas import DataFrame

from seaborn._core.groupby import GroupBy
from seaborn._core.scales import Scale
from seaborn._core.typing import Default

default = Default()


@dataclass
Expand All @@ -24,35 +27,48 @@ def __call__(
@dataclass
class Jitter(Move):
"""
Random displacement of marks along either or both axes to reduce overplotting.
Random displacement along one or both axes to reduce overplotting.
Parameters
----------
width : float
Magnitude of jitter, relative to mark width, along the orientation axis.
If not provided, the default value will be 0 when `x` or `y` are set, otherwise
there will be a small amount of jitter applied by default.
x : float
Magnitude of jitter, in data units, along the x axis.
y : float
Magnitude of jitter, in data units, along the y axis.
Examples
--------
.. include:: ../docstrings/objects.Jitter.rst
"""
width: float = 0
width: float | Default = default
x: float = 0
y: float = 0

seed: Optional[int] = None

# TODO what is the best way to have a reasonable default?
# The problem is that "reasonable" seems dependent on the mark
seed: int | None = None

def __call__(
self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
) -> DataFrame:

# TODO is it a problem that GroupBy is not used for anything here?
# Should we type it as optional?

data = data.copy()

rng = np.random.default_rng(self.seed)

def jitter(data, col, scale):
noise = rng.uniform(-.5, +.5, len(data))
offsets = noise * scale
return data[col] + offsets

if self.width is default:
width = 0.0 if self.x or self.y else 0.2
else:
width = cast(float, self.width)

if self.width:
data[orient] = jitter(data, orient, self.width * data["width"])
data[orient] = jitter(data, orient, width * data["width"])
if self.x:
data["x"] = jitter(data, "x", self.x)
if self.y:
Expand Down
18 changes: 10 additions & 8 deletions seaborn/_core/plot.py
Expand Up @@ -29,7 +29,13 @@
from seaborn._core.subplots import Subplots
from seaborn._core.groupby import GroupBy
from seaborn._core.properties import PROPERTIES, Property
from seaborn._core.typing import DataSource, VariableSpec, VariableSpecList, OrderSpec
from seaborn._core.typing import (
DataSource,
VariableSpec,
VariableSpecList,
OrderSpec,
Default,
)
from seaborn._core.rules import categorical_order
from seaborn._compat import set_scale_obj, set_layout_engine
from seaborn.rcmod import axes_style, plotting_context
Expand All @@ -47,6 +53,9 @@
from typing_extensions import TypedDict


default = Default()


# ---- Definitions for internal specs --------------------------------- #


Expand Down Expand Up @@ -79,13 +88,6 @@ class PairSpec(TypedDict, total=False):

# --- Local helpers ----------------------------------------------------------------

class Default:
def __repr__(self):
return "<default>"


default = Default()


@contextmanager
def theme_context(params: dict[str, Any]) -> Generator:
Expand Down
8 changes: 8 additions & 0 deletions seaborn/_core/typing.py
Expand Up @@ -29,3 +29,11 @@
ContinuousValueSpec = Union[
Tuple[float, float], List[float], Dict[Any, float], None,
]


class Default:
def __repr__(self):
return "<default>"


default = Default()
9 changes: 9 additions & 0 deletions tests/_core/test_moves.py
Expand Up @@ -78,6 +78,15 @@ def check_pos(self, res, df, var, limit):
assert (res[var] < df[var] + limit / 2).all()
assert (res[var] > df[var] - limit / 2).all()

def test_default(self, df):

orient = "x"
groupby = self.get_groupby(df, orient)
res = Jitter()(df, groupby, orient, {})
self.check_same(res, df, "y", "grp2", "width")
self.check_pos(res, df, "x", 0.2 * df["width"])
assert (res["x"] - df["x"]).abs().min() > 0

def test_width(self, df):

width = .4
Expand Down

0 comments on commit 54cab15

Please sign in to comment.