Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Histogram refactoring and improvements (#3045)
* Transition histplot internatls to use _stats/histogram * Simplify the way we use Hist * Note status of Histogram class * Store stat value in Hist output and simplify interface * Add API examples for Hist * Add parameter validation for Hist.stat * Add some checks on common_norm/common_bin
- Loading branch information
Showing
8 changed files
with
386 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "59690096-a0ad-4ff3-b82c-0258d724035a", | ||
"metadata": { | ||
"tags": [ | ||
"hide" | ||
] | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import seaborn.objects as so\n", | ||
"from seaborn import load_dataset\n", | ||
"penguins = load_dataset(\"penguins\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "c345a35c-bac8-4163-ba40-e7c208df1033", | ||
"metadata": {}, | ||
"source": [ | ||
"For discrete or categorical variables, this stat is commonly combined with a :class:`Bar` mark:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6a96ac9b-1240-496d-9385-840205945208", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"so.Plot(penguins, \"island\").add(so.Bar(), so.Hist())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "1e5ff9d5-c6a9-4adc-a9be-0f155b1575be", | ||
"metadata": {}, | ||
"source": [ | ||
"When used to estimate a univariate distribution, it is better to use the :class:`Bars` mark:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7f3e3144-752a-4d71-9528-85eb1ed0a9a4", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p = so.Plot(penguins, \"flipper_length_mm\")\n", | ||
"p.add(so.Bars(), so.Hist())" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "008b9ffe-da74-4406-9756-4f70e333f33b", | ||
"metadata": {}, | ||
"source": [ | ||
"The granularity of the bins will influence whether the underlying distribution is accurately represented. Adjust it by setting the total number:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "27d221d5-add5-40a8-85d2-05102384dad1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Bars(), so.Hist(bins=20))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "fffebb54-0299-45c5-b7fb-6fcad6427239", | ||
"metadata": {}, | ||
"source": [ | ||
"Alternatively, specify the *width* of the bins:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "d036ca65-7dcf-45ac-a2d1-caafb9f922a7", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Bars(), so.Hist(binwidth=5))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "bc1e4bd3-2a16-42bd-9c13-a660dd381f66", | ||
"metadata": {}, | ||
"source": [ | ||
"By default, the transform returns the count of observations in each bin. The counts can be normalized, e.g. to show a proportion:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "dbf23712-2231-4226-8265-0e2a5299c4bb", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Bars(), so.Hist(stat=\"proportion\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "6c6fb23e-78c5-4630-a958-62cb4dee4ec8", | ||
"metadata": {}, | ||
"source": [ | ||
"When additional variables define groups, the default behavior is to normalize across all groups:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ac3fe4ef-56e3-4ec7-b580-596d2a3d924b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p = p.facet(\"island\")\n", | ||
"p.add(so.Bars(), so.Hist(stat=\"proportion\"))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "f7afc403-26cc-4325-a28a-913c2291aa35", | ||
"metadata": {}, | ||
"source": [ | ||
"Pass `common_norm=False` to normalize each distribution independently:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "b2029324-069f-4261-a178-1efad2fd0e88", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Bars(), so.Hist(stat=\"proportion\", common_norm=False))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "0f83401a-e456-4a14-af69-f1483c6c03c4", | ||
"metadata": {}, | ||
"source": [ | ||
"Or, with more than one grouping varible, specify a subset to normalize within:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "5c092262-8a8f-4a3e-8cae-9e0f23dd94ba", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Bars(), so.Hist(stat=\"proportion\", common_norm=[\"col\"]), color=\"sex\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "86532133-bf33-4674-9614-86ae3408aa51", | ||
"metadata": {}, | ||
"source": [ | ||
"When distributions overlap it may be easier to discern their shapes with an :class:`Area` mark:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "00b18ad8-52d4-460a-a012-d87c66b3e71e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Area(), so.Hist(), color=\"sex\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "raw", | ||
"id": "2b34d435-abbf-41aa-b219-91883d7d29f3", | ||
"metadata": {}, | ||
"source": [ | ||
"Or add :class:`Stack` move to represent a part-whole relationship:" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "3a7a0c05-d774-4f99-950f-5dc9865027c4", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"p.add(so.Bars(), so.Hist(), so.Stack(), color=\"sex\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e247e74b-2c09-40f0-8f45-9fa5f8264d78", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "py310", | ||
"language": "python", | ||
"name": "py310" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.