diff --git a/README.md b/README.md index f711f1fd751d..5a3a3ba68264 100644 --- a/README.md +++ b/README.md @@ -192,10 +192,13 @@ Install Polars with all optional dependencies. ```sh pip install 'polars[all]' -pip install 'polars[numpy,pandas,pyarrow]' # install a subset of all optional dependencies ``` -You can also install the dependencies directly. +You can also install a subset of all optional dependencies. + +```sh +pip install 'polars[numpy,pandas,pyarrow]' +``` | Tag | Description | | ---------- | ---------------------------------------------------------------------------- | @@ -209,6 +212,7 @@ You can also install the dependencies directly. | openpyxl | Support for reading from Excel files with native types | | deltalake | Support for reading from Delta Lake Tables | | pyiceberg | Support for reading from Apache Iceberg tables | +| plot | Support for plot functions on Dataframes | | timezone | Timezone support, only needed if are on Python<3.9 or you are on Windows | Releases happen quite often (weekly / every few days) at the moment, so updating polars regularly to get the latest bugfixes / features might not be a bad idea. diff --git a/docs/user-guide/installation.md b/docs/user-guide/installation.md index 525fb48c0e9d..3e86f76b80c6 100644 --- a/docs/user-guide/installation.md +++ b/docs/user-guide/installation.md @@ -55,6 +55,7 @@ pip install 'polars[numpy,fsspec]' | connectorx | Support for reading from SQL databases | | xlsx2csv | Support for reading from Excel files | | deltalake | Support for reading from Delta Lake Tables | +| plot | Support for plotting Dataframes | | timezone | Timezone support, only needed if 1. you are on Python < 3.9 and/or 2. you are on Windows, otherwise no dependencies will be installed | ### Rust diff --git a/py-polars/docs/source/reference/dataframe/attributes.rst b/py-polars/docs/source/reference/dataframe/attributes.rst index 086cc41597eb..3e0bbfa721bf 100644 --- a/py-polars/docs/source/reference/dataframe/attributes.rst +++ b/py-polars/docs/source/reference/dataframe/attributes.rst @@ -10,6 +10,7 @@ Attributes DataFrame.dtypes DataFrame.flags DataFrame.height + DataFrame.plot DataFrame.schema DataFrame.shape DataFrame.width diff --git a/py-polars/docs/source/reference/dataframe/index.rst b/py-polars/docs/source/reference/dataframe/index.rst index ffcc810cc829..509b1e3f5ede 100644 --- a/py-polars/docs/source/reference/dataframe/index.rst +++ b/py-polars/docs/source/reference/dataframe/index.rst @@ -16,6 +16,7 @@ This page gives an overview of all public DataFrame methods. group_by modify_select miscellaneous + plot .. currentmodule:: polars diff --git a/py-polars/docs/source/reference/dataframe/plot.rst b/py-polars/docs/source/reference/dataframe/plot.rst new file mode 100644 index 000000000000..c43b717971bf --- /dev/null +++ b/py-polars/docs/source/reference/dataframe/plot.rst @@ -0,0 +1,44 @@ +==== +Plot +==== + +Polars does not implement plotting logic itself, but instead defers to +hvplot. Please see the `hvplot reference gallery `_ +for more information and documentation. + +Examples +-------- +Scatter plot: + +.. code-block:: python + + df = pl.DataFrame( + { + "length": [1, 4, 6], + "width": [4, 5, 6], + "species": ["setosa", "setosa", "versicolor"], + } + ) + df.plot.scatter(x="length", y="width", by="species") + +Line plot: + +.. code-block:: python + + from datetime import date + df = pl.DataFrame( + { + "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 3)], + "stock_1": [1, 4, 6], + "stock_2": [1, 5, 2], + } + ) + df.plot.line(x="date", y=["stock_1", "stock_2"]) + +For more info on what you can pass, you can use ``hvplot.help``: + +.. code-block:: python + + import hvplot + hvplot.help('scatter') + diff --git a/py-polars/docs/source/reference/series/attributes.rst b/py-polars/docs/source/reference/series/attributes.rst index aed84405190e..aec1ae90d37a 100644 --- a/py-polars/docs/source/reference/series/attributes.rst +++ b/py-polars/docs/source/reference/series/attributes.rst @@ -15,3 +15,4 @@ Attributes Series.shape Series.str Series.flags + Series.plot diff --git a/py-polars/docs/source/reference/series/index.rst b/py-polars/docs/source/reference/series/index.rst index eb56ce20604e..07a27ca796ee 100644 --- a/py-polars/docs/source/reference/series/index.rst +++ b/py-polars/docs/source/reference/series/index.rst @@ -20,6 +20,7 @@ This page gives an overview of all public Series methods. list modify_select miscellaneous + plot string struct temporal diff --git a/py-polars/docs/source/reference/series/plot.rst b/py-polars/docs/source/reference/series/plot.rst new file mode 100644 index 000000000000..b6e01f685e80 --- /dev/null +++ b/py-polars/docs/source/reference/series/plot.rst @@ -0,0 +1,29 @@ +==== +Plot +==== + +Polars does not implement plotting logic itself, but instead defers to +hvplot. Please see the `hvplot reference gallery `_ +for more information and documentation. + +Examples +-------- +Histogram: + +.. code-block:: python + + s = pl.Series([1, 4, 2]) + s.plot.hist() + +KDE plot (note: in addition to ``hvplot``, this one also requires ``scipy``): + +.. code-block:: python + + s.plot.kde() + +For more info on what you can pass, you can use ``hvplot.help``: + +.. code-block:: python + + import hvplot + hvplot.help("hist") diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 987e12d3eae6..b55e4f0e554f 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -48,12 +48,14 @@ py_type_to_dtype, ) from polars.dependencies import ( + _HVPLOT_AVAILABLE, _PANDAS_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, dataframe_api_compat, + hvplot, ) from polars.dependencies import numpy as np from polars.dependencies import pandas as pd @@ -348,7 +350,7 @@ class DataFrame: """ - _accessors: ClassVar[set[str]] = set() + _accessors: ClassVar[set[str]] = {"plot"} def __init__( self, @@ -1116,6 +1118,52 @@ def _replace(self, column: str, new_column: Series) -> Self: self._df.replace(column, new_column._s) return self + @property + def plot(self) -> Any: + """ + Create a plot namespace. + + Polars does not implement plotting logic itself, but instead defers to + hvplot. Please see the `hvplot reference gallery `_ + for more information and documentation. + + Examples + -------- + Scatter plot: + + >>> df = pl.DataFrame( + ... { + ... "length": [1, 4, 6], + ... "width": [4, 5, 6], + ... "species": ["setosa", "setosa", "versicolor"], + ... } + ... ) + >>> df.plot.scatter(x="length", y="width", by="species") # doctest: +SKIP + + Line plot: + + >>> from datetime import date + >>> df = pl.DataFrame( + ... { + ... "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 3)], + ... "stock_1": [1, 4, 6], + ... "stock_2": [1, 5, 2], + ... } + ... ) + >>> df.plot.line(x="date", y=["stock_1", "stock_2"]) # doctest: +SKIP + + For more info on what you can pass, you can use ``hvplot.help``: + + >>> import hvplot # doctest: +SKIP + >>> hvplot.help("scatter") # doctest: +SKIP + """ + if not _HVPLOT_AVAILABLE or parse_version(hvplot.__version__) < parse_version( + "0.9.1" + ): + raise ModuleUpgradeRequired("hvplot>=0.9.1 is required for `.plot`") + hvplot.post_patch() + return hvplot.plotting.core.hvPlotTabularPolars(self) + @property def shape(self) -> tuple[int, int]: """ diff --git a/py-polars/polars/dependencies.py b/py-polars/polars/dependencies.py index 4de9fbe001de..0eacfefd316b 100644 --- a/py-polars/polars/dependencies.py +++ b/py-polars/polars/dependencies.py @@ -12,6 +12,7 @@ _DELTALAKE_AVAILABLE = True _FSSPEC_AVAILABLE = True _GEVENT_AVAILABLE = True +_HVPLOT_AVAILABLE = True _HYPOTHESIS_AVAILABLE = True _NUMPY_AVAILABLE = True _PANDAS_AVAILABLE = True @@ -158,6 +159,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: import deltalake import fsspec import gevent + import hvplot import hypothesis import numpy import pandas @@ -183,6 +185,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]: ) deltalake, _DELTALAKE_AVAILABLE = _lazy_import("deltalake") fsspec, _FSSPEC_AVAILABLE = _lazy_import("fsspec") + hvplot, _HVPLOT_AVAILABLE = _lazy_import("hvplot") hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis") numpy, _NUMPY_AVAILABLE = _lazy_import("numpy") pandas, _PANDAS_AVAILABLE = _lazy_import("pandas") @@ -243,6 +246,7 @@ def _check_for_pydantic(obj: Any, *, check_type: bool = True) -> bool: "deltalake", "fsspec", "gevent", + "hvplot", "numpy", "pandas", "pydantic", @@ -260,6 +264,7 @@ def _check_for_pydantic(obj: Any, *, check_type: bool = True) -> bool: "_PYICEBERG_AVAILABLE", "_FSSPEC_AVAILABLE", "_GEVENT_AVAILABLE", + "_HVPLOT_AVAILABLE", "_HYPOTHESIS_AVAILABLE", "_NUMPY_AVAILABLE", "_PANDAS_AVAILABLE", diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index e45df2247a2b..be6af81bcafa 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -52,11 +52,13 @@ supported_numpy_char_code, ) from polars.dependencies import ( + _HVPLOT_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, dataframe_api_compat, + hvplot, ) from polars.dependencies import numpy as np from polars.dependencies import pandas as pd @@ -237,6 +239,7 @@ class Series: "str", "bin", "struct", + "plot", } def __init__( @@ -7486,6 +7489,38 @@ def struct(self) -> StructNameSpace: """Create an object namespace of all struct related methods.""" return StructNameSpace(self) + @property + def plot(self) -> Any: + """ + Create a plot namespace. + + Polars does not implement plotting logic itself, but instead defers to + hvplot. Please see the `hvplot reference gallery `_ + for more information and documentation. + + Examples + -------- + Histogram: + + >>> s = pl.Series([1, 4, 2]) + >>> s.plot.hist() # doctest: +SKIP + + KDE plot (note: in addition to ``hvplot``, this one also requires ``scipy``): + + >>> s.plot.kde() # doctest: +SKIP + + For more info on what you can pass, you can use ``hvplot.help``: + + >>> import hvplot # doctest: +SKIP + >>> hvplot.help("hist") # doctest: +SKIP + """ + if not _HVPLOT_AVAILABLE or parse_version(hvplot.__version__) < parse_version( + "0.9.1" + ): + raise ModuleUpgradeRequired("hvplot>=0.9.1 is required for `.plot`") + hvplot.post_patch() + return hvplot.plotting.core.hvPlotTabularPolars(self) + def _resolve_temporal_dtype( dtype: PolarsDataType | None, diff --git a/py-polars/polars/series/utils.py b/py-polars/polars/series/utils.py index 75d77f022375..2f8e6529ad2c 100644 --- a/py-polars/polars/series/utils.py +++ b/py-polars/polars/series/utils.py @@ -39,7 +39,14 @@ def expr_dispatch(cls: type[T]) -> type[T]: expr_lookup = _expr_lookup(namespace) for name in dir(cls): - if not name.startswith("_"): + if ( + # private + not name.startswith("_") + # Avoid error when building docs + # https://github.com/pola-rs/polars/pull/13238#discussion_r1438787093 + # TODO: is there a better way to do this? + and name != "plot" + ): attr = getattr(cls, name) if callable(attr): attr = _undecorated(attr) diff --git a/py-polars/polars/utils/show_versions.py b/py-polars/polars/utils/show_versions.py index 464f5b237b10..d6ff8c4ee47f 100644 --- a/py-polars/polars/utils/show_versions.py +++ b/py-polars/polars/utils/show_versions.py @@ -24,6 +24,7 @@ def show_versions() -> None: connectorx: 0.3.2 deltalake: 0.13.0 fsspec: 2023.10.0 + hvplot: 0.9.1 gevent: 23.9.1 matplotlib: 3.8.2 numpy: 1.26.2 @@ -66,6 +67,7 @@ def _get_dependency_info() -> dict[str, str]: "deltalake", "fsspec", "gevent", + "hvplot", "matplotlib", "numpy", "openpyxl", diff --git a/py-polars/polars/utils/various.py b/py-polars/polars/utils/various.py index 7ae994ce24aa..f6964fd20486 100644 --- a/py-polars/polars/utils/various.py +++ b/py-polars/polars/utils/various.py @@ -399,7 +399,7 @@ def __get__( # type: ignore[override] return self.fget( # type: ignore[misc] instance if isinstance(instance, cls) else cls ) - except AttributeError: + except (AttributeError, ImportError): return None # type: ignore[return-value] diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index a9c6ecf37896..d8c17fbaf4a7 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -45,6 +45,7 @@ connectorx = ["connectorx >= 0.3.2"] deltalake = ["deltalake >= 0.14.0"] fsspec = ["fsspec"] gevent = ["gevent"] +plot = ["hvplot >= 0.9.1"] matplotlib = ["matplotlib"] numpy = ["numpy >= 1.16.0"] openpyxl = ["openpyxl >= 3.0.0"] @@ -58,7 +59,7 @@ timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_syst xlsx2csv = ["xlsx2csv >= 0.8.0"] xlsxwriter = ["xlsxwriter"] all = [ - "polars[pyarrow,pandas,numpy,fsspec,connectorx,xlsx2csv,deltalake,timezone,matplotlib,pydantic,pyiceberg,sqlalchemy,xlsxwriter,adbc,cloudpickle,gevent]", + "polars[pyarrow,pandas,numpy,fsspec,plot,connectorx,xlsx2csv,deltalake,timezone,pydantic,pyiceberg,sqlalchemy,xlsxwriter,adbc,cloudpickle,gevent]", ] [tool.maturin] @@ -88,6 +89,7 @@ module = [ "ezodf.*", "fsspec.*", "gevent", + "hvplot.*", "matplotlib.*", "moto.server", "openpyxl", diff --git a/py-polars/requirements-dev.txt b/py-polars/requirements-dev.txt index 17c6300ee26f..d97002186a6b 100644 --- a/py-polars/requirements-dev.txt +++ b/py-polars/requirements-dev.txt @@ -47,8 +47,10 @@ dataframe-api-compat >= 0.1.6 pyiceberg >= 0.5.0 # Csv zstandard -# Other +# Plotting +hvplot>=0.9.1 matplotlib +# Other gevent # ------- diff --git a/py-polars/tests/unit/namespaces/test_plot.py b/py-polars/tests/unit/namespaces/test_plot.py new file mode 100644 index 000000000000..c202f9969947 --- /dev/null +++ b/py-polars/tests/unit/namespaces/test_plot.py @@ -0,0 +1,42 @@ +from datetime import date + +import pytest + +import polars as pl +from polars.exceptions import PolarsPanicError + + +def test_dataframe_scatter() -> None: + df = pl.DataFrame( + { + "length": [1, 4, 6], + "width": [4, 5, 6], + "species": ["setosa", "setosa", "versicolor"], + } + ) + df.plot.scatter(x="length", y="width", by="species") + + +def test_dataframe_line() -> None: + df = pl.DataFrame( + { + "date": [date(2020, 1, 2), date(2020, 1, 3), date(2020, 1, 3)], + "stock_1": [1, 4, 6], + "stock_2": [1, 5, 2], + } + ) + df.plot.line(x="date", y=["stock_1", "stock_2"]) + + +def test_series_hist() -> None: + s = pl.Series("values", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) + s.plot.hist() + + +def test_empty_dataframe() -> None: + pl.DataFrame({"a": [], "b": []}).plot.scatter(x="a", y="b") + + +def test_unsupported_dtype() -> None: + with pytest.raises(PolarsPanicError): + pl.DataFrame({"a": [{1, 2}], "b": [4]}).plot.scatter(x="a", y="b")