Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into sty/ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Jan 15, 2024
2 parents a902f8e + 1af1030 commit 359736e
Show file tree
Hide file tree
Showing 31 changed files with 254 additions and 105 deletions.
36 changes: 0 additions & 36 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,50 +73,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX03 --ignore_functions \
pandas.Series.plot.line \
pandas.Series.to_sql \
pandas.Series.to_latex \
pandas.errors.DatabaseError \
pandas.errors.IndexingError \
pandas.errors.InvalidColumnName \
pandas.errors.PossibleDataLossError \
pandas.errors.PossiblePrecisionLoss \
pandas.errors.SettingWithCopyError \
pandas.errors.SettingWithCopyWarning \
pandas.errors.SpecificationError \
pandas.errors.UndefinedVariableError \
pandas.errors.ValueLabelTypeMismatch \
pandas.Timestamp.ceil \
pandas.Timestamp.floor \
pandas.Timestamp.round \
pandas.read_pickle \
pandas.ExcelWriter \
pandas.read_json \
pandas.io.json.build_table_schema \
pandas.DataFrame.to_latex \
pandas.io.formats.style.Styler.to_latex \
pandas.read_parquet \
pandas.DataFrame.to_sql \
pandas.read_stata \
pandas.core.resample.Resampler.pipe \
pandas.core.resample.Resampler.interpolate \
pandas.plotting.scatter_matrix \
pandas.pivot \
pandas.merge_asof \
pandas.wide_to_long \
pandas.Index.rename \
pandas.Index.droplevel \
pandas.Index.isin \
pandas.MultiIndex.names \
pandas.MultiIndex.droplevel \
pandas.IndexSlice \
pandas.Grouper \
pandas.io.formats.style.Styler.map \
pandas.io.formats.style.Styler.apply_index \
pandas.io.formats.style.Styler.map_index \
pandas.io.formats.style.Styler.format \
pandas.io.formats.style.Styler.format_index \
pandas.io.formats.style.Styler.relabel_index \
pandas.io.formats.style.Styler.hide \
pandas.io.formats.style.Styler.set_td_classes \
pandas.io.formats.style.Styler.set_tooltips \
pandas.io.formats.style.Styler.set_uuid \
pandas.io.formats.style.Styler.pipe \
Expand All @@ -126,9 +93,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
pandas.io.formats.style.Styler.text_gradient \
pandas.DataFrame.values \
pandas.DataFrame.groupby \
pandas.DataFrame.idxmax \
pandas.DataFrame.idxmin \
pandas.DataFrame.pivot \
pandas.DataFrame.sort_values \
pandas.DataFrame.plot.hexbin \
pandas.DataFrame.plot.line \
Expand Down
2 changes: 1 addition & 1 deletion doc/source/development/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Bug reports and enhancement requests
====================================

Bug reports and enhancement requests are an important part of making pandas more stable and
are curated though Github issues. When reporting and issue or request, please select the `appropriate
are curated though Github issues. When reporting an issue or request, please select the `appropriate
category and fill out the issue form fully <https://github.com/pandas-dev/pandas/issues/new/choose>`_
to ensure others and the core development team can fully understand the scope of the issue.

Expand Down
17 changes: 9 additions & 8 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1704,7 +1704,7 @@ option parameter:

.. code-block:: python
storage_options = {"client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"}}}
storage_options = {"client_kwargs": {"endpoint_url": "http://127.0.0.1:5555"}}
df = pd.read_json("s3://pandas-test/test-1", storage_options=storage_options)
More sample configurations and documentation can be found at `S3Fs documentation
Expand Down Expand Up @@ -3015,14 +3015,15 @@ Read in the content of the "books.xml" as instance of ``StringIO`` or
Even read XML from AWS S3 buckets such as NIH NCBI PMC Article Datasets providing
Biomedical and Life Science Jorurnals:

.. ipython:: python
:okwarning:
.. code-block:: python
df = pd.read_xml(
"s3://pmc-oa-opendata/oa_comm/xml/all/PMC1236943.xml",
xpath=".//journal-meta",
)
df
>>> df = pd.read_xml(
... "s3://pmc-oa-opendata/oa_comm/xml/all/PMC1236943.xml",
... xpath=".//journal-meta",
...)
>>> df
journal-id journal-title issn publisher
0 Cardiovasc Ultrasound Cardiovascular Ultrasound 1476-7120 NaN
With `lxml`_ as default ``parser``, you access the full-featured XML library
that extends Python's ElementTree API. One powerful tool is ability to query
Expand Down
24 changes: 12 additions & 12 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -973,16 +973,16 @@ timedelta}, default 'raise'
A timestamp can be rounded using multiple frequency units:
>>> ts.round(freq='h') # hour
>>> ts.round(freq='h') # hour
Timestamp('2020-03-14 16:00:00')
>>> ts.round(freq='min') # minute
>>> ts.round(freq='min') # minute
Timestamp('2020-03-14 15:33:00')
>>> ts.round(freq='s') # seconds
>>> ts.round(freq='s') # seconds
Timestamp('2020-03-14 15:32:52')
>>> ts.round(freq='ms') # milliseconds
>>> ts.round(freq='ms') # milliseconds
Timestamp('2020-03-14 15:32:52.193000')
``freq`` can also be a multiple of a single unit, like '5min' (i.e. 5 minutes):
Expand Down Expand Up @@ -1062,16 +1062,16 @@ timedelta}, default 'raise'
A timestamp can be floored using multiple frequency units:
>>> ts.floor(freq='h') # hour
>>> ts.floor(freq='h') # hour
Timestamp('2020-03-14 15:00:00')
>>> ts.floor(freq='min') # minute
>>> ts.floor(freq='min') # minute
Timestamp('2020-03-14 15:32:00')
>>> ts.floor(freq='s') # seconds
>>> ts.floor(freq='s') # seconds
Timestamp('2020-03-14 15:32:52')
>>> ts.floor(freq='ns') # nanoseconds
>>> ts.floor(freq='ns') # nanoseconds
Timestamp('2020-03-14 15:32:52.192548651')
``freq`` can also be a multiple of a single unit, like '5min' (i.e. 5 minutes):
Expand Down Expand Up @@ -1151,16 +1151,16 @@ timedelta}, default 'raise'
A timestamp can be ceiled using multiple frequency units:
>>> ts.ceil(freq='h') # hour
>>> ts.ceil(freq='h') # hour
Timestamp('2020-03-14 16:00:00')
>>> ts.ceil(freq='min') # minute
>>> ts.ceil(freq='min') # minute
Timestamp('2020-03-14 15:33:00')
>>> ts.ceil(freq='s') # seconds
>>> ts.ceil(freq='s') # seconds
Timestamp('2020-03-14 15:32:53')
>>> ts.ceil(freq='us') # microseconds
>>> ts.ceil(freq='us') # microseconds
Timestamp('2020-03-14 15:32:52.192549')
``freq`` can also be a multiple of a single unit, like '5min' (i.e. 5 minutes):
Expand Down
24 changes: 12 additions & 12 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1973,16 +1973,16 @@ timedelta}, default 'raise'
A timestamp can be rounded using multiple frequency units:
>>> ts.round(freq='h') # hour
>>> ts.round(freq='h') # hour
Timestamp('2020-03-14 16:00:00')
>>> ts.round(freq='min') # minute
>>> ts.round(freq='min') # minute
Timestamp('2020-03-14 15:33:00')
>>> ts.round(freq='s') # seconds
>>> ts.round(freq='s') # seconds
Timestamp('2020-03-14 15:32:52')
>>> ts.round(freq='ms') # milliseconds
>>> ts.round(freq='ms') # milliseconds
Timestamp('2020-03-14 15:32:52.193000')
``freq`` can also be a multiple of a single unit, like '5min' (i.e. 5 minutes):
Expand Down Expand Up @@ -2064,16 +2064,16 @@ timedelta}, default 'raise'
A timestamp can be floored using multiple frequency units:
>>> ts.floor(freq='h') # hour
>>> ts.floor(freq='h') # hour
Timestamp('2020-03-14 15:00:00')
>>> ts.floor(freq='min') # minute
>>> ts.floor(freq='min') # minute
Timestamp('2020-03-14 15:32:00')
>>> ts.floor(freq='s') # seconds
>>> ts.floor(freq='s') # seconds
Timestamp('2020-03-14 15:32:52')
>>> ts.floor(freq='ns') # nanoseconds
>>> ts.floor(freq='ns') # nanoseconds
Timestamp('2020-03-14 15:32:52.192548651')
``freq`` can also be a multiple of a single unit, like '5min' (i.e. 5 minutes):
Expand Down Expand Up @@ -2153,16 +2153,16 @@ timedelta}, default 'raise'
A timestamp can be ceiled using multiple frequency units:
>>> ts.ceil(freq='h') # hour
>>> ts.ceil(freq='h') # hour
Timestamp('2020-03-14 16:00:00')
>>> ts.ceil(freq='min') # minute
>>> ts.ceil(freq='min') # minute
Timestamp('2020-03-14 15:33:00')
>>> ts.ceil(freq='s') # seconds
>>> ts.ceil(freq='s') # seconds
Timestamp('2020-03-14 15:32:53')
>>> ts.ceil(freq='us') # microseconds
>>> ts.ceil(freq='us') # microseconds
Timestamp('2020-03-14 15:32:52.192549')
``freq`` can also be a multiple of a single unit, like '5min' (i.e. 5 minutes):
Expand Down
13 changes: 12 additions & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,29 @@
from typing import SupportsIndex

if sys.version_info >= (3, 10):
from typing import Concatenate # pyright: ignore[reportUnusedImport]
from typing import ParamSpec
from typing import TypeGuard # pyright: ignore[reportUnusedImport]
else:
from typing_extensions import TypeGuard # pyright: ignore[reportUnusedImport]
from typing_extensions import ( # pyright: ignore[reportUnusedImport]
Concatenate,
ParamSpec,
TypeGuard,
)

P = ParamSpec("P")

if sys.version_info >= (3, 11):
from typing import Self # pyright: ignore[reportUnusedImport]
else:
from typing_extensions import Self # pyright: ignore[reportUnusedImport]

else:
npt: Any = None
ParamSpec: Any = None
Self: Any = None
TypeGuard: Any = None
Concatenate: Any = None

HashableT = TypeVar("HashableT", bound=Hashable)
MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
Expand Down
36 changes: 33 additions & 3 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
TYPE_CHECKING,
Any,
Callable,
TypeVar,
cast,
overload,
)
Expand Down Expand Up @@ -51,7 +52,9 @@
from pandas._typing import (
AnyArrayLike,
ArrayLike,
Concatenate,
NpDtype,
P,
RandomState,
T,
)
Expand Down Expand Up @@ -463,8 +466,34 @@ def random_state(state: RandomState | None = None):
)


_T = TypeVar("_T") # Secondary TypeVar for use in pipe's type hints


@overload
def pipe(
obj: _T,
func: Callable[Concatenate[_T, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T:
...


@overload
def pipe(
obj: Any,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
...


def pipe(
obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
obj: _T,
func: Callable[Concatenate[_T, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
"""
Apply a function ``func`` to object ``obj`` either by passing obj as the
Expand All @@ -490,12 +519,13 @@ def pipe(
object : the return type of ``func``.
"""
if isinstance(func, tuple):
func, target = func
# Assigning to func_ so pyright understands that it's a callable
func_, target = func
if target in kwargs:
msg = f"{target} is both the pipe target and a keyword argument"
raise ValueError(msg)
kwargs[target] = obj
return func(*args, **kwargs)
return func_(*args, **kwargs)
else:
return func(obj, *args, **kwargs)

Expand Down
26 changes: 23 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
Axis,
AxisInt,
CompressionOptions,
Concatenate,
DtypeArg,
DtypeBackend,
DtypeObj,
Expand Down Expand Up @@ -213,6 +214,7 @@
)

from pandas._libs.tslibs import BaseOffset
from pandas._typing import P

from pandas import (
DataFrame,
Expand Down Expand Up @@ -6175,13 +6177,31 @@ def sample(

return result

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T:
...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
...

@final
@doc(klass=_shared_doc_kwargs["klass"])
def pipe(
self,
func: Callable[..., T] | tuple[Callable[..., T], str],
*args,
**kwargs,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
r"""
Apply chainable functions that expect Series or DataFrames.
Expand Down

0 comments on commit 359736e

Please sign in to comment.