Skip to content

Commit

Permalink
change dataframe docs
Browse files Browse the repository at this point in the history
  • Loading branch information
gboeker committed Apr 11, 2024
1 parent 8d44d87 commit ba2e914
Show file tree
Hide file tree
Showing 38 changed files with 375 additions and 375 deletions.
4 changes: 0 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ jobs:
no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
command: |
pip3 install cibuildwheel==2.15.0
# When this is a nightly wheel build, allow picking up NumPy 2.0 dev wheels:
if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then
export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
fi
cibuildwheel --prerelease-pythons --output-dir wheelhouse
environment:
Expand Down
12 changes: 0 additions & 12 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,6 @@ jobs:
CIBW_PRERELEASE_PYTHONS: True
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

- name: Build nightly wheels (with NumPy pre-release)
if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }}
uses: pypa/cibuildwheel@v2.17.0
with:
package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
env:
# The nightly wheels should be build witht he NumPy 2.0 pre-releases
# which requires the additional URL.
CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
CIBW_PRERELEASE_PYTHONS: True
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}

- name: Set up Python
uses: mamba-org/setup-micromamba@v1
with:
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ci:
skip: [pylint, pyright, mypy]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.1
rev: v0.3.4
hooks:
- id: ruff
args: [--exit-non-zero-on-fix]
Expand All @@ -39,7 +39,7 @@ repos:
- id: ruff-format
exclude: ^scripts
- repo: https://github.com/jendrikseipp/vulture
rev: 'v2.10'
rev: 'v2.11'
hooks:
- id: vulture
entry: python scripts/run_vulture.py
Expand Down Expand Up @@ -93,11 +93,11 @@ repos:
args: [--disable=all, --enable=redefined-outer-name]
stages: [manual]
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
rev: 5.13.2
hooks:
- id: isort
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
rev: v3.15.2
hooks:
- id: pyupgrade
args: [--py39-plus]
Expand All @@ -116,7 +116,7 @@ repos:
hooks:
- id: sphinx-lint
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v17.0.6
rev: v18.1.2
hooks:
- id: clang-format
files: ^pandas/_libs/src|^pandas/_libs/include
Expand Down
46 changes: 3 additions & 43 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.assign SA01" \
-i "pandas.DataFrame.at_time PR01" \
-i "pandas.DataFrame.axes SA01" \
-i "pandas.DataFrame.backfill PR01,SA01" \
-i "pandas.DataFrame.bfill SA01" \
-i "pandas.DataFrame.columns SA01" \
-i "pandas.DataFrame.copy SA01" \
Expand All @@ -99,12 +98,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.kurt RT03,SA01" \
-i "pandas.DataFrame.kurtosis RT03,SA01" \
-i "pandas.DataFrame.last_valid_index SA01" \
-i "pandas.DataFrame.mask RT03" \
-i "pandas.DataFrame.max RT03" \
-i "pandas.DataFrame.mean RT03,SA01" \
-i "pandas.DataFrame.median RT03,SA01" \
-i "pandas.DataFrame.min RT03" \
-i "pandas.DataFrame.pad PR01,SA01" \
-i "pandas.DataFrame.plot PR02,SA01" \
-i "pandas.DataFrame.pop SA01" \
-i "pandas.DataFrame.prod RT03" \
Expand All @@ -119,19 +116,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.sparse.to_dense SA01" \
-i "pandas.DataFrame.std PR01,RT03,SA01" \
-i "pandas.DataFrame.sum RT03" \
-i "pandas.DataFrame.swapaxes PR01,SA01" \
-i "pandas.DataFrame.swaplevel SA01" \
-i "pandas.DataFrame.to_feather SA01" \
-i "pandas.DataFrame.to_markdown SA01" \
-i "pandas.DataFrame.to_parquet RT03" \
-i "pandas.DataFrame.to_period SA01" \
-i "pandas.DataFrame.to_timestamp SA01" \
-i "pandas.DataFrame.tz_convert SA01" \
-i "pandas.DataFrame.tz_localize SA01" \
-i "pandas.DataFrame.unstack RT03" \
-i "pandas.DataFrame.value_counts RT03" \
-i "pandas.DataFrame.var PR01,RT03,SA01" \
-i "pandas.DataFrame.where RT03" \
-i "pandas.DatetimeIndex.ceil SA01" \
-i "pandas.DatetimeIndex.date SA01" \
-i "pandas.DatetimeIndex.day SA01" \
Expand Down Expand Up @@ -226,7 +215,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Index.to_list RT03" \
-i "pandas.Index.union PR07,RT03,SA01" \
-i "pandas.Index.unique RT03" \
-i "pandas.Index.value_counts RT03" \
-i "pandas.Index.view GL08" \
-i "pandas.Int16Dtype SA01" \
-i "pandas.Int32Dtype SA01" \
Expand Down Expand Up @@ -400,7 +388,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.list.flatten SA01" \
-i "pandas.Series.list.len SA01" \
-i "pandas.Series.lt PR07,SA01" \
-i "pandas.Series.mask RT03" \
-i "pandas.Series.max RT03" \
-i "pandas.Series.mean RT03,SA01" \
-i "pandas.Series.median RT03,SA01" \
Expand Down Expand Up @@ -477,17 +464,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.to_frame SA01" \
-i "pandas.Series.to_list RT03" \
-i "pandas.Series.to_markdown SA01" \
-i "pandas.Series.to_period SA01" \
-i "pandas.Series.to_string SA01" \
-i "pandas.Series.to_timestamp RT03,SA01" \
-i "pandas.Series.truediv PR07" \
-i "pandas.Series.tz_convert SA01" \
-i "pandas.Series.tz_localize SA01" \
-i "pandas.Series.unstack SA01" \
-i "pandas.Series.update PR07,SA01" \
-i "pandas.Series.value_counts RT03" \
-i "pandas.Series.var PR01,RT03,SA01" \
-i "pandas.Series.where RT03" \
-i "pandas.SparseDtype SA01" \
-i "pandas.Timedelta PR07,SA01" \
-i "pandas.Timedelta.as_unit SA01" \
Expand Down Expand Up @@ -681,60 +661,40 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.apply RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.cummax RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.cummin RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.cumprod RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.cumsum RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.filter RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.filter SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.hist RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.max SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.mean RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.median SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.min SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique RT03,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02,SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.rank RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.resample RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.skew RT03" \
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.transform RT03" \
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.agg RT03" \
-i "pandas.core.groupby.SeriesGroupBy.aggregate RT03" \
-i "pandas.core.groupby.SeriesGroupBy.apply RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cummax RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cummin RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cumprod RT03" \
-i "pandas.core.groupby.SeriesGroupBy.cumsum RT03" \
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.filter PR01,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.groups SA01" \
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
-i "pandas.core.groupby.SeriesGroupBy.max SA01" \
-i "pandas.core.groupby.SeriesGroupBy.mean RT03" \
-i "pandas.core.groupby.SeriesGroupBy.median SA01" \
-i "pandas.core.groupby.SeriesGroupBy.min SA01" \
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02,SA01" \
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
-i "pandas.core.groupby.SeriesGroupBy.rank RT03" \
-i "pandas.core.groupby.SeriesGroupBy.resample RT03" \
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-i "pandas.core.groupby.SeriesGroupBy.skew RT03" \
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
-i "pandas.core.groupby.SeriesGroupBy.transform RT03" \
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
-i "pandas.core.resample.Resampler.ffill RT03" \
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
Expand Down
6 changes: 6 additions & 0 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,12 @@ You can also include the grouping columns if you want to operate on them.
grouped[["A", "B"]].sum()
.. note::

The ``groupby`` operation in Pandas drops the ``name`` field of the columns Index object
after the operation. This change ensures consistency in syntax between different
column selection methods within groupby operations.

.. _groupby.iterating-label:

Iterating through groups
Expand Down
4 changes: 4 additions & 0 deletions doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,10 @@ The most robust and consistent way of slicing ranges along arbitrary axes is
described in the :ref:`Selection by Position <indexing.integer>` section
detailing the ``.iloc`` method. For now, we explain the semantics of slicing using the ``[]`` operator.

.. note::

When the :class:`Series` has float indices, slicing will select by position.

With Series, the syntax works exactly as with an ndarray, returning a slice of
the values and the corresponding labels:

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ Removal of prior version deprecations/changes
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
- All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
- Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
- Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
- Disallow passing a pandas type to :meth:`Index.view` (:issue:`55709`)
- Disallow units other than "s", "ms", "us", "ns" for datetime64 and timedelta64 dtypes in :func:`array` (:issue:`53817`)
Expand Down
11 changes: 2 additions & 9 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,15 @@ def argmin(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[overri
# override base class by adding axis keyword
validate_bool_kwarg(skipna, "skipna")
if not skipna and self._hasna:
raise NotImplementedError
raise ValueError("Encountered an NA value with skipna=False")
return nargminmax(self, "argmin", axis=axis)

# Signature of "argmax" incompatible with supertype "ExtensionArray"
def argmax(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override]
# override base class by adding axis keyword
validate_bool_kwarg(skipna, "skipna")
if not skipna and self._hasna:
raise NotImplementedError
raise ValueError("Encountered an NA value with skipna=False")
return nargminmax(self, "argmax", axis=axis)

def unique(self) -> Self:
Expand Down Expand Up @@ -296,13 +296,6 @@ def __getitem__(
result = self._from_backing_data(result)
return result

def _fill_mask_inplace(
self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
) -> None:
# (for now) when self.ndim == 2, we assume axis=0
func = missing.get_fill_func(method, ndim=self.ndim)
func(self._ndarray.T, limit=limit, mask=mask.T)

def _pad_or_backfill(
self,
*,
Expand Down
23 changes: 2 additions & 21 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,7 @@ def argmin(self, skipna: bool = True) -> int:
# 2. argmin itself : total control over sorting.
validate_bool_kwarg(skipna, "skipna")
if not skipna and self._hasna:
raise NotImplementedError
raise ValueError("Encountered an NA value with skipna=False")
return nargminmax(self, "argmin")

def argmax(self, skipna: bool = True) -> int:
Expand Down Expand Up @@ -919,7 +919,7 @@ def argmax(self, skipna: bool = True) -> int:
# 2. argmax itself : total control over sorting.
validate_bool_kwarg(skipna, "skipna")
if not skipna and self._hasna:
raise NotImplementedError
raise ValueError("Encountered an NA value with skipna=False")
return nargminmax(self, "argmax")

def interpolate(
Expand Down Expand Up @@ -2111,25 +2111,6 @@ def _where(self, mask: npt.NDArray[np.bool_], value) -> Self:
result[~mask] = val
return result

# TODO(3.0): this can be removed once GH#33302 deprecation is enforced
def _fill_mask_inplace(
self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
) -> None:
"""
Replace values in locations specified by 'mask' using pad or backfill.
See also
--------
ExtensionArray.fillna
"""
func = missing.get_fill_func(method)
npvalues = self.astype(object)
# NB: if we don't copy mask here, it may be altered inplace, which
# would mess up the `self[mask] = ...` below.
func(npvalues, limit=limit, mask=mask.copy())
new_values = self._from_sequence(npvalues, dtype=self.dtype)
self[mask] = new_values[mask]

def _rank(
self,
*,
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1623,13 +1623,13 @@ def _argmin_argmax(self, kind: Literal["argmin", "argmax"]) -> int:
def argmax(self, skipna: bool = True) -> int:
validate_bool_kwarg(skipna, "skipna")
if not skipna and self._hasna:
raise NotImplementedError
raise ValueError("Encountered an NA value with skipna=False")
return self._argmin_argmax("argmax")

def argmin(self, skipna: bool = True) -> int:
validate_bool_kwarg(skipna, "skipna")
if not skipna and self._hasna:
raise NotImplementedError
raise ValueError("Encountered an NA value with skipna=False")
return self._argmin_argmax("argmin")

# ------------------------------------------------------------------------
Expand Down
17 changes: 4 additions & 13 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,13 +735,8 @@ def argmax(
nv.validate_minmax_axis(axis)
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)

if skipna and len(delegate) > 0 and isna(delegate).all():
raise ValueError("Encountered all NA values")
elif not skipna and isna(delegate).any():
raise ValueError("Encountered an NA value with skipna=False")

if isinstance(delegate, ExtensionArray):
return delegate.argmax()
return delegate.argmax(skipna=skipna)
else:
result = nanops.nanargmax(delegate, skipna=skipna)
# error: Incompatible return value type (got "Union[int, ndarray]", expected
Expand All @@ -754,15 +749,10 @@ def argmin(
) -> int:
delegate = self._values
nv.validate_minmax_axis(axis)
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)

if skipna and len(delegate) > 0 and isna(delegate).all():
raise ValueError("Encountered all NA values")
elif not skipna and isna(delegate).any():
raise ValueError("Encountered an NA value with skipna=False")
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)

if isinstance(delegate, ExtensionArray):
return delegate.argmin()
return delegate.argmin(skipna=skipna)
else:
result = nanops.nanargmin(delegate, skipna=skipna)
# error: Incompatible return value type (got "Union[int, ndarray]", expected
Expand Down Expand Up @@ -924,6 +914,7 @@ def value_counts(
Returns
-------
Series
Series containing counts of unique values.
See Also
--------
Expand Down

0 comments on commit ba2e914

Please sign in to comment.