Skip to content

Commit

Permalink
Validate dir for pd.errors and pd.util (pandas-dev#57140)
Browse files Browse the repository at this point in the history
* Ensure pandas.errors only imports its __all__

* Make public util API accessible

* Fix import

* Type input
  • Loading branch information
mroeschke committed Jan 31, 2024
1 parent 29a3682 commit cad0d87
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 78 deletions.
7 changes: 4 additions & 3 deletions doc/source/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ The following subpackages are public.
`pandas-stubs <https://github.com/pandas-dev/pandas-stubs>`_ package
which has classes in addition to those that occur in pandas for type-hinting.

In addition, public functions in ``pandas.io`` and ``pandas.tseries`` submodules
are mentioned in the documentation.
In addition, public functions in ``pandas.io``, ``pandas.tseries``, ``pandas.util`` submodules
are explicitly mentioned in the documentation. Further APIs in these modules are not guaranteed
to be stable.


.. warning::

The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed.
The ``pandas.core``, ``pandas.compat`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed.

.. If you update this toctree, also update the manual toctree in the
.. main index.rst.template
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
from pandas.errors import (
ChainedAssignmentError,
InvalidIndexError,
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_msg,
_chained_assignment_warning_method_msg,
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@
InvalidIndexError,
SettingWithCopyError,
SettingWithCopyWarning,
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_warning_method_msg,
_check_cacher,
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
IndexingError,
InvalidIndexError,
LossySetitemError,
)
from pandas.errors.cow import (
_chained_assignment_msg,
_chained_assignment_warning_msg,
_check_cacher,
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
from pandas.errors import (
ChainedAssignmentError,
InvalidIndexError,
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_msg,
_chained_assignment_warning_method_msg,
Expand Down
76 changes: 1 addition & 75 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,81 +475,6 @@ class ChainedAssignmentError(Warning):
"""


_chained_assignment_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment.\n"
"When using the Copy-on-Write mode, such chained assignment never works "
"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
"the assignment in a single step.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy"
)


_chained_assignment_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment using an inplace method.\n"
"When using the Copy-on-Write mode, such inplace method never works "
"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"For example, when doing 'df[col].method(value, inplace=True)', try "
"using 'df.method({col: value}, inplace=True)' instead, to perform "
"the operation inplace on the original object.\n\n"
)


_chained_assignment_warning_msg = (
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
"You are setting values through chained assignment. Currently this works "
"in certain cases, but when using Copy-on-Write (which will become the "
"default behaviour in pandas 3.0) this will never work to update the "
"original DataFrame or Series, because the intermediate object on which "
"we are setting values will behave as a copy.\n"
"A typical example is when you are setting values in a column of a "
"DataFrame, like:\n\n"
'df["col"][row_indexer] = value\n\n'
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy\n"
)


_chained_assignment_warning_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment using an inplace method.\n"
"The behavior will change in pandas 3.0. This inplace method will "
"never work because the intermediate object on which we are setting "
"values always behaves as a copy.\n\n"
"For example, when doing 'df[col].method(value, inplace=True)', try "
"using 'df.method({col: value}, inplace=True)' or "
"df[col] = df[col].method(value) instead, to perform "
"the operation inplace on the original object.\n\n"
)


def _check_cacher(obj) -> bool:
# This is a mess, selection paths that return a view set the _cacher attribute
# on the Series; most of them also set _item_cache which adds 1 to our relevant
# reference count, but iloc does not, so we have to check if we are actually
# in the item cache
if hasattr(obj, "_cacher"):
parent = obj._cacher[1]()
# parent could be dead
if parent is None:
return False
if hasattr(parent, "_item_cache"):
if obj._cacher[0] in parent._item_cache:
# Check if we are actually the item from item_cache, iloc creates a
# new object
return obj is parent._item_cache[obj._cacher[0]]
return False


class NumExprClobberingError(NameError):
"""
Exception raised when trying to use a built-in numexpr name as a variable name.
Expand Down Expand Up @@ -831,6 +756,7 @@ class InvalidComparison(Exception):
"AbstractMethodError",
"AttributeConflictWarning",
"CategoricalConversionWarning",
"ChainedAssignmentError",
"ClosedFileError",
"CSSWarning",
"DatabaseError",
Expand Down
74 changes: 74 additions & 0 deletions pandas/errors/cow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from typing import Any

_chained_assignment_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment.\n"
"When using the Copy-on-Write mode, such chained assignment never works "
"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
"the assignment in a single step.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy"
)


_chained_assignment_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment using an inplace method.\n"
"When using the Copy-on-Write mode, such inplace method never works "
"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"For example, when doing 'df[col].method(value, inplace=True)', try "
"using 'df.method({col: value}, inplace=True)' instead, to perform "
"the operation inplace on the original object.\n\n"
)


_chained_assignment_warning_msg = (
"ChainedAssignmentError: behaviour will change in pandas 3.0!\n"
"You are setting values through chained assignment. Currently this works "
"in certain cases, but when using Copy-on-Write (which will become the "
"default behaviour in pandas 3.0) this will never work to update the "
"original DataFrame or Series, because the intermediate object on which "
"we are setting values will behave as a copy.\n"
"A typical example is when you are setting values in a column of a "
"DataFrame, like:\n\n"
'df["col"][row_indexer] = value\n\n'
'Use `df.loc[row_indexer, "col"] = values` instead, to perform the '
"assignment in a single step and ensure this keeps updating the original `df`.\n\n"
"See the caveats in the documentation: "
"https://pandas.pydata.org/pandas-docs/stable/user_guide/"
"indexing.html#returning-a-view-versus-a-copy\n"
)

_chained_assignment_warning_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment using an inplace method.\n"
"The behavior will change in pandas 3.0. This inplace method will "
"never work because the intermediate object on which we are setting "
"values always behaves as a copy.\n\n"
"For example, when doing 'df[col].method(value, inplace=True)', try "
"using 'df.method({col: value}, inplace=True)' or "
"df[col] = df[col].method(value) instead, to perform "
"the operation inplace on the original object.\n\n"
)


def _check_cacher(obj: Any) -> bool:
# This is a mess, selection paths that return a view set the _cacher attribute
# on the Series; most of them also set _item_cache which adds 1 to our relevant
# reference count, but iloc does not, so we have to check if we are actually
# in the item cache
if hasattr(obj, "_cacher"):
parent = obj._cacher[1]()
# parent could be dead
if parent is None:
return False
if hasattr(parent, "_item_cache"):
if obj._cacher[0] in parent._item_cache:
# Check if we are actually the item from item_cache, iloc creates a
# new object
return obj is parent._item_cache[obj._cacher[0]]
return False
23 changes: 23 additions & 0 deletions pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,29 @@ def test_api_extensions(self):
self.check(api_extensions, self.allowed_api_extensions)


class TestErrors(Base):
def test_errors(self):
self.check(pd.errors, pd.errors.__all__, ignored=["ctypes", "cow"])


class TestUtil(Base):
def test_util(self):
self.check(
pd.util,
["hash_array", "hash_pandas_object"],
ignored=[
"_decorators",
"_test_decorators",
"_exceptions",
"_validators",
"capitalize_first_letter",
"version",
"_print_versions",
"_tester",
],
)


class TestTesting(Base):
funcs = [
"assert_frame_equal",
Expand Down
4 changes: 4 additions & 0 deletions pandas/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,9 @@ def __getattr__(key: str):
raise AttributeError(f"module 'pandas.util' has no attribute '{key}'")


def __dir__():
return list(globals().keys()) + ["hash_array", "hash_pandas_object"]


def capitalize_first_letter(s: str) -> str:
return s[:1].upper() + s[1:]

0 comments on commit cad0d87

Please sign in to comment.