Skip to content

Commit

Permalink
[ENH] deep_equals - clearer return on diffs from dtypes and `inde…
Browse files Browse the repository at this point in the history
…x`, relaxation of `MultiIndex` equality check (#5560)

This PR improves error messaging in `deep_equals` and makes checks on
`pandas` based indices stricter.

Changes:

* `dtypes` of `pd.DataFrame`-s are now checked explicitly
* providing a clearer return on `pd.DataFrame` `dtypes` no being equal
* `pd.Index` `dtypes` are now checked explicitly
* index equality check is slightly relaxed. `DataFrame.equals` may
consider `MultiIndex` as unequal due to irrelevant typing in some
`python` versions, so instead we now explicitly check equality in names,
values

This should help diagnose the `dask` inconsistency in
`VectorizedDF.reconstruct` return, I suspect it is the `dtype`.
Example of test failure (only on certain python versions) here:
#5552
(update: this was due to `MultiIndex` type ambiguity, with the
relaxation of the `deep_equals` check this should be fine now)

Depends on #5561 which fixes inconsistencies uncovered through
the above.
  • Loading branch information
fkiraly committed Nov 24, 2023
1 parent 7c3cc4b commit 05306bb
Showing 1 changed file with 65 additions and 1 deletion.
66 changes: 65 additions & 1 deletion sktime/utils/_testing/deep_equals.py
Expand Up @@ -98,11 +98,67 @@ def ret(is_equal, msg="", string_arguments: list = None):
else:
return ret(x.equals(y), ".series_equals, x = {} != y = {}", [x, y])
elif isinstance(x, pd.DataFrame):
# check column names for equality
if not x.columns.equals(y.columns):
return ret(
False, f".columns, x.columns = {x.columns} != y.columns = {y.columns}"
)
# if columns are equal and at least one is object, recurse over Series
# check dtypes for equality
if not x.dtypes.equals(y.dtypes):
return ret(
False, f".dtypes, x.dtypes = {x.dtypes} != y.dtypes = {y.dtypes}"
)
# check index for equality
# we are not recursing due to ambiguity in integer index types
# which may differ from pandas version to pandas version
# and would upset the type check, e.g., RangeIndex(2) vs Index([0, 1])
xix = x.index
yix = y.index
if hasattr(xix, "dtype") and hasattr(xix, "dtype"):
if not xix.dtype == yix.dtype:
return ret(
False,
".index.dtype, x.index.dtype = {} != y.index.dtype = {}",
[xix.dtype, yix.dtype],
)
if hasattr(xix, "dtypes") and hasattr(yix, "dtypes"):
if not x.dtypes.equals(y.dtypes):
return ret(
False,
".index.dtypes, x.dtypes = {} != y.index.dtypes = {}",
[xix.dtypes, yix.dtypes],
)
ix_eq = xix.equals(yix)
if not ix_eq:
if not len(xix) == len(yix):
return ret(
False,
".index.len, x.index.len = {} != y.index.len = {}",
[len(xix), len(yix)],
)
if hasattr(xix, "name") and hasattr(yix, "name"):
if not xix.name == yix.name:
return ret(
False,
".index.name, x.index.name = {} != y.index.name = {}",
[xix.name, yix.name],
)
if hasattr(xix, "names") and hasattr(yix, "names"):
if not len(xix.names) == len(yix.names):
return ret(
False,
".index.names, x.index.names = {} != y.index.name = {}",
[xix.names, yix.names],
)
if not np.all(xix.names == yix.names):
return ret(
False,
".index.names, x.index.names = {} != y.index.name = {}",
[xix.names, yix.names],
)
elts_eq = np.all(xix == yix)
return ret(elts_eq, ".index.equals, x = {} != y = {}", [xix, yix])
# if columns, dtypes are equal and at least one is object, recurse over Series
if sum(x.dtypes == "object") > 0:
for c in x.columns:
is_equal, msg = deep_equals(x[c], y[c], return_msg=True)
Expand All @@ -112,6 +168,14 @@ def ret(is_equal, msg="", string_arguments: list = None):
else:
return ret(x.equals(y), ".df_equals, x = {} != y = {}", [x, y])
elif isinstance(x, pd.Index):
if hasattr(x, "dtype") and hasattr(y, "dtype"):
if not x.dtype == y.dtype:
return ret(False, f".dtype, x.dtype = {x.dtype} != y.dtype = {y.dtype}")
if hasattr(x, "dtypes") and hasattr(y, "dtypes"):
if not x.dtypes.equals(y.dtypes):
return ret(
False, f".dtypes, x.dtypes = {x.dtypes} != y.dtypes = {y.dtypes}"
)
return ret(x.equals(y), ".index_equals, x = {} != y = {}", [x, y])
elif isinstance(x, np.ndarray):
if x.dtype != y.dtype:
Expand Down

0 comments on commit 05306bb

Please sign in to comment.