Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a few test failures on big-endian systems #46681

Merged
merged 2 commits into from Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/_testing/__init__.py
Expand Up @@ -8,6 +8,7 @@
import os
import re
import string
from sys import byteorder
from typing import (
TYPE_CHECKING,
Callable,
Expand Down Expand Up @@ -168,6 +169,8 @@
np.uint32,
]

ENDIAN = {"little": "<", "big": ">"}[byteorder]

NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
NP_NAT_OBJECTS = [
cls("NaT", unit)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_astype.py
Expand Up @@ -20,7 +20,7 @@ def test_astype():
tm.assert_numpy_array_equal(result, expected)

result = arr.astype("str")
expected = np.array(["True", "False", "<NA>"], dtype="<U5")
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
tm.assert_numpy_array_equal(result, expected)

# no missing values
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_construction.py
Expand Up @@ -273,7 +273,7 @@ def test_to_numpy(box):

arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype="str")
expected = np.array([True, False, pd.NA], dtype="<U5")
expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
tm.assert_numpy_array_equal(result, expected)

# no missing values -> can convert to bool, otherwise raises
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/floating/test_to_numpy.py
Expand Up @@ -115,7 +115,7 @@ def test_to_numpy_string(box, dtype):
arr = con([0.0, 1.0, None], dtype="Float64")

result = arr.to_numpy(dtype="str")
expected = np.array([0.0, 1.0, pd.NA], dtype="<U32")
expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
tm.assert_numpy_array_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/integer/test_dtypes.py
Expand Up @@ -283,7 +283,7 @@ def test_to_numpy_na_raises(dtype):

def test_astype_str():
a = pd.array([1, 2, None], dtype="Int64")
expected = np.array(["1", "2", "<NA>"], dtype="<U21")
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")

tm.assert_numpy_array_equal(a.astype(str), expected)
tm.assert_numpy_array_equal(a.astype("str"), expected)
Expand Down
147 changes: 116 additions & 31 deletions pandas/tests/frame/methods/test_to_records.py
Expand Up @@ -151,106 +151,176 @@ def test_to_records_with_categorical(self):
{},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Should have no effect in this case.
(
{"index": True},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Column dtype applied across the board. Index unaffected.
(
{"column_dtypes": "<U4"},
{"column_dtypes": f"{tm.ENDIAN}U4"},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "<U4"), ("B", "<U4"), ("C", "<U4")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}U4"),
("B", f"{tm.ENDIAN}U4"),
("C", f"{tm.ENDIAN}U4"),
],
),
),
# Index dtype applied across the board. Columns unaffected.
(
{"index_dtypes": "<U1"},
{"index_dtypes": f"{tm.ENDIAN}U1"},
np.rec.array(
[("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
dtype=[("index", "<U1"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}U1"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Pass in a type instance.
(
{"column_dtypes": str},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}U"),
("B", f"{tm.ENDIAN}U"),
("C", f"{tm.ENDIAN}U"),
],
),
),
# Pass in a dtype instance.
(
{"column_dtypes": np.dtype("unicode")},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}U"),
("B", f"{tm.ENDIAN}U"),
("C", f"{tm.ENDIAN}U"),
],
),
),
# Pass in a dictionary (name-only).
(
{"column_dtypes": {"A": np.int8, "B": np.float32, "C": "<U2"}},
{
"column_dtypes": {
"A": np.int8,
"B": np.float32,
"C": f"{tm.ENDIAN}U2",
}
},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "<U2")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", f"{tm.ENDIAN}U2"),
],
),
),
# Pass in a dictionary (indices-only).
(
{"index_dtypes": {0: "int16"}},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", "i2"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Ignore index mappings if index is not True.
(
{"index": False, "index_dtypes": "<U2"},
{"index": False, "index_dtypes": f"{tm.ENDIAN}U2"},
np.rec.array(
[(1, 0.2, "a"), (2, 1.5, "bc")],
dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Non-existent names / indices in mapping should not error.
(
{"index_dtypes": {0: "int16", "not-there": "float32"}},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", "i2"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Names / indices not in mapping default to array dtype.
(
{"column_dtypes": {"A": np.int8, "B": np.float32}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
),
),
# Names / indices not in dtype mapping default to array dtype.
(
{"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
),
),
# Mixture of everything.
(
{
"column_dtypes": {"A": np.int8, "B": np.float32},
"index_dtypes": "<U2",
"index_dtypes": f"{tm.ENDIAN}U2",
},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}U2"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
),
),
# Invalid dype values.
Expand Down Expand Up @@ -299,7 +369,11 @@ def test_to_records_dtype(self, kwargs, expected):
{"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}},
np.rec.array(
[(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)],
dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")],
dtype=[
("a", f"{tm.ENDIAN}i4"),
("b", "i1"),
("c", f"{tm.ENDIAN}f8"),
],
),
),
# MultiIndex in the columns.
Expand All @@ -310,14 +384,17 @@ def test_to_records_dtype(self, kwargs, expected):
[("a", "d"), ("b", "e"), ("c", "f")]
),
),
{"column_dtypes": {0: "<U1", 2: "float32"}, "index_dtypes": "float32"},
{
"column_dtypes": {0: f"{tm.ENDIAN}U1", 2: "float32"},
"index_dtypes": "float32",
},
np.rec.array(
[(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)],
dtype=[
("index", "<f4"),
("('a', 'd')", "<U1"),
("('b', 'e')", "<i8"),
("('c', 'f')", "<f4"),
("index", f"{tm.ENDIAN}f4"),
("('a', 'd')", f"{tm.ENDIAN}U1"),
("('b', 'e')", f"{tm.ENDIAN}i8"),
("('c', 'f')", f"{tm.ENDIAN}f4"),
],
),
),
Expand All @@ -332,19 +409,22 @@ def test_to_records_dtype(self, kwargs, expected):
[("d", -4), ("d", -5), ("f", -6)], names=list("cd")
),
),
{"column_dtypes": "float64", "index_dtypes": {0: "<U2", 1: "int8"}},
{
"column_dtypes": "float64",
"index_dtypes": {0: f"{tm.ENDIAN}U2", 1: "int8"},
},
np.rec.array(
[
("d", -4, 1.0, 2.0, 3.0),
("d", -5, 4.0, 5.0, 6.0),
("f", -6, 7, 8, 9.0),
],
dtype=[
("c", "<U2"),
("c", f"{tm.ENDIAN}U2"),
("d", "i1"),
("('a', 'd')", "<f8"),
("('b', 'e')", "<f8"),
("('c', 'f')", "<f8"),
("('a', 'd')", f"{tm.ENDIAN}f8"),
("('b', 'e')", f"{tm.ENDIAN}f8"),
("('c', 'f')", f"{tm.ENDIAN}f8"),
],
),
),
Expand Down Expand Up @@ -374,13 +454,18 @@ def keys(self):

dtype_mappings = {
"column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}),
"index_dtypes": "<U2",
"index_dtypes": f"{tm.ENDIAN}U2",
}

result = df.to_records(**dtype_mappings)
expected = np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}U2"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
)
tm.assert_almost_equal(result, expected)

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/io/parser/test_c_parser_only.py
Expand Up @@ -144,9 +144,12 @@ def test_dtype_and_names_error(c_parser_only):
"the dtype timedelta64 is not supported for parsing",
{"dtype": {"A": "timedelta64", "B": "float64"}},
),
("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
(
f"the dtype {tm.ENDIAN}U8 is not supported for parsing",
{"dtype": {"A": "U8"}},
),
],
ids=["dt64-0", "dt64-1", "td64", "<U8"],
ids=["dt64-0", "dt64-1", "td64", f"{tm.ENDIAN}U8"],
)
def test_unsupported_dtype(c_parser_only, match, kwargs):
parser = c_parser_only
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/tools/test_to_timedelta.py
Expand Up @@ -198,7 +198,8 @@ def test_to_timedelta_on_missing_values(self):

actual = to_timedelta(Series(["00:00:01", np.nan]))
expected = Series(
[np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"
[np.timedelta64(1000000000, "ns"), timedelta_NaT],
dtype=f"{tm.ENDIAN}m8[ns]",
)
tm.assert_series_equal(actual, expected)

Expand Down