Skip to content

Commit

Permalink
Fix some arrow failures
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed May 10, 2024
1 parent bd6bd56 commit e071ed6
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 1 deletion.
12 changes: 12 additions & 0 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,10 @@ def test_trailing_spaces(all_parsers, kwargs, expected_data):
data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n" # noqa: E501
parser = all_parsers

if parser.engine == "pyarrow":
with pytest.raises(ValueError, match="the 'pyarrow' engine does not support"):
parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
return
expected = DataFrame(expected_data)
result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
tm.assert_frame_equal(result, expected)
Expand All @@ -473,6 +477,14 @@ def test_single_char_leading_whitespace(all_parsers):
a
b\n"""

if parser.engine == "pyarrow":
msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data),
skipinitialspace=True,
)
return
expected = DataFrame({"MyColumn": list("abab")})
result = parser.read_csv(StringIO(data), skipinitialspace=True, sep=r"\s+")
tm.assert_frame_equal(result, expected)
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/io/parser/test_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,8 @@ def test_header_missing_rows(all_parsers):
parser.read_csv(StringIO(data), header=[0, 1, 2])


# ValueError: the 'pyarrow' engine does not support regex separators
@xfail_pyarrow
def test_header_multiple_whitespaces(all_parsers):
# GH#54931
parser = all_parsers
Expand All @@ -693,6 +695,8 @@ def test_header_multiple_whitespaces(all_parsers):
tm.assert_frame_equal(result, expected)


# ValueError: the 'pyarrow' engine does not support regex separators
@xfail_pyarrow
def test_header_delim_whitespace(all_parsers):
# GH#54918
parser = all_parsers
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/io/parser/test_skiprows.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
@pytest.mark.parametrize(
"lineterminator",
["\n", "\r\n", "\r"], # "LF" # "CRLF" # "CR"
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/io/parser/usecols/test_usecols_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ def test_usecols_single_string(all_parsers):
parser.read_csv(StringIO(data), usecols="foo")


@skip_pyarrow # CSV parse error in one case, AttributeError in another
# ArrowKeyError: Column 'a' in include_columns does not exist in CSV file
@skip_pyarrow
@pytest.mark.parametrize(
"data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
)
Expand Down Expand Up @@ -254,6 +255,7 @@ def test_usecols_regex_sep(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow # Column 'a' in include_columns does not exist in CSV file
def test_usecols_with_whitespace(all_parsers):
parser = all_parsers
data = "a b c\n4 apple bat 5.7\n8 orange cow 10"
Expand Down

0 comments on commit e071ed6

Please sign in to comment.