Fix some arrow failures

pandas-dev · May 10, 2024 · e071ed6 · e071ed6
1 parent bd6bd56
commit e071ed6
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 1 deletion.
diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
@@ -450,6 +450,10 @@ def test_trailing_spaces(all_parsers, kwargs, expected_data):
     data = "A B C  \nrandom line with trailing spaces    \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n   \n5.1,NaN,10.0\n"  # noqa: E501
     parser = all_parsers
 
+    if parser.engine == "pyarrow":
+        with pytest.raises(ValueError, match="the 'pyarrow' engine does not support"):
+            parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
+        return
     expected = DataFrame(expected_data)
     result = parser.read_csv(StringIO(data.replace(",", "  ")), **kwargs)
     tm.assert_frame_equal(result, expected)
@@ -473,6 +477,14 @@ def test_single_char_leading_whitespace(all_parsers):
 a
 b\n"""
 
+    if parser.engine == "pyarrow":
+        msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(
+                StringIO(data),
+                skipinitialspace=True,
+            )
+        return
     expected = DataFrame({"MyColumn": list("abab")})
     result = parser.read_csv(StringIO(data), skipinitialspace=True, sep=r"\s+")
     tm.assert_frame_equal(result, expected)

diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
@@ -682,6 +682,8 @@ def test_header_missing_rows(all_parsers):
         parser.read_csv(StringIO(data), header=[0, 1, 2])
 
 
+# ValueError: the 'pyarrow' engine does not support regex separators
+@xfail_pyarrow
 def test_header_multiple_whitespaces(all_parsers):
     # GH#54931
     parser = all_parsers
@@ -693,6 +695,8 @@ def test_header_multiple_whitespaces(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+# ValueError: the 'pyarrow' engine does not support regex separators
+@xfail_pyarrow
 def test_header_delim_whitespace(all_parsers):
     # GH#54918
     parser = all_parsers

diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py
@@ -187,6 +187,7 @@ def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data):
     tm.assert_frame_equal(result, expected)
 
 
+@xfail_pyarrow  # ValueError: the 'pyarrow' engine does not support regex separators
 @pytest.mark.parametrize(
     "lineterminator",
     ["\n", "\r\n", "\r"],  # "LF"  # "CRLF"  # "CR"

diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -158,7 +158,8 @@ def test_usecols_single_string(all_parsers):
         parser.read_csv(StringIO(data), usecols="foo")
 
 
-@skip_pyarrow  # CSV parse error in one case, AttributeError in another
+# ArrowKeyError: Column 'a' in include_columns does not exist in CSV file
+@skip_pyarrow
 @pytest.mark.parametrize(
     "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
 )
@@ -254,6 +255,7 @@ def test_usecols_regex_sep(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@skip_pyarrow  # Column 'a' in include_columns does not exist in CSV file
 def test_usecols_with_whitespace(all_parsers):
     parser = all_parsers
     data = "a  b  c\n4  apple  bat  5.7\n8  orange  cow  10"