diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index f58180ff5dd..2197c1d2c67 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -987,10 +987,11 @@ def replace( raise NotImplementedError("`flags` parameter is not yet supported") if can_convert_to_column(pat) and can_convert_to_column(repl): - warnings.warn( - "`n` parameter is not supported when " - "`pat` and `repl` are list-like inputs" - ) + if n != -1: + warnings.warn( + "`n` parameter is not supported when " + "`pat` and `repl` are list-like inputs" + ) return self._return_or_inplace( libstrings.replace_multi_re( diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f36e1cae9f1..fa04706c011 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2231,18 +2231,18 @@ def update(self, other): If ``other`` contains NaNs the corresponding values are not updated in the original Series. - >>> s = cudf.Series([1, 2, 3]) + >>> s = cudf.Series([1.0, 2.0, 3.0]) >>> s - 0 1 - 1 2 - 2 3 - dtype: int64 - >>> s.update(cudf.Series([4, np.nan, 6], nan_as_null=False)) + 0 1.0 + 1 2.0 + 2 3.0 + dtype: float64 + >>> s.update(cudf.Series([4.0, np.nan, 6.0], nan_as_null=False)) >>> s - 0 4 - 1 2 - 2 6 - dtype: int64 + 0 4.0 + 1 2.0 + 2 6.0 + dtype: float64 ``other`` can also be a non-Series object type that is coercible into a Series diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index ceb08cb8058..456a8fa5463 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -6,7 +6,7 @@ import warnings from collections import defaultdict from contextlib import ExitStack -from typing import Dict, List, Tuple +from typing import Dict, List, Optional, Tuple from uuid import uuid4 from pyarrow import dataset as ds, parquet as pq @@ -1010,9 +1010,13 @@ def __init__( ) -> None: if isinstance(path, str) and path.startswith("s3://"): self.fs_meta = {"is_s3": True, "actual_path": path} - self.path = tempfile.TemporaryDirectory().name + self.dir_: Optional[ + tempfile.TemporaryDirectory + ] = tempfile.TemporaryDirectory() + self.path = self.dir_.name else: self.fs_meta = {} + self.dir_ = None self.path = path self.common_args = { @@ -1194,6 +1198,9 @@ def close(self, return_metadata=False): s3_file.put(local_path, s3_path, recursive=True) shutil.rmtree(self.path) + if self.dir_ is not None: + self.dir_.cleanup() + if return_metadata: return ( merge_parquet_filemetadata(metadata) diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index c7174adf342..36c13eeefdf 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -16,6 +16,7 @@ SIGNED_TYPES, _decimal_series, assert_eq, + expect_warning_if, ) data_ = [ @@ -200,7 +201,8 @@ def test_typecast_to_from_decimal(data, from_dtype, to_dtype): elif isinstance(to_dtype, Decimal64Dtype): expected = cudf.Series(Decimal64Column.from_arrow(pa_arr)) - got = s.astype(to_dtype) + with expect_warning_if(to_dtype.scale < s.dtype.scale, UserWarning): + got = s.astype(to_dtype) assert_eq(got, expected) diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py index 6f8305e6751..0981e850c10 100644 --- a/python/cudf/cudf/tests/test_df_protocol.py +++ b/python/cudf/cudf/tests/test_df_protocol.py @@ -40,7 +40,7 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol): ) if dtype[0] != _DtypeKind.BOOL: - array_from_dlpack = cp.fromDlpack(buf.__dlpack__()).get() + array_from_dlpack = cp.from_dlpack(buf.__dlpack__()).get() col_array = cp.asarray(cudfcol.data_array_view).get() assert_eq( array_from_dlpack[non_null_idxs.to_numpy()].flatten(), diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py index e779ac276a3..dbb5c548166 100644 --- a/python/cudf/cudf/tests/test_doctests.py +++ b/python/cudf/cudf/tests/test_doctests.py @@ -10,6 +10,8 @@ import cudf +pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning") + def _name_in_all(parent, name): return name in getattr(parent, "__all__", []) diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py index 98061f4e977..cc58b7ba1f5 100644 --- a/python/cudf/cudf/tests/test_duplicates.py +++ b/python/cudf/cudf/tests/test_duplicates.py @@ -4,8 +4,8 @@ import random import numpy as np +import pandas as pd import pytest -from pandas import DataFrame, MultiIndex, Series, date_range import cudf from cudf import concat @@ -40,7 +40,7 @@ def assert_df2(g, p): @pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) def test_duplicated_with_misspelled_column_name(subset): - df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + df = pd.DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) gdf = cudf.DataFrame.from_pandas(df) assert_exceptions_equal( @@ -59,7 +59,7 @@ def test_duplicated_with_misspelled_column_name(subset): [1, 2, 4, 5, 6, 6], [], ["a", "b", "s", "sd", "a", "b"], - Series(["aaa"] * 10, dtype="object"), + pd.Series(["aaa"] * 10, dtype="object"), ], ) def test_drop_duplicates_series(data, keep): @@ -73,7 +73,7 @@ def test_drop_duplicates_series(data, keep): def test_drop_duplicates(): - pdf = DataFrame( + pdf = pd.DataFrame( { "AAA": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], @@ -146,36 +146,40 @@ def test_drop_duplicates(): expected = pdf.drop_duplicates("E", keep="last") assert_df(result, expected) - pdf = DataFrame({"x": [7, 6, 3, 3, 4, 8, 0], "y": [0, 6, 5, 5, 9, 1, 2]}) + pdf = pd.DataFrame( + {"x": [7, 6, 3, 3, 4, 8, 0], "y": [0, 6, 5, 5, 9, 1, 2]} + ) gdf = cudf.DataFrame.from_pandas(pdf) assert_df(gdf.drop_duplicates(), pdf.drop_duplicates()) - pdf = DataFrame([[1, 0], [0, 2]]) + pdf = pd.DataFrame([[1, 0], [0, 2]]) gdf = cudf.DataFrame.from_pandas(pdf) assert_df(gdf.drop_duplicates(), pdf.drop_duplicates()) - pdf = DataFrame([[-2, 0], [0, -4]]) + pdf = pd.DataFrame([[-2, 0], [0, -4]]) gdf = cudf.DataFrame.from_pandas(pdf) assert_df(gdf.drop_duplicates(), pdf.drop_duplicates()) x = np.iinfo(np.int64).max / 3 * 2 - pdf = DataFrame([[-x, x], [0, x + 4]]) + pdf = pd.DataFrame([[-x, x], [0, x + 4]]) gdf = cudf.DataFrame.from_pandas(pdf) assert_df(gdf.drop_duplicates(), pdf.drop_duplicates()) - pdf = DataFrame([[-x, x], [x, x + 4]]) + pdf = pd.DataFrame([[-x, x], [x, x + 4]]) gdf = cudf.DataFrame.from_pandas(pdf) assert_df(gdf.drop_duplicates(), pdf.drop_duplicates()) - pdf = DataFrame([i] * 9 for i in range(16)) - pdf = pdf.append([[1] + [0] * 8], ignore_index=True) + pdf = pd.DataFrame([i] * 9 for i in range(16)) + pdf = pd.concat([pdf, pd.DataFrame([[1] + [0] * 8])], ignore_index=True) gdf = cudf.DataFrame.from_pandas(pdf) assert_df(gdf.drop_duplicates(), pdf.drop_duplicates()) @pytest.mark.skip(reason="cudf does not support duplicate column names yet") def test_drop_duplicates_with_duplicate_column_names(): - df = DataFrame([[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]) + df = pd.DataFrame( + [[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"] + ) df = cudf.DataFrame.from_pandas(df) result0 = df.drop_duplicates() @@ -187,7 +191,7 @@ def test_drop_duplicates_with_duplicate_column_names(): def test_drop_duplicates_for_take_all(): - pdf = DataFrame( + pdf = pd.DataFrame( { "AAA": ["foo", "bar", "baz", "bar", "foo", "bar", "qux", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], @@ -224,7 +228,7 @@ def test_drop_duplicates_for_take_all(): def test_drop_duplicates_tuple(): - pdf = DataFrame( + pdf = pd.DataFrame( { ("AA", "AB"): [ "foo", @@ -265,11 +269,11 @@ def test_drop_duplicates_tuple(): @pytest.mark.parametrize( "df", [ - DataFrame(), - DataFrame(columns=[]), - DataFrame(columns=["A", "B", "C"]), - DataFrame(index=[]), - DataFrame(index=["A", "B", "C"]), + pd.DataFrame(), + pd.DataFrame(columns=[]), + pd.DataFrame(columns=["A", "B", "C"]), + pd.DataFrame(index=[]), + pd.DataFrame(index=["A", "B", "C"]), ], ) def test_drop_duplicates_empty(df): @@ -292,7 +296,7 @@ def get_pdf(n_dup): # create dataframe with n_dup duplicate rows rows = comb + shuf[:n_dup] random.Random(n_dup).shuffle(rows) - return DataFrame(rows) + return pd.DataFrame(rows) for i in range(5): pdf = get_pdf(i) @@ -328,7 +332,7 @@ def get_pdf(n_dup): def test_dataframe_drop_duplicates_method(): - pdf = DataFrame( + pdf = pd.DataFrame( [(1, 2, "a"), (2, 3, "b"), (3, 4, "c"), (2, 3, "d"), (3, 5, "c")], columns=["n1", "n2", "s1"], ) @@ -387,7 +391,7 @@ def test_dataframe_drop_duplicates_method(): def test_datetime_drop_duplicates(): date_df = cudf.DataFrame() - date_df["date"] = date_range("11/20/2018", periods=6, freq="D") + date_df["date"] = pd.date_range("11/20/2018", periods=6, freq="D") date_df["value"] = np.random.sample(len(date_df)) df = concat([date_df, date_df[:4]]) @@ -402,7 +406,7 @@ def test_datetime_drop_duplicates(): def test_drop_duplicates_NA(): # none - df = DataFrame( + df = pd.DataFrame( { "A": [None, None, "foo", "bar", "foo", "bar", "bar", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], @@ -439,7 +443,7 @@ def test_drop_duplicates_NA(): assert_df(result, expected) # nan - df = DataFrame( + df = pd.DataFrame( { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], @@ -481,7 +485,7 @@ def test_drop_duplicates_NA_for_take_all(): # pandas drop_duplicates calls in this function. # none - pdf = DataFrame( + pdf = pd.DataFrame( { "A": [None, None, "foo", "bar", "foo", "baz", "bar", "qux"], "C": [1.0, np.nan, np.nan, np.nan, 1.0, 2.0, 3, 1.0], @@ -531,7 +535,7 @@ def test_drop_duplicates_NA_for_take_all(): def test_drop_duplicates_inplace(): - orig = DataFrame( + orig = pd.DataFrame( { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], "B": ["one", "one", "two", "two", "two", "two", "one", "two"], @@ -608,8 +612,8 @@ def test_drop_duplicates_multi_index(): ["one", "two", "one", "two", "one", "two", "one", "two"], ] - idx = MultiIndex.from_tuples(list(zip(*arrays)), names=["a", "b"]) - pdf = DataFrame(np.random.randint(0, 2, (8, 4)), index=idx) + idx = pd.MultiIndex.from_tuples(list(zip(*arrays)), names=["a", "b"]) + pdf = pd.DataFrame(np.random.randint(0, 2, (8, 4)), index=idx) gdf = cudf.DataFrame.from_pandas(pdf) expected = pdf.drop_duplicates() diff --git a/python/cudf/cudf/tests/test_hash_vocab.py b/python/cudf/cudf/tests/test_hash_vocab.py index dcf40417e4f..e081119ff89 100644 --- a/python/cudf/cudf/tests/test_hash_vocab.py +++ b/python/cudf/cudf/tests/test_hash_vocab.py @@ -1,6 +1,7 @@ # Copyright (c) 2020-2022, NVIDIA CORPORATION. import filecmp import os +import warnings import pytest @@ -20,6 +21,9 @@ def test_correct_bert_base_vocab_hash(datadir, tmpdir): groundtruth_path = os.path.join(datadir, "vocab-hash.txt") output_path = tmpdir.join("cudf-vocab-hash.txt") - hash_vocab(vocab_path, output_path) + with warnings.catch_warnings(): + # See https://github.com/rapidsai/cudf/issues/12403 + warnings.simplefilter(action="ignore", category=RuntimeWarning) + hash_vocab(vocab_path, output_path) assert filecmp.cmp(output_path, groundtruth_path, shallow=False) diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py index 4f3912ad9d4..94da7a50c2e 100644 --- a/python/cudf/cudf/tests/test_joining.py +++ b/python/cudf/cudf/tests/test_joining.py @@ -768,9 +768,9 @@ def test_merge_sort_on_indexes(kwargs): definitely_sorted.index.name = None assert_eq(gd_merge, definitely_sorted) elif left_index: - assert gd_merge["b"].is_monotonic + assert gd_merge["b"].is_monotonic_increasing elif right_index: - assert gd_merge["a"].is_monotonic + assert gd_merge["a"].is_monotonic_increasing @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py index 8267a4aa3af..1f340d63a71 100644 --- a/python/cudf/cudf/tests/test_json.py +++ b/python/cudf/cudf/tests/test_json.py @@ -512,7 +512,8 @@ def test_json_corner_case_with_escape_and_double_quote_char_with_strings(): ) def test_json_to_json_compare_contents(gdf, pdf): expected_json = pdf.to_json(lines=True, orient="records") - actual_json = gdf.to_json(lines=True, orient="records") + with pytest.warns(UserWarning): + actual_json = gdf.to_json(lines=True, orient="records") assert expected_json == actual_json @@ -988,7 +989,8 @@ def test_json_round_trip_gzip(): df = cudf.DataFrame({"a": [1, 2, 3], "b": ["abc", "def", "ghi"]}) bytes = BytesIO() with gzip.open(bytes, mode="wb") as fo: - df.to_json(fo, orient="records", lines=True) + with pytest.warns(UserWarning): + df.to_json(fo, orient="records", lines=True) bytes.seek(0) with gzip.open(bytes, mode="rb") as fo: written_df = cudf.read_json(fo, orient="records", lines=True) @@ -999,7 +1001,8 @@ def test_json_round_trip_gzip(): with gzip.open(bytes, mode="wb") as fo: fo.seek(loc) - df.to_json(fo, orient="records", lines=True) + with pytest.warns(UserWarning): + df.to_json(fo, orient="records", lines=True) bytes.seek(loc) with gzip.open(bytes, mode="rb") as fo: fo.seek(loc) diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py index 46b48b8244c..cf9e70d85c7 100644 --- a/python/cudf/cudf/tests/test_query.py +++ b/python/cudf/cudf/tests/test_query.py @@ -50,7 +50,7 @@ def test_query(data, fn, nulls): pdf["a"] = np.arange(nelem) pdf["b"] = np.random.random(nelem) * nelem if nulls: - pdf["a"][::2] = None + pdf.loc[::2, "a"] = None gdf = cudf.from_pandas(pdf) assert_eq(pdf.query(query_expr), gdf.query(query_expr)) diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py index 5c8773edd63..9bd67309ece 100644 --- a/python/cudf/cudf/tests/test_rank.py +++ b/python/cudf/cudf/tests/test_rank.py @@ -55,7 +55,8 @@ def test_rank_all_arguments( assert_eq(gdf["col1"].rank(**kwargs), pdf["col1"].rank(**kwargs)) assert_eq(gdf["col2"].rank(**kwargs), pdf["col2"].rank(**kwargs)) if numeric_only: - expect = pdf["str"].rank(**kwargs) + with pytest.warns(FutureWarning): + expect = pdf["str"].rank(**kwargs) got = gdf["str"].rank(**kwargs) assert expect.empty == got.empty expected = pdf.select_dtypes(include=np.number) diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 97e6934b42e..5ba0bec3dc4 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -135,8 +135,8 @@ def test_integer_dataframe(x): ) @settings(deadline=None) def test_integer_series(x): - sr = cudf.Series(x) - ps = pd.Series(data=x) + sr = cudf.Series(x, dtype=int) + ps = pd.Series(data=x, dtype=int) assert repr(sr) == repr(ps) @@ -144,7 +144,7 @@ def test_integer_series(x): @given(st.lists(st.floats())) @settings(deadline=None) def test_float_dataframe(x): - gdf = cudf.DataFrame({"x": cudf.Series(x, nan_as_null=False)}) + gdf = cudf.DataFrame({"x": cudf.Series(x, dtype=float, nan_as_null=False)}) pdf = gdf.to_pandas() assert repr(gdf) == repr(pdf) @@ -152,8 +152,8 @@ def test_float_dataframe(x): @given(st.lists(st.floats())) @settings(deadline=None) def test_float_series(x): - sr = cudf.Series(x, nan_as_null=False) - ps = pd.Series(data=x) + sr = cudf.Series(x, dtype=float, nan_as_null=False) + ps = pd.Series(data=x, dtype=float) assert repr(sr) == repr(ps) diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 6336565af52..37ffbab1676 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -389,7 +389,13 @@ def test_pivot_simple(index, column, data): pdf = pd.DataFrame({"index": index, "column": column, "data": data}) gdf = cudf.from_pandas(pdf) - expect = pdf.pivot("index", "column") + # In pandas 2.0 this will be a failure because pandas will require all of + # these as keyword arguments. Matching that check in cudf is a bit + # cumbersome and not worth the effort to match the warning, so this code + # just catches pandas's warning (rather than updating the signature) so + # that when it starts failing we know to update our impl of pivot. + with pytest.warns(FutureWarning): + expect = pdf.pivot("index", "column") got = gdf.pivot("index", "column") check_index_and_columns = expect.shape != (0, 0) diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py index 53318eef1c8..f921f27e931 100644 --- a/python/cudf/cudf/tests/test_serialize.py +++ b/python/cudf/cudf/tests/test_serialize.py @@ -340,7 +340,8 @@ def test_deserialize_cudf_0_16(datadir): fname = datadir / "pkl" / "stringColumnWithRangeIndex_cudf_0.16.pkl" expected = cudf.DataFrame({"a": ["hi", "hello", "world", None]}) - actual = pickle.load(open(fname, "rb")) + with open(fname, "rb") as f: + actual = pickle.load(f) assert_eq(expected, actual) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index 0f6e9f48e10..0c72a0d8fee 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -20,6 +20,7 @@ _create_pandas_series, assert_eq, assert_exceptions_equal, + expect_warning_if, gen_rand, ) @@ -101,8 +102,10 @@ def test_series_append_basic(data, others, ignore_index): other_ps = pd.Series(others) other_gs = cudf.Series(others) - expected = psr.append(other_ps, ignore_index=ignore_index) - actual = gsr.append(other_gs, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + expected = psr.append(other_ps, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + actual = gsr.append(other_gs, ignore_index=ignore_index) assert_eq(expected, actual) @@ -146,8 +149,10 @@ def test_series_append_basic_str(data, others, ignore_index): other_ps = pd.Series(others) other_gs = cudf.Series(others) - expected = psr.append(other_ps, ignore_index=ignore_index) - actual = gsr.append(other_gs, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + expected = psr.append(other_ps, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + actual = gsr.append(other_gs, ignore_index=ignore_index) assert_eq(expected, actual) @@ -197,8 +202,10 @@ def test_series_append_series_with_index(data, others, ignore_index): other_ps = others other_gs = cudf.from_pandas(others) - expected = psr.append(other_ps, ignore_index=ignore_index) - actual = gsr.append(other_gs, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + expected = psr.append(other_ps, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + actual = gsr.append(other_gs, ignore_index=ignore_index) assert_eq(expected, actual) @@ -211,14 +218,16 @@ def test_series_append_error_mixed_types(): match="cudf does not support mixed types, please type-cast " "both series to same dtypes.", ): - gsr.append(other) + with pytest.warns(FutureWarning): + gsr.append(other) with pytest.raises( TypeError, match="cudf does not support mixed types, please type-cast " "both series to same dtypes.", ): - gsr.append([gsr, other, gsr, other]) + with pytest.warns(FutureWarning): + gsr.append([gsr, other, gsr, other]) @pytest.mark.parametrize( @@ -276,8 +285,10 @@ def test_series_append_list_series_with_index(data, others, ignore_index): other_ps = others other_gs = [cudf.from_pandas(obj) for obj in others] - expected = psr.append(other_ps, ignore_index=ignore_index) - actual = gsr.append(other_gs, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + expected = psr.append(other_ps, ignore_index=ignore_index) + with pytest.warns(FutureWarning): + actual = gsr.append(other_gs, ignore_index=ignore_index) assert_eq(expected, actual) @@ -287,13 +298,15 @@ def test_series_append_existing_buffers(): # Add new buffer a2 = cudf.Series(np.arange(5)) - gs = gs.append(a2) + with pytest.warns(FutureWarning): + gs = gs.append(a2) assert len(gs) == 15 np.testing.assert_equal(gs.to_numpy(), np.hstack([a1, a2.to_numpy()])) # Ensure appending to previous buffer a3 = cudf.Series(np.arange(3)) - gs = gs.append(a3) + with pytest.warns(FutureWarning): + gs = gs.append(a3) assert len(gs) == 18 a4 = np.hstack([a1, a2.to_numpy(), a3.to_numpy()]) np.testing.assert_equal(gs.to_numpy(), a4) @@ -301,11 +314,13 @@ def test_series_append_existing_buffers(): # Appending different dtype a5 = cudf.Series(np.array([1, 2, 3], dtype=np.int32)) a6 = cudf.Series(np.array([4.5, 5.5, 6.5], dtype=np.float64)) - gs = a5.append(a6) + with pytest.warns(FutureWarning): + gs = a5.append(a6) np.testing.assert_equal( gs.to_numpy(), np.hstack([a5.to_numpy(), a6.to_numpy()]) ) - gs = cudf.Series(a6).append(a5) + with pytest.warns(FutureWarning): + gs = cudf.Series(a6).append(a5) np.testing.assert_equal( gs.to_numpy(), np.hstack([a6.to_numpy(), a5.to_numpy()]) ) @@ -469,7 +484,8 @@ def test_series_factorize(data, na_sentinel): gsr = cudf.Series(data) psr = gsr.to_pandas() - expected_labels, expected_cats = psr.factorize(na_sentinel=na_sentinel) + with pytest.warns(FutureWarning): + expected_labels, expected_cats = psr.factorize(na_sentinel=na_sentinel) actual_labels, actual_cats = gsr.factorize(na_sentinel=na_sentinel) assert_eq(expected_labels, actual_labels.get()) @@ -984,7 +1000,10 @@ def test_series_update(data, other): ps = gs.to_pandas() ps.update(p_other) - gs.update(g_other) + with expect_warning_if( + isinstance(other, cudf.Series) and other.isna().any(), UserWarning + ): + gs.update(g_other) assert_eq(gs, ps) @@ -1397,7 +1416,10 @@ def test_reset_index(level, drop, inplace, original_name, name): "test_reset_index_dup_level_name_exceptions" ) - expect = ps.reset_index(level=level, drop=drop, name=name, inplace=inplace) + with expect_warning_if(name is None and not drop): + expect = ps.reset_index( + level=level, drop=drop, name=name, inplace=inplace + ) got = gs.reset_index(level=level, drop=drop, name=name, inplace=inplace) if inplace: expect = ps @@ -1422,7 +1444,10 @@ def test_reset_index_dup_level_name(level, drop, inplace, original_name, name): "test_reset_index_dup_level_name_exceptions" ) - expect = ps.reset_index(level=level, drop=drop, inplace=inplace, name=name) + with expect_warning_if(name is None and not drop): + expect = ps.reset_index( + level=level, drop=drop, inplace=inplace, name=name + ) got = gs.reset_index(level=level, drop=drop, inplace=inplace, name=name) if inplace: expect = ps @@ -1448,7 +1473,8 @@ def test_reset_index_named(drop, inplace, original_name, name): "test_reset_index_dup_level_name_exceptions" ) - expect = ps.reset_index(drop=drop, inplace=inplace, name=name) + with expect_warning_if(name is None and not drop): + expect = ps.reset_index(drop=drop, inplace=inplace, name=name) got = gs.reset_index(drop=drop, inplace=inplace, name=name) if inplace: @@ -1539,7 +1565,13 @@ def test_autocorr(cudf_series, lag): psr = cudf_series.to_pandas() cudf_corr = cudf_series.autocorr(lag=lag) - pd_corr = psr.autocorr(lag=lag) + + # autocorrelation is undefined (nan) for less than two entries, but pandas + # short-circuits when there are 0 entries and bypasses the numpy function + # call that generates an error. + num_both_valid = (psr.notna() & psr.shift(lag).notna()).sum() + with expect_warning_if(num_both_valid == 1, RuntimeWarning): + pd_corr = psr.autocorr(lag=lag) assert_eq(pd_corr, cudf_corr) diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py index a182a5e7d24..68a56729c5a 100644 --- a/python/cudf/cudf/tests/test_sorting.py +++ b/python/cudf/cudf/tests/test_sorting.py @@ -14,6 +14,7 @@ NUMERIC_TYPES, assert_eq, assert_exceptions_equal, + expect_warning_if, ) sort_nelem_args = [2, 257] @@ -328,23 +329,27 @@ def _check_scatter_by_map(dfs, col): assert sr.iloc[0] == i assert nrows == nelem - _check_scatter_by_map( - df.scatter_by_map("a", map_size, keep_index=keep), df["a"] - ) + with pytest.warns(UserWarning): + _check_scatter_by_map( + df.scatter_by_map("a", map_size, keep_index=keep), df["a"] + ) _check_scatter_by_map( df.scatter_by_map("b", map_size, keep_index=keep), df["b"] ) _check_scatter_by_map( df.scatter_by_map("c", map_size, keep_index=keep), df["c"] ) - _check_scatter_by_map( - df.scatter_by_map("d", map_size, keep_index=keep), df["d"] - ) + with pytest.warns(UserWarning): + _check_scatter_by_map( + df.scatter_by_map("d", map_size, keep_index=keep), df["d"] + ) if map_size == 2 and nelem == 100: - df.scatter_by_map("a") # Auto-detect map_size + with pytest.warns(UserWarning): + df.scatter_by_map("a") # Auto-detect map_size with pytest.raises(ValueError): - df.scatter_by_map("a", map_size=1, debug=True) # Bad map_size + with pytest.warns(UserWarning): + df.scatter_by_map("a", map_size=1, debug=True) # Bad map_size # Test GenericIndex df2 = df.set_index("c") @@ -374,7 +379,8 @@ def test_dataframe_sort_values_kind(nelem, dtype, kind): df = DataFrame() df["a"] = aa = (100 * np.random.random(nelem)).astype(dtype) df["b"] = bb = (100 * np.random.random(nelem)).astype(dtype) - sorted_df = df.sort_values(by="a", kind=kind) + with expect_warning_if(kind != "quicksort", UserWarning): + sorted_df = df.sort_values(by="a", kind=kind) # Check sorted_index = np.argsort(aa, kind="mergesort") assert_eq(sorted_df.index.values, sorted_index) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 2a43adf5a5c..8939fcfeed9 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -1842,7 +1842,14 @@ def test_string_wrap(data, width): ps = pd.Series(data) assert_eq( - gs.str.wrap(width=width), + gs.str.wrap( + width=width, + break_long_words=False, + expand_tabs=False, + replace_whitespace=True, + drop_whitespace=True, + break_on_hyphens=False, + ), ps.str.wrap( width=width, break_long_words=False, @@ -1857,7 +1864,14 @@ def test_string_wrap(data, width): pi = pd.Index(data) assert_eq( - gi.str.wrap(width=width), + gi.str.wrap( + width=width, + break_long_words=False, + expand_tabs=False, + replace_whitespace=True, + drop_whitespace=True, + break_on_hyphens=False, + ), pi.str.wrap( width=width, break_long_words=False, diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py index c3dfeac9a3f..e6658040663 100644 --- a/python/cudf/cudf/tests/test_testing.py +++ b/python/cudf/cudf/tests/test_testing.py @@ -339,15 +339,15 @@ def test_series_different_type_cases(dtype, check_exact, check_dtype): @pytest.mark.parametrize( - "index", - [cudf.Int8Index, cudf.Int16Index, cudf.Int32Index, cudf.Int64Index], + "dtype", + ["int8", "int16", "int32", "int64"], ) @pytest.mark.parametrize("exact", ["equiv", True, False]) -def test_range_index_and_int_index_eqaulity(index, exact): +def test_range_index_and_int_index_eqaulity(dtype, exact): pidx1 = pd.RangeIndex(0, stop=5, step=1) pidx2 = pd.Index([0, 1, 2, 3, 4]) idx1 = cudf.from_pandas(pidx1) - idx2 = index([0, 1, 2, 3, 4]) + idx2 = cudf.Index([0, 1, 2, 3, 4], dtype=dtype) kind = None try: