From 9f4fe10df09c564e0977ca76c08cdbd5ea543cca Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:13:54 -0700 Subject: [PATCH 1/8] STY: Enable RUFF001 --- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/io/json/test_json_table_schema.py | 2 +- pandas/tests/io/parser/test_encoding.py | 4 ++-- pandas/tests/io/parser/test_read_fwf.py | 3 ++- pandas/tests/io/test_clipboard.py | 6 +++--- pandas/tests/series/methods/test_to_csv.py | 2 +- pandas/tests/strings/test_strings.py | 12 ++++++------ pandas/tseries/holiday.py | 2 +- pyproject.toml | 2 -- 9 files changed, 17 insertions(+), 18 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index f7d132a1c0bf0..70bb3e3e6b10e 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -957,7 +957,7 @@ def test_to_csv_path_is_none(self, float_frame): (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"), (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"), ( - DataFrame(5 * [[123, "Γειά σου", "Κόσμε"]], columns=["X", "Y", "Z"]), + DataFrame(5 * [[123, "Γειά oou", "Κόσμε"]], columns=["X", "Y", "Z"]), "cp737", ), ], diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 25b0e4a9f1de9..fb032decc9fb9 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -251,7 +251,7 @@ def test_read_json_from_to_json_results(self): "recommender_id": {"row_0": 3}, "recommender_name_jp": {"row_0": "浦田"}, "recommender_name_en": {"row_0": "Urata"}, - "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"}, + "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"}, "name_en": {"row_0": "Hakata Dolls Matsuo"}, } ) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 31c7994f39058..013072fafcc03 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -223,12 +223,12 @@ def test_encoding_named_temp_file(all_parsers): def test_parse_encoded_special_characters(encoding): # GH16218 Verify parsing of data with encoded special characters # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a") - data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" + data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" encoded_data = BytesIO(data.encode(encoding)) result = read_csv(encoded_data, delimiter="\t", encoding=encoding) expected = DataFrame( - data=[[":foo", 0], ["bar", 1], ["baz", 2]], + data=[[":foo", 0], ["bar", 1], ["baz", 2]], columns=["a", "b"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 47794c09bf541..c93a9518b04a4 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -190,7 +190,8 @@ def test_read_csv_compat(): def test_bytes_io_input(): - result = read_fwf(BytesIO("שלום\nשלום".encode()), widths=[2, 2], encoding="utf8") + data = BytesIO("שלום\nשלlם").encode() + result = read_fwf(data, widths=[2, 2], encoding="utf8") expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index ff81d0125144e..70144d65b99af 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -62,9 +62,9 @@ def df(request): data_type = request.param if data_type == "delims": - return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]}) + return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]}) elif data_type == "utf8": - return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]}) + return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]}) elif data_type == "utf16": return DataFrame( {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} @@ -402,7 +402,7 @@ def test_round_trip_valid_encodings(self, enc, df): self.check_round_trip_frame(df, encoding=enc) @pytest.mark.single_cpu - @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."]) + @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑`...", "abcd..."]) @pytest.mark.xfail( (os.environ.get("DISPLAY") is None and not is_platform_mac()) or is_ci_environment(), diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 070ab872a4e5b..394fdd5c338bc 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -122,7 +122,7 @@ def test_to_csv_path_is_none(self): # GH 21241, 21118 (Series(["abc", "def", "ghi"], name="X"), "ascii"), (Series(["123", "你好", "世界"], name="中文"), "gb2312"), - (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"), + (Series(["123", "Γειά oou", "Κόσμε"], name="Ελληνικά"), "cp737"), ], ) def test_to_csv_compression(self, s, encoding, compression): diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index a5c4f8f7c8a4f..84b0eeeeaef6f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -227,7 +227,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): # 0x2605: ★ not number # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY # 0xFF13: 3 Em 3 - ser = Series(["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype) + ser = Series(["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype) expected_dtype = "bool" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) result = getattr(ser.str, method)() @@ -246,7 +246,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): ], ) def test_isnumeric_unicode_missing(method, expected, any_string_dtype): - values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] + values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] ser = Series(values, dtype=any_string_dtype) expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) @@ -564,12 +564,12 @@ def test_decode_errors_kwarg(): "form, expected", [ ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]), - ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), + ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), ], ) def test_normalize(form, expected, any_string_dtype): ser = Series( - ["ABC", "ABC", "123", np.nan, "アイエ"], + ["ABC", "ABC", "123", np.nan, "アイエ"], index=["a", "b", "c", "d", "e"], dtype=any_string_dtype, ) @@ -580,7 +580,7 @@ def test_normalize(form, expected, any_string_dtype): def test_normalize_bad_arg_raises(any_string_dtype): ser = Series( - ["ABC", "ABC", "123", np.nan, "アイエ"], + ["ABC", "ABC", "123", np.nan, "アイエ"], index=["a", "b", "c", "d", "e"], dtype=any_string_dtype, ) @@ -589,7 +589,7 @@ def test_normalize_bad_arg_raises(any_string_dtype): def test_normalize_index(): - idx = Index(["ABC", "123", "アイエ"]) + idx = Index(["ABC", "123", "アイエ"]) expected = Index(["ABC", "123", "アイエ"]) result = idx.str.normalize("NFKC") tm.assert_index_equal(result, expected) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 70190b16767cf..44c21bc284121 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -570,7 +570,7 @@ def merge(self, other, inplace: bool = False): offset=DateOffset(weekday=MO(3)), ) USPresidentsDay = Holiday( - "Washington’s Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3)) + "Washington's Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3)) ) GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)]) diff --git a/pyproject.toml b/pyproject.toml index 6e82b200bb1c7..cf00adde09fa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -326,8 +326,6 @@ ignore = [ "PLR0124", # Consider `elif` instead of `else` then `if` to remove indentation level "PLR5501", - # ambiguous-unicode-character-string - "RUF001", # ambiguous-unicode-character-docstring "RUF002", # ambiguous-unicode-character-comment From 80415434bdff976e88cdc0beaea308cb765db0fd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:18:36 -0700 Subject: [PATCH 2/8] more noqa --- pandas/io/stata.py | 2 +- pandas/tests/extension/test_arrow.py | 2 +- pandas/tests/io/test_stata.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 2181b33b315ae..054d73a8aba42 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -3768,7 +3768,7 @@ def _validate_variable_name(self, name: str) -> str: and c != "_" ) or 128 <= ord(c) < 192 - or c in {"×", "÷"} + or c in {"×", "÷"} # noqa: RUF001 ): name = name.replace(c, "_") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 197cdc3f436a1..5aa990d8c3ba3 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2104,7 +2104,7 @@ def test_str_slice_replace(start, stop, repl, exp): ["!|,", "isalnum", False], ["aaa", "isalpha", True], ["!!!", "isalpha", False], - ["٠", "isdecimal", True], + ["٠", "isdecimal", True], # noqa: RUF001 ["~!", "isdecimal", False], ["2", "isdigit", True], ["~", "isdigit", False], diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c4035ea867962..580373ba793f8 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -286,7 +286,7 @@ def test_read_dta18(self, datapath): ["Cat", "Bogota", "Bogotá", 1, 1.0, "option b Ünicode", 1.0], ["Dog", "Boston", "Uzunköprü", np.nan, np.nan, np.nan, np.nan], ["Plane", "Rome", "Tromsø", 0, 0.0, "option a", 0.0], - ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4], + ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4], # noqa: RUF001 ["", "", "", 0, 0.3332999, "option a", 1 / 3.0], ], columns=[ From ecdc7c1a5390ef98e2f5159d3c463abf2a372a77 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:21:09 -0700 Subject: [PATCH 3/8] Enable ruff003 --- pandas/io/formats/html.py | 2 +- pandas/tests/groupby/test_groupby.py | 2 +- pandas/tests/strings/test_strings.py | 2 +- pyproject.toml | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 151bde4e1c4c2..ce59985b8f352 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -94,7 +94,7 @@ def render(self) -> list[str]: self._write_table() if self.should_show_dimensions: - by = chr(215) # × + by = chr(215) # × # noqa: RUF003 self.write( f"
{len(self.frame)} rows {by} {len(self.frame.columns)} columns
" ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c8de1cd6785b6..6ffc975da4dd5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1402,7 +1402,7 @@ def test_groupby_dtype_inference_empty(): def test_groupby_unit64_float_conversion(): - # GH: 30859 groupby converts unit64 to floats sometimes + # GH: 30859 groupby converts unit64 to floats sometimes df = DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]}) result = df.groupby(["first", "second"])["value"].max() expected = Series( diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 84b0eeeeaef6f..6405526a57833 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -226,7 +226,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER # 0x2605: ★ not number # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY - # 0xFF13: 3 Em 3 + # 0xFF13: 3 Em 3 # noqa: RUF003 ser = Series(["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype) expected_dtype = "bool" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) diff --git a/pyproject.toml b/pyproject.toml index cf00adde09fa5..261dc7f48c238 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -328,8 +328,6 @@ ignore = [ "PLR5501", # ambiguous-unicode-character-docstring "RUF002", - # ambiguous-unicode-character-comment - "RUF003", # collection-literal-concatenation "RUF005", # pairwise-over-zipped (>=PY310 only) From fb0c8bca0e39225c2a9aba53151b1316c8ab3477 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:32:24 -0700 Subject: [PATCH 4/8] Enable ruff002 --- pandas/core/frame.py | 6 +++--- pandas/core/generic.py | 4 ++-- pandas/core/groupby/generic.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/reshape/merge.py | 2 +- pandas/core/tools/datetimes.py | 2 +- pandas/io/excel/_base.py | 4 ++-- pandas/io/formats/style.py | 2 +- pandas/io/sql.py | 6 +++--- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d5d485e903a6b..61f8af3b62666 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7134,7 +7134,7 @@ def value_counts( ascending : bool, default False Sort in ascending order. dropna : bool, default True - Don’t include counts of rows that contain NA values. + Don't include counts of rows that contain NA values. .. versionadded:: 1.3.0 @@ -9971,7 +9971,7 @@ def map( func : callable Python function, returns a single value from a single value. na_action : {None, 'ignore'}, default None - If ‘ignore’, propagate NaN values, without passing them to func. + If 'ignore', propagate NaN values, without passing them to func. **kwargs Additional keyword arguments to pass as keywords arguments to `func`. @@ -10057,7 +10057,7 @@ def applymap( func : callable Python function, returns a single value from a single value. na_action : {None, 'ignore'}, default None - If ‘ignore’, propagate NaN values, without passing them to func. + If 'ignore', propagate NaN values, without passing them to func. **kwargs Additional keyword arguments to pass as keywords arguments to `func`. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f6bea7d89a0b9..aa6578bbcaf66 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5633,7 +5633,7 @@ def filter( Keep labels from axis for which "like in label == True". regex : str (regular expression) Keep labels from axis for which re.search(regex, label) == True. - axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + axis : {0 or 'index', 1 or 'columns', None}, default None The axis to filter on, expressed either as an index (int) or axis name (str). By default this is the info axis, 'columns' for DataFrame. For `Series` this parameter is unused and defaults to `None`. @@ -5922,7 +5922,7 @@ def sample( np.random.Generator objects now accepted - axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + axis : {0 or 'index', 1 or 'columns', None}, default None Axis to sample. Accepts axis number or name. Default is stat axis for given data type. For `Series` this parameter is unused and defaults to `None`. ignore_index : bool, default False diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ceec715a40fd1..2ffdaa934e838 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2322,7 +2322,7 @@ def value_counts( ascending : bool, default False Sort in ascending order. dropna : bool, default True - Don’t include counts of rows that contain NA values. + Don't include counts of rows that contain NA values. Returns ------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index eb85acbc4b819..c07ca760cbc8e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -177,7 +177,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): yearfirst : bool, default False If True parse dates in `data` with the year first order. dtype : numpy.dtype or DatetimeTZDtype or str, default None - Note that the only NumPy dtype allowed is ‘datetime64[ns]’. + Note that the only NumPy dtype allowed is `datetime64[ns]`. copy : bool, default False Make a copy of input ndarray. name : label, default None diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e8fd3398c4db8..da253da1428bf 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2336,7 +2336,7 @@ def _factorize_keys( sort : bool, defaults to True If True, the encoding is done such that the unique elements in the keys are sorted. - how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’ + how : {'left', 'right', 'outer', 'inner'}, default 'inner' Type of merge. Returns diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 95faea468fb5d..0360903424d54 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -916,7 +916,7 @@ def to_datetime( - **DataFrame/dict-like** are converted to :class:`Series` with :class:`datetime64` dtype. For each row a datetime is created from assembling the various dataframe columns. Column keys can be common abbreviations - like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or + like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same. The following causes are responsible for :class:`datetime.datetime` objects diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c310b2614fa5f..7c67f85ed3d1e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1268,14 +1268,14 @@ def __init__( @property def date_format(self) -> str: """ - Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’). + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). """ return self._date_format @property def datetime_format(self) -> str: """ - Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’). + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). """ return self._datetime_format diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index a45ea881d8dad..f77778ee45ae3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3489,7 +3489,7 @@ def highlight_quantile( Left bound, in [0, q_right), for the target quantile range. q_right : float, default 1 Right bound, in (q_left, 1], for the target quantile range. - interpolation : {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’} + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} Argument passed to ``Series.quantile`` or ``DataFrame.quantile`` for quantile estimation. inclusive : {'both', 'neither', 'left', 'right'} diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 467e8d2c3ff58..2cf9d144eb91c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -441,7 +441,7 @@ def read_sql_query( rows to include in each chunk. dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. .. versionadded:: 1.3.0 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' @@ -597,7 +597,7 @@ def read_sql( .. versionadded:: 2.0 dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query. .. versionadded:: 2.0.0 @@ -1759,7 +1759,7 @@ def read_query( of rows to include in each chunk. dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’} + {'a': np.float64, 'b': np.int32, 'c': 'Int64'} .. versionadded:: 1.3.0 From a5aa9336055cfaf3587a81a54c98959f0d97c3de Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:32:38 -0700 Subject: [PATCH 5/8] Enable ruff002 --- pyproject.toml | 2 -- web/pandas/about/governance.md | 4 ++-- web/pandas/community/coc.md | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 261dc7f48c238..4f63ed0179935 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -326,8 +326,6 @@ ignore = [ "PLR0124", # Consider `elif` instead of `else` then `if` to remove indentation level "PLR5501", - # ambiguous-unicode-character-docstring - "RUF002", # collection-literal-concatenation "RUF005", # pairwise-over-zipped (>=PY310 only) diff --git a/web/pandas/about/governance.md b/web/pandas/about/governance.md index 46480acc69c31..d8777d1d0c15d 100644 --- a/web/pandas/about/governance.md +++ b/web/pandas/about/governance.md @@ -128,7 +128,7 @@ In particular, the Core Team may: and merging pull requests. - Make decisions about the Services that are run by The Project and manage those Services for the benefit of the Project and Community. -- Make decisions when regular community discussion doesn’t produce consensus +- Make decisions when regular community discussion doesn't produce consensus on an issue in a reasonable time frame. ### Core Team membership @@ -157,7 +157,7 @@ they will be considered for removal from the Core Team. Before removal, inactive Member will be approached by the BDFL to see if they plan on returning to active participation. If not they will be removed immediately upon a Core Team vote. If they plan on returning to active participation soon, they will be -given a grace period of one year. If they don’t return to active participation +given a grace period of one year. If they don't return to active participation within that time period they will be removed by vote of the Core Team without further grace period. All former Core Team members can be considered for membership again at any time in the future, like any other Project Contributor. diff --git a/web/pandas/community/coc.md b/web/pandas/community/coc.md index f6d0c3543840e..22cd77859c557 100644 --- a/web/pandas/community/coc.md +++ b/web/pandas/community/coc.md @@ -21,7 +21,7 @@ Examples of unacceptable behavior by participants include: * Other unethical or unprofessional conduct Furthermore, we encourage inclusive behavior - for example, -please don’t say “hey guys!” but “hey everyone!”. +please don't say “hey guys!” but “hey everyone!”. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions From fe96467c3e241d9918ddf38be1b85d5061a0b1bb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:36:45 -0700 Subject: [PATCH 6/8] add another noqa --- pandas/tests/io/parser/test_encoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 013072fafcc03..f6dbb24f36f18 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -223,12 +223,12 @@ def test_encoding_named_temp_file(all_parsers): def test_parse_encoded_special_characters(encoding): # GH16218 Verify parsing of data with encoded special characters # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a") - data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" + data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" # noqa: RUF001 encoded_data = BytesIO(data.encode(encoding)) result = read_csv(encoded_data, delimiter="\t", encoding=encoding) expected = DataFrame( - data=[[":foo", 0], ["bar", 1], ["baz", 2]], + data=[[":foo", 0], ["bar", 1], ["baz", 2]], # noqa: RUF001 columns=["a", "b"], ) tm.assert_frame_equal(result, expected) From c24889e597d8e0d0bf0949bb19540efd731e2b21 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:57:55 -0700 Subject: [PATCH 7/8] revert some intentional unicode --- pandas/tests/frame/methods/test_to_csv.py | 5 ++++- pandas/tests/series/methods/test_to_csv.py | 5 ++++- pandas/tests/strings/test_strings.py | 14 ++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 70bb3e3e6b10e..475c33d86e6e7 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -957,7 +957,10 @@ def test_to_csv_path_is_none(self, float_frame): (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"), (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"), ( - DataFrame(5 * [[123, "Γειά oou", "Κόσμε"]], columns=["X", "Y", "Z"]), + DataFrame( + 5 * [[123, "Γειά σου", "Κόσμε"]], # noqa: RUF001 + columns=["X", "Y", "Z"], + ), "cp737", ), ], diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 394fdd5c338bc..76ca05a60eb7a 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -122,7 +122,10 @@ def test_to_csv_path_is_none(self): # GH 21241, 21118 (Series(["abc", "def", "ghi"], name="X"), "ascii"), (Series(["123", "你好", "世界"], name="中文"), "gb2312"), - (Series(["123", "Γειά oou", "Κόσμε"], name="Ελληνικά"), "cp737"), + ( + Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), # noqa: RUF001 + "cp737", + ), ], ) def test_to_csv_compression(self, s, encoding, compression): diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 6405526a57833..1e573bdfe8fb5 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -227,7 +227,9 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): # 0x2605: ★ not number # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY # 0xFF13: 3 Em 3 # noqa: RUF003 - ser = Series(["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype) + ser = Series( + ["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype # noqa: RUF001 + ) expected_dtype = "bool" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) result = getattr(ser.str, method)() @@ -246,7 +248,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): ], ) def test_isnumeric_unicode_missing(method, expected, any_string_dtype): - values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] + values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] # noqa: RUF001 ser = Series(values, dtype=any_string_dtype) expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) @@ -564,12 +566,12 @@ def test_decode_errors_kwarg(): "form, expected", [ ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]), - ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), + ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), # noqa: RUF001 ], ) def test_normalize(form, expected, any_string_dtype): ser = Series( - ["ABC", "ABC", "123", np.nan, "アイエ"], + ["ABC", "ABC", "123", np.nan, "アイエ"], # noqa: RUF001 index=["a", "b", "c", "d", "e"], dtype=any_string_dtype, ) @@ -580,7 +582,7 @@ def test_normalize(form, expected, any_string_dtype): def test_normalize_bad_arg_raises(any_string_dtype): ser = Series( - ["ABC", "ABC", "123", np.nan, "アイエ"], + ["ABC", "ABC", "123", np.nan, "アイエ"], # noqa: RUF001 index=["a", "b", "c", "d", "e"], dtype=any_string_dtype, ) @@ -589,7 +591,7 @@ def test_normalize_bad_arg_raises(any_string_dtype): def test_normalize_index(): - idx = Index(["ABC", "123", "アイエ"]) + idx = Index(["ABC", "123", "アイエ"]) # noqa: RUF001 expected = Index(["ABC", "123", "アイエ"]) result = idx.str.normalize("NFKC") tm.assert_index_equal(result, expected) From e484ee0a9dca73f7cbbcfbd7ca2547a350883179 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 31 Jul 2023 15:43:39 -0700 Subject: [PATCH 8/8] Fix test --- pandas/tests/io/parser/test_read_fwf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index c93a9518b04a4..c19f8d2792a35 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -190,7 +190,7 @@ def test_read_csv_compat(): def test_bytes_io_input(): - data = BytesIO("שלום\nשלlם").encode() + data = BytesIO("שלום\nשלום".encode()) # noqa: RUF001 result = read_fwf(data, widths=[2, 2], encoding="utf8") expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected)