diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d8f6468aca83..3a07149fe8171 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -153,3 +153,12 @@ jobs: run: | source activate pandas-dev pytest pandas/tests/frame/methods --array-manager + pytest pandas/tests/arithmetic/ --array-manager + + # indexing subset (temporary since other tests don't pass yet) + pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager + pytest pandas/tests/frame/indexing/test_where.py --array-manager + pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_multi_index --array-manager + pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns --array-manager + pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups --array-manager + pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column --array-manager diff --git a/.github/workflows/database.yml b/.github/workflows/database.yml index f3ccd78266ba6..b34373b82af1a 100644 --- a/.github/workflows/database.yml +++ b/.github/workflows/database.yml @@ -170,3 +170,11 @@ jobs: - name: Print skipped tests run: python ci/print_skipped.py + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + files: /tmp/test_coverage.xml + flags: unittests + name: codecov-pandas + fail_ci_if_error: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e0df3434b2906..d433fb08209bf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -138,7 +138,7 @@ repos: entry: python scripts/check_for_inconsistent_pandas_namespace.py language: python types: [python] - files: ^pandas/tests/ + files: ^pandas/tests/frame/ - id: FrameOrSeriesUnion name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias entry: Union\[.*(Series,.*DataFrame|DataFrame,.*Series).*\] @@ -180,6 +180,12 @@ repos: language: pygrep types: [python] files: ^pandas/tests/ + - id: title-capitalization + name: Validate correct capitalization among titles in documentation + entry: python scripts/validate_rst_title_capitalization.py + language: python + types: [rst] + files: ^doc/source/(development|reference)/ - repo: https://github.com/asottile/yesqa rev: v1.2.2 hooks: diff --git a/MANIFEST.in b/MANIFEST.in index cf6a1835433a4..494ad69efbc56 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,9 +1,4 @@ -include MANIFEST.in -include LICENSE include RELEASE.md -include README.md -include setup.py -include pyproject.toml graft doc prune doc/build @@ -16,10 +11,12 @@ global-exclude *.bz2 global-exclude *.csv global-exclude *.dta global-exclude *.feather +global-exclude *.tar global-exclude *.gz global-exclude *.h5 global-exclude *.html global-exclude *.json +global-exclude *.jsonl global-exclude *.pickle global-exclude *.png global-exclude *.pyc @@ -40,6 +37,11 @@ global-exclude .DS_Store global-exclude .git* global-exclude \#* +# GH 39321 +# csv_dir_path fixture checks the existence of the directory +# exclude the whole directory to avoid running related tests in sdist +prune pandas/tests/io/parser/data + include versioneer.py include pandas/_version.py include pandas/io/formats/templates/*.tpl diff --git a/Makefile b/Makefile index 2c968234749f5..f47c50032f83c 100644 --- a/Makefile +++ b/Makefile @@ -25,16 +25,3 @@ doc: cd doc; \ python make.py clean; \ python make.py html - -check: - python3 scripts/validate_unwanted_patterns.py \ - --validation-type="private_function_across_module" \ - --included-file-extensions="py" \ - --excluded-file-paths=pandas/tests,asv_bench/ \ - pandas/ - - python3 scripts/validate_unwanted_patterns.py \ - --validation-type="private_import_across_module" \ - --included-file-extensions="py" \ - --excluded-file-paths=pandas/tests,asv_bench/,doc/ - pandas/ diff --git a/asv_bench/benchmarks/algos/__init__.py b/asv_bench/benchmarks/algos/__init__.py new file mode 100644 index 0000000000000..97c9ab09b9c6b --- /dev/null +++ b/asv_bench/benchmarks/algos/__init__.py @@ -0,0 +1,12 @@ +""" +algos/ directory is intended for individual functions from core.algorithms + +In many cases these algorithms are reachable in multiple ways: + algos.foo(x, y) + Series(x).foo(y) + Index(x).foo(y) + pd.array(x).foo(y) + +In most cases we profile the Series variant directly, trusting the performance +of the others to be highly correlated. +""" diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py new file mode 100644 index 0000000000000..5d81d9d0d45a3 --- /dev/null +++ b/asv_bench/benchmarks/algos/isin.py @@ -0,0 +1,317 @@ +import numpy as np + +from pandas.compat.numpy import np_version_under1p20 + +from pandas import ( + Categorical, + NaT, + Series, + date_range, +) + + +class IsIn: + + params = [ + "int64", + "uint64", + "object", + "Int64", + "boolean", + "bool", + "datetime64[ns]", + "category[object]", + "category[int]", + ] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10000 + + self.mismatched = [NaT.to_datetime64()] * 2 + + if dtype in ["boolean", "bool"]: + self.series = Series(np.random.randint(0, 2, N)).astype(dtype) + self.values = [True, False] + + elif dtype == "datetime64[ns]": + # Note: values here is much larger than non-dt64ns cases + + # dti has length=115777 + dti = date_range(start="2015-10-26", end="2016-01-01", freq="50s") + self.series = Series(dti) + self.values = self.series._values[::3] + self.mismatched = [1, 2] + + elif dtype in ["category[object]", "category[int]"]: + # Note: sizes are different in this case than others + np.random.seed(1234) + + n = 5 * 10 ** 5 + sample_size = 100 + + arr = list(np.random.randint(0, n // 10, size=n)) + if dtype == "category[object]": + arr = [f"s{i:04d}" for i in arr] + + self.values = np.random.choice(arr, sample_size) + self.series = Series(arr).astype("category") + + else: + self.series = Series(np.random.randint(1, 10, N)).astype(dtype) + self.values = [1, 2] + + self.cat_values = Categorical(self.values) + + def time_isin(self, dtype): + self.series.isin(self.values) + + def time_isin_categorical(self, dtype): + self.series.isin(self.cat_values) + + def time_isin_empty(self, dtype): + self.series.isin([]) + + def time_isin_mismatched_dtype(self, dtype): + self.series.isin(self.mismatched) + + +class IsinAlmostFullWithRandomInt: + params = [ + [np.float64, np.int64, np.uint64, np.object_], + range(10, 21), + ["inside", "outside"], + ] + param_names = ["dtype", "exponent", "title"] + + def setup(self, dtype, exponent, title): + M = 3 * 2 ** (exponent - 2) + # 0.77-the maximal share of occupied buckets + np.random.seed(42) + self.series = Series(np.random.randint(0, M, M)).astype(dtype) + + values = np.random.randint(0, M, M).astype(dtype) + if title == "inside": + self.values = values + elif title == "outside": + self.values = values + M + else: + raise ValueError(title) + + def time_isin(self, dtype, exponent, title): + self.series.isin(self.values) + + +class IsinWithRandomFloat: + params = [ + [np.float64, np.object], + [ + 1_300, + 2_000, + 7_000, + 8_000, + 70_000, + 80_000, + 750_000, + 900_000, + ], + ["inside", "outside"], + ] + param_names = ["dtype", "size", "title"] + + def setup(self, dtype, size, title): + np.random.seed(42) + self.values = np.random.rand(size) + self.series = Series(self.values).astype(dtype) + np.random.shuffle(self.values) + + if title == "outside": + self.values = self.values + 0.1 + + def time_isin(self, dtype, size, title): + self.series.isin(self.values) + + +class IsinWithArangeSorted: + params = [ + [np.float64, np.int64, np.uint64, np.object], + [ + 1_000, + 2_000, + 8_000, + 100_000, + 1_000_000, + ], + ] + param_names = ["dtype", "size"] + + def setup(self, dtype, size): + self.series = Series(np.arange(size)).astype(dtype) + self.values = np.arange(size).astype(dtype) + + def time_isin(self, dtype, size): + self.series.isin(self.values) + + +class IsinWithArange: + params = [ + [np.float64, np.int64, np.uint64, np.object], + [ + 1_000, + 2_000, + 8_000, + ], + [-2, 0, 2], + ] + param_names = ["dtype", "M", "offset_factor"] + + def setup(self, dtype, M, offset_factor): + offset = int(M * offset_factor) + np.random.seed(42) + tmp = Series(np.random.randint(offset, M + offset, 10 ** 6)) + self.series = tmp.astype(dtype) + self.values = np.arange(M).astype(dtype) + + def time_isin(self, dtype, M, offset_factor): + self.series.isin(self.values) + + +class IsInFloat64: + + params = [ + [np.float64, "Float64"], + ["many_different_values", "few_different_values", "only_nans_values"], + ] + param_names = ["dtype", "title"] + + def setup(self, dtype, title): + N_many = 10 ** 5 + N_few = 10 ** 6 + self.series = Series([1, 2], dtype=dtype) + + if title == "many_different_values": + # runtime is dominated by creation of the lookup-table + self.values = np.arange(N_many, dtype=np.float64) + elif title == "few_different_values": + # runtime is dominated by creation of the lookup-table + self.values = np.zeros(N_few, dtype=np.float64) + elif title == "only_nans_values": + # runtime is dominated by creation of the lookup-table + self.values = np.full(N_few, np.nan, dtype=np.float64) + else: + raise ValueError(title) + + def time_isin(self, dtype, title): + self.series.isin(self.values) + + +class IsInForObjects: + """ + A subset of the cartesian product of cases have special motivations: + + "nans" x "nans" + if nan-objects are different objects, + this has the potential to trigger O(n^2) running time + + "short" x "long" + running time dominated by the preprocessing + + "long" x "short" + running time dominated by look-up + + "long" x "long" + no dominating part + + "long_floats" x "long_floats" + because of nans floats are special + no dominating part + + """ + + variants = ["nans", "short", "long", "long_floats"] + + params = [variants, variants] + param_names = ["series_type", "vals_type"] + + def setup(self, series_type, vals_type): + N_many = 10 ** 5 + + if series_type == "nans": + ser_vals = np.full(10 ** 4, np.nan) + elif series_type == "short": + ser_vals = np.arange(2) + elif series_type == "long": + ser_vals = np.arange(N_many) + elif series_type == "long_floats": + ser_vals = np.arange(N_many, dtype=np.float_) + + self.series = Series(ser_vals).astype(object) + + if vals_type == "nans": + values = np.full(10 ** 4, np.nan) + elif vals_type == "short": + values = np.arange(2) + elif vals_type == "long": + values = np.arange(N_many) + elif vals_type == "long_floats": + values = np.arange(N_many, dtype=np.float_) + + self.values = values.astype(object) + + def time_isin(self, series_type, vals_type): + self.series.isin(self.values) + + +class IsInLongSeriesLookUpDominates: + params = [ + ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], + [5, 1000], + ["random_hits", "random_misses", "monotone_hits", "monotone_misses"], + ] + param_names = ["dtype", "MaxNumber", "series_type"] + + def setup(self, dtype, MaxNumber, series_type): + N = 10 ** 7 + + if not np_version_under1p20 and dtype in ("Int64", "Float64"): + raise NotImplementedError + + if series_type == "random_hits": + np.random.seed(42) + array = np.random.randint(0, MaxNumber, N) + if series_type == "random_misses": + np.random.seed(42) + array = np.random.randint(0, MaxNumber, N) + MaxNumber + if series_type == "monotone_hits": + array = np.repeat(np.arange(MaxNumber), N // MaxNumber) + if series_type == "monotone_misses": + array = np.arange(N) + MaxNumber + + self.series = Series(array).astype(dtype) + self.values = np.arange(MaxNumber).astype(dtype) + + def time_isin(self, dtypes, MaxNumber, series_type): + self.series.isin(self.values) + + +class IsInLongSeriesValuesDominate: + params = [ + ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], + ["random", "monotone"], + ] + param_names = ["dtype", "series_type"] + + def setup(self, dtype, series_type): + N = 10 ** 7 + if series_type == "random": + np.random.seed(42) + vals = np.random.randint(0, 10 * N, N) + if series_type == "monotone": + vals = np.arange(N) + + self.values = vals.astype(dtype) + M = 10 ** 6 + 1 + self.series = Series(np.arange(M)).astype(dtype) + + def time_isin(self, dtypes, series_type): + self.series.isin(self.values) diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 7478efbf22609..488237a6f5a8b 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -4,7 +4,13 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + to_timedelta, +) import pandas._testing as tm from pandas.core.algorithms import checked_add_with_arr @@ -110,16 +116,26 @@ class FrameWithFrameWide: operator.add, operator.floordiv, operator.gt, - ] + ], + [ + # (n_rows, n_columns) + (1_000_000, 10), + (100_000, 100), + (10_000, 1000), + (1000, 10_000), + ], ] - param_names = ["op"] + param_names = ["op", "shape"] - def setup(self, op): + def setup(self, op, shape): # we choose dtypes so as to make the blocks # a) not perfectly match between right and left # b) appreciably bigger than single columns - n_cols = 2000 - n_rows = 500 + n_rows, n_cols = shape + + if op is operator.floordiv: + # floordiv is much slower than the other operations -> use less data + n_rows = n_rows // 10 # construct dataframe with 2 blocks arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8") @@ -131,7 +147,7 @@ def setup(self, op): df._consolidate_inplace() # TODO: GH#33198 the setting here shoudlnt need two steps - arr1 = np.random.randn(n_rows, n_cols // 4).astype("f8") + arr1 = np.random.randn(n_rows, max(n_cols // 4, 3)).astype("f8") arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8") arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8") df2 = pd.concat( @@ -145,11 +161,11 @@ def setup(self, op): self.left = df self.right = df2 - def time_op_different_blocks(self, op): + def time_op_different_blocks(self, op, shape): # blocks (and dtypes) are not aligned op(self.left, self.right) - def time_op_same_blocks(self, op): + def time_op_same_blocks(self, op, shape): # blocks (and dtypes) are aligned op(self.left, self.left) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 4e32b6e496929..268f25c3d12e3 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -220,25 +220,6 @@ def time_rank_int_cat_ordered(self): self.s_int_cat_ordered.rank() -class Isin: - - params = ["object", "int64"] - param_names = ["dtype"] - - def setup(self, dtype): - np.random.seed(1234) - n = 5 * 10 ** 5 - sample_size = 100 - arr = list(np.random.randint(0, n // 10, size=n)) - if dtype == "object": - arr = [f"s{i:04d}" for i in arr] - self.sample = np.random.choice(arr, sample_size) - self.series = pd.Series(arr).astype("category") - - def time_isin_categorical(self, dtype): - self.series.isin(self.sample) - - class IsMonotonic: def setup(self): N = 1000 diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py index 7c43485f5ef45..5993b068feadf 100644 --- a/asv_bench/benchmarks/ctors.py +++ b/asv_bench/benchmarks/ctors.py @@ -1,6 +1,12 @@ import numpy as np -from pandas import DatetimeIndex, Index, MultiIndex, Series, Timestamp +from pandas import ( + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, +) from .pandas_vb_common import tm diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py index 3efcf46955e2b..9209e851289bb 100644 --- a/asv_bench/benchmarks/dtypes.py +++ b/asv_bench/benchmarks/dtypes.py @@ -5,7 +5,10 @@ import pandas as pd from pandas import DataFrame import pandas._testing as tm -from pandas.api.types import is_extension_array_dtype, pandas_dtype +from pandas.api.types import ( + is_extension_array_dtype, + pandas_dtype, +) from .pandas_vb_common import ( datetime_dtypes, diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index e0a2257b0ca1f..3367898101528 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -1,12 +1,21 @@ import numpy as np import pandas as pd -from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, +) from .pandas_vb_common import tm try: - from pandas.tseries.offsets import Hour, Nano + from pandas.tseries.offsets import ( + Hour, + Nano, + ) except ImportError: # For compatibility with older versions from pandas.core.datetools import * # noqa diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index dc6fd2ff61423..bd068cec4641b 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -3,7 +3,15 @@ import numpy as np -from pandas import DataFrame, MultiIndex, NaT, Series, date_range, isnull, period_range +from pandas import ( + DataFrame, + MultiIndex, + NaT, + Series, + date_range, + isnull, + period_range, +) from .pandas_vb_common import tm diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py index 47523005a877f..410668ca3c7cf 100644 --- a/asv_bench/benchmarks/gil.py +++ b/asv_bench/benchmarks/gil.py @@ -1,6 +1,12 @@ import numpy as np -from pandas import DataFrame, Series, date_range, factorize, read_csv +from pandas import ( + DataFrame, + Series, + date_range, + factorize, + read_csv, +) from pandas.core.algorithms import take_nd from .pandas_vb_common import tm diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 806cf38ad90b6..fb08c6fdeaedf 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -461,6 +461,29 @@ def time_dtype_as_field(self, dtype, method, application): self.as_field_method() +class GroupByCythonAgg: + """ + Benchmarks specifically targetting our cython aggregation algorithms + (using a big enough dataframe with simple key, so a large part of the + time is actually spent in the grouped aggregation). + """ + + param_names = ["dtype", "method"] + params = [ + ["float64"], + ["sum", "prod", "min", "max", "mean", "median", "var", "first", "last"], + ] + + def setup(self, dtype, method): + N = 1_000_000 + df = DataFrame(np.random.randn(N, 10), columns=list("abcdefghij")) + df["key"] = np.random.randint(0, 100, size=N) + self.df = df + + def time_frame_agg(self, dtype, method): + self.df.groupby("key").agg(method) + + class RankWithTies: # GH 21237 param_names = ["dtype", "tie_method"] diff --git a/asv_bench/benchmarks/hash_functions.py b/asv_bench/benchmarks/hash_functions.py index 3743882b936e2..394433f7c8f99 100644 --- a/asv_bench/benchmarks/hash_functions.py +++ b/asv_bench/benchmarks/hash_functions.py @@ -3,28 +3,6 @@ import pandas as pd -class IsinAlmostFullWithRandomInt: - params = [ - [np.float64, np.int64, np.uint64, np.object], - range(10, 21), - ] - param_names = ["dtype", "exponent"] - - def setup(self, dtype, exponent): - M = 3 * 2 ** (exponent - 2) - # 0.77-the maximal share of occupied buckets - np.random.seed(42) - self.s = pd.Series(np.random.randint(0, M, M)).astype(dtype) - self.values = np.random.randint(0, M, M).astype(dtype) - self.values_outside = self.values + M - - def time_isin(self, dtype, exponent): - self.s.isin(self.values) - - def time_isin_outside(self, dtype, exponent): - self.s.isin(self.values_outside) - - class UniqueForLargePyObjectInts: def setup(self): lst = [x << 32 for x in range(5000)] @@ -34,80 +12,6 @@ def time_unique(self): pd.unique(self.arr) -class IsinWithRandomFloat: - params = [ - [np.float64, np.object], - [ - 1_300, - 2_000, - 7_000, - 8_000, - 70_000, - 80_000, - 750_000, - 900_000, - ], - ] - param_names = ["dtype", "M"] - - def setup(self, dtype, M): - np.random.seed(42) - self.values = np.random.rand(M) - self.s = pd.Series(self.values).astype(dtype) - np.random.shuffle(self.values) - self.values_outside = self.values + 0.1 - - def time_isin(self, dtype, M): - self.s.isin(self.values) - - def time_isin_outside(self, dtype, M): - self.s.isin(self.values_outside) - - -class IsinWithArangeSorted: - params = [ - [np.float64, np.int64, np.uint64, np.object], - [ - 1_000, - 2_000, - 8_000, - 100_000, - 1_000_000, - ], - ] - param_names = ["dtype", "M"] - - def setup(self, dtype, M): - self.s = pd.Series(np.arange(M)).astype(dtype) - self.values = np.arange(M).astype(dtype) - - def time_isin(self, dtype, M): - self.s.isin(self.values) - - -class IsinWithArange: - params = [ - [np.float64, np.int64, np.uint64, np.object], - [ - 1_000, - 2_000, - 8_000, - ], - [-2, 0, 2], - ] - param_names = ["dtype", "M", "offset_factor"] - - def setup(self, dtype, M, offset_factor): - offset = int(M * offset_factor) - np.random.seed(42) - tmp = pd.Series(np.random.randint(offset, M + offset, 10 ** 6)) - self.s = tmp.astype(dtype) - self.values = np.arange(M).astype(dtype) - - def time_isin(self, dtype, M, offset_factor): - self.s.isin(self.values) - - class Float64GroupIndex: # GH28303 def setup(self): diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index e17c985321c47..b6808ace629db 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -1,8 +1,14 @@ import numpy as np -from pandas import Series, to_numeric - -from .pandas_vb_common import lib, tm +from pandas import ( + Series, + to_numeric, +) + +from .pandas_vb_common import ( + lib, + tm, +) class ToNumeric: diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 12de9b121ef6d..5ff9431fbf8e4 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -1,12 +1,24 @@ -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import random import string import numpy as np -from pandas import Categorical, DataFrame, date_range, read_csv, to_datetime - -from ..pandas_vb_common import BaseIO, tm +from pandas import ( + Categorical, + DataFrame, + date_range, + read_csv, + to_datetime, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) class ToCSV(BaseIO): diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py index 7efaeddecd423..3363b43f29b78 100644 --- a/asv_bench/benchmarks/io/excel.py +++ b/asv_bench/benchmarks/io/excel.py @@ -2,10 +2,19 @@ import numpy as np from odf.opendocument import OpenDocumentSpreadsheet -from odf.table import Table, TableCell, TableRow +from odf.table import ( + Table, + TableCell, + TableRow, +) from odf.text import P -from pandas import DataFrame, ExcelWriter, date_range, read_excel +from pandas import ( + DataFrame, + ExcelWriter, + date_range, + read_excel, +) from ..pandas_vb_common import tm diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py index 4ca399a293a4b..4a2c1c872e6eb 100644 --- a/asv_bench/benchmarks/io/hdf.py +++ b/asv_bench/benchmarks/io/hdf.py @@ -1,8 +1,16 @@ import numpy as np -from pandas import DataFrame, HDFStore, date_range, read_hdf - -from ..pandas_vb_common import BaseIO, tm +from pandas import ( + DataFrame, + HDFStore, + date_range, + read_hdf, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) class HDFStoreDataFrame(BaseIO): diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index ed0fb5b8fe342..00f3278ced98b 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -2,9 +2,18 @@ import numpy as np -from pandas import DataFrame, concat, date_range, read_json, timedelta_range - -from ..pandas_vb_common import BaseIO, tm +from pandas import ( + DataFrame, + concat, + date_range, + read_json, + timedelta_range, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) class ReadJSON(BaseIO): diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py index 656fe2197bc8a..c71cdcdcc5c59 100644 --- a/asv_bench/benchmarks/io/pickle.py +++ b/asv_bench/benchmarks/io/pickle.py @@ -1,8 +1,15 @@ import numpy as np -from pandas import DataFrame, date_range, read_pickle - -from ..pandas_vb_common import BaseIO, tm +from pandas import ( + DataFrame, + date_range, + read_pickle, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) class Pickle(BaseIO): diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py index b71bb832280b9..3cfa28de78c90 100644 --- a/asv_bench/benchmarks/io/sql.py +++ b/asv_bench/benchmarks/io/sql.py @@ -3,7 +3,12 @@ import numpy as np from sqlalchemy import create_engine -from pandas import DataFrame, date_range, read_sql_query, read_sql_table +from pandas import ( + DataFrame, + date_range, + read_sql_query, + read_sql_table, +) from ..pandas_vb_common import tm diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py index 9faafa82ff46e..4ae2745af8bff 100644 --- a/asv_bench/benchmarks/io/stata.py +++ b/asv_bench/benchmarks/io/stata.py @@ -1,8 +1,15 @@ import numpy as np -from pandas import DataFrame, date_range, read_stata - -from ..pandas_vb_common import BaseIO, tm +from pandas import ( + DataFrame, + date_range, + read_stata, +) + +from ..pandas_vb_common import ( + BaseIO, + tm, +) class Stata(BaseIO): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index b0ad43ace88b5..27eaecff09d0f 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -2,7 +2,15 @@ import numpy as np -from pandas import DataFrame, MultiIndex, Series, concat, date_range, merge, merge_asof +from pandas import ( + DataFrame, + MultiIndex, + Series, + concat, + date_range, + merge, + merge_asof, +) from .pandas_vb_common import tm diff --git a/asv_bench/benchmarks/libs.py b/asv_bench/benchmarks/libs.py new file mode 100644 index 0000000000000..f5c2397945cea --- /dev/null +++ b/asv_bench/benchmarks/libs.py @@ -0,0 +1,42 @@ +""" +Benchmarks for code in pandas/_libs, excluding pandas/_libs/tslibs, +which has its own directory +""" +import numpy as np + +from pandas._libs.lib import ( + is_list_like, + is_scalar, +) + +from pandas import ( + NA, + NaT, +) + +# TODO: share with something in pd._testing? +scalars = [ + 0, + 1.0, + 1 + 2j, + True, + "foo", + b"bar", + None, + np.datetime64(123, "ns"), + np.timedelta64(123, "ns"), + NaT, + NA, +] +zero_dims = [np.array("123")] +listlikes = [np.array([1, 2, 3]), {0: 1}, {1, 2, 3}, [1, 2, 3], (1, 2, 3)] + + +class ScalarListLike: + params = scalars + zero_dims + listlikes + + def time_is_list_like(self, param): + is_list_like(param) + + def time_is_scalar(self, param): + is_scalar(param) diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py index 18dbb7eae0615..25df5b0214959 100644 --- a/asv_bench/benchmarks/multiindex_object.py +++ b/asv_bench/benchmarks/multiindex_object.py @@ -2,7 +2,12 @@ import numpy as np -from pandas import DataFrame, MultiIndex, RangeIndex, date_range +from pandas import ( + DataFrame, + MultiIndex, + RangeIndex, + date_range, +) from .pandas_vb_common import tm diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 74193ee62cfae..4f81aee62c202 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -2,7 +2,14 @@ Period benchmarks with non-tslibs dependencies. See benchmarks.tslibs.period for benchmarks that rely only on tslibs. """ -from pandas import DataFrame, Period, PeriodIndex, Series, date_range, period_range +from pandas import ( + DataFrame, + Period, + PeriodIndex, + Series, + date_range, + period_range, +) from pandas.tseries.frequencies import to_offset diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py index 5c718516360ed..11e43401f9395 100644 --- a/asv_bench/benchmarks/plotting.py +++ b/asv_bench/benchmarks/plotting.py @@ -1,7 +1,12 @@ import matplotlib import numpy as np -from pandas import DataFrame, DatetimeIndex, Series, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, +) try: from pandas.plotting import andrews_curves diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 03394e6fe08cb..65392f2cea65b 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -1,8 +1,18 @@ import numpy as np -from pandas import DataFrame, Index, MultiIndex, Series, date_range, period_range - -from .pandas_vb_common import lib, tm +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, + period_range, +) + +from .pandas_vb_common import ( + lib, + tm, +) class Reindex: diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py index da1592a2f1ab0..faee9bc57464b 100644 --- a/asv_bench/benchmarks/reshape.py +++ b/asv_bench/benchmarks/reshape.py @@ -4,7 +4,13 @@ import numpy as np import pandas as pd -from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long +from pandas import ( + DataFrame, + MultiIndex, + date_range, + melt, + wide_to_long, +) from pandas.api.types import CategoricalDtype diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py index b457bce8fe138..d05a28e0873d0 100644 --- a/asv_bench/benchmarks/series_methods.py +++ b/asv_bench/benchmarks/series_methods.py @@ -2,7 +2,11 @@ import numpy as np -from pandas import Categorical, NaT, Series, date_range +from pandas import ( + NaT, + Series, + date_range, +) from .pandas_vb_common import tm @@ -23,165 +27,6 @@ def time_constructor(self, data): Series(data=self.data, index=self.idx) -class IsIn: - - params = ["int64", "uint64", "object", "Int64"] - param_names = ["dtype"] - - def setup(self, dtype): - N = 10000 - self.s = Series(np.random.randint(1, 10, N)).astype(dtype) - self.values = [1, 2] - - def time_isin(self, dtypes): - self.s.isin(self.values) - - -class IsInBoolean: - - params = ["boolean", "bool"] - param_names = ["dtype"] - - def setup(self, dtype): - N = 10000 - self.s = Series(np.random.randint(0, 2, N)).astype(dtype) - self.values = [True, False] - - def time_isin(self, dtypes): - self.s.isin(self.values) - - -class IsInDatetime64: - def setup(self): - dti = date_range( - start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s" - ) - self.ser = Series(dti) - self.subset = self.ser._values[::3] - self.cat_subset = Categorical(self.subset) - - def time_isin(self): - self.ser.isin(self.subset) - - def time_isin_cat_values(self): - self.ser.isin(self.cat_subset) - - def time_isin_mismatched_dtype(self): - self.ser.isin([1, 2]) - - def time_isin_empty(self): - self.ser.isin([]) - - -class IsInFloat64: - - params = [np.float64, "Float64"] - param_names = ["dtype"] - - def setup(self, dtype): - N_many = 10 ** 5 - N_few = 10 ** 6 - self.small = Series([1, 2], dtype=dtype) - self.many_different_values = np.arange(N_many, dtype=np.float64) - self.few_different_values = np.zeros(N_few, dtype=np.float64) - self.only_nans_values = np.full(N_few, np.nan, dtype=np.float64) - - def time_isin_many_different(self, dtypes): - # runtime is dominated by creation of the lookup-table - self.small.isin(self.many_different_values) - - def time_isin_few_different(self, dtypes): - # runtime is dominated by creation of the lookup-table - self.small.isin(self.few_different_values) - - def time_isin_nan_values(self, dtypes): - # runtime is dominated by creation of the lookup-table - self.small.isin(self.few_different_values) - - -class IsInForObjects: - def setup(self): - self.s_nans = Series(np.full(10 ** 4, np.nan)).astype(object) - self.vals_nans = np.full(10 ** 4, np.nan).astype(object) - self.s_short = Series(np.arange(2)).astype(object) - self.s_long = Series(np.arange(10 ** 5)).astype(object) - self.vals_short = np.arange(2).astype(object) - self.vals_long = np.arange(10 ** 5).astype(object) - # because of nans floats are special: - self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float_)).astype(object) - self.vals_long_floats = np.arange(10 ** 5, dtype=np.float_).astype(object) - - def time_isin_nans(self): - # if nan-objects are different objects, - # this has the potential to trigger O(n^2) running time - self.s_nans.isin(self.vals_nans) - - def time_isin_short_series_long_values(self): - # running time dominated by the preprocessing - self.s_short.isin(self.vals_long) - - def time_isin_long_series_short_values(self): - # running time dominated by look-up - self.s_long.isin(self.vals_short) - - def time_isin_long_series_long_values(self): - # no dominating part - self.s_long.isin(self.vals_long) - - def time_isin_long_series_long_values_floats(self): - # no dominating part - self.s_long_floats.isin(self.vals_long_floats) - - -class IsInLongSeriesLookUpDominates: - params = [ - ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], - [5, 1000], - ["random_hits", "random_misses", "monotone_hits", "monotone_misses"], - ] - param_names = ["dtype", "MaxNumber", "series_type"] - - def setup(self, dtype, MaxNumber, series_type): - N = 10 ** 7 - if series_type == "random_hits": - np.random.seed(42) - array = np.random.randint(0, MaxNumber, N) - if series_type == "random_misses": - np.random.seed(42) - array = np.random.randint(0, MaxNumber, N) + MaxNumber - if series_type == "monotone_hits": - array = np.repeat(np.arange(MaxNumber), N // MaxNumber) - if series_type == "monotone_misses": - array = np.arange(N) + MaxNumber - self.series = Series(array).astype(dtype) - self.values = np.arange(MaxNumber).astype(dtype) - - def time_isin(self, dtypes, MaxNumber, series_type): - self.series.isin(self.values) - - -class IsInLongSeriesValuesDominate: - params = [ - ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"], - ["random", "monotone"], - ] - param_names = ["dtype", "series_type"] - - def setup(self, dtype, series_type): - N = 10 ** 7 - if series_type == "random": - np.random.seed(42) - vals = np.random.randint(0, 10 * N, N) - if series_type == "monotone": - vals = np.arange(N) - self.values = vals.astype(dtype) - M = 10 ** 6 + 1 - self.series = Series(np.arange(M)).astype(dtype) - - def time_isin(self, dtypes, series_type): - self.series.isin(self.values) - - class NSort: params = ["first", "last", "all"] diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 28ceb25eebd96..5006a0dbf1f98 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -2,7 +2,11 @@ import scipy.sparse import pandas as pd -from pandas import MultiIndex, Series, date_range +from pandas import ( + MultiIndex, + Series, + date_range, +) from pandas.arrays import SparseArray diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index 7c75ad031e7cd..76257e1b40f1a 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -2,7 +2,11 @@ import numpy as np -from pandas import Categorical, DataFrame, Series +from pandas import ( + Categorical, + DataFrame, + Series, +) from .pandas_vb_common import tm diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 207010b8cc943..9e221ee030e6d 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -5,7 +5,12 @@ import numpy as np -from pandas import DataFrame, Series, timedelta_range, to_timedelta +from pandas import ( + DataFrame, + Series, + timedelta_range, + to_timedelta, +) class ToTimedelta: diff --git a/asv_bench/benchmarks/tslibs/normalize.py b/asv_bench/benchmarks/tslibs/normalize.py index 9a206410d8775..292f57d7f5c77 100644 --- a/asv_bench/benchmarks/tslibs/normalize.py +++ b/asv_bench/benchmarks/tslibs/normalize.py @@ -1,5 +1,8 @@ try: - from pandas._libs.tslibs import is_date_array_normalized, normalize_i8_timestamps + from pandas._libs.tslibs import ( + is_date_array_normalized, + normalize_i8_timestamps, + ) except ImportError: from pandas._libs.tslibs.conversion import ( normalize_i8_timestamps, @@ -8,7 +11,10 @@ import pandas as pd -from .tslib import _sizes, _tzs +from .tslib import ( + _sizes, + _tzs, +) class Normalize: diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py index 849e8ec864ac2..f2efee33c6da7 100644 --- a/asv_bench/benchmarks/tslibs/period.py +++ b/asv_bench/benchmarks/tslibs/period.py @@ -5,11 +5,17 @@ import numpy as np -from pandas._libs.tslibs.period import Period, periodarr_to_dt64arr +from pandas._libs.tslibs.period import ( + Period, + periodarr_to_dt64arr, +) from pandas.tseries.frequencies import to_offset -from .tslib import _sizes, _tzs +from .tslib import ( + _sizes, + _tzs, +) try: from pandas._libs.tslibs.vectorized import dt64arr_to_periodarr diff --git a/asv_bench/benchmarks/tslibs/resolution.py b/asv_bench/benchmarks/tslibs/resolution.py index 280be7932d4db..0d22ff77ee308 100644 --- a/asv_bench/benchmarks/tslibs/resolution.py +++ b/asv_bench/benchmarks/tslibs/resolution.py @@ -17,9 +17,15 @@ df.loc[key] = (val.average, val.stdev) """ -from datetime import timedelta, timezone - -from dateutil.tz import gettz, tzlocal +from datetime import ( + timedelta, + timezone, +) + +from dateutil.tz import ( + gettz, + tzlocal, +) import numpy as np import pytz diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py index 40f8e561f5238..86c8d735bdb27 100644 --- a/asv_bench/benchmarks/tslibs/timestamp.py +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -1,6 +1,14 @@ -from datetime import datetime, timedelta, timezone - -from dateutil.tz import gettz, tzlocal, tzutc +from datetime import ( + datetime, + timedelta, + timezone, +) + +from dateutil.tz import ( + gettz, + tzlocal, + tzutc, +) import numpy as np import pytz diff --git a/asv_bench/benchmarks/tslibs/tslib.py b/asv_bench/benchmarks/tslibs/tslib.py index 5952a402bf89a..17beada916e46 100644 --- a/asv_bench/benchmarks/tslibs/tslib.py +++ b/asv_bench/benchmarks/tslibs/tslib.py @@ -15,9 +15,15 @@ val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz) df.loc[key] = (val.average, val.stdev) """ -from datetime import timedelta, timezone +from datetime import ( + timedelta, + timezone, +) -from dateutil.tz import gettz, tzlocal +from dateutil.tz import ( + gettz, + tzlocal, +) import numpy as np import pytz diff --git a/asv_bench/benchmarks/tslibs/tz_convert.py b/asv_bench/benchmarks/tslibs/tz_convert.py index c2c90024ca5bd..89b39c1f8919f 100644 --- a/asv_bench/benchmarks/tslibs/tz_convert.py +++ b/asv_bench/benchmarks/tslibs/tz_convert.py @@ -3,7 +3,10 @@ from pandas._libs.tslibs.tzconversion import tz_localize_to_utc -from .tslib import _sizes, _tzs +from .tslib import ( + _sizes, + _tzs, +) try: old_sig = False diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 597aced96eb18..251f450840ea9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -233,10 +233,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS02,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03 RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Validate correct capitalization among titles in documentation' ; echo $MSG - $BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development $BASE_DIR/doc/source/reference - RET=$(($RET + $?)) ; echo $MSG "DONE" - fi ### TYPING ### diff --git a/ci/deps/azure-37-locale_slow.yaml b/ci/deps/azure-37-locale_slow.yaml index 7f658fe62d268..0c47b1a72774f 100644 --- a/ci/deps/azure-37-locale_slow.yaml +++ b/ci/deps/azure-37-locale_slow.yaml @@ -18,7 +18,7 @@ dependencies: - lxml - matplotlib=3.0.0 - numpy=1.16.* - - openpyxl=2.6.0 + - openpyxl=3.0.0 - python-dateutil - python-blosc - pytz=2017.3 diff --git a/ci/deps/azure-37-minimum_versions.yaml b/ci/deps/azure-37-minimum_versions.yaml index f184ea87c89fe..9cc158b76cd41 100644 --- a/ci/deps/azure-37-minimum_versions.yaml +++ b/ci/deps/azure-37-minimum_versions.yaml @@ -19,7 +19,7 @@ dependencies: - numba=0.46.0 - numexpr=2.6.8 - numpy=1.16.5 - - openpyxl=2.6.0 + - openpyxl=3.0.0 - pytables=3.5.1 - python-dateutil=2.7.3 - pytz=2017.3 diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml index e833ea1f1f398..53ee212360475 100644 --- a/conda.recipe/meta.yaml +++ b/conda.recipe/meta.yaml @@ -19,7 +19,7 @@ requirements: - pip - cython - numpy - - setuptools >=3.3 + - setuptools >=38.6.0 - python-dateutil >=2.7.3 - pytz run: diff --git a/doc/make.py b/doc/make.py index a81ba7afd9f81..76ce2aca2916c 100755 --- a/doc/make.py +++ b/doc/make.py @@ -39,7 +39,7 @@ class DocBuilder: def __init__( self, - num_jobs=0, + num_jobs="auto", include_api=True, whatsnew=False, single_doc=None, @@ -135,7 +135,7 @@ def _sphinx_build(self, kind: str): cmd = ["sphinx-build", "-b", kind] if self.num_jobs: - cmd += ["-j", str(self.num_jobs)] + cmd += ["-j", self.num_jobs] if self.warnings_are_errors: cmd += ["-W", "--keep-going"] if self.verbosity: @@ -304,7 +304,7 @@ def main(): "command", nargs="?", default="html", help=f"command to run: {joined}" ) argparser.add_argument( - "--num-jobs", type=int, default=0, help="number of jobs used by sphinx-build" + "--num-jobs", default="auto", help="number of jobs used by sphinx-build" ) argparser.add_argument( "--no-api", default=False, help="omit api and autosummary", action="store_true" diff --git a/doc/source/conf.py b/doc/source/conf.py index 7cd9743e463d0..66f5e631fa656 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -423,7 +423,7 @@ if include_api: intersphinx_mapping = { "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), - "matplotlib": ("https://matplotlib.org/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), "pandas-gbq": ("https://pandas-gbq.readthedocs.io/en/latest/", None), "py": ("https://pylib.readthedocs.io/en/latest/", None), diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst index f3630a44d29cd..f48c4ff5d97af 100644 --- a/doc/source/development/contributing.rst +++ b/doc/source/development/contributing.rst @@ -612,7 +612,8 @@ For comparison, a full documentation build may take 15 minutes, but a single section may take 15 seconds. Subsequent builds, which only process portions you have changed, will be faster. -You can also specify to use multiple cores to speed up the documentation build:: +The build will automatically use the number of cores available on your machine +to speed up the documentation build. You can override this:: python make.py html --num-jobs 4 diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index bb89b91954518..4b69d5b0c8c77 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -476,6 +476,14 @@ storing numeric arrays with units. These arrays can be stored inside pandas' Series and DataFrame. Operations between Series and DataFrame columns which use pint's extension array are then units aware. +`Text Extensions for Pandas`_ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``Text Extensions for Pandas `` +provides extension types to cover common data structures for representing natural language +data, plus library integrations that convert the outputs of popular natural language +processing libraries into Pandas DataFrames. + .. _ecosystem.accessors: Accessors diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 49039f05b889a..1ee8e3401e7f4 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -219,7 +219,7 @@ Dependencies ================================================================ ========================== Package Minimum supported version ================================================================ ========================== -`setuptools `__ 24.2.0 +`setuptools `__ 38.6.0 `NumPy `__ 1.16.5 `python-dateutil `__ 2.7.3 `pytz `__ 2017.3 @@ -274,7 +274,7 @@ html5lib 1.0.1 HTML parser for read_html (see :ref lxml 4.3.0 HTML parser for read_html (see :ref:`note `) matplotlib 2.2.3 Visualization numba 0.46.0 Alternative execution engine for rolling operations -openpyxl 2.6.0 Reading / writing for xlsx files +openpyxl 3.0.0 Reading / writing for xlsx files pandas-gbq 0.12.0 Google Big Query access psycopg2 2.7 PostgreSQL engine for sqlalchemy pyarrow 0.15.0 Parquet, ORC, and feather reading / writing diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst index 07c856c96426d..180f833a2753d 100644 --- a/doc/source/user_guide/gotchas.rst +++ b/doc/source/user_guide/gotchas.rst @@ -178,6 +178,77 @@ To test for membership in the values, use the method :meth:`~pandas.Series.isin` For ``DataFrames``, likewise, ``in`` applies to the column axis, testing for membership in the list of column names. +.. _udf-mutation: + +Mutating with User Defined Function (UDF) methods +------------------------------------------------- + +This section applies to pandas methods that take a UDF. In particular, the methods +``.apply``, ``.aggregate``, ``.transform``, and ``.filter``. + +It is a general rule in programming that one should not mutate a container +while it is being iterated over. Mutation will invalidate the iterator, +causing unexpected behavior. Consider the example: + +.. ipython:: python + + values = [0, 1, 2, 3, 4, 5] + n_removed = 0 + for k, value in enumerate(values): + idx = k - n_removed + if value % 2 == 1: + del values[idx] + n_removed += 1 + else: + values[idx] = value + 1 + values + +One probably would have expected that the result would be ``[1, 3, 5]``. +When using a pandas method that takes a UDF, internally pandas is often +iterating over the +``DataFrame`` or other pandas object. Therefore, if the UDF mutates (changes) +the ``DataFrame``, unexpected behavior can arise. + +Here is a similar example with :meth:`DataFrame.apply`: + +.. ipython:: python + + def f(s): + s.pop("a") + return s + + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + try: + df.apply(f, axis="columns") + except Exception as err: + print(repr(err)) + +To resolve this issue, one can make a copy so that the mutation does +not apply to the container being iterated over. + +.. ipython:: python + + values = [0, 1, 2, 3, 4, 5] + n_removed = 0 + for k, value in enumerate(values.copy()): + idx = k - n_removed + if value % 2 == 1: + del values[idx] + n_removed += 1 + else: + values[idx] = value + 1 + values + +.. ipython:: python + + def f(s): + s = s.copy() + s.pop("a") + return s + + df = pd.DataFrame({"a": [1, 2, 3], 'b': [4, 5, 6]}) + df.apply(f, axis="columns") + ``NaN``, Integer ``NA`` values and ``NA`` type promotions --------------------------------------------------------- diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index d6934a3ca2a6c..67c74f9a04618 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -2853,14 +2853,12 @@ See the :ref:`cookbook` for some advanced strategies. The `xlrd `__ package is now only for reading old-style ``.xls`` files. - Before pandas 1.2.0, the default argument ``engine=None`` to :func:`~pandas.read_excel` + Before pandas 1.3.0, the default argument ``engine=None`` to :func:`~pandas.read_excel` would result in using the ``xlrd`` engine in many cases, including new - Excel 2007+ (``.xlsx``) files. - If `openpyxl `__ is installed, - many of these cases will now default to using the ``openpyxl`` engine. - See the :func:`read_excel` documentation for more details. + Excel 2007+ (``.xlsx``) files. pandas will now default to using the + `openpyxl `__ engine. - Thus, it is strongly encouraged to install ``openpyxl`` to read Excel 2007+ + It is strongly encouraged to install ``openpyxl`` to read Excel 2007+ (``.xlsx``) files. **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.** This is no longer supported, switch to using ``openpyxl`` instead. diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 5dac3a26424a8..ad8a23882e1e8 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -374,7 +374,7 @@ For example, after running the following, ``styled.xlsx`` renders as below: df.iloc[0, 2] = np.nan df styled = (df.style - .applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black') + .applymap(lambda val: 'color:red;' if val < 0 else 'color:black;') .highlight_max()) styled.to_excel('styled.xlsx', engine='openpyxl') diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst index 781054fc4de7c..490175914cef1 100644 --- a/doc/source/whatsnew/v0.8.0.rst +++ b/doc/source/whatsnew/v0.8.0.rst @@ -176,7 +176,7 @@ New plotting methods Vytautas Jancauskas, the 2012 GSOC participant, has added many new plot types. For example, ``'kde'`` is a new option: -.. ipython:: python +.. code-block:: python s = pd.Series( np.concatenate((np.random.randn(1000), np.random.randn(1000) * 0.5 + 3)) diff --git a/doc/source/whatsnew/v1.2.3.rst b/doc/source/whatsnew/v1.2.3.rst index e675b3ea921d1..4231b6d94b1b9 100644 --- a/doc/source/whatsnew/v1.2.3.rst +++ b/doc/source/whatsnew/v1.2.3.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :func:`pandas.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`) - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 799bc88ffff4e..d5177075afda5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -8,6 +8,16 @@ including other versions of pandas. {{ header }} +.. warning:: + + When reading new Excel 2007+ (``.xlsx``) files, the default argument + ``engine=None`` to :func:`~pandas.read_excel` will now result in using the + `openpyxl `_ engine in all cases + when the option :attr:`io.excel.xlsx.reader` is set to ``"auto"``. + Previously, some cases would use the + `xlrd `_ engine instead. See + :ref:`What's new 1.2.0 ` for background on this change. + .. --------------------------------------------------------------------------- Enhancements @@ -55,7 +65,8 @@ Other enhancements - :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`) - :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes (:issue:`35643`, :issue:`21266`, :issue:`39317`) - :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) -- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None``. (:issue:`39359`) +- :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) +- :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. - Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`) @@ -164,6 +175,8 @@ If installed, we now require: +-----------------+-----------------+----------+---------+ | mypy (dev) | 0.800 | | X | +-----------------+-----------------+----------+---------+ +| setuptools | 38.6.0 | | X | ++-----------------+-----------------+----------+---------+ For `optional libraries `_ the general recommendation is to use the latest version. The following table lists the lowest version per library that is currently being tested throughout the development of pandas. @@ -186,7 +199,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | numba | 0.46.0 | | +-----------------+-----------------+---------+ -| openpyxl | 2.6.0 | | +| openpyxl | 3.0.0 | X | +-----------------+-----------------+---------+ | pyarrow | 0.15.0 | | +-----------------+-----------------+---------+ @@ -239,7 +252,7 @@ Deprecations - Deprecated :attr:`Rolling.is_datetimelike` (:issue:`38963`) - Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`) - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`) -- +- Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`) .. --------------------------------------------------------------------------- @@ -312,6 +325,8 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`) +- Bug in :meth:`Series.view` and :meth:`Index.view` when converting between datetime-like (``datetime64[ns]``, ``datetime64[ns, tz]``, ``timedelta64``, ``period``) dtypes (:issue:`39788`) - - @@ -346,7 +361,9 @@ Indexing - Bug in setting ``timedelta64`` or ``datetime64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`, issue:`39619`) - Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`) - Bug in setting ``datetime64`` values into a :class:`Series` with integer-dtype incorrect casting the datetime64 values to integers (:issue:`39266`) +- Bug in setting ``np.datetime64("NaT")`` into a :class:`Series` with :class:`Datetime64TZDtype` incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) - Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`) +- Bug in :meth:`DatetimeIndex.insert` when inserting ``np.datetime64("NaT")`` into a timezone-aware index incorrectly treating the timezone-naive value as timezone-aware (:issue:`39769`) - Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`) - Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) @@ -418,6 +435,8 @@ Groupby/resample/rolling - Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`) - Bug in :meth:`core.window.rolling.RollingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.corr` where the groupby column would return 0 instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) - Bug in :meth:`core.window.expanding.ExpandingGroupby.corr` and :meth:`core.window.expanding.ExpandingGroupby.cov` where 1 would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) +- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Reshaping ^^^^^^^^^ @@ -460,6 +479,8 @@ Other - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) - Bug in :class:`Styler` where ``subset`` arg in methods raised an error for some valid multiindex slices (:issue:`33562`) - :class:`Styler` rendered HTML output minor alterations to support w3 good code standard (:issue:`39626`) +- Bug in :class:`Styler` where rendered HTML was missing a column class identifier for certain header cells (:issue:`39716`) +- Bug in :meth:`Styler.background_gradient` where text-color was not determined correctly (:issue:`39888`) - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 512b638fc4877..47913c2a1cf7d 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -49,9 +49,22 @@ """ from collections import namedtuple -from contextlib import ContextDecorator, contextmanager +from contextlib import ( + ContextDecorator, + contextmanager, +) import re -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, cast +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Tuple, + Type, + cast, +) import warnings from pandas._typing import F diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 080a84bef1e58..1a1b263ae356e 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1,8 +1,14 @@ import cython from cython import Py_ssize_t -from libc.math cimport fabs, sqrt -from libc.stdlib cimport free, malloc +from libc.math cimport ( + fabs, + sqrt, +) +from libc.stdlib cimport ( + free, + malloc, +) from libc.string cimport memmove import numpy as np @@ -46,7 +52,10 @@ from pandas._libs.khash cimport ( kh_resize_int64, khiter_t, ) -from pandas._libs.util cimport get_nat, numeric +from pandas._libs.util cimport ( + get_nat, + numeric, +) import pandas._libs.missing as missing diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 553ecbc58e745..43bf6d9dd1fee 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -2,7 +2,10 @@ import cython from cython import Py_ssize_t from cython cimport floating -from libc.stdlib cimport free, malloc +from libc.stdlib cimport ( + free, + malloc, +) import numpy as np @@ -27,9 +30,16 @@ from numpy.math cimport NAN cnp.import_array() from pandas._libs.algos cimport swap -from pandas._libs.util cimport get_nat, numeric +from pandas._libs.util cimport ( + get_nat, + numeric, +) -from pandas._libs.algos import groupsort_indexer, rank_1d, take_2d_axis1_float64_float64 +from pandas._libs.algos import ( + groupsort_indexer, + rank_1d, + take_2d_axis1_float64_float64, +) from pandas._libs.missing cimport checknull diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index f2af04d91a3e3..ead967386ed1d 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -3,11 +3,20 @@ import cython -from libc.stdlib cimport free, malloc +from libc.stdlib cimport ( + free, + malloc, +) import numpy as np -from numpy cimport import_array, ndarray, uint8_t, uint32_t, uint64_t +from numpy cimport ( + import_array, + ndarray, + uint8_t, + uint32_t, + uint64_t, +) import_array() diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index cc9341665b8db..735d8c07f4774 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -1,4 +1,7 @@ -from numpy cimport intp_t, ndarray +from numpy cimport ( + intp_t, + ndarray, +) from pandas._libs.khash cimport ( complex64_t, diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 3527fe2d8cd8d..1bbffaa7bb5d2 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -1,12 +1,26 @@ cimport cython -from cpython.mem cimport PyMem_Free, PyMem_Malloc -from cpython.ref cimport Py_INCREF, PyObject -from libc.stdlib cimport free, malloc +from cpython.mem cimport ( + PyMem_Free, + PyMem_Malloc, +) +from cpython.ref cimport ( + Py_INCREF, + PyObject, +) +from libc.stdlib cimport ( + free, + malloc, +) import numpy as np cimport numpy as cnp -from numpy cimport float64_t, ndarray, uint8_t, uint32_t +from numpy cimport ( + float64_t, + ndarray, + uint8_t, + uint32_t, +) from numpy.math cimport NAN cnp.import_array() diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index e31c3739f456d..cb7b9f990a98e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -28,7 +28,10 @@ from pandas._libs.tslibs.period cimport is_period_object from pandas._libs.tslibs.timedeltas cimport _Timedelta from pandas._libs.tslibs.timestamps cimport _Timestamp -from pandas._libs import algos, hashtable as _hash +from pandas._libs import ( + algos, + hashtable as _hash, +) from pandas._libs.missing import checknull diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 006fd34632d5a..150b7f62b4b26 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -12,7 +12,10 @@ cdef extern from "Python.h": import numpy as np cimport numpy as cnp -from numpy cimport NPY_INT64, int64_t +from numpy cimport ( + NPY_INT64, + int64_t, +) cnp.import_array() diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 10becdce5d6dd..9ed8b71c2ce17 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -1,7 +1,13 @@ import numbers -from operator import le, lt +from operator import ( + le, + lt, +) -from cpython.datetime cimport PyDateTime_IMPORT, PyDelta_Check +from cpython.datetime cimport ( + PyDateTime_IMPORT, + PyDelta_Check, +) PyDateTime_IMPORT diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3a11e7fbbdf33..9f2c82d760785 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -14,10 +14,16 @@ from cpython.datetime cimport ( ) from cpython.iterator cimport PyIter_Check from cpython.number cimport PyNumber_Check -from cpython.object cimport Py_EQ, PyObject_RichCompareBool +from cpython.object cimport ( + Py_EQ, + PyObject_RichCompareBool, +) from cpython.ref cimport Py_INCREF from cpython.sequence cimport PySequence_Check -from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM +from cpython.tuple cimport ( + PyTuple_New, + PyTuple_SET_ITEM, +) PyDateTime_IMPORT @@ -66,7 +72,12 @@ cdef extern from "src/parse_helper.h": int floatify(object, float64_t *result, int *maybe_int) except -1 from pandas._libs cimport util -from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX, is_nan +from pandas._libs.util cimport ( + INT64_MAX, + INT64_MIN, + UINT64_MAX, + is_nan, +) from pandas._libs.tslib import array_to_datetime from pandas._libs.tslibs.period import Period @@ -80,7 +91,11 @@ from pandas._libs.missing cimport ( isnaobj, ) from pandas._libs.tslibs.conversion cimport convert_to_tsobject -from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT, checknull_with_nat +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, + checknull_with_nat, +) from pandas._libs.tslibs.offsets cimport is_offset_object from pandas._libs.tslibs.period cimport is_period_object from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 @@ -1044,11 +1059,12 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: return ( - isinstance(obj, abc.Iterable) + # equiv: `isinstance(obj, abc.Iterable)` + hasattr(obj, "__iter__") and not isinstance(obj, type) # we do not count strings/unicode/bytes as list-like and not isinstance(obj, (str, bytes)) # exclude zero-dimensional numpy arrays, effectively scalars - and not (util.is_array(obj) and obj.ndim == 0) + and not cnp.PyArray_IsZeroDim(obj) # exclude sets if allow_sets is False and not (allow_sets is False and isinstance(obj, abc.Set)) ) diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd index ce8e8007e7630..9d32fcd3625db 100644 --- a/pandas/_libs/missing.pxd +++ b/pandas/_libs/missing.pxd @@ -1,4 +1,7 @@ -from numpy cimport ndarray, uint8_t +from numpy cimport ( + ndarray, + uint8_t, +) cpdef bint is_matching_na(object left, object right, bint nan_matches_none=*) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index d91d0261a1b33..f6f9e7410d34c 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -5,7 +5,12 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport float64_t, int64_t, ndarray, uint8_t +from numpy cimport ( + float64_t, + int64_t, + ndarray, + uint8_t, +) cnp.import_array() @@ -15,7 +20,10 @@ from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, is_null_datetimelike, ) -from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_value, + get_timedelta64_value, +) from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op from pandas.compat import IS64 diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index d1f897d237c1b..1e51a578c44ea 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -14,13 +14,20 @@ import cython from cython import Py_ssize_t import numpy as np -from numpy cimport import_array, ndarray, uint8_t +from numpy cimport ( + import_array, + ndarray, + uint8_t, +) import_array() from pandas._libs.missing cimport checknull -from pandas._libs.util cimport UINT8_MAX, is_nan +from pandas._libs.util cimport ( + UINT8_MAX, + is_nan, +) @cython.wraparound(False) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a72a2ff8eaf28..c4d98ccb88ba5 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1,22 +1,36 @@ # Copyright (c) 2012, Lambda Foundry, Inc. # See LICENSE for the license -from csv import QUOTE_MINIMAL, QUOTE_NONE, QUOTE_NONNUMERIC +from csv import ( + QUOTE_MINIMAL, + QUOTE_NONE, + QUOTE_NONNUMERIC, +) from errno import ENOENT import sys import time import warnings from libc.stdlib cimport free -from libc.string cimport strcasecmp, strlen, strncpy +from libc.string cimport ( + strcasecmp, + strlen, + strncpy, +) import cython from cython import Py_ssize_t from cpython.bytes cimport PyBytes_AsString -from cpython.exc cimport PyErr_Fetch, PyErr_Occurred +from cpython.exc cimport ( + PyErr_Fetch, + PyErr_Occurred, +) from cpython.object cimport PyObject from cpython.ref cimport Py_XDECREF -from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_Decode +from cpython.unicode cimport ( + PyUnicode_AsUTF8String, + PyUnicode_Decode, +) cdef extern from "Python.h": @@ -26,12 +40,22 @@ cdef extern from "Python.h": import numpy as np cimport numpy as cnp -from numpy cimport float64_t, int64_t, ndarray, uint8_t, uint64_t +from numpy cimport ( + float64_t, + int64_t, + ndarray, + uint8_t, + uint64_t, +) cnp.import_array() from pandas._libs cimport util -from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX +from pandas._libs.util cimport ( + INT64_MAX, + INT64_MIN, + UINT64_MAX, +) import pandas._libs.lib as lib @@ -62,7 +86,12 @@ from pandas._libs.khash cimport ( khiter_t, ) -from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning +from pandas.errors import ( + DtypeWarning, + EmptyDataError, + ParserError, + ParserWarning, +) from pandas.core.dtypes.common import ( is_bool_dtype, diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 9b936eed785b4..7b786e9c0493d 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -1,6 +1,10 @@ from cython import Py_ssize_t -from cpython.dict cimport PyDict_Contains, PyDict_GetItem, PyDict_SetItem +from cpython.dict cimport ( + PyDict_Contains, + PyDict_GetItem, + PyDict_SetItem, +) cdef class CachedProperty: diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 25b41b020aee6..4d0bd4744be5d 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,17 +1,29 @@ from copy import copy -from libc.stdlib cimport free, malloc +from libc.stdlib cimport ( + free, + malloc, +) import numpy as np cimport numpy as cnp -from numpy cimport int64_t, ndarray +from numpy cimport ( + int64_t, + ndarray, +) cnp.import_array() -from pandas._libs.util cimport is_array, set_array_not_contiguous +from pandas._libs.util cimport ( + is_array, + set_array_not_contiguous, +) -from pandas._libs.lib import is_scalar, maybe_convert_objects +from pandas._libs.lib import ( + is_scalar, + maybe_convert_objects, +) cpdef check_result_array(object obj, Py_ssize_t cnt): @@ -97,9 +109,8 @@ cdef class SeriesBinGrouper(_BaseGrouper): ndarray arr, index, dummy_arr, dummy_index object values, f, bins, typ, ityp, name - def __init__(self, object series, object f, object bins, object dummy): + def __init__(self, object series, object f, object bins): - assert dummy is not None # always obj[:0] assert len(bins) > 0 # otherwise we get IndexError in get_result self.bins = bins @@ -115,6 +126,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): self.index = series.index.values self.name = series.name + dummy = series.iloc[:0] self.dummy_arr, self.dummy_index = self._check_dummy(dummy) # kludge for #1688 @@ -191,10 +203,7 @@ cdef class SeriesGrouper(_BaseGrouper): object f, labels, values, typ, ityp, name def __init__(self, object series, object f, object labels, - Py_ssize_t ngroups, object dummy): - - # in practice we always pass obj.iloc[:0] or equivalent - assert dummy is not None + Py_ssize_t ngroups): if len(series) == 0: # get_result would never assign `result` @@ -213,6 +222,7 @@ cdef class SeriesGrouper(_BaseGrouper): self.index = series.index.values self.name = series.name + dummy = series.iloc[:0] self.dummy_arr, self.dummy_index = self._check_dummy(dummy) self.ngroups = ngroups diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 7a2fa471b9ba8..ff15a2c720c2c 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -8,10 +8,17 @@ from numpy cimport import_array import_array() from pandas._libs.lib import is_complex -from pandas._libs.util cimport is_array, is_real_number_object + +from pandas._libs.util cimport ( + is_array, + is_real_number_object, +) from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.missing import array_equivalent, isna +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) cdef bint isiterable(obj): diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 9578fed2d1fd9..605e2135edc9f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -13,7 +13,11 @@ PyDateTime_IMPORT cimport numpy as cnp -from numpy cimport float64_t, int64_t, ndarray +from numpy cimport ( + float64_t, + int64_t, + ndarray, +) import numpy as np @@ -31,7 +35,11 @@ from pandas._libs.tslibs.np_datetime cimport ( pydate_to_dt64, pydatetime_to_dt64, ) -from pandas._libs.util cimport is_datetime64_object, is_float_object, is_integer_object +from pandas._libs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import parse_datetime_string @@ -53,6 +61,7 @@ from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.tslibs.timestamps import Timestamp # Note: this is the only non-tslibs intra-pandas dependency here + from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 6135e54a4502e..e38ed9a20e55b 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -28,7 +28,10 @@ ] from pandas._libs.tslibs import dtypes -from pandas._libs.tslibs.conversion import OutOfBoundsTimedelta, localize_pydatetime +from pandas._libs.tslibs.conversion import ( + OutOfBoundsTimedelta, + localize_pydatetime, +) from pandas._libs.tslibs.dtypes import Resolution from pandas._libs.tslibs.nattype import ( NaT, @@ -38,8 +41,15 @@ nat_strings, ) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.offsets import BaseOffset, Tick, to_offset -from pandas._libs.tslibs.period import IncompatibleFrequency, Period +from pandas._libs.tslibs.offsets import ( + BaseOffset, + Tick, + to_offset, +) +from pandas._libs.tslibs.period import ( + IncompatibleFrequency, + Period, +) from pandas._libs.tslibs.timedeltas import ( Timedelta, delta_to_nanoseconds, diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd index 388fd0c62b937..511c9f94a47d8 100644 --- a/pandas/_libs/tslibs/ccalendar.pxd +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -1,5 +1,8 @@ from cython cimport Py_ssize_t -from numpy cimport int32_t, int64_t +from numpy cimport ( + int32_t, + int64_t, +) ctypedef (int32_t, int32_t, int32_t) iso_calendar_t diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index d8c83daa661a3..2aa049559d9e9 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -5,7 +5,10 @@ Cython implementations of functions resembling the stdlib calendar module import cython -from numpy cimport int32_t, int64_t +from numpy cimport ( + int32_t, + int64_t, +) # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index c80be79a12d90..1b99e855da40f 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -1,5 +1,12 @@ -from cpython.datetime cimport datetime, tzinfo -from numpy cimport int32_t, int64_t, ndarray +from cpython.datetime cimport ( + datetime, + tzinfo, +) +from numpy cimport ( + int32_t, + int64_t, + ndarray, +) from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 0a22bd9b849a7..0646c58fa84b6 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -2,7 +2,12 @@ import cython import numpy as np cimport numpy as cnp -from numpy cimport int32_t, int64_t, intp_t, ndarray +from numpy cimport ( + int32_t, + int64_t, + intp_t, + ndarray, +) cnp.import_array() diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2f25df9144f32..79d6a42075e83 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -9,13 +9,22 @@ from cython import Py_ssize_t import numpy as np cimport numpy as cnp -from numpy cimport int8_t, int32_t, int64_t, ndarray, uint32_t +from numpy cimport ( + int8_t, + int32_t, + int64_t, + ndarray, + uint32_t, +) cnp.import_array() from pandas._config.localization import set_locale -from pandas._libs.tslibs.ccalendar import DAYS_FULL, MONTHS_FULL +from pandas._libs.tslibs.ccalendar import ( + DAYS_FULL, + MONTHS_FULL, +) from pandas._libs.tslibs.ccalendar cimport ( dayofweek, diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index d5582d65a0c11..2879528b2c501 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -30,7 +30,10 @@ from numpy cimport int64_t cnp.import_array() cimport pandas._libs.tslibs.util as util -from pandas._libs.tslibs.np_datetime cimport get_datetime64_value, get_timedelta64_value +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_value, + get_timedelta64_value, +) # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 026fa719d1cc1..c2bbc4fe764fe 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -1,5 +1,11 @@ -from cpython.datetime cimport date, datetime -from numpy cimport int32_t, int64_t +from cpython.datetime cimport ( + date, + datetime, +) +from numpy cimport ( + int32_t, + int64_t, +) cdef extern from "numpy/ndarrayobject.h": diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 12aaaf4ce3977..418730277ed6b 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -8,7 +8,14 @@ from cpython.datetime cimport ( PyDateTime_GET_YEAR, PyDateTime_IMPORT, ) -from cpython.object cimport Py_EQ, Py_GE, Py_GT, Py_LE, Py_LT, Py_NE +from cpython.object cimport ( + Py_EQ, + Py_GE, + Py_GT, + Py_LE, + Py_LT, + Py_NE, +) PyDateTime_IMPORT diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 4dc14397a30f4..2d4704ad3bda6 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -24,7 +24,10 @@ from dateutil.relativedelta import relativedelta import numpy as np cimport numpy as cnp -from numpy cimport int64_t, ndarray +from numpy cimport ( + int64_t, + ndarray, +) cnp.import_array() @@ -57,7 +60,10 @@ from pandas._libs.tslibs.conversion cimport ( convert_datetime_to_tsobject, localize_pydatetime, ) -from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) from pandas._libs.tslibs.np_datetime cimport ( dt64_to_dtstruct, dtstruct_to_dt64, diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 5c3417ee2d93c..50b1804e1c5f9 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -9,7 +9,12 @@ from libc.string cimport strchr import cython from cython import Py_ssize_t -from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo +from cpython.datetime cimport ( + datetime, + datetime_new, + import_datetime, + tzinfo, +) from cpython.object cimport PyObject_Str from cpython.version cimport PY_VERSION_HEX @@ -31,7 +36,10 @@ cnp.import_array() # dateutil compat -from dateutil.parser import DEFAULTPARSER, parse as du_parse +from dateutil.parser import ( + DEFAULTPARSER, + parse as du_parse, +) from dateutil.relativedelta import relativedelta from dateutil.tz import ( tzlocal as _dateutil_tzlocal, @@ -43,9 +51,15 @@ from dateutil.tz import ( from pandas._config import get_option from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS -from pandas._libs.tslibs.nattype cimport c_NaT as NaT, c_nat_strings as nat_strings +from pandas._libs.tslibs.nattype cimport ( + c_NaT as NaT, + c_nat_strings as nat_strings, +) from pandas._libs.tslibs.offsets cimport is_offset_object -from pandas._libs.tslibs.util cimport get_c_string_buf_and_size, is_array +from pandas._libs.tslibs.util cimport ( + get_c_string_buf_and_size, + is_array, +) cdef extern from "../src/headers/portable.h": diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d518729b6ce67..165f51d06af6d 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1,16 +1,32 @@ import warnings cimport numpy as cnp -from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompareBool -from numpy cimport int64_t, ndarray +from cpython.object cimport ( + Py_EQ, + Py_NE, + PyObject_RichCompareBool, +) +from numpy cimport ( + int64_t, + ndarray, +) import numpy as np cnp.import_array() -from libc.stdlib cimport free, malloc -from libc.string cimport memset, strlen -from libc.time cimport strftime, tm +from libc.stdlib cimport ( + free, + malloc, +) +from libc.string cimport ( + memset, + strlen, +) +from libc.time cimport ( + strftime, + tm, +) import cython @@ -54,7 +70,10 @@ from pandas._libs.tslibs.ccalendar cimport ( get_week_of_year, is_leapyear, ) -from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds, is_any_td_scalar +from pandas._libs.tslibs.timedeltas cimport ( + delta_to_nanoseconds, + is_any_td_scalar, +) from pandas._libs.tslibs.conversion import ensure_datetime64ns diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index bc4632ad028ab..ffa29b44a366a 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -5,14 +5,20 @@ import locale import re import time -from cpython.datetime cimport date, tzinfo +from cpython.datetime cimport ( + date, + tzinfo, +) from _thread import allocate_lock as _thread_allocate_lock import numpy as np import pytz -from numpy cimport int64_t, ndarray +from numpy cimport ( + int64_t, + ndarray, +) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 871819f82a672..9ebabd704475b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -3,12 +3,19 @@ import warnings import cython -from cpython.object cimport Py_EQ, Py_NE, PyObject_RichCompare +from cpython.object cimport ( + Py_EQ, + Py_NE, + PyObject_RichCompare, +) import numpy as np cimport numpy as cnp -from numpy cimport int64_t, ndarray +from numpy cimport ( + int64_t, + ndarray, +) cnp.import_array() @@ -24,7 +31,10 @@ PyDateTime_IMPORT cimport pandas._libs.tslibs.util as util from pandas._libs.tslibs.base cimport ABCTimestamp -from pandas._libs.tslibs.conversion cimport cast_from_unit, precision_from_unit +from pandas._libs.tslibs.conversion cimport ( + cast_from_unit, + precision_from_unit, +) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, @@ -47,7 +57,11 @@ from pandas._libs.tslibs.util cimport ( is_integer_object, is_timedelta64_object, ) -from pandas._libs.tslibs.fields import RoundTo, round_nsint64 + +from pandas._libs.tslibs.fields import ( + RoundTo, + round_nsint64, +) # ---------------------------------------------------------------------- # Constants diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 45aae3581fe79..eadd7c7022acb 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -1,4 +1,7 @@ -from cpython.datetime cimport datetime, tzinfo +from cpython.datetime cimport ( + datetime, + tzinfo, +) from numpy cimport int64_t from pandas._libs.tslibs.base cimport ABCTimestamp diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 5f6b614ac3d81..60ffa3dd46989 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -13,7 +13,12 @@ cimport cython import numpy as np cimport numpy as cnp -from numpy cimport int8_t, int64_t, ndarray, uint8_t +from numpy cimport ( + int8_t, + int64_t, + ndarray, + uint8_t, +) cnp.import_array() @@ -63,7 +68,10 @@ from pandas._libs.tslibs.fields import ( round_nsint64, ) -from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, cmp_scalar, @@ -74,8 +82,14 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.offsets cimport is_offset_object, to_offset -from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds, is_any_td_scalar +from pandas._libs.tslibs.offsets cimport ( + is_offset_object, + to_offset, +) +from pandas._libs.tslibs.timedeltas cimport ( + delta_to_nanoseconds, + is_any_td_scalar, +) from pandas._libs.tslibs.timedeltas import Timedelta diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd index 753c881ed505c..13f196a567952 100644 --- a/pandas/_libs/tslibs/timezones.pxd +++ b/pandas/_libs/tslibs/timezones.pxd @@ -1,4 +1,8 @@ -from cpython.datetime cimport datetime, timedelta, tzinfo +from cpython.datetime cimport ( + datetime, + timedelta, + tzinfo, +) cdef tzinfo utc_pytz diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 73d06d4641368..92065e1c3d4c5 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,6 +1,13 @@ -from datetime import timedelta, timezone +from datetime import ( + timedelta, + timezone, +) -from cpython.datetime cimport datetime, timedelta, tzinfo +from cpython.datetime cimport ( + datetime, + timedelta, + tzinfo, +) # dateutil compat @@ -24,7 +31,10 @@ from numpy cimport int64_t cnp.import_array() # ---------------------------------------------------------------------- -from pandas._libs.tslibs.util cimport get_nat, is_integer_object +from pandas._libs.tslibs.util cimport ( + get_nat, + is_integer_object, +) cdef int64_t NPY_NAT = get_nat() diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 1049682af08e8..8e82d8a180aa6 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -19,13 +19,24 @@ import numpy as np import pytz cimport numpy as cnp -from numpy cimport int64_t, intp_t, ndarray, uint8_t +from numpy cimport ( + int64_t, + intp_t, + ndarray, + uint8_t, +) cnp.import_array() -from pandas._libs.tslibs.ccalendar cimport DAY_NANOS, HOUR_NANOS +from pandas._libs.tslibs.ccalendar cimport ( + DAY_NANOS, + HOUR_NANOS, +) from pandas._libs.tslibs.nattype cimport NPY_NAT -from pandas._libs.tslibs.np_datetime cimport dt64_to_dtstruct, npy_datetimestruct +from pandas._libs.tslibs.np_datetime cimport ( + dt64_to_dtstruct, + npy_datetimestruct, +) from pandas._libs.tslibs.timezones cimport ( get_dst_info, get_utcoffset, diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 16d801f69df05..150516aadffc6 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -27,7 +27,10 @@ cdef extern from "Python.h": const char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) except NULL -from numpy cimport float64_t, int64_t +from numpy cimport ( + float64_t, + int64_t, +) cdef extern from "numpy/arrayobject.h": diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index c3c78ca54885a..30d9f5e64b282 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -1,21 +1,40 @@ import cython -from cpython.datetime cimport date, datetime, time, tzinfo +from cpython.datetime cimport ( + date, + datetime, + time, + tzinfo, +) import numpy as np -from numpy cimport int64_t, intp_t, ndarray +from numpy cimport ( + int64_t, + intp_t, + ndarray, +) from .conversion cimport normalize_i8_stamp from .dtypes import Resolution -from .nattype cimport NPY_NAT, c_NaT as NaT -from .np_datetime cimport dt64_to_dtstruct, npy_datetimestruct +from .nattype cimport ( + NPY_NAT, + c_NaT as NaT, +) +from .np_datetime cimport ( + dt64_to_dtstruct, + npy_datetimestruct, +) from .offsets cimport to_offset from .period cimport get_period_ordinal from .timestamps cimport create_timestamp_from_ts -from .timezones cimport get_dst_info, is_tzlocal, is_utc +from .timezones cimport ( + get_dst_info, + is_tzlocal, + is_utc, +) from .tzconversion cimport tz_convert_utc_to_tzlocal # ------------------------------------------------------------------------- diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 5e02e6119815c..5a95b0ec4e08a 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -8,7 +8,12 @@ from libcpp.deque cimport deque import numpy as np cimport numpy as cnp -from numpy cimport float32_t, float64_t, int64_t, ndarray +from numpy cimport ( + float32_t, + float64_t, + int64_t, + ndarray, +) cnp.import_array() diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 6a49a5bb34855..b8b9a8553161f 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -2,7 +2,10 @@ import numpy as np -from numpy cimport int64_t, ndarray +from numpy cimport ( + int64_t, + ndarray, +) # Cython routines for window indexers diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 06f180eef0c65..6577f3604d14b 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -1,8 +1,14 @@ import cython import numpy as np -from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_LENGTH -from numpy cimport ndarray, uint8_t +from cpython cimport ( + PyBytes_GET_SIZE, + PyUnicode_GET_LENGTH, +) +from numpy cimport ( + ndarray, + uint8_t, +) ctypedef fused pandas_string: str diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0b2be53131af6..6b88bd26627b0 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -99,10 +99,18 @@ use_numexpr, with_csv_dialect, ) -from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray, period_array +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, + period_array, +) if TYPE_CHECKING: - from pandas import PeriodIndex, TimedeltaIndex + from pandas import ( + PeriodIndex, + TimedeltaIndex, + ) _N = 30 _K = 4 @@ -207,8 +215,10 @@ def box_expected(expected, box_cls, transpose=True): if transpose: # for vector operations, we need a DataFrame to be a single-row, # not a single-column, in order to operate against non-DataFrame - # vectors of the same length. + # vectors of the same length. But convert to two rows to avoid + # single-row special cases in datetime arithmetic expected = expected.T + expected = pd.concat([expected] * 2, ignore_index=True) elif box_cls is PeriodArray: # the PeriodArray constructor is not as flexible as period_array expected = period_array(expected) @@ -559,7 +569,7 @@ def makeCustomIndex( "p": makePeriodIndex, }.get(idx_type) if idx_func: - # pandas\_testing.py:2120: error: Cannot call function of unknown type + # error: Cannot call function of unknown type idx = idx_func(nentries) # type: ignore[operator] # but we need to fill in the name if names: diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index 5f27b016b68a2..e327f48f9a888 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -1,11 +1,22 @@ import bz2 from functools import wraps import gzip -from typing import Any, Callable, Optional, Tuple +from typing import ( + Any, + Callable, + Optional, + Tuple, +) import zipfile -from pandas._typing import FilePathOrBuffer, FrameOrSeries -from pandas.compat import get_lzma_file, import_lzma +from pandas._typing import ( + FilePathOrBuffer, + FrameOrSeries, +) +from pandas.compat import ( + get_lzma_file, + import_lzma, +) import pandas as pd from pandas._testing._random import rands @@ -82,9 +93,8 @@ def dec(f): is_decorating = not kwargs and len(args) == 1 and callable(args[0]) if is_decorating: f = args[0] - # pandas\_testing.py:2331: error: Incompatible types in assignment - # (expression has type "List[]", variable has type - # "Tuple[Any, ...]") + # error: Incompatible types in assignment (expression has type + # "List[]", variable has type "Tuple[Any, ...]") args = [] # type: ignore[assignment] return dec(f) else: @@ -205,8 +215,7 @@ def wrapper(*args, **kwargs): except Exception as err: errno = getattr(err, "errno", None) if not errno and hasattr(errno, "reason"): - # pandas\_testing.py:2521: error: "Exception" has no attribute - # "reason" + # error: "Exception" has no attribute "reason" errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined] if errno in skip_errnos: @@ -396,7 +405,10 @@ def write_to_compressed(compression, path, data, dest="test"): def close(fignum=None): - from matplotlib.pyplot import close as _close, get_fignums + from matplotlib.pyplot import ( + close as _close, + get_fignums, + ) if fignum is None: for fignum in get_fignums(): diff --git a/pandas/_testing/_warnings.py b/pandas/_testing/_warnings.py index 6429f74637f01..ee32abe19278e 100644 --- a/pandas/_testing/_warnings.py +++ b/pandas/_testing/_warnings.py @@ -1,6 +1,12 @@ from contextlib import contextmanager import re -from typing import Optional, Sequence, Type, Union, cast +from typing import ( + Optional, + Sequence, + Type, + Union, + cast, +) import warnings @@ -163,7 +169,10 @@ def _is_unexpected_warning( def _assert_raised_with_correct_stacklevel( actual_warning: warnings.WarningMessage, ) -> None: - from inspect import getframeinfo, stack + from inspect import ( + getframeinfo, + stack, + ) caller = getframeinfo(stack()[4][0]) msg = ( diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 6b67459c47c38..829472f24852a 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1,4 +1,7 @@ -from typing import Union, cast +from typing import ( + Union, + cast, +) import warnings import numpy as np @@ -29,7 +32,10 @@ Series, TimedeltaIndex, ) -from pandas.core.algorithms import safe_sort, take_nd +from pandas.core.algorithms import ( + safe_sort, + take_nd, +) from pandas.core.arrays import ( DatetimeArray, ExtensionArray, diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index 71530b9537fc8..a14e87c04c913 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -5,7 +5,11 @@ from shutil import rmtree import string import tempfile -from typing import IO, Any, Union +from typing import ( + IO, + Any, + Union, +) import numpy as np diff --git a/pandas/_typing.py b/pandas/_typing.py index f03b3c9eaf65a..c50d532f40dd7 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -1,5 +1,14 @@ -from datetime import datetime, timedelta, tzinfo -from io import BufferedIOBase, RawIOBase, TextIOBase, TextIOWrapper +from datetime import ( + datetime, + timedelta, + tzinfo, +) +from io import ( + BufferedIOBase, + RawIOBase, + TextIOBase, + TextIOWrapper, +) from mmap import mmap from os import PathLike from typing import ( @@ -29,7 +38,11 @@ if TYPE_CHECKING: from typing import final - from pandas._libs import Period, Timedelta, Timestamp + from pandas._libs import ( + Period, + Timedelta, + Timestamp, + ) from pandas.core.dtypes.dtypes import ExtensionDtype @@ -37,14 +50,21 @@ from pandas.core.arrays.base import ExtensionArray # noqa: F401 from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame # noqa: F401 - from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy + from pandas.core.groupby.generic import ( + DataFrameGroupBy, + SeriesGroupBy, + ) from pandas.core.indexes.base import Index - from pandas.core.internals import ArrayManager, BlockManager + from pandas.core.internals import ( + ArrayManager, + BlockManager, + ) from pandas.core.resample import Resampler from pandas.core.series import Series from pandas.core.window.rolling import BaseWindow from pandas.io.formats.format import EngFormatter + from pandas.tseries.offsets import DateOffset else: # typing.final does not exist until py38 final = lambda x: x @@ -91,6 +111,7 @@ Suffixes = Tuple[str, str] Ordered = Optional[bool] JSONSerializable = Optional[Union[PythonScalar, List, Dict]] +Frequency = Union[str, "DateOffset"] Axes = Collection[Any] # dtypes diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py index bebbb38b4aefa..c22f37f2ef292 100644 --- a/pandas/api/__init__.py +++ b/pandas/api/__init__.py @@ -1,2 +1,6 @@ """ public toolkit API """ -from pandas.api import extensions, indexers, types # noqa +from pandas.api import ( # noqa + extensions, + indexers, + types, +) diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 401e7081d2422..ea5f1ba926899 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -4,7 +4,10 @@ from pandas._libs.lib import no_default -from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) from pandas.core.accessor import ( register_dataframe_accessor, @@ -12,7 +15,10 @@ register_series_accessor, ) from pandas.core.algorithms import take -from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) __all__ = [ "no_default", diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index bcad9f1ddab09..eb2b4caddb7a6 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -17,7 +17,7 @@ "matplotlib": "2.2.3", "numexpr": "2.6.8", "odfpy": "1.3.0", - "openpyxl": "2.6.0", + "openpyxl": "3.0.0", "pandas_gbq": "0.12.0", "pyarrow": "0.15.0", "pytest": "5.0.1", diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index a84dbb4a661e4..035963e8255ea 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -1,4 +1,9 @@ -from typing import ChainMap, MutableMapping, TypeVar, cast +from typing import ( + ChainMap, + MutableMapping, + TypeVar, + cast, +) _KT = TypeVar("_KT") _VT = TypeVar("_VT") diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index c47c31fabeb70..8934a02a8f5bc 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -16,11 +16,22 @@ easier to adjust to future upstream changes in the analogous numpy signatures. """ from distutils.version import LooseVersion -from typing import Any, Dict, Optional, Union +from typing import ( + Any, + Dict, + Optional, + Union, +) -from numpy import __version__, ndarray +from numpy import ( + __version__, + ndarray, +) -from pandas._libs.lib import is_bool, is_integer +from pandas._libs.lib import ( + is_bool, + is_integer, +) from pandas.errors import UnsupportedFunctionCall from pandas.util._validators import ( validate_args, diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index e6940d78dbaa2..9d48035213126 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -7,7 +7,10 @@ import copy import io import pickle as pkl -from typing import TYPE_CHECKING, Optional +from typing import ( + TYPE_CHECKING, + Optional, +) import warnings from pandas._libs.tslibs import BaseOffset @@ -15,7 +18,10 @@ from pandas import Index if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) def load_reduce(self): diff --git a/pandas/conftest.py b/pandas/conftest.py index bc455092ebe86..ce572e42abec6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -19,27 +19,52 @@ """ from collections import abc -from datetime import date, datetime, time, timedelta, timezone +from datetime import ( + date, + datetime, + time, + timedelta, + timezone, +) from decimal import Decimal import operator import os -from dateutil.tz import tzlocal, tzutc +from dateutil.tz import ( + tzlocal, + tzutc, +) import hypothesis from hypothesis import strategies as st import numpy as np import pytest -from pytz import FixedOffset, utc +from pytz import ( + FixedOffset, + utc, +) import pandas.util._test_decorators as td -from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, +) import pandas as pd -from pandas import DataFrame, Interval, Period, Series, Timedelta, Timestamp +from pandas import ( + DataFrame, + Interval, + Period, + Series, + Timedelta, + Timestamp, +) import pandas._testing as tm from pandas.core import ops -from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) # ---------------------------------------------------------------- @@ -1565,6 +1590,14 @@ def indexer_si(request): return request.param +@pytest.fixture(params=[tm.setitem, tm.loc]) +def indexer_sl(request): + """ + Parametrize over __setitem__, loc.__setitem__ + """ + return request.param + + @pytest.fixture def using_array_manager(request): """ diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 15c2a4a6c5c04..2b6dd379ea47c 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -4,7 +4,11 @@ that can be mixed into or pinned onto other pandas classes. """ -from typing import FrozenSet, List, Set +from typing import ( + FrozenSet, + List, + Set, +) import warnings from pandas.util._decorators import doc diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 4dbce8f75898f..0a4e03fa97402 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -20,22 +20,19 @@ Sequence, Tuple, Union, - cast, ) from pandas._typing import ( AggFuncType, - AggFuncTypeBase, - AggFuncTypeDict, - Axis, FrameOrSeries, - FrameOrSeriesUnion, ) -from pandas.core.dtypes.common import is_dict_like, is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.common import ( + is_dict_like, + is_list_like, +) +from pandas.core.dtypes.generic import ABCSeries -from pandas.core.algorithms import safe_sort from pandas.core.base import SpecificationError import pandas.core.common as com from pandas.core.indexes.api import Index @@ -399,134 +396,3 @@ def validate_func_kwargs( no_arg_message = "Must provide 'func' or named aggregation **kwargs." raise TypeError(no_arg_message) return columns, func - - -def transform( - obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs -) -> FrameOrSeriesUnion: - """ - Transform a DataFrame or Series - - Parameters - ---------- - obj : DataFrame or Series - Object to compute the transform on. - func : string, function, list, or dictionary - Function(s) to compute the transform with. - axis : {0 or 'index', 1 or 'columns'} - Axis along which the function is applied: - - * 0 or 'index': apply function to each column. - * 1 or 'columns': apply function to each row. - - Returns - ------- - DataFrame or Series - Result of applying ``func`` along the given axis of the - Series or DataFrame. - - Raises - ------ - ValueError - If the transform function fails or does not transform. - """ - is_series = obj.ndim == 1 - - if obj._get_axis_number(axis) == 1: - assert not is_series - return transform(obj.T, func, 0, *args, **kwargs).T - - if is_list_like(func) and not is_dict_like(func): - func = cast(List[AggFuncTypeBase], func) - # Convert func equivalent dict - if is_series: - func = {com.get_callable_name(v) or v: v for v in func} - else: - func = {col: func for col in obj} - - if is_dict_like(func): - func = cast(AggFuncTypeDict, func) - return transform_dict_like(obj, func, *args, **kwargs) - - # func is either str or callable - func = cast(AggFuncTypeBase, func) - try: - result = transform_str_or_callable(obj, func, *args, **kwargs) - except Exception: - raise ValueError("Transform function failed") - - # Functions that transform may return empty Series/DataFrame - # when the dtype is not appropriate - if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty and not obj.empty: - raise ValueError("Transform function failed") - if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( - obj.index - ): - raise ValueError("Function did not transform") - - return result - - -def transform_dict_like( - obj: FrameOrSeries, - func: AggFuncTypeDict, - *args, - **kwargs, -): - """ - Compute transform in the case of a dict-like func - """ - from pandas.core.reshape.concat import concat - - if len(func) == 0: - raise ValueError("No transform functions were provided") - - if obj.ndim != 1: - # Check for missing columns on a frame - cols = set(func.keys()) - set(obj.columns) - if len(cols) > 0: - cols_sorted = list(safe_sort(list(cols))) - raise SpecificationError(f"Column(s) {cols_sorted} do not exist") - - # Can't use func.values(); wouldn't work for a Series - if any(is_dict_like(v) for _, v in func.items()): - # GH 15931 - deprecation of renaming keys - raise SpecificationError("nested renamer is not supported") - - results: Dict[Hashable, FrameOrSeriesUnion] = {} - for name, how in func.items(): - colg = obj._gotitem(name, ndim=1) - try: - results[name] = transform(colg, how, 0, *args, **kwargs) - except Exception as err: - if str(err) in { - "Function did not transform", - "No transform functions were provided", - }: - raise err - - # combine results - if not results: - raise ValueError("Transform function failed") - return concat(results, axis=1) - - -def transform_str_or_callable( - obj: FrameOrSeries, func: AggFuncTypeBase, *args, **kwargs -) -> FrameOrSeriesUnion: - """ - Compute transform in the case of a string or callable func - """ - if isinstance(func, str): - return obj._try_aggregate_string_function(func, *args, **kwargs) - - if not args and not kwargs: - f = obj._get_cython_func(func) - if f: - return getattr(obj, f)() - - # Two possible ways to use a UDF - apply or call directly - try: - return obj.apply(func, args=args, **kwargs) - except Exception: - return func(obj, *args, **kwargs) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6088837550ecd..62172de5b7ec2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -6,13 +6,34 @@ import operator from textwrap import dedent -from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast -from warnings import catch_warnings, simplefilter, warn +from typing import ( + TYPE_CHECKING, + Dict, + Optional, + Tuple, + Union, + cast, +) +from warnings import ( + catch_warnings, + simplefilter, + warn, +) import numpy as np -from pandas._libs import algos, hashtable as htable, iNaT, lib -from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj, FrameOrSeriesUnion +from pandas._libs import ( + algos, + hashtable as htable, + iNaT, + lib, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + DtypeObj, + FrameOrSeriesUnion, +) from pandas.util._decorators import doc from pandas.core.dtypes.cast import ( @@ -57,7 +78,10 @@ ABCSeries, ABCTimedeltaArray, ) -from pandas.core.dtypes.missing import isna, na_value_for_dtype +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) from pandas.core.construction import ( array, @@ -67,8 +91,16 @@ from pandas.core.indexers import validate_indices if TYPE_CHECKING: - from pandas import Categorical, DataFrame, Index, Series - from pandas.core.arrays import DatetimeArray, TimedeltaArray + from pandas import ( + Categorical, + DataFrame, + Index, + Series, + ) + from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, + ) _shared_docs: Dict[str, str] = {} @@ -231,7 +263,7 @@ def _reconstruct_data( return values -def _ensure_arraylike(values): +def _ensure_arraylike(values) -> ArrayLike: """ ensure that we are arraylike if not already """ @@ -291,7 +323,7 @@ def get_data_algo(values: ArrayLike): return htable, values -def _check_object_for_strings(values) -> str: +def _check_object_for_strings(values: np.ndarray) -> str: """ Check if we can use string hashtable instead of object hashtable. @@ -495,7 +527,11 @@ def f(c, v): def factorize_array( - values: np.ndarray, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None + values: np.ndarray, + na_sentinel: int = -1, + size_hint: Optional[int] = None, + na_value=None, + mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, np.ndarray]: """ Factorize an array-like to codes and uniques. @@ -875,7 +911,7 @@ def value_counts_arraylike(values, dropna: bool): return keys, counts -def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray: +def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarray: """ Return boolean ndarray denoting duplicate values. @@ -950,13 +986,13 @@ def mode(values, dropna: bool = True) -> Series: def rank( - values, + values: ArrayLike, axis: int = 0, method: str = "average", na_option: str = "keep", ascending: bool = True, pct: bool = False, -): +) -> np.ndarray: """ Rank the values along a given axis. @@ -1006,7 +1042,12 @@ def rank( return ranks -def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None): +def checked_add_with_arr( + arr: np.ndarray, + b, + arr_mask: Optional[np.ndarray] = None, + b_mask: Optional[np.ndarray] = None, +) -> np.ndarray: """ Perform array addition that checks for underflow and overflow. @@ -1019,9 +1060,9 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None): ---------- arr : array addend. b : array or scalar addend. - arr_mask : boolean array or None + arr_mask : np.ndarray[bool] or None, default None array indicating which elements to exclude from checking - b_mask : boolean array or boolean or None + b_mask : np.ndarray[bool] or None, default None array or scalar indicating which element(s) to exclude from checking Returns @@ -1374,7 +1415,9 @@ def get_indexer(current_indexer, other_indexer): def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None): - def wrapper(arr, indexer, out, fill_value=np.nan): + def wrapper( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan + ): if arr_dtype is not None: arr = arr.view(arr_dtype) if out_dtype is not None: @@ -1387,7 +1430,9 @@ def wrapper(arr, indexer, out, fill_value=np.nan): def _convert_wrapper(f, conv_dtype): - def wrapper(arr, indexer, out, fill_value=np.nan): + def wrapper( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan + ): if conv_dtype == object: # GH#39755 avoid casting dt64/td64 to integers arr = ensure_wrapped_if_datetimelike(arr) @@ -1397,7 +1442,9 @@ def wrapper(arr, indexer, out, fill_value=np.nan): return wrapper -def _take_2d_multi_object(arr, indexer, out, fill_value, mask_info): +def _take_2d_multi_object( + arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value, mask_info +) -> None: # this is not ideal, performance-wise, but it's better than raising # an exception (best to optimize in Cython to avoid getting here) row_idx, col_idx = indexer @@ -1420,7 +1467,14 @@ def _take_2d_multi_object(arr, indexer, out, fill_value, mask_info): out[i, j] = arr[u_, v] -def _take_nd_object(arr, indexer, out, axis: int, fill_value, mask_info): +def _take_nd_object( + arr: np.ndarray, + indexer: np.ndarray, + out: np.ndarray, + axis: int, + fill_value, + mask_info, +): if mask_info is not None: mask, needs_masking = mask_info else: @@ -1538,7 +1592,7 @@ def _take_nd_object(arr, indexer, out, axis: int, fill_value, mask_info): def _get_take_nd_function( - ndim: int, arr_dtype, out_dtype, axis: int = 0, mask_info=None + ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: int = 0, mask_info=None ): if ndim <= 2: tup = (arr_dtype.name, out_dtype.name) @@ -1573,7 +1627,9 @@ def func2(arr, indexer, out, fill_value=np.nan): return func2 -def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None): +def take( + arr, indices: np.ndarray, axis: int = 0, allow_fill: bool = False, fill_value=None +): """ Take elements from an array. @@ -1707,7 +1763,7 @@ def take_nd( arr, indexer, axis: int = 0, - out=None, + out: Optional[np.ndarray] = None, fill_value=lib.no_default, allow_fill: bool = True, ): @@ -2208,7 +2264,7 @@ def _sort_mixed(values): return np.concatenate([nums, np.asarray(strs, dtype=object)]) -def _sort_tuples(values: np.ndarray[tuple]): +def _sort_tuples(values: np.ndarray): """ Convert array of tuples (1d) to array or array (2d). We need to keep the columns separately as they contain different types and diff --git a/pandas/core/api.py b/pandas/core/api.py index 67e86c2076329..2677530455b07 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -1,6 +1,11 @@ # flake8: noqa -from pandas._libs import NaT, Period, Timedelta, Timestamp +from pandas._libs import ( + NaT, + Period, + Timedelta, + Timestamp, +) from pandas._libs.missing import NA from pandas.core.dtypes.dtypes import ( @@ -9,12 +14,24 @@ IntervalDtype, PeriodDtype, ) -from pandas.core.dtypes.missing import isna, isnull, notna, notnull +from pandas.core.dtypes.missing import ( + isna, + isnull, + notna, + notnull, +) -from pandas.core.algorithms import factorize, unique, value_counts +from pandas.core.algorithms import ( + factorize, + unique, + value_counts, +) from pandas.core.arrays import Categorical from pandas.core.arrays.boolean import BooleanDtype -from pandas.core.arrays.floating import Float32Dtype, Float64Dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, @@ -28,7 +45,10 @@ from pandas.core.arrays.string_ import StringDtype from pandas.core.construction import array from pandas.core.flags import Flags -from pandas.core.groupby import Grouper, NamedAgg +from pandas.core.groupby import ( + Grouper, + NamedAgg, +) from pandas.core.indexes.api import ( CategoricalIndex, DatetimeIndex, @@ -42,8 +62,14 @@ TimedeltaIndex, UInt64Index, ) -from pandas.core.indexes.datetimes import bdate_range, date_range -from pandas.core.indexes.interval import Interval, interval_range +from pandas.core.indexes.datetimes import ( + bdate_range, + date_range, +) +from pandas.core.indexes.interval import ( + Interval, + interval_range, +) from pandas.core.indexes.period import period_range from pandas.core.indexes.timedeltas import timedelta_range from pandas.core.indexing import IndexSlice diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 828b460f84ec6..46b1e5b20ce3a 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -6,6 +6,7 @@ TYPE_CHECKING, Any, Dict, + Hashable, Iterator, List, Optional, @@ -37,10 +38,17 @@ is_list_like, is_sequence, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCNDFrame, + ABCSeries, +) from pandas.core.algorithms import safe_sort -from pandas.core.base import DataError, SpecificationError +from pandas.core.base import ( + DataError, + SpecificationError, +) import pandas.core.common as com from pandas.core.construction import ( array as pd_array, @@ -48,8 +56,15 @@ ) if TYPE_CHECKING: - from pandas import DataFrame, Index, Series - from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy + from pandas import ( + DataFrame, + Index, + Series, + ) + from pandas.core.groupby import ( + DataFrameGroupBy, + SeriesGroupBy, + ) from pandas.core.resample import Resampler from pandas.core.window.rolling import BaseWindow @@ -137,6 +152,7 @@ def f(x): else: f = func + self.orig_f: AggFuncType = func self.f: AggFuncType = f @property @@ -147,18 +163,14 @@ def index(self) -> Index: def apply(self) -> FrameOrSeriesUnion: pass - def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: + def agg(self) -> Optional[FrameOrSeriesUnion]: """ Provide an implementation for the aggregators. Returns ------- - tuple of result, how. - - Notes - ----- - how can be a string describe the required post-processing, or - None if not required. + Result of aggregation, or None if agg cannot be performed by + this method. """ obj = self.obj arg = self.f @@ -171,23 +183,146 @@ def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: result = self.maybe_apply_str() if result is not None: - return result, None + return result if is_dict_like(arg): - return self.agg_dict_like(_axis), True + return self.agg_dict_like(_axis) elif is_list_like(arg): # we require a list, but not a 'str' - return self.agg_list_like(_axis=_axis), None - else: - result = None + return self.agg_list_like(_axis=_axis) if callable(arg): f = obj._get_cython_func(arg) if f and not args and not kwargs: - return getattr(obj, f)(), None + return getattr(obj, f)() # caller can react - return result, True + return None + + def transform(self) -> FrameOrSeriesUnion: + """ + Transform a DataFrame or Series. + + Returns + ------- + DataFrame or Series + Result of applying ``func`` along the given axis of the + Series or DataFrame. + + Raises + ------ + ValueError + If the transform function fails or does not transform. + """ + obj = self.obj + func = self.orig_f + axis = self.axis + args = self.args + kwargs = self.kwargs + + is_series = obj.ndim == 1 + + if obj._get_axis_number(axis) == 1: + assert not is_series + return obj.T.transform(func, 0, *args, **kwargs).T + + if is_list_like(func) and not is_dict_like(func): + func = cast(List[AggFuncTypeBase], func) + # Convert func equivalent dict + if is_series: + func = {com.get_callable_name(v) or v: v for v in func} + else: + func = {col: func for col in obj} + + if is_dict_like(func): + func = cast(AggFuncTypeDict, func) + return self.transform_dict_like(func) + + # func is either str or callable + func = cast(AggFuncTypeBase, func) + try: + result = self.transform_str_or_callable(func) + except Exception: + raise ValueError("Transform function failed") + + # Functions that transform may return empty Series/DataFrame + # when the dtype is not appropriate + if ( + isinstance(result, (ABCSeries, ABCDataFrame)) + and result.empty + and not obj.empty + ): + raise ValueError("Transform function failed") + if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( + obj.index + ): + raise ValueError("Function did not transform") + + return result + + def transform_dict_like(self, func): + """ + Compute transform in the case of a dict-like func + """ + from pandas.core.reshape.concat import concat + + obj = self.obj + args = self.args + kwargs = self.kwargs + + if len(func) == 0: + raise ValueError("No transform functions were provided") + + if obj.ndim != 1: + # Check for missing columns on a frame + cols = set(func.keys()) - set(obj.columns) + if len(cols) > 0: + cols_sorted = list(safe_sort(list(cols))) + raise SpecificationError(f"Column(s) {cols_sorted} do not exist") + + # Can't use func.values(); wouldn't work for a Series + if any(is_dict_like(v) for _, v in func.items()): + # GH 15931 - deprecation of renaming keys + raise SpecificationError("nested renamer is not supported") + + results: Dict[Hashable, FrameOrSeriesUnion] = {} + for name, how in func.items(): + colg = obj._gotitem(name, ndim=1) + try: + results[name] = colg.transform(how, 0, *args, **kwargs) + except Exception as err: + if str(err) in { + "Function did not transform", + "No transform functions were provided", + }: + raise err + + # combine results + if not results: + raise ValueError("Transform function failed") + return concat(results, axis=1) + + def transform_str_or_callable(self, func) -> FrameOrSeriesUnion: + """ + Compute transform in the case of a string or callable func + """ + obj = self.obj + args = self.args + kwargs = self.kwargs + + if isinstance(func, str): + return obj._try_aggregate_string_function(func, *args, **kwargs) + + if not args and not kwargs: + f = obj._get_cython_func(func) + if f: + return getattr(obj, f)() + + # Two possible ways to use a UDF - apply or call directly + try: + return obj.apply(func, args=args, **kwargs) + except Exception: + return func(obj, *args, **kwargs) def agg_list_like(self, _axis: int) -> FrameOrSeriesUnion: """ @@ -519,6 +654,35 @@ def apply(self) -> FrameOrSeriesUnion: return self.apply_standard() + def agg(self): + obj = self.obj + axis = self.axis + + # TODO: Avoid having to change state + self.obj = self.obj if self.axis == 0 else self.obj.T + self.axis = 0 + + result = None + try: + result = super().agg() + except TypeError as err: + exc = TypeError( + "DataFrame constructor called with " + f"incompatible data and dtype: {err}" + ) + raise exc from err + finally: + self.obj = obj + self.axis = axis + + if axis == 1: + result = result.T if result is not None else result + + if result is None: + result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) + + return result + def apply_empty_result(self): """ we have an empty result; at least 1 axis is 0 @@ -837,6 +1001,36 @@ def apply(self) -> FrameOrSeriesUnion: return self.apply_standard() + def agg(self): + result = super().agg() + if result is None: + f = self.f + args = self.args + kwargs = self.kwargs + + # string, list-like, and dict-like are entirely handled in super + assert callable(f) + + # we can be called from an inner function which + # passes this meta-data + kwargs.pop("_axis", None) + kwargs.pop("_level", None) + + # try a regular apply, this evaluates lambdas + # row-by-row; however if the lambda is expected a Series + # expression, e.g.: lambda x: x-x.quantile(0.25) + # this will fail, so we can try a vectorized evaluation + + # we cannot FIRST try the vectorized evaluation, because + # then .agg and .apply would have different semantics if the + # operation is actually defined on the Series, e.g. str + try: + result = self.obj.apply(f, *args, **kwargs) + except (ValueError, AttributeError, TypeError): + result = f(self.obj, *args, **kwargs) + + return result + def apply_empty_result(self) -> Series: obj = self.obj return obj._constructor(dtype=obj.dtype, index=obj.index).__finalize__( @@ -893,6 +1087,9 @@ def __init__( def apply(self): raise NotImplementedError + def transform(self): + raise NotImplementedError + class ResamplerWindowApply(Apply): axis = 0 @@ -916,3 +1113,6 @@ def __init__( def apply(self): raise NotImplementedError + + def transform(self): + raise NotImplementedError diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 917aace233ee5..cfa1f59f3d4ca 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -1,7 +1,10 @@ """ EA-compatible analogue to to np.putmask """ -from typing import Any, Tuple +from typing import ( + Any, + Tuple, +) import warnings import numpy as np @@ -14,7 +17,11 @@ find_common_type, infer_dtype_from, ) -from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_list_like +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, + is_list_like, +) from pandas.core.dtypes.missing import isna_compat from pandas.core.arrays import ExtensionArray diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index d0565dfff0eb1..201b9fdcc51cc 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -3,11 +3,19 @@ """ import operator import re -from typing import Any, Optional, Pattern, Union +from typing import ( + Any, + Optional, + Pattern, + Union, +) import numpy as np -from pandas._typing import ArrayLike, Scalar +from pandas._typing import ( + ArrayLike, + Scalar, +) from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, diff --git a/pandas/core/array_algos/transforms.py b/pandas/core/array_algos/transforms.py index 371425f325d76..1dde9b221a90b 100644 --- a/pandas/core/array_algos/transforms.py +++ b/pandas/core/array_algos/transforms.py @@ -19,7 +19,7 @@ def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray new_values = new_values.T axis = new_values.ndim - axis - 1 - if np.prod(new_values.shape): + if new_values.size: new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis) axis_indexer = [slice(None)] * values.ndim diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index cb185dcf78f63..3f45f503d0f62 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -5,7 +5,10 @@ ExtensionArray """ import operator -from typing import Any, Callable +from typing import ( + Any, + Callable, +) import warnings import numpy as np @@ -13,7 +16,10 @@ from pandas._libs import lib from pandas.core.construction import extract_array -from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op, roperator +from pandas.core.ops import ( + maybe_dispatch_ufunc_to_dunder_op, + roperator, +) from pandas.core.ops.common import unpack_zerodim_and_defer @@ -228,7 +234,7 @@ def _maybe_fallback(ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): return NotImplemented -def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any): +def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any): """ Compatibility with numpy ufuncs. @@ -341,9 +347,7 @@ def reconstruct(result): result = result.__finalize__(self) return result - if self.ndim > 1 and ( - len(inputs) > 1 or ufunc.nout > 1 # type: ignore[attr-defined] - ): + if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1): # Just give up on preserving types in the complex case. # In theory we could preserve them for them. # * nout>1 is doable if BlockManager.apply took nout and @@ -367,7 +371,7 @@ def reconstruct(result): # Those can have an axis keyword and thus can't be called block-by-block result = getattr(ufunc, method)(np.asarray(inputs[0]), **kwargs) - if ufunc.nout > 1: # type: ignore[attr-defined] + if ufunc.nout > 1: result = tuple(reconstruct(x) for x in result) else: result = reconstruct(result) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index b6d98eb17eb6c..22f15ca9650db 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -11,7 +11,10 @@ from pandas.core.arrays.interval import IntervalArray from pandas.core.arrays.masked import BaseMaskedArray from pandas.core.arrays.numpy_ import PandasArray -from pandas.core.arrays.period import PeriodArray, period_array +from pandas.core.arrays.period import ( + PeriodArray, + period_array, +) from pandas.core.arrays.sparse import SparseArray from pandas.core.arrays.string_ import StringArray from pandas.core.arrays.timedeltas import TimedeltaArray diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index eb7c9e69d962b..825757ddffee4 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -1,7 +1,14 @@ from __future__ import annotations from functools import wraps -from typing import Any, Optional, Sequence, Type, TypeVar, Union +from typing import ( + Any, + Optional, + Sequence, + Type, + TypeVar, + Union, +) import numpy as np @@ -9,7 +16,10 @@ from pandas._typing import Shape from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import cache_readonly, doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.common import is_dtype_equal @@ -17,7 +27,11 @@ from pandas.core.dtypes.missing import array_equivalent from pandas.core import missing -from pandas.core.algorithms import take, unique, value_counts +from pandas.core.algorithms import ( + take, + unique, + value_counts, +) from pandas.core.array_algos.transforms import shift from pandas.core.arrays.base import ExtensionArray from pandas.core.construction import extract_array @@ -389,7 +403,10 @@ def value_counts(self, dropna: bool = True): if self.ndim != 1: raise NotImplementedError - from pandas import Index, Series + from pandas import ( + Index, + Series, + ) if dropna: values = self[~self.isna()]._ndarray diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index a62a5ec4ec7f7..edc8fa14ca142 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -25,12 +25,22 @@ import numpy as np from pandas._libs import lib -from pandas._typing import ArrayLike, Dtype, Shape +from pandas._typing import ( + ArrayLike, + Dtype, + Shape, +) from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.util._decorators import ( + Appender, + Substitution, +) +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, +) from pandas.core.dtypes.cast import maybe_cast_to_extension_array from pandas.core.dtypes.common import ( @@ -41,13 +51,24 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.algorithms import factorize_array, isin, unique +from pandas.core.algorithms import ( + factorize_array, + isin, + unique, +) from pandas.core.missing import get_fill_func -from pandas.core.sorting import nargminmax, nargsort +from pandas.core.sorting import ( + nargminmax, + nargsort, +) _extension_array_shared_docs: Dict[str, str] = {} diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py index 86eafb34e847f..260cd08707473 100644 --- a/pandas/core/arrays/boolean.py +++ b/pandas/core/arrays/boolean.py @@ -1,13 +1,26 @@ from __future__ import annotations import numbers -from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union +from typing import ( + TYPE_CHECKING, + List, + Optional, + Tuple, + Type, + Union, +) import warnings import numpy as np -from pandas._libs import lib, missing as libmissing -from pandas._typing import ArrayLike, Dtype +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + Dtype, +) from pandas.compat.numpy import function as nv from pandas.core.dtypes.common import ( @@ -19,11 +32,17 @@ is_numeric_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + register_extension_dtype, +) from pandas.core.dtypes.missing import isna from pandas.core import ops -from pandas.core.arrays.masked import BaseMaskedArray, BaseMaskedDtype +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) if TYPE_CHECKING: import pyarrow @@ -596,7 +615,10 @@ def _logical_method(self, other, op): return BooleanArray(result, mask) def _cmp_method(self, other, op): - from pandas.arrays import FloatingArray, IntegerArray + from pandas.arrays import ( + FloatingArray, + IntegerArray, + ) if isinstance(other, (IntegerArray, FloatingArray)): return NotImplemented diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0d1465da7297e..916d4f9f2fd28 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -22,12 +22,28 @@ from pandas._config import get_option -from pandas._libs import NaT, algos as libalgos, hashtable as htable +from pandas._libs import ( + NaT, + algos as libalgos, + hashtable as htable, +) from pandas._libs.lib import no_default -from pandas._typing import ArrayLike, Dtype, NpDtype, Ordered, Scalar +from pandas._typing import ( + ArrayLike, + Dtype, + NpDtype, + Ordered, + Scalar, +) from pandas.compat.numpy import function as nv -from pandas.util._decorators import cache_readonly, deprecate_kwarg -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.util._decorators import ( + cache_readonly, + deprecate_kwarg, +) +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, +) from pandas.core.dtypes.cast import ( coerce_indexer_dtype, @@ -53,17 +69,40 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.generic import ABCIndex, ABCSeries -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) from pandas.core import ops -from pandas.core.accessor import PandasDelegate, delegate_names +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) import pandas.core.algorithms as algorithms -from pandas.core.algorithms import factorize, get_data_algo, take_nd, unique1d +from pandas.core.algorithms import ( + factorize, + get_data_algo, + take_nd, + unique1d, +) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -from pandas.core.base import ExtensionArray, NoNewAttributesMixin, PandasObject +from pandas.core.base import ( + ExtensionArray, + NoNewAttributesMixin, + PandasObject, +) import pandas.core.common as com -from pandas.core.construction import array, extract_array, sanitize_array +from pandas.core.construction import ( + array, + extract_array, + sanitize_array, +) from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.missing import interpolate_2d from pandas.core.ops.common import unpack_zerodim_and_defer @@ -522,7 +561,12 @@ def _from_inferred_categories( ------- Categorical """ - from pandas import Index, to_datetime, to_numeric, to_timedelta + from pandas import ( + Index, + to_datetime, + to_numeric, + to_timedelta, + ) cats = Index(inferred_categories) known_categories = ( @@ -1435,7 +1479,10 @@ def value_counts(self, dropna: bool = True): -------- Series.value_counts """ - from pandas import CategoricalIndex, Series + from pandas import ( + CategoricalIndex, + Series, + ) code, cat = self._codes, self.categories ncat, mask = (len(cat), code >= 0) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 162a69370bc61..bae22505145b5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,6 +1,9 @@ from __future__ import annotations -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import operator from typing import ( TYPE_CHECKING, @@ -18,7 +21,10 @@ import numpy as np -from pandas._libs import algos, lib +from pandas._libs import ( + algos, + lib, +) from pandas._libs.tslibs import ( BaseOffset, IncompatibleFrequency, @@ -32,12 +38,30 @@ iNaT, to_offset, ) -from pandas._libs.tslibs.fields import RoundTo, round_nsint64 +from pandas._libs.tslibs.fields import ( + RoundTo, + round_nsint64, +) from pandas._libs.tslibs.timestamps import integer_op_not_supported -from pandas._typing import DatetimeLikeScalar, Dtype, DtypeObj, NpDtype +from pandas._typing import ( + ArrayLike, + DatetimeLikeScalar, + Dtype, + DtypeObj, + NpDtype, +) from pandas.compat.numpy import function as nv -from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning -from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.errors import ( + AbstractMethodError, + NullFrequencyError, + PerformanceWarning, +) +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, +) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -57,22 +81,51 @@ is_unsigned_integer_dtype, pandas_dtype, ) -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, +) -from pandas.core import nanops, ops -from pandas.core.algorithms import checked_add_with_arr, isin, unique1d +from pandas.core import ( + nanops, + ops, +) +from pandas.core.algorithms import ( + checked_add_with_arr, + isin, + unique1d, +) from pandas.core.arraylike import OpsMixin -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray, ravel_compat +from pandas.core.arrays._mixins import ( + NDArrayBackedExtensionArray, + ravel_compat, +) import pandas.core.common as com -from pandas.core.construction import array, extract_array -from pandas.core.indexers import check_array_indexer, check_setitem_lengths +from pandas.core.construction import ( + array, + extract_array, +) +from pandas.core.indexers import ( + check_array_indexer, + check_setitem_lengths, +) from pandas.core.ops.common import unpack_zerodim_and_defer -from pandas.core.ops.invalid import invalid_comparison, make_invalid_op +from pandas.core.ops.invalid import ( + invalid_comparison, + make_invalid_op, +) from pandas.tseries import frequencies if TYPE_CHECKING: - from pandas.core.arrays import DatetimeArray, TimedeltaArray + from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, + ) DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType] DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") @@ -350,12 +403,13 @@ def astype(self, dtype, copy=True): elif is_integer_dtype(dtype): # we deliberately ignore int32 vs. int64 here. # See https://github.com/pandas-dev/pandas/issues/24381 for more. + level = find_stack_level() warnings.warn( f"casting {self.dtype} values to int64 with .astype(...) is " "deprecated and will raise in a future version. " "Use .view(...) instead.", FutureWarning, - stacklevel=3, + stacklevel=level, ) values = self.asi8 @@ -381,9 +435,30 @@ def astype(self, dtype, copy=True): else: return np.asarray(self, dtype=dtype) - def view(self, dtype: Optional[Dtype] = None): + def view(self, dtype: Optional[Dtype] = None) -> ArrayLike: + # We handle datetime64, datetime64tz, timedelta64, and period + # dtypes here. Everything else we pass through to the underlying + # ndarray. if dtype is None or dtype is self.dtype: return type(self)(self._ndarray, dtype=self.dtype) + + if isinstance(dtype, type): + # we sometimes pass non-dtype objects, e.g np.ndarray; + # pass those through to the underlying ndarray + return self._ndarray.view(dtype) + + dtype = pandas_dtype(dtype) + if isinstance(dtype, (PeriodDtype, DatetimeTZDtype)): + cls = dtype.construct_array_type() + return cls._simple_new(self.asi8, dtype=dtype) + elif dtype == "M8[ns]": + from pandas.core.arrays import DatetimeArray + + return DatetimeArray._simple_new(self.asi8, dtype=dtype) + elif dtype == "m8[ns]": + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray._simple_new(self.asi8.view("m8[ns]"), dtype=dtype) return self._ndarray.view(dtype=dtype) # ------------------------------------------------------------------ @@ -445,8 +520,7 @@ def _validate_comparison_value(self, other): raise InvalidComparison(other) if isinstance(other, self._recognized_scalars) or other is NaT: - # pandas\core\arrays\datetimelike.py:432: error: Too many arguments - # for "object" [call-arg] + # error: Too many arguments for "object" other = self._scalar_type(other) # type: ignore[call-arg] try: self._check_compatible_with(other) @@ -497,8 +571,7 @@ def _validate_shift_value(self, fill_value): if is_valid_na_for_dtype(fill_value, self.dtype): fill_value = NaT elif isinstance(fill_value, self._recognized_scalars): - # pandas\core\arrays\datetimelike.py:746: error: Too many arguments - # for "object" [call-arg] + # error: Too many arguments for "object" fill_value = self._scalar_type(fill_value) # type: ignore[call-arg] else: # only warn if we're not going to raise @@ -506,8 +579,7 @@ def _validate_shift_value(self, fill_value): # kludge for #31971 since Period(integer) tries to cast to str new_fill = Period._from_ordinal(fill_value, freq=self.freq) else: - # pandas\core\arrays\datetimelike.py:753: error: Too many - # arguments for "object" [call-arg] + # error: Too many arguments for "object" new_fill = self._scalar_type(fill_value) # type: ignore[call-arg] # stacklevel here is chosen to be correct when called from @@ -562,8 +634,14 @@ def _validate_scalar( # GH#18295 value = NaT + elif isna(value): + # if we are dt64tz and value is dt64("NaT"), dont cast to NaT, + # or else we'll fail to raise in _unbox_scalar + msg = self._validation_error_message(value, allow_listlike) + raise TypeError(msg) + elif isinstance(value, self._recognized_scalars): - # error: Too many arguments for "object" [call-arg] + # error: Too many arguments for "object" value = self._scalar_type(value) # type: ignore[call-arg] else: @@ -1679,7 +1757,7 @@ def factorize(self, na_sentinel=-1, sort: bool = False): # TODO: overload __getitem__, a slice indexer returns same type as self # error: Incompatible types in assignment (expression has type # "Union[DatetimeLikeArrayMixin, Union[Any, Any]]", variable - # has type "TimelikeOps") [assignment] + # has type "TimelikeOps") uniques = uniques[::-1] # type: ignore[assignment] return codes, uniques # FIXME: shouldn't get here; we are ignoring sort diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 144a7186f5826..3982a7deca2bb 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,12 +1,24 @@ from __future__ import annotations -from datetime import datetime, time, timedelta, tzinfo -from typing import Optional, Union, cast +from datetime import ( + datetime, + time, + timedelta, + tzinfo, +) +from typing import ( + Optional, + Union, + cast, +) import warnings import numpy as np -from pandas._libs import lib, tslib +from pandas._libs import ( + lib, + tslib, +) from pandas._libs.tslibs import ( BaseOffset, NaT, @@ -47,7 +59,11 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCIndex, ABCPandasArray, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCPandasArray, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core.algorithms import checked_add_with_arr @@ -56,7 +72,11 @@ import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias -from pandas.tseries.offsets import BDay, Day, Tick +from pandas.tseries.offsets import ( + BDay, + Day, + Tick, +) _midnight = time(0, 0) @@ -464,10 +484,8 @@ def _generate_range( def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") - if not isna(value): - self._check_compatible_with(value, setitem=setitem) - return value.asm8 - return np.datetime64(value.value, "ns") + self._check_compatible_with(value, setitem=setitem) + return value.asm8 def _scalar_from_string(self, value): return Timestamp(value, tz=self.tz) @@ -598,6 +616,10 @@ def astype(self, dtype, copy=True): elif is_datetime64_ns_dtype(dtype): return astype_dt64_to_dt64tz(self, dtype, copy, via_utc=False) + elif self.tz is None and is_datetime64_dtype(dtype) and dtype != self.dtype: + # unit conversion e.g. datetime64[s] + return self._data.astype(dtype) + elif is_period_dtype(dtype): return self.to_period(freq=dtype.freq) return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index bc8f2af4f3801..a43b30f5043e2 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -1,12 +1,23 @@ from __future__ import annotations -from typing import List, Optional, Tuple, Type +from typing import ( + List, + Optional, + Tuple, + Type, +) import warnings import numpy as np -from pandas._libs import lib, missing as libmissing -from pandas._typing import ArrayLike, DtypeObj +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, +) from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly @@ -20,10 +31,16 @@ is_object_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + register_extension_dtype, +) from pandas.core.dtypes.missing import isna -from pandas.core.arrays.numeric import NumericArray, NumericDtype +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric @@ -303,7 +320,10 @@ def _values_for_argsort(self) -> np.ndarray: return self._data def _cmp_method(self, other, op): - from pandas.arrays import BooleanArray, IntegerArray + from pandas.arrays import ( + BooleanArray, + IntegerArray, + ) mask = None diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 363832ec89240..d62a05253b265 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -1,16 +1,33 @@ from __future__ import annotations -from typing import Dict, List, Optional, Tuple, Type +from typing import ( + Dict, + List, + Optional, + Tuple, + Type, +) import warnings import numpy as np -from pandas._libs import iNaT, lib, missing as libmissing -from pandas._typing import ArrayLike, Dtype, DtypeObj +from pandas._libs import ( + iNaT, + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, +) from pandas.compat.numpy import function as nv from pandas.util._decorators import cache_readonly -from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_dtype, @@ -23,8 +40,14 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core.arrays.masked import BaseMaskedArray, BaseMaskedDtype -from pandas.core.arrays.numeric import NumericArray, NumericDtype +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) +from pandas.core.arrays.numeric import ( + NumericArray, + NumericDtype, +) from pandas.core.ops import invalid_comparison from pandas.core.tools.numeric import to_numeric diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 0f3e028c34c05..4d165dac40397 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1,14 +1,24 @@ from __future__ import annotations import operator -from operator import le, lt +from operator import ( + le, + lt, +) import textwrap -from typing import Optional, Sequence, Type, TypeVar, cast +from typing import ( + Optional, + Sequence, + Type, + TypeVar, + cast, +) import numpy as np from pandas._config import get_option +from pandas._libs import NaT from pandas._libs.interval import ( VALID_CLOSED, Interval, @@ -16,14 +26,19 @@ intervals_to_interval_bounds, ) from pandas._libs.missing import NA -from pandas._typing import ArrayLike, Dtype, NpDtype +from pandas._typing import ( + ArrayLike, + Dtype, + NpDtype, +) from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender from pandas.core.dtypes.cast import maybe_convert_platform from pandas.core.dtypes.common import ( is_categorical_dtype, - is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, is_dtype_equal, is_float_dtype, is_integer_dtype, @@ -43,10 +58,21 @@ ABCPeriodIndex, ABCSeries, ) -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) -from pandas.core.algorithms import isin, take, value_counts -from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs +from pandas.core.algorithms import ( + isin, + take, + value_counts, +) +from pandas.core.arrays.base import ( + ExtensionArray, + _extension_array_shared_docs, +) from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.construction import ( @@ -56,7 +82,10 @@ ) from pandas.core.indexers import check_array_indexer from pandas.core.indexes.base import ensure_index -from pandas.core.ops import invalid_comparison, unpack_zerodim_and_defer +from pandas.core.ops import ( + invalid_comparison, + unpack_zerodim_and_defer, +) IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") @@ -169,7 +198,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): # Constructors def __new__( - cls, + cls: Type[IntervalArrayT], data, closed=None, dtype: Optional[Dtype] = None, @@ -197,7 +226,7 @@ def __new__( raise TypeError(msg) # might need to convert empty or purely na data - data = maybe_convert_platform_interval(data) + data = _maybe_convert_platform_interval(data) left, right, infer_closed = intervals_to_interval_bounds( data, validate_closed=closed is None ) @@ -214,14 +243,14 @@ def __new__( @classmethod def _simple_new( - cls, + cls: Type[IntervalArrayT], left, right, closed=None, - copy=False, + copy: bool = False, dtype: Optional[Dtype] = None, - verify_integrity=True, - ): + verify_integrity: bool = True, + ) -> IntervalArrayT: result = IntervalMixin.__new__(cls) if closed is None and isinstance(dtype, IntervalDtype): @@ -301,12 +330,18 @@ def _simple_new( @classmethod def _from_sequence( - cls, scalars, *, dtype: Optional[Dtype] = None, copy: bool = False - ): + cls: Type[IntervalArrayT], + scalars, + *, + dtype: Optional[Dtype] = None, + copy: bool = False, + ) -> IntervalArrayT: return cls(scalars, dtype=dtype, copy=copy) @classmethod - def _from_factorized(cls, values, original): + def _from_factorized( + cls: Type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT + ) -> IntervalArrayT: if len(values) == 0: # An empty array returns object-dtype here. We can't create # a new IA from an (empty) object-dtype array, so turn it into the @@ -362,9 +397,13 @@ def _from_factorized(cls, values, original): } ) def from_breaks( - cls, breaks, closed="right", copy: bool = False, dtype: Optional[Dtype] = None - ): - breaks = maybe_convert_platform_interval(breaks) + cls: Type[IntervalArrayT], + breaks, + closed="right", + copy: bool = False, + dtype: Optional[Dtype] = None, + ) -> IntervalArrayT: + breaks = _maybe_convert_platform_interval(breaks) return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) @@ -433,10 +472,15 @@ def from_breaks( } ) def from_arrays( - cls, left, right, closed="right", copy=False, dtype: Optional[Dtype] = None - ): - left = maybe_convert_platform_interval(left) - right = maybe_convert_platform_interval(right) + cls: Type[IntervalArrayT], + left, + right, + closed="right", + copy: bool = False, + dtype: Optional[Dtype] = None, + ) -> IntervalArrayT: + left = _maybe_convert_platform_interval(left) + right = _maybe_convert_platform_interval(right) return cls._simple_new( left, right, closed, copy=copy, dtype=dtype, verify_integrity=True @@ -492,8 +536,12 @@ def from_arrays( } ) def from_tuples( - cls, data, closed="right", copy=False, dtype: Optional[Dtype] = None - ): + cls: Type[IntervalArrayT], + data, + closed="right", + copy: bool = False, + dtype: Optional[Dtype] = None, + ) -> IntervalArrayT: if len(data): left, right = [], [] else: @@ -548,7 +596,7 @@ def _validate(self): msg = "left side of interval must be <= right side" raise ValueError(msg) - def _shallow_copy(self, left, right): + def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT: """ Return a new IntervalArray with the replacement attributes @@ -565,7 +613,7 @@ def _shallow_copy(self, left, right): # Descriptive @property - def dtype(self): + def dtype(self) -> IntervalDtype: return self._dtype @property @@ -721,7 +769,9 @@ def argsort( ascending=ascending, kind=kind, na_position=na_position, **kwargs ) - def fillna(self, value=None, method=None, limit=None): + def fillna( + self: IntervalArrayT, value=None, method=None, limit=None + ) -> IntervalArrayT: """ Fill NA/NaN values using the specified method. @@ -759,7 +809,7 @@ def fillna(self, value=None, method=None, limit=None): right = self.right.fillna(value=value_right) return self._shallow_copy(left, right) - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): """ Cast to an ExtensionArray or NumPy array with dtype 'dtype'. @@ -863,7 +913,9 @@ def copy(self: IntervalArrayT) -> IntervalArrayT: def isna(self) -> np.ndarray: return isna(self._left) - def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: + def shift( + self: IntervalArrayT, periods: int = 1, fill_value: object = None + ) -> IntervalArray: if not len(self) or periods == 0: return self.copy() @@ -892,7 +944,15 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray: b = empty return self._concat_same_type([a, b]) - def take(self, indices, *, allow_fill=False, fill_value=None, axis=None, **kwargs): + def take( + self: IntervalArrayT, + indices, + *, + allow_fill: bool = False, + fill_value=None, + axis=None, + **kwargs, + ) -> IntervalArrayT: """ Take elements from the IntervalArray. @@ -999,9 +1059,12 @@ def _validate_setitem_value(self, value): if is_integer_dtype(self.dtype.subtype): # can't set NaN on a numpy integer array needs_float_conversion = True - elif is_datetime64_any_dtype(self.dtype.subtype): + elif is_datetime64_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.datetime64("NaT") + elif is_datetime64tz_dtype(self.dtype.subtype): + # need proper NaT to set directly on the DatetimeArray array + value = NaT elif is_timedelta64_dtype(self.dtype.subtype): # need proper NaT to set directly on the numpy array value = np.timedelta64("NaT") @@ -1044,7 +1107,7 @@ def value_counts(self, dropna: bool = True): # --------------------------------------------------------------------- # Rendering Methods - def _format_data(self): + def _format_data(self) -> str: # TODO: integrate with categorical and make generic # name argument is unused here; just for compat with base / categorical @@ -1088,7 +1151,7 @@ def __repr__(self) -> str: template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" return template - def _format_space(self): + def _format_space(self) -> str: space = " " * (len(type(self).__name__) + 1) return f"\n{space}" @@ -1268,7 +1331,7 @@ def closed(self): ), } ) - def set_closed(self, closed): + def set_closed(self: IntervalArrayT, closed) -> IntervalArrayT: if closed not in VALID_CLOSED: msg = f"invalid option for 'closed': {closed}" raise ValueError(msg) @@ -1291,7 +1354,7 @@ def set_closed(self, closed): @Appender( _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs ) - def is_non_overlapping_monotonic(self): + def is_non_overlapping_monotonic(self) -> bool: # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) # we already require left <= right @@ -1404,7 +1467,7 @@ def __arrow_array__(self, type=None): @Appender( _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""} ) - def to_tuples(self, na_tuple=True): + def to_tuples(self, na_tuple=True) -> np.ndarray: tuples = com.asarray_tuplesafe(zip(self._left, self._right)) if not na_tuple: # GH 18756 @@ -1433,7 +1496,7 @@ def delete(self: IntervalArrayT, loc) -> IntervalArrayT: return self._shallow_copy(left=new_left, right=new_right) @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) - def repeat(self, repeats, axis=None): + def repeat(self: IntervalArrayT, repeats: int, axis=None) -> IntervalArrayT: nv.validate_repeat((), {"axis": axis}) left_repeat = self.left.repeat(repeats) right_repeat = self.right.repeat(repeats) @@ -1508,7 +1571,7 @@ def isin(self, values) -> np.ndarray: # GH#38353 instead of casting to object, operating on a # complex128 ndarray is much more performant. - # error: "ArrayLike" has no attribute "view" [attr-defined] + # error: "ArrayLike" has no attribute "view" left = self._combined.view("complex128") # type:ignore[attr-defined] right = values._combined.view("complex128") return np.in1d(left, right) @@ -1532,7 +1595,7 @@ def _combined(self) -> ArrayLike: return comb -def maybe_convert_platform_interval(values): +def _maybe_convert_platform_interval(values) -> ArrayLike: """ Try to do platform conversion, with special casing for IntervalArray. Wrapper around maybe_convert_platform that alters the default return diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index a6ed75c65b2e9..bae14f4e560c2 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1,13 +1,33 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Optional, + Sequence, + Tuple, + Type, + TypeVar, + Union, +) import numpy as np -from pandas._libs import lib, missing as libmissing -from pandas._typing import ArrayLike, Dtype, NpDtype, Scalar +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + ArrayLike, + Dtype, + NpDtype, + Scalar, +) from pandas.errors import AbstractMethodError -from pandas.util._decorators import cache_readonly, doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import ( @@ -18,10 +38,17 @@ is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) from pandas.core import nanops -from pandas.core.algorithms import factorize_array, isin, take +from pandas.core.algorithms import ( + factorize_array, + isin, + take, +) from pandas.core.array_algos import masked_reductions from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray @@ -377,7 +404,10 @@ def value_counts(self, dropna: bool = True) -> Series: -------- Series.value_counts """ - from pandas import Index, Series + from pandas import ( + Index, + Series, + ) from pandas.arrays import IntegerArray # compute counts on the data with no nans diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py index 69499bc7e4a77..57017e44a66e9 100644 --- a/pandas/core/arrays/numeric.py +++ b/pandas/core/arrays/numeric.py @@ -2,11 +2,19 @@ import datetime import numbers -from typing import TYPE_CHECKING, Any, List, Union +from typing import ( + TYPE_CHECKING, + Any, + List, + Union, +) import numpy as np -from pandas._libs import Timedelta, missing as libmissing +from pandas._libs import ( + Timedelta, + missing as libmissing, +) from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import ( @@ -18,7 +26,10 @@ ) from pandas.core import ops -from pandas.core.arrays.masked import BaseMaskedArray, BaseMaskedDtype +from pandas.core.arrays.masked import ( + BaseMaskedArray, + BaseMaskedDtype, +) if TYPE_CHECKING: import pyarrow diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 9999a9ed411d8..fd95ab987b18a 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -1,19 +1,31 @@ from __future__ import annotations import numbers -from typing import Optional, Tuple, Union +from typing import ( + Optional, + Tuple, + Union, +) import numpy as np from numpy.lib.mixins import NDArrayOperatorsMixin from pandas._libs import lib -from pandas._typing import Dtype, NpDtype, Scalar +from pandas._typing import ( + Dtype, + NpDtype, + Scalar, +) from pandas.compat.numpy import function as nv +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.dtypes import PandasDtype from pandas.core.dtypes.missing import isna -from pandas.core import nanops, ops +from pandas.core import ( + nanops, + ops, +) from pandas.core.arraylike import OpsMixin from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.strings.object_array import ObjectStringArrayMixin @@ -86,6 +98,14 @@ def _from_sequence( dtype = dtype._dtype result = np.asarray(scalars, dtype=dtype) + if ( + result.ndim > 1 + and not hasattr(scalars, "dtype") + and (dtype is None or dtype == object) + ): + # e.g. list-of-tuples + result = construct_1d_object_array_from_listlike(scalars) + if copy and result is scalars: result = result.copy() return cls(result) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e0149f27ad6a6..109be2c67bb1a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -2,7 +2,15 @@ from datetime import timedelta import operator -from typing import Any, Callable, List, Optional, Sequence, Type, Union +from typing import ( + Any, + Callable, + List, + Optional, + Sequence, + Type, + Union, +) import numpy as np @@ -20,7 +28,10 @@ ) from pandas._libs.tslibs.dtypes import FreqGroup from pandas._libs.tslibs.fields import isleapyear_arr -from pandas._libs.tslibs.offsets import Tick, delta_to_tick +from pandas._libs.tslibs.offsets import ( + Tick, + delta_to_tick, +) from pandas._libs.tslibs.period import ( DIFFERENT_FREQ, IncompatibleFrequency, @@ -29,8 +40,15 @@ get_period_field_arr, period_asfreq_arr, ) -from pandas._typing import AnyArrayLike, Dtype, NpDtype -from pandas.util._decorators import cache_readonly, doc +from pandas._typing import ( + AnyArrayLike, + Dtype, + NpDtype, +) +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.core.dtypes.common import ( TD64NS_DTYPE, @@ -49,7 +67,10 @@ ABCSeries, ABCTimedeltaArray, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) import pandas.core.algorithms as algos from pandas.core.arrays import datetimelike as dtl diff --git a/pandas/core/arrays/sparse/__init__.py b/pandas/core/arrays/sparse/__init__.py index e9ff4b7d4ffc2..18294ead0329d 100644 --- a/pandas/core/arrays/sparse/__init__.py +++ b/pandas/core/arrays/sparse/__init__.py @@ -1,6 +1,9 @@ # flake8: noqa: F401 -from pandas.core.arrays.sparse.accessor import SparseAccessor, SparseFrameAccessor +from pandas.core.arrays.sparse.accessor import ( + SparseAccessor, + SparseFrameAccessor, +) from pandas.core.arrays.sparse.array import ( BlockIndex, IntIndex, diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index c0bc88dc54e43..c3d11793dbd8c 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -6,7 +6,10 @@ from pandas.core.dtypes.cast import find_common_type -from pandas.core.accessor import PandasDelegate, delegate_names +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) from pandas.core.arrays.sparse.array import SparseArray from pandas.core.arrays.sparse.dtype import SparseDtype diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 4f68ed3d9a79d..a209037f9a9a6 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -6,16 +6,32 @@ from collections import abc import numbers import operator -from typing import Any, Callable, Optional, Sequence, Type, TypeVar, Union +from typing import ( + Any, + Callable, + Optional, + Sequence, + Type, + TypeVar, + Union, +) import warnings import numpy as np from pandas._libs import lib import pandas._libs.sparse as splib -from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex +from pandas._libs.sparse import ( + BlockIndex, + IntIndex, + SparseIndex, +) from pandas._libs.tslibs import NaT -from pandas._typing import Dtype, NpDtype, Scalar +from pandas._typing import ( + Dtype, + NpDtype, + Scalar, +) from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning @@ -37,8 +53,15 @@ is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.generic import ABCIndex, ABCSeries -from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, +) import pandas.core.algorithms as algos from pandas.core.arraylike import OpsMixin @@ -46,7 +69,10 @@ from pandas.core.arrays.sparse.dtype import SparseDtype from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.construction import extract_array, sanitize_array +from pandas.core.construction import ( + extract_array, + sanitize_array, +) from pandas.core.indexers import check_array_indexer from pandas.core.missing import interpolate_2d from pandas.core.nanops import check_below_min_count @@ -738,7 +764,10 @@ def value_counts(self, dropna: bool = True): ------- counts : Series """ - from pandas import Index, Series + from pandas import ( + Index, + Series, + ) keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna) fcounts = self.sp_index.ngaps diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 4c1c1b42ff6fa..948edcbd99e64 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -2,15 +2,28 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Tuple, + Type, +) import warnings import numpy as np -from pandas._typing import Dtype, DtypeObj +from pandas._typing import ( + Dtype, + DtypeObj, +) from pandas.errors import PerformanceWarning -from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( is_bool_dtype, @@ -20,7 +33,10 @@ is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.missing import isna, na_value_for_dtype +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) if TYPE_CHECKING: from pandas.core.arrays.sparse.array import SparseArray diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 56c678c88b9c7..ad2c5f75fc32c 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -3,7 +3,10 @@ Currently only includes to_coo helpers. """ -from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) from pandas.core.series import Series diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 65618ce32b6d7..6fd68050bc8dc 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1,14 +1,28 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Type, Union +from typing import ( + TYPE_CHECKING, + Optional, + Type, + Union, +) import numpy as np -from pandas._libs import lib, missing as libmissing -from pandas._typing import Dtype, Scalar +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + Dtype, + Scalar, +) from pandas.compat.numpy import function as nv -from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, @@ -21,7 +35,11 @@ from pandas.core import ops from pandas.core.array_algos import masked_reductions -from pandas.core.arrays import FloatingArray, IntegerArray, PandasArray +from pandas.core.arrays import ( + FloatingArray, + IntegerArray, + PandasArray, +) from pandas.core.arrays.floating import FloatingDtype from pandas.core.arrays.integer import _IntegerDtype from pandas.core.construction import extract_array @@ -190,9 +208,8 @@ def __init__(self, values, copy=False): values = extract_array(values) super().__init__(values, copy=copy) - # pandas\core\arrays\string_.py:188: error: Incompatible types in - # assignment (expression has type "StringDtype", variable has type - # "PandasDtype") [assignment] + # error: Incompatible types in assignment (expression has type "StringDtype", + # variable has type "PandasDtype") self._dtype = StringDtype() # type: ignore[assignment] if not isinstance(values, type(self)): self._validate() @@ -386,7 +403,11 @@ def _cmp_method(self, other, op): _str_na_value = StringDtype.na_value def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): - from pandas.arrays import BooleanArray, IntegerArray, StringArray + from pandas.arrays import ( + BooleanArray, + IntegerArray, + StringArray, + ) from pandas.core.arrays.string_ import StringDtype if dtype is None: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 252e9a84022db..e2b0ad372bf88 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -1,12 +1,25 @@ from __future__ import annotations from distutils.version import LooseVersion -from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + Optional, + Sequence, + Type, + Union, +) import numpy as np -from pandas._libs import lib, missing as libmissing -from pandas._typing import Dtype, NpDtype +from pandas._libs import ( + lib, + missing as libmissing, +) +from pandas._typing import ( + Dtype, + NpDtype, +) from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.base import ExtensionDtype @@ -22,7 +35,10 @@ ) from pandas.core.arraylike import OpsMixin from pandas.core.arrays.base import ExtensionArray -from pandas.core.indexers import check_array_indexer, validate_indices +from pandas.core.indexers import ( + check_array_indexer, + validate_indices, +) from pandas.core.missing import get_fill_func try: @@ -615,7 +631,10 @@ def value_counts(self, dropna: bool = True) -> Series: -------- Series.value_counts """ - from pandas import Index, Series + from pandas import ( + Index, + Series, + ) vc = self._data.value_counts() diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 480aaf3d48f62..893644be23a0e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1,11 +1,18 @@ from __future__ import annotations from datetime import timedelta -from typing import List, Optional, Union +from typing import ( + List, + Optional, + Union, +) import numpy as np -from pandas._libs import lib, tslibs +from pandas._libs import ( + lib, + tslibs, +) from pandas._libs.tslibs import ( BaseOffset, NaT, @@ -42,12 +49,18 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex +from pandas.core.dtypes.generic import ( + ABCSeries, + ABCTimedeltaIndex, +) from pandas.core.dtypes.missing import isna from pandas.core import nanops from pandas.core.algorithms import checked_add_with_arr -from pandas.core.arrays import IntegerArray, datetimelike as dtl +from pandas.core.arrays import ( + IntegerArray, + datetimelike as dtl, +) from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com from pandas.core.construction import extract_array diff --git a/pandas/core/base.py b/pandas/core/base.py index da8ed8a59f981..9b2efeff76926 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -19,11 +19,18 @@ import numpy as np import pandas._libs.lib as lib -from pandas._typing import Dtype, DtypeObj, IndexLabel +from pandas._typing import ( + Dtype, + DtypeObj, + IndexLabel, +) from pandas.compat import PYPY from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import cache_readonly, doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -32,12 +39,23 @@ is_object_dtype, is_scalar, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries -from pandas.core.dtypes.missing import isna, remove_na_arraylike +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) from pandas.core import algorithms from pandas.core.accessor import DirNamesMixin -from pandas.core.algorithms import duplicated, unique1d, value_counts +from pandas.core.algorithms import ( + duplicated, + unique1d, + value_counts, +) from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray from pandas.core.construction import create_series_with_explicit_dtype @@ -95,8 +113,7 @@ def __sizeof__(self): either a value or Series of values """ if hasattr(self, "memory_usage"): - # pandas\core\base.py:84: error: "PandasObject" has no attribute - # "memory_usage" [attr-defined] + # error: "PandasObject" has no attribute "memory_usage" mem = self.memory_usage(deep=True) # type: ignore[attr-defined] return int(mem if is_scalar(mem) else mem.sum()) @@ -206,17 +223,14 @@ def _selection_list(self): @cache_readonly def _selected_obj(self): - # pandas\core\base.py:195: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" if self._selection is None or isinstance( self.obj, ABCSeries # type: ignore[attr-defined] ): - # pandas\core\base.py:194: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" return self.obj # type: ignore[attr-defined] else: - # pandas\core\base.py:204: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" return self.obj[self._selection] # type: ignore[attr-defined] @cache_readonly @@ -225,29 +239,22 @@ def ndim(self) -> int: @cache_readonly def _obj_with_exclusions(self): - # pandas\core\base.py:209: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" if self._selection is not None and isinstance( self.obj, ABCDataFrame # type: ignore[attr-defined] ): - # pandas\core\base.py:217: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" return self.obj.reindex( # type: ignore[attr-defined] columns=self._selection_list ) - # pandas\core\base.py:207: error: "SelectionMixin" has no attribute - # "exclusions" [attr-defined] + # error: "SelectionMixin" has no attribute "exclusions" if len(self.exclusions) > 0: # type: ignore[attr-defined] - # pandas\core\base.py:208: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] - - # pandas\core\base.py:208: error: "SelectionMixin" has no attribute - # "exclusions" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" + # error: "SelectionMixin" has no attribute "exclusions" return self.obj.drop(self.exclusions, axis=1) # type: ignore[attr-defined] else: - # pandas\core\base.py:210: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" return self.obj # type: ignore[attr-defined] def __getitem__(self, key): @@ -255,13 +262,11 @@ def __getitem__(self, key): raise IndexError(f"Column(s) {self._selection} already selected") if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): - # pandas\core\base.py:217: error: "SelectionMixin" has no attribute - # "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" if len( self.obj.columns.intersection(key) # type: ignore[attr-defined] ) != len(key): - # pandas\core\base.py:218: error: "SelectionMixin" has no - # attribute "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" bad_keys = list( set(key).difference(self.obj.columns) # type: ignore[attr-defined] ) @@ -269,13 +274,13 @@ def __getitem__(self, key): return self._gotitem(list(key), ndim=2) elif not getattr(self, "as_index", False): - # error: "SelectionMixin" has no attribute "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" if key not in self.obj.columns: # type: ignore[attr-defined] raise KeyError(f"Column not found: {key}") return self._gotitem(key, ndim=2) else: - # error: "SelectionMixin" has no attribute "obj" [attr-defined] + # error: "SelectionMixin" has no attribute "obj" if key not in self.obj: # type: ignore[attr-defined] raise KeyError(f"Column not found: {key}") return self._gotitem(key, ndim=1) @@ -601,8 +606,7 @@ def to_numpy( dtype='datetime64[ns]') """ if is_extension_array_dtype(self.dtype): - # pandas\core\base.py:837: error: Too many arguments for "to_numpy" - # of "ExtensionArray" [call-arg] + # error: Too many arguments for "to_numpy" of "ExtensionArray" return self.array.to_numpy( # type: ignore[call-arg] dtype, copy=copy, na_value=na_value, **kwargs ) @@ -914,13 +918,11 @@ def _map_values(self, mapper, na_action=None): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values - # pandas\core\base.py:893: error: Incompatible types in - # assignment (expression has type "Categorical", variable has - # type "IndexOpsMixin") [assignment] + # error: Incompatible types in assignment (expression has type + # "Categorical", variable has type "IndexOpsMixin") self = cast("Categorical", self) # type: ignore[assignment] - # pandas\core\base.py:894: error: Item "ExtensionArray" of - # "Union[ExtensionArray, Any]" has no attribute "map" - # [union-attr] + # error: Item "ExtensionArray" of "Union[ExtensionArray, Any]" has no + # attribute "map" return self._values.map(mapper) # type: ignore[union-attr] values = self._values @@ -938,8 +940,7 @@ def _map_values(self, mapper, na_action=None): raise NotImplementedError map_f = lambda values, f: values.map(f) else: - # pandas\core\base.py:1142: error: "IndexOpsMixin" has no attribute - # "astype" [attr-defined] + # error: "IndexOpsMixin" has no attribute "astype" values = self.astype(object)._values # type: ignore[attr-defined] if na_action == "ignore": map_f = lambda values, f: lib.map_infer_mask( @@ -1177,8 +1178,7 @@ def memory_usage(self, deep=False): are not components of the array if deep=False or if used on PyPy """ if hasattr(self.array, "memory_usage"): - # pandas\core\base.py:1379: error: "ExtensionArray" has no - # attribute "memory_usage" [attr-defined] + # error: "ExtensionArray" has no attribute "memory_usage" return self.array.memory_usage(deep=deep) # type: ignore[attr-defined] v = self.array.nbytes @@ -1313,9 +1313,8 @@ def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: def drop_duplicates(self, keep="first"): duplicated = self.duplicated(keep=keep) - # pandas\core\base.py:1507: error: Value of type "IndexOpsMixin" is not - # indexable [index] + # error: Value of type "IndexOpsMixin" is not indexable return self[~duplicated] # type: ignore[index] - def duplicated(self, keep="first"): + def duplicated(self, keep: Union[str, bool] = "first") -> np.ndarray: return duplicated(self._values, keep=keep) diff --git a/pandas/core/common.py b/pandas/core/common.py index aa24e12bf2cf1..8625c5063382f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -4,7 +4,10 @@ Note: pandas.core.common is *not* part of the public API. """ -from collections import abc, defaultdict +from collections import ( + abc, + defaultdict, +) import contextlib from functools import partial import inspect @@ -25,7 +28,12 @@ import numpy as np from pandas._libs import lib -from pandas._typing import AnyArrayLike, NpDtype, Scalar, T +from pandas._typing import ( + AnyArrayLike, + NpDtype, + Scalar, + T, +) from pandas.compat import np_version_under1p18 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -35,9 +43,13 @@ is_extension_array_dtype, is_integer, ) -from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) from pandas.core.dtypes.inference import iterable_not_string -from pandas.core.dtypes.missing import isna, isnull, notnull # noqa +from pandas.core.dtypes.missing import isna class SettingWithCopyError(ValueError): @@ -137,7 +149,7 @@ def is_bool_indexer(key: Any) -> bool: return False -def cast_scalar_indexer(val, warn_float=False): +def cast_scalar_indexer(val, warn_float: bool = False): """ To avoid numpy DeprecationWarnings, cast float to integer where valid. @@ -268,10 +280,6 @@ def maybe_iterable_to_list(obj: Union[Iterable[T], T]) -> Union[Collection[T], T """ if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): return list(obj) - # error: Incompatible return value type (got - # "Union[pandas.core.common., - # pandas.core.common.1, T]", expected - # "Union[Collection[T], T]") [return-value] obj = cast(Collection, obj) return obj @@ -288,7 +296,7 @@ def is_null_slice(obj) -> bool: ) -def is_true_slices(line): +def is_true_slices(line) -> List[bool]: """ Find non-trivial slices in "line": return a list of booleans with same length. """ @@ -296,7 +304,7 @@ def is_true_slices(line): # TODO: used only once in indexing; belongs elsewhere? -def is_full_slice(obj, line) -> bool: +def is_full_slice(obj, line: int) -> bool: """ We have a full length slice. """ diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py index 5ad3e78a76866..94724d559e501 100644 --- a/pandas/core/computation/align.py +++ b/pandas/core/computation/align.py @@ -3,8 +3,19 @@ """ from __future__ import annotations -from functools import partial, wraps -from typing import TYPE_CHECKING, Dict, Optional, Sequence, Tuple, Type, Union +from functools import ( + partial, + wraps, +) +from typing import ( + TYPE_CHECKING, + Dict, + Optional, + Sequence, + Tuple, + Type, + Union, +) import warnings import numpy as np @@ -12,7 +23,10 @@ from pandas._typing import FrameOrSeries from pandas.errors import PerformanceWarning -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.base import PandasObject import pandas.core.common as com diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index 77a378369ca34..5b2dbed7af6ea 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -3,10 +3,19 @@ """ import abc -from typing import Dict, Type - -from pandas.core.computation.align import align_terms, reconstruct_object -from pandas.core.computation.ops import MATHOPS, REDUCTIONS +from typing import ( + Dict, + Type, +) + +from pandas.core.computation.align import ( + align_terms, + reconstruct_object, +) +from pandas.core.computation.ops import ( + MATHOPS, + REDUCTIONS, +) import pandas.io.formats.printing as printing diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 12f16343362e2..51fcbb02fd926 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -10,7 +10,10 @@ from pandas.util._validators import validate_bool_kwarg from pandas.core.computation.engines import ENGINES -from pandas.core.computation.expr import PARSERS, Expr +from pandas.core.computation.expr import ( + PARSERS, + Expr, +) from pandas.core.computation.parsing import tokenize_string from pandas.core.computation.scope import ensure_scope diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index babf8116a5588..02660539f4981 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -3,10 +3,20 @@ """ import ast -from functools import partial, reduce +from functools import ( + partial, + reduce, +) from keyword import iskeyword import tokenize -from typing import Callable, Optional, Set, Tuple, Type, TypeVar +from typing import ( + Callable, + Optional, + Set, + Tuple, + Type, + TypeVar, +) import numpy as np @@ -31,7 +41,10 @@ UndefinedVariableError, is_term, ) -from pandas.core.computation.parsing import clean_backtick_quoted_toks, tokenize_string +from pandas.core.computation.parsing import ( + clean_backtick_quoted_toks, + tokenize_string, +) from pandas.core.computation.scope import Scope import pandas.io.formats.printing as printing @@ -659,8 +672,7 @@ def visit_Call(self, node, side=None, **kwargs): raise if res is None: - # pandas\core\computation\expr.py:663: error: "expr" has no - # attribute "id" [attr-defined] + # error: "expr" has no attribute "id" raise ValueError( f"Invalid function call {node.func.id}" # type: ignore[attr-defined] ) @@ -684,8 +696,7 @@ def visit_Call(self, node, side=None, **kwargs): for key in node.keywords: if not isinstance(key, ast.keyword): - # pandas\core\computation\expr.py:684: error: "expr" has no - # attribute "id" [attr-defined] + # error: "expr" has no attribute "id" raise ValueError( "keyword error in function call " # type: ignore[attr-defined] f"'{node.func.id}'" diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 087b7f39e3374..05736578b6337 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -6,7 +6,11 @@ """ import operator -from typing import List, Optional, Set +from typing import ( + List, + Optional, + Set, +) import warnings import numpy as np @@ -76,7 +80,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check): if op_str is not None: # required min elements (otherwise we are adding overhead) - if np.prod(a.shape) > _MIN_ELEMENTS: + if a.size > _MIN_ELEMENTS: # check for dtype compatibility dtypes: Set[str] = set() for o in [a, b]: diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 7b42b21cadc1f..2f7623060e7dc 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -8,19 +8,33 @@ from distutils.version import LooseVersion from functools import partial import operator -from typing import Callable, Iterable, Optional, Union +from typing import ( + Callable, + Iterable, + Optional, + Union, +) import numpy as np from pandas._libs.tslibs import Timestamp -from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.common import ( + is_list_like, + is_scalar, +) import pandas.core.common as com -from pandas.core.computation.common import ensure_decoded, result_type_many +from pandas.core.computation.common import ( + ensure_decoded, + result_type_many, +) from pandas.core.computation.scope import DEFAULT_GLOBALS -from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded +from pandas.io.formats.printing import ( + pprint_thing, + pprint_thing_encoded, +) REDUCTIONS = ("sum", "prod") @@ -71,8 +85,7 @@ def __init__(self, name: str, is_local: Optional[bool] = None): class Term: def __new__(cls, name, env, side=None, encoding=None): klass = Constant if not isinstance(name, str) else cls - # pandas\core\computation\ops.py:72: error: Argument 2 for "super" not - # an instance of argument 1 [misc] + # error: Argument 2 for "super" not an instance of argument 1 supr_new = super(Term, klass).__new__ # type: ignore[misc] return supr_new(klass) @@ -593,7 +606,7 @@ def __init__(self, func, args): self.func = func def __call__(self, env): - # pandas\core\computation\ops.py:592: error: "Op" not callable [operator] + # error: "Op" not callable operands = [op(env) for op in self.operands] # type: ignore[operator] with np.errstate(all="ignore"): return self.func.func(*operands) @@ -605,7 +618,10 @@ def __repr__(self) -> str: class FuncNode: def __init__(self, name: str): - from pandas.core.computation.check import NUMEXPR_INSTALLED, NUMEXPR_VERSION + from pandas.core.computation.check import ( + NUMEXPR_INSTALLED, + NUMEXPR_VERSION, + ) if name not in MATHOPS or ( NUMEXPR_INSTALLED diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py index 3c2f7f2793358..f3321fc55ad80 100644 --- a/pandas/core/computation/parsing.py +++ b/pandas/core/computation/parsing.py @@ -6,7 +6,11 @@ from keyword import iskeyword import token import tokenize -from typing import Hashable, Iterator, Tuple +from typing import ( + Hashable, + Iterator, + Tuple, +) # A token value Python's tokenizer probably will never use. BACKTICK_QUOTED_STRING = 100 diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 6a3b95186d666..5e7fdb8dc9c7d 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -3,24 +3,42 @@ import ast from functools import partial -from typing import Any, Dict, Optional, Tuple +from typing import ( + Any, + Dict, + Optional, + Tuple, +) import numpy as np -from pandas._libs.tslibs import Timedelta, Timestamp +from pandas._libs.tslibs import ( + Timedelta, + Timestamp, +) from pandas.compat.chainmap import DeepChainMap from pandas.core.dtypes.common import is_list_like import pandas.core.common as com -from pandas.core.computation import expr, ops, scope as _scope +from pandas.core.computation import ( + expr, + ops, + scope as _scope, +) from pandas.core.computation.common import ensure_decoded from pandas.core.computation.expr import BaseExprVisitor -from pandas.core.computation.ops import UndefinedVariableError, is_term +from pandas.core.computation.ops import ( + UndefinedVariableError, + is_term, +) from pandas.core.construction import extract_array from pandas.core.indexes.base import Index -from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded +from pandas.io.formats.printing import ( + pprint_thing, + pprint_thing_encoded, +) class PyTablesScope(_scope.Scope): diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index c2ba7f9892ef0..71d725051977f 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -131,17 +131,14 @@ def __init__( # scope when we align terms (alignment accesses the underlying # numpy array of pandas objects) - # pandas\core\computation\scope.py:132: error: Incompatible types - # in assignment (expression has type "ChainMap[str, Any]", variable - # has type "DeepChainMap[str, Any]") [assignment] + # error: Incompatible types in assignment (expression has type + # "ChainMap[str, Any]", variable has type "DeepChainMap[str, Any]") self.scope = self.scope.new_child( # type: ignore[assignment] (global_dict or frame.f_globals).copy() ) if not isinstance(local_dict, Scope): - # pandas\core\computation\scope.py:134: error: Incompatible - # types in assignment (expression has type "ChainMap[str, - # Any]", variable has type "DeepChainMap[str, Any]") - # [assignment] + # error: Incompatible types in assignment (expression has type + # "ChainMap[str, Any]", variable has type "DeepChainMap[str, Any]") self.scope = self.scope.new_child( # type: ignore[assignment] (local_dict or frame.f_locals).copy() ) @@ -150,8 +147,7 @@ def __init__( # assumes that resolvers are going from outermost scope to inner if isinstance(local_dict, Scope): - # pandas\core\computation\scope.py:140: error: Cannot determine - # type of 'resolvers' [has-type] + # error: Cannot determine type of 'resolvers' resolvers += tuple(local_dict.resolvers.maps) # type: ignore[has-type] self.resolvers = DeepChainMap(*resolvers) self.temps = {} @@ -239,8 +235,7 @@ def swapkey(self, old_key: str, new_key: str, new_value=None): for mapping in maps: if old_key in mapping: - # pandas\core\computation\scope.py:228: error: Unsupported - # target for indexed assignment ("Mapping[Any, Any]") [index] + # error: Unsupported target for indexed assignment ("Mapping[Any, Any]") mapping[new_key] = new_value # type: ignore[index] return @@ -260,10 +255,8 @@ def _get_vars(self, stack, scopes: List[str]): for scope, (frame, _, _, _, _, _) in variables: try: d = getattr(frame, "f_" + scope) - # pandas\core\computation\scope.py:247: error: Incompatible - # types in assignment (expression has type "ChainMap[str, - # Any]", variable has type "DeepChainMap[str, Any]") - # [assignment] + # error: Incompatible types in assignment (expression has type + # "ChainMap[str, Any]", variable has type "DeepChainMap[str, Any]") self.scope = self.scope.new_child(d) # type: ignore[assignment] finally: # won't remove it, but DECREF it @@ -331,13 +324,10 @@ def full_scope(self): vars : DeepChainMap All variables in this scope. """ - # pandas\core\computation\scope.py:314: error: Unsupported operand - # types for + ("List[Dict[Any, Any]]" and "List[Mapping[Any, Any]]") - # [operator] - - # pandas\core\computation\scope.py:314: error: Unsupported operand - # types for + ("List[Dict[Any, Any]]" and "List[Mapping[str, Any]]") - # [operator] + # error: Unsupported operand types for + ("List[Dict[Any, Any]]" and + # "List[Mapping[Any, Any]]") + # error: Unsupported operand types for + ("List[Dict[Any, Any]]" and + # "List[Mapping[str, Any]]") maps = ( [self.temps] + self.resolvers.maps # type: ignore[operator] diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8aa3d7900e8e9..dd75473da6d78 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -7,16 +7,34 @@ from __future__ import annotations from collections import abc -from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Optional, + Sequence, + Union, + cast, +) import numpy as np import numpy.ma as ma from pandas._libs import lib -from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime -from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj +from pandas._libs.tslibs import ( + IncompatibleFrequency, + OutOfBoundsDatetime, +) +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, +) -from pandas.core.dtypes.base import ExtensionDtype, registry +from pandas.core.dtypes.base import ( + ExtensionDtype, + registry, +) from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, construct_1d_ndarray_preserving_na, @@ -49,7 +67,11 @@ import pandas.core.common as com if TYPE_CHECKING: - from pandas import ExtensionArray, Index, Series + from pandas import ( + ExtensionArray, + Index, + Series, + ) def array( diff --git a/pandas/core/describe.py b/pandas/core/describe.py index dcafb3c3a8be5..3a872c6202e04 100644 --- a/pandas/core/describe.py +++ b/pandas/core/describe.py @@ -5,14 +5,29 @@ """ from __future__ import annotations -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Callable, List, Optional, Sequence, Union, cast +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Callable, + List, + Optional, + Sequence, + Union, + cast, +) import warnings import numpy as np from pandas._libs.tslibs import Timestamp -from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Hashable +from pandas._typing import ( + FrameOrSeries, + FrameOrSeriesUnion, + Hashable, +) from pandas.util._validators import validate_percentile from pandas.core.dtypes.common import ( @@ -27,7 +42,10 @@ from pandas.io.formats.format import format_percentiles if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) def describe_ndframe( diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 887bbc052b5c9..d83405803753a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -4,14 +4,26 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Type, Union +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Tuple, + Type, + Union, +) import numpy as np from pandas._typing import DtypeObj from pandas.errors import AbstractMethodError -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) if TYPE_CHECKING: from pandas.core.arrays import ExtensionArray diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e27c519304e2e..669bfe08d42b0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -5,7 +5,11 @@ from __future__ import annotations from contextlib import suppress -from datetime import datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) from typing import ( TYPE_CHECKING, Any, @@ -24,7 +28,10 @@ import numpy as np -from pandas._libs import lib, tslib +from pandas._libs import ( + lib, + tslib, +) from pandas._libs.tslibs import ( NaT, OutOfBoundsDatetime, @@ -36,7 +43,13 @@ iNaT, ints_to_pydatetime, ) -from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, + Scalar, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg @@ -58,6 +71,7 @@ is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, is_dtype_equal, is_extension_array_dtype, is_float, @@ -95,7 +109,10 @@ if TYPE_CHECKING: from pandas import Series - from pandas.core.arrays import DatetimeArray, ExtensionArray + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + ) _int8_max = np.iinfo(np.int8).max _int16_max = np.iinfo(np.int16).max @@ -154,6 +171,29 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal return value +def maybe_box_native(value: Scalar) -> Scalar: + """ + If passed a scalar cast the scalar to a python native type. + + Parameters + ---------- + value : scalar or Series + + Returns + ------- + scalar or Series + """ + if is_datetime_or_timedelta_dtype(value): + value = maybe_box_datetimelike(value) + elif is_float(value): + value = float(value) + elif is_integer(value): + value = int(value) + elif is_bool(value): + value = bool(value) + return value + + def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: """ Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting @@ -387,7 +427,10 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj: """ from pandas.core.arrays.boolean import BooleanDtype from pandas.core.arrays.floating import Float64Dtype - from pandas.core.arrays.integer import Int64Dtype, _IntegerDtype + from pandas.core.arrays.integer import ( + Int64Dtype, + _IntegerDtype, + ) if how in ["add", "cumsum", "sum", "prod"]: if dtype == np.dtype(bool): @@ -549,35 +592,51 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan): # returns tuple of (dtype, fill_value) if issubclass(dtype.type, np.datetime64): - if isinstance(fill_value, datetime) and fill_value.tzinfo is not None: - # Trying to insert tzaware into tznaive, have to cast to object - dtype = np.dtype(np.object_) - elif is_integer(fill_value) or is_float(fill_value): - dtype = np.dtype(np.object_) - else: + inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) + if inferred == dtype: + return dtype, fv + + # TODO(2.0): once this deprecation is enforced, this whole case + # becomes equivalent to: + # dta = DatetimeArray._from_sequence([], dtype="M8[ns]") + # try: + # fv = dta._validate_setitem_value(fill_value) + # return dta.dtype, fv + # except (ValueError, TypeError): + # return np.dtype(object), fill_value + if isinstance(fill_value, date) and not isinstance(fill_value, datetime): + # deprecate casting of date object to match infer_dtype_from_scalar + # and DatetimeArray._validate_setitem_value try: - fill_value = Timestamp(fill_value).to_datetime64() - except (TypeError, ValueError): - dtype = np.dtype(np.object_) - elif issubclass(dtype.type, np.timedelta64): - if ( - is_integer(fill_value) - or is_float(fill_value) - or isinstance(fill_value, str) - ): - # TODO: What about str that can be a timedelta? - dtype = np.dtype(np.object_) - else: + fv = Timestamp(fill_value).to_datetime64() + except OutOfBoundsDatetime: + pass + else: + warnings.warn( + "Using a `date` object for fill_value with `datetime64[ns]` " + "dtype is deprecated. In a future version, this will be cast " + "to object dtype. Pass `fill_value=Timestamp(date_obj)` instead.", + FutureWarning, + stacklevel=7, + ) + return dtype, fv + elif isinstance(fill_value, str): try: - fv = Timedelta(fill_value) - except ValueError: - dtype = np.dtype(np.object_) + # explicitly wrap in str to convert np.str_ + fv = Timestamp(str(fill_value)) + except (ValueError, TypeError): + pass else: - if fv is NaT: - # NaT has no `to_timedelta64` method - fill_value = np.timedelta64("NaT", "ns") - else: - fill_value = fv.to_timedelta64() + if fv.tz is None: + return dtype, fv.asm8 + + return np.dtype(object), fill_value + + elif issubclass(dtype.type, np.timedelta64): + inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) + if inferred == dtype: + return dtype, fv + return np.dtype(object), fill_value elif is_float(fill_value): if issubclass(dtype.type, np.bool_): @@ -723,21 +782,22 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, if val is NaT or val.tz is None: dtype = np.dtype("M8[ns]") + val = val.to_datetime64() else: if pandas_dtype: dtype = DatetimeTZDtype(unit="ns", tz=val.tz) else: # return datetimetz as object return np.dtype(object), val - val = val.value elif isinstance(val, (np.timedelta64, timedelta)): try: - val = Timedelta(val).value + val = Timedelta(val) except (OutOfBoundsTimedelta, OverflowError): dtype = np.dtype(object) else: dtype = np.dtype("m8[ns]") + val = np.timedelta64(val.value, "ns") elif is_bool(val): dtype = np.dtype(np.bool_) @@ -1497,7 +1557,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): value = iNaT # we have an array of datetime or timedeltas & nulls - elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype): + elif value.size or not is_dtype_equal(value.dtype, dtype): _disallow_mismatched_datetimelike(value, dtype) try: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index d24cff4ae81bb..0966d0b93cc25 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -2,14 +2,26 @@ Common type operations. """ -from typing import Any, Callable, Union +from typing import ( + Any, + Callable, + Union, +) import warnings import numpy as np -from pandas._libs import Interval, Period, algos +from pandas._libs import ( + Interval, + Period, + algos, +) from pandas._libs.tslibs import conversion -from pandas._typing import ArrayLike, DtypeObj, Optional +from pandas._typing import ( + ArrayLike, + DtypeObj, + Optional, +) from pandas.core.dtypes.base import registry from pandas.core.dtypes.dtypes import ( @@ -19,7 +31,10 @@ IntervalDtype, PeriodDtype, ) -from pandas.core.dtypes.generic import ABCCategorical, ABCIndex +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCIndex, +) from pandas.core.dtypes.inference import ( # noqa:F401 is_array_like, is_bool, diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index b766392e35601..42ac786ff315e 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -5,7 +5,10 @@ import numpy as np -from pandas._typing import ArrayLike, DtypeObj +from pandas._typing import ( + ArrayLike, + DtypeObj, +) from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( @@ -14,11 +17,17 @@ is_extension_array_dtype, is_sparse, ) -from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCSeries, +) from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseArray -from pandas.core.construction import array, ensure_wrapped_if_datetimelike +from pandas.core.construction import ( + array, + ensure_wrapped_if_datetimelike, +) def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index deafc17f76e10..da3a9269cf2c4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -31,11 +31,25 @@ to_offset, tz_compare, ) -from pandas._typing import Dtype, DtypeObj, NpDtype, Ordered +from pandas._typing import ( + Dtype, + DtypeObj, + NpDtype, + Ordered, +) -from pandas.core.dtypes.base import ExtensionDtype, register_extension_dtype -from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCIndex -from pandas.core.dtypes.inference import is_bool, is_list_like +from pandas.core.dtypes.base import ( + ExtensionDtype, + register_extension_dtype, +) +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCIndex, +) +from pandas.core.dtypes.inference import ( + is_bool, + is_list_like, +) if TYPE_CHECKING: import pyarrow @@ -1032,7 +1046,10 @@ class IntervalDtype(PandasExtensionDtype): _cache: Dict[str_type, PandasExtensionDtype] = {} def __new__(cls, subtype=None, closed: Optional[str_type] = None): - from pandas.core.dtypes.common import is_string_dtype, pandas_dtype + from pandas.core.dtypes.common import ( + is_string_dtype, + pandas_dtype, + ) if closed is not None and closed not in {"right", "left", "both", "neither"}: raise ValueError("closed must be one of 'right', 'left', 'both', 'neither'") diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 47a6009590d8b..2de7b262c3533 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -1,7 +1,11 @@ """ define generic base classes for pandas objects """ from __future__ import annotations -from typing import TYPE_CHECKING, Type, cast +from typing import ( + TYPE_CHECKING, + Type, + cast, +) if TYPE_CHECKING: from pandas import ( diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index ef645313de614..3279007fcebe1 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -9,8 +9,15 @@ from pandas._libs import lib import pandas._libs.missing as libmissing -from pandas._libs.tslibs import NaT, Period, iNaT -from pandas._typing import ArrayLike, DtypeObj +from pandas._libs.tslibs import ( + NaT, + Period, + iNaT, +) +from pandas._typing import ( + ArrayLike, + DtypeObj, +) from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -429,7 +436,7 @@ def array_equivalent( # NaNs can occur in float and complex arrays. if is_float_dtype(left.dtype) or is_complex_dtype(left.dtype): - if not (np.prod(left.shape) and np.prod(right.shape)): + if not (left.size and right.size): return True return ((left == right) | (isna(left) & isna(right))).all() @@ -604,7 +611,11 @@ def is_valid_na_for_dtype(obj, dtype: DtypeObj) -> bool: if not lib.is_scalar(obj) or not isna(obj): return False if dtype.kind == "M": - return not isinstance(obj, np.timedelta64) + if isinstance(dtype, np.dtype): + # i.e. not tzaware + return not isinstance(obj, np.timedelta64) + # we have to rule out tznaive dt64("NaT") + return not isinstance(obj, (np.timedelta64, np.datetime64)) if dtype.kind == "m": return not isinstance(obj, np.datetime64) if dtype.kind in ["i", "u", "f", "c"]: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 63d238da12101..2d6cfff561aab 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -44,7 +44,11 @@ from pandas._config import get_option -from pandas._libs import algos as libalgos, lib, properties +from pandas._libs import ( + algos as libalgos, + lib, + properties, +) from pandas._libs.lib import no_default from pandas._typing import ( AggFuncType, @@ -59,6 +63,7 @@ FloatFormatType, FormattersType, FrameOrSeriesUnion, + Frequency, IndexKeyFunc, IndexLabel, Level, @@ -91,7 +96,7 @@ find_common_type, infer_dtype_from_scalar, invalidate_string_dtypes, - maybe_box_datetimelike, + maybe_box_native, maybe_convert_platform, maybe_downcast_to_dtype, maybe_infer_to_datetimelike, @@ -119,16 +124,34 @@ is_sequence, pandas_dtype, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) -from pandas.core import algorithms, common as com, generic, nanops, ops +from pandas.core import ( + algorithms, + common as com, + generic, + nanops, + ops, +) from pandas.core.accessor import CachedAccessor -from pandas.core.aggregation import reconstruct_func, relabel_result, transform +from pandas.core.aggregation import ( + reconstruct_func, + relabel_result, +) from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseFrameAccessor -from pandas.core.construction import extract_array, sanitize_masked_array -from pandas.core.generic import NDFrame, _shared_docs +from pandas.core.construction import ( + extract_array, + sanitize_masked_array, +) +from pandas.core.generic import ( + NDFrame, + _shared_docs, +) from pandas.core.indexers import check_key_length from pandas.core.indexes import base as ibase from pandas.core.indexes.api import ( @@ -138,9 +161,18 @@ ensure_index, ensure_index_from_sequences, ) -from pandas.core.indexes.multi import MultiIndex, maybe_droplevels -from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable -from pandas.core.internals import ArrayManager, BlockManager +from pandas.core.indexes.multi import ( + MultiIndex, + maybe_droplevels, +) +from pandas.core.indexing import ( + check_bool_indexer, + convert_to_index_sliceable, +) +from pandas.core.internals import ( + ArrayManager, + BlockManager, +) from pandas.core.internals.construction import ( arrays_to_mgr, dataclasses_to_dicts, @@ -156,17 +188,30 @@ ) from pandas.core.reshape.melt import melt from pandas.core.series import Series -from pandas.core.sorting import get_group_index, lexsort_indexer, nargsort +from pandas.core.sorting import ( + get_group_index, + lexsort_indexer, + nargsort, +) from pandas.io.common import get_handle -from pandas.io.formats import console, format as fmt -from pandas.io.formats.info import BaseInfo, DataFrameInfo +from pandas.io.formats import ( + console, + format as fmt, +) +from pandas.io.formats.info import ( + BaseInfo, + DataFrameInfo, +) import pandas.plotting if TYPE_CHECKING: from typing import Literal - from pandas._typing import TimedeltaConvertibleTypes, TimestampConvertibleTypes + from pandas._typing import ( + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ) from pandas.core.groupby.generic import DataFrameGroupBy from pandas.core.resample import Resampler @@ -707,10 +752,11 @@ def _can_fast_transpose(self) -> bool: """ if isinstance(self._mgr, ArrayManager): return False - if self._mgr.any_extension_types: - # TODO(EA2D) special case would be unnecessary with 2D EAs + blocks = self._mgr.blocks + if len(blocks) != 1: return False - return len(self._mgr.blocks) == 1 + + return not self._mgr.any_extension_types # ---------------------------------------------------------------------- # Rendering Methods @@ -1163,8 +1209,8 @@ def __len__(self) -> int: """ return len(self.index) - # pandas/core/frame.py:1146: error: Overloaded function signatures 1 and 2 - # overlap with incompatible return types [misc] + # error: Overloaded function signatures 1 and 2 overlap with incompatible return + # types @overload def dot(self, other: Series) -> Series: # type: ignore[misc] ... @@ -1610,7 +1656,7 @@ def to_dict(self, orient: str = "dict", into=dict): ( "data", [ - list(map(maybe_box_datetimelike, t)) + list(map(maybe_box_native, t)) for t in self.itertuples(index=False, name=None) ], ), @@ -1618,7 +1664,7 @@ def to_dict(self, orient: str = "dict", into=dict): ) elif orient == "series": - return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items()) + return into_c((k, v) for k, v in self.items()) elif orient == "records": columns = self.columns.tolist() @@ -1627,8 +1673,7 @@ def to_dict(self, orient: str = "dict", into=dict): for row in self.itertuples(index=False, name=None) ) return [ - into_c((k, maybe_box_datetimelike(v)) for k, v in row.items()) - for row in rows + into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows ] elif orient == "index": @@ -4021,8 +4066,8 @@ def lookup(self, row_labels, col_labels) -> np.ndarray: .. deprecated:: 1.2.0 DataFrame.lookup is deprecated, use DataFrame.melt and DataFrame.loc instead. - For an example see :meth:`~pandas.DataFrame.lookup` - in the user guide. + For further details see + :ref:`Looking up values by index/column labels `. Parameters ---------- @@ -4634,7 +4679,11 @@ def _replace_columnwise( @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift( - self, periods=1, freq=None, axis: Axis = 0, fill_value=lib.no_default + self, + periods=1, + freq: Optional[Frequency] = None, + axis: Axis = 0, + fill_value=lib.no_default, ) -> DataFrame: axis = self._get_axis_number(axis) @@ -4822,8 +4871,8 @@ def set_index( elif isinstance(col, (Index, Series)): # if Index then not MultiIndex (treated above) - # error: Argument 1 to "append" of "list" has incompatible - # type "Union[Index, Series]"; expected "Index" [arg-type] + # error: Argument 1 to "append" of "list" has incompatible type + # "Union[Index, Series]"; expected "Index" arrays.append(col) # type:ignore[arg-type] names.append(col.name) elif isinstance(col, (list, np.ndarray)): @@ -7313,7 +7362,10 @@ def stack(self, level: Level = -1, dropna: bool = True): dog kg NaN 2.0 m 3.0 NaN """ - from pandas.core.reshape.reshape import stack, stack_multiple + from pandas.core.reshape.reshape import ( + stack, + stack_multiple, + ) if isinstance(level, (tuple, list)): result = stack_multiple(self, level, dropna=dropna) @@ -7680,21 +7732,14 @@ def _gotitem( examples=_agg_examples_doc, ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + from pandas.core.apply import frame_apply + axis = self._get_axis_number(axis) relabeling, func, columns, order = reconstruct_func(func, **kwargs) - result = None - try: - result, how = self._aggregate(func, axis, *args, **kwargs) - except TypeError as err: - exc = TypeError( - "DataFrame constructor called with " - f"incompatible data and dtype: {err}" - ) - raise exc from err - if result is None: - return self.apply(func, axis=axis, args=args, **kwargs) + op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + result = op.agg() if relabeling: # This is to keep the order to columns occurrence unchanged, and also @@ -7710,25 +7755,6 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): return result - def _aggregate(self, arg, axis: Axis = 0, *args, **kwargs): - from pandas.core.apply import frame_apply - - op = frame_apply( - self if axis == 0 else self.T, - func=arg, - axis=0, - args=args, - kwargs=kwargs, - ) - result, how = op.agg() - - if axis == 1: - # NDFrame.aggregate returns a tuple, and we need to transpose - # only result - result = result.T if result is not None else result - - return result, how - agg = aggregate @doc( @@ -7739,7 +7765,10 @@ def _aggregate(self, arg, axis: Axis = 0, *args, **kwargs): def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> DataFrame: - result = transform(self, func, axis, *args, **kwargs) + from pandas.core.apply import frame_apply + + op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) + result = op.transform() assert isinstance(result, DataFrame) return result @@ -7814,6 +7843,12 @@ def apply( DataFrame.aggregate: Only perform aggregating type operations. DataFrame.transform: Only perform transforming type operations. + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`udf-mutation` + for more details. + Examples -------- >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) @@ -9446,7 +9481,7 @@ def quantile( @doc(NDFrame.asfreq, **_shared_doc_kwargs) def asfreq( self, - freq, + freq: Frequency, method=None, how: Optional[str] = None, normalize: bool = False, @@ -9492,7 +9527,11 @@ def resample( ) def to_timestamp( - self, freq=None, how: str = "start", axis: Axis = 0, copy: bool = True + self, + freq: Optional[Frequency] = None, + how: str = "start", + axis: Axis = 0, + copy: bool = True, ) -> DataFrame: """ Cast to DatetimeIndex of timestamps, at *beginning* of period. @@ -9525,7 +9564,9 @@ def to_timestamp( setattr(new_obj, axis_name, new_ax) return new_obj - def to_period(self, freq=None, axis: Axis = 0, copy: bool = True) -> DataFrame: + def to_period( + self, freq: Optional[Frequency] = None, axis: Axis = 0, copy: bool = True + ) -> DataFrame: """ Convert DataFrame from DatetimeIndex to PeriodIndex. @@ -9723,12 +9764,3 @@ def _reindex_for_setitem(value: FrameOrSeriesUnion, index: Index) -> ArrayLike: "incompatible index of inserted column with frame index" ) from err return reindexed_value - - -def _maybe_atleast_2d(value): - # TODO(EA2D): not needed with 2D EAs - - if is_extension_array_dtype(value): - return value - - return np.atleast_2d(np.asarray(value)) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ec37da66760c3..1b7c02cd7a05b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -33,7 +33,12 @@ from pandas._config import config from pandas._libs import lib -from pandas._libs.tslibs import Period, Tick, Timestamp, to_offset +from pandas._libs.tslibs import ( + Period, + Tick, + Timestamp, + to_offset, +) from pandas._typing import ( Axis, CompressionOptions, @@ -57,9 +62,18 @@ ) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv -from pandas.errors import AbstractMethodError, InvalidIndexError -from pandas.util._decorators import doc, rewrite_axis_style_signature -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.errors import ( + AbstractMethodError, + InvalidIndexError, +) +from pandas.util._decorators import ( + doc, + rewrite_axis_style_signature, +) +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, +) from pandas.core.dtypes.common import ( ensure_int64, @@ -82,16 +96,33 @@ is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.dtypes.inference import is_hashable -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) -from pandas.core import arraylike, indexing, missing, nanops +from pandas.core import ( + arraylike, + indexing, + missing, + nanops, +) import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray -from pandas.core.base import PandasObject, SelectionMixin +from pandas.core.base import ( + PandasObject, + SelectionMixin, +) import pandas.core.common as com -from pandas.core.construction import create_series_with_explicit_dtype, extract_array +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, +) from pandas.core.describe import describe_ndframe from pandas.core.flags import Flags from pandas.core.indexes import base as ibase @@ -103,16 +134,27 @@ RangeIndex, ensure_index, ) -from pandas.core.internals import ArrayManager, BlockManager +from pandas.core.internals import ( + ArrayManager, + BlockManager, +) from pandas.core.missing import find_valid_index from pandas.core.ops import align_method_FRAME from pandas.core.reshape.concat import concat from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import get_indexer_indexer -from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window +from pandas.core.window import ( + Expanding, + ExponentialMovingWindow, + Rolling, + Window, +) from pandas.io.formats import format as fmt -from pandas.io.formats.format import DataFrameFormatter, DataFrameRenderer +from pandas.io.formats.format import ( + DataFrameFormatter, + DataFrameRenderer, +) from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: @@ -695,9 +737,10 @@ def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: ).__finalize__(self, method="swapaxes") @final + @doc(klass=_shared_doc_kwargs["klass"]) def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: """ - Return DataFrame with requested index / column level(s) removed. + Return {klass} with requested index / column level(s) removed. .. versionadded:: 0.24.0 @@ -708,7 +751,7 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: If list-like, elements must be names or positional indexes of levels. - axis : {0 or 'index', 1 or 'columns'}, default 0 + axis : {{0 or 'index', 1 or 'columns'}}, default 0 Axis along which the level(s) is removed: * 0 or 'index': remove level(s) in column. @@ -716,8 +759,8 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: Returns ------- - DataFrame - DataFrame with requested index / column level(s) removed. + {klass} + {klass} with requested index / column level(s) removed. Examples -------- @@ -10530,8 +10573,7 @@ def _add_numeric_operations(cls): def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): return NDFrame.any(self, axis, bool_only, skipna, level, **kwargs) - # pandas\core\generic.py:10725: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.any = any # type: ignore[assignment] @doc( @@ -10547,13 +10589,11 @@ def any(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): return NDFrame.all(self, axis, bool_only, skipna, level, **kwargs) - # pandas\core\generic.py:10719: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method - # pandas\core\generic.py:10719: error: Incompatible types in assignment - # (expression has type "Callable[[Iterable[object]], bool]", variable - # has type "Callable[[NDFrame, Any, Any, Any, Any, KwArg(Any)], Any]") - # [assignment] + # error: Incompatible types in assignment (expression has type + # "Callable[[Iterable[object]], bool]", variable has type "Callable[[NDFrame, + # Any, Any, Any, Any, KwArg(Any)], Any]") cls.all = all # type: ignore[assignment] # error: Argument 1 to "doc" has incompatible type "Optional[str]"; expected @@ -10571,8 +10611,7 @@ def all(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): def mad(self, axis=None, skipna=None, level=None): return NDFrame.mad(self, axis, skipna, level) - # pandas\core\generic.py:10736: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.mad = mad # type: ignore[assignment] @doc( @@ -10595,8 +10634,7 @@ def sem( ): return NDFrame.sem(self, axis, skipna, level, ddof, numeric_only, **kwargs) - # pandas\core\generic.py:10758: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.sem = sem # type: ignore[assignment] @doc( @@ -10618,8 +10656,7 @@ def var( ): return NDFrame.var(self, axis, skipna, level, ddof, numeric_only, **kwargs) - # pandas\core\generic.py:10779: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.var = var # type: ignore[assignment] @doc( @@ -10642,8 +10679,7 @@ def std( ): return NDFrame.std(self, axis, skipna, level, ddof, numeric_only, **kwargs) - # pandas\core\generic.py:10801: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.std = std # type: ignore[assignment] @doc( @@ -10658,8 +10694,7 @@ def std( def cummin(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cummin(self, axis, skipna, *args, **kwargs) - # pandas\core\generic.py:10815: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.cummin = cummin # type: ignore[assignment] @doc( @@ -10674,8 +10709,7 @@ def cummin(self, axis=None, skipna=True, *args, **kwargs): def cummax(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cummax(self, axis, skipna, *args, **kwargs) - # pandas\core\generic.py:10829: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.cummax = cummax # type: ignore[assignment] @doc( @@ -10690,8 +10724,7 @@ def cummax(self, axis=None, skipna=True, *args, **kwargs): def cumsum(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cumsum(self, axis, skipna, *args, **kwargs) - # pandas\core\generic.py:10843: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.cumsum = cumsum # type: ignore[assignment] @doc( @@ -10706,8 +10739,7 @@ def cumsum(self, axis=None, skipna=True, *args, **kwargs): def cumprod(self, axis=None, skipna=True, *args, **kwargs): return NDFrame.cumprod(self, axis, skipna, *args, **kwargs) - # pandas\core\generic.py:10857: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.cumprod = cumprod # type: ignore[assignment] @doc( @@ -10734,8 +10766,7 @@ def sum( self, axis, skipna, level, numeric_only, min_count, **kwargs ) - # pandas\core\generic.py:10883: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.sum = sum # type: ignore[assignment] @doc( @@ -10761,8 +10792,7 @@ def prod( self, axis, skipna, level, numeric_only, min_count, **kwargs ) - # pandas\core\generic.py:10908: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.prod = prod # type: ignore[assignment] cls.product = prod @@ -10779,8 +10809,7 @@ def prod( def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs) - # pandas\core\generic.py:10924: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.mean = mean # type: ignore[assignment] @doc( @@ -10796,8 +10825,7 @@ def mean(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs) - # pandas\core\generic.py:10939: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.skew = skew # type: ignore[assignment] @doc( @@ -10816,8 +10844,7 @@ def skew(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): def kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs) - # pandas\core\generic.py:10957: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.kurt = kurt # type: ignore[assignment] cls.kurtosis = kurt @@ -10836,8 +10863,7 @@ def median( ): return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs) - # pandas\core\generic.py:10975: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.median = median # type: ignore[assignment] @doc( @@ -10855,8 +10881,7 @@ def median( def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs) - # pandas\core\generic.py:10992: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.max = max # type: ignore[assignment] @doc( @@ -10874,8 +10899,7 @@ def max(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): def min(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs) - # pandas\core\generic.py:11009: error: Cannot assign to a method - # [assignment] + # error: Cannot assign to a method cls.min = min # type: ignore[assignment] @final diff --git a/pandas/core/groupby/__init__.py b/pandas/core/groupby/__init__.py index 0c5d2658978b4..8248f378e2c1a 100644 --- a/pandas/core/groupby/__init__.py +++ b/pandas/core/groupby/__init__.py @@ -1,4 +1,8 @@ -from pandas.core.groupby.generic import DataFrameGroupBy, NamedAgg, SeriesGroupBy +from pandas.core.groupby.generic import ( + DataFrameGroupBy, + NamedAgg, + SeriesGroupBy, +) from pandas.core.groupby.groupby import GroupBy from pandas.core.groupby.grouper import Grouper diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 594c5899209df..c169e29b74dbb 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -9,7 +9,10 @@ from pandas._typing import final -from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.common import ( + is_list_like, + is_scalar, +) from pandas.core.base import PandasObject @@ -57,8 +60,7 @@ def _gotitem(self, key, ndim, subset=None): """ # create a new object to prevent aliasing if subset is None: - # pandas\core\groupby\base.py:52: error: "GotItemMixin" has no - # attribute "obj" [attr-defined] + # error: "GotItemMixin" has no attribute "obj" subset = self.obj # type: ignore[attr-defined] # we need to make a shallow copy of ourselves @@ -70,22 +72,15 @@ def _gotitem(self, key, ndim, subset=None): # Try to select from a DataFrame, falling back to a Series try: - # pandas\core\groupby\base.py:60: error: "GotItemMixin" has no - # attribute "_groupby" [attr-defined] + # error: "GotItemMixin" has no attribute "_groupby" groupby = self._groupby[key] # type: ignore[attr-defined] except IndexError: - # pandas\core\groupby\base.py:62: error: "GotItemMixin" has no - # attribute "_groupby" [attr-defined] + # error: "GotItemMixin" has no attribute "_groupby" groupby = self._groupby # type: ignore[attr-defined] - # pandas\core\groupby\base.py:64: error: Too many arguments for - # "GotItemMixin" [call-arg] - - # pandas\core\groupby\base.py:64: error: Unexpected keyword argument - # "groupby" for "GotItemMixin" [call-arg] - - # pandas\core\groupby\base.py:64: error: Unexpected keyword argument - # "parent" for "GotItemMixin" [call-arg] + # error: Too many arguments for "GotItemMixin" + # error: Unexpected keyword argument "groupby" for "GotItemMixin" + # error: Unexpected keyword argument "parent" for "GotItemMixin" self = type(self)( subset, groupby=groupby, parent=self, **kwargs # type: ignore[call-arg] ) diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 64037f5757a38..c9dd420ec33df 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -1,4 +1,7 @@ -from typing import Optional, Tuple +from typing import ( + Optional, + Tuple, +) import numpy as np diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a7297923f1034..c1a277925de2a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -7,7 +7,10 @@ """ from __future__ import annotations -from collections import abc, namedtuple +from collections import ( + abc, + namedtuple, +) import copy from functools import partial from textwrap import dedent @@ -32,9 +35,20 @@ import numpy as np -from pandas._libs import lib, reduction as libreduction -from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion -from pandas.util._decorators import Appender, Substitution, doc +from pandas._libs import ( + lib, + reduction as libreduction, +) +from pandas._typing import ( + ArrayLike, + FrameOrSeries, + FrameOrSeriesUnion, +) +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) from pandas.core.dtypes.cast import ( find_common_type, @@ -46,23 +60,36 @@ ensure_platform_int, is_bool, is_categorical_dtype, + is_dict_like, is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar, needs_i8_conversion, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) -from pandas.core import algorithms, nanops +from pandas.core import ( + algorithms, + nanops, +) from pandas.core.aggregation import ( maybe_mangle_lambdas, reconstruct_func, validate_func_kwargs, ) from pandas.core.apply import GroupByApply -from pandas.core.arrays import Categorical, ExtensionArray -from pandas.core.base import DataError, SpecificationError +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +from pandas.core.base import ( + DataError, + SpecificationError, +) import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.frame import DataFrame @@ -76,7 +103,11 @@ get_groupby, group_selection_context, ) -from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same +from pandas.core.indexes.api import ( + Index, + MultiIndex, + all_indexes_same, +) import pandas.core.indexes.base as ibase from pandas.core.internals import BlockManager from pandas.core.series import Series @@ -580,6 +611,12 @@ def filter(self, func, dropna=True, *args, **kwargs): dropna : Drop groups that do not pass the filter. True by default; if False, groups that evaluate False are filled with NaNs. + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`udf-mutation` + for more details. + Examples -------- >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', @@ -962,8 +999,8 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) func = maybe_mangle_lambdas(func) op = GroupByApply(self, func, args, kwargs) - result, how = op.agg() - if how is None: + result = op.agg() + if not is_dict_like(func) and result is not None: return result if result is None: @@ -982,7 +1019,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # try to treat as if we are passing a list try: - result, _ = GroupByApply( + result = GroupByApply( self, [func], args=(), kwargs={"_axis": self.axis} ).agg() @@ -1506,6 +1543,10 @@ def filter(self, func, dropna=True, *args, **kwargs): Each subframe is endowed the attribute 'name' in case you need to know which group you are working on. + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`udf-mutation` + for more details. + Examples -------- >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5758762c13984..e939c184d501a 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -10,7 +10,10 @@ class providing the base-class of operations. from contextlib import contextmanager import datetime -from functools import partial, wraps +from functools import ( + partial, + wraps, +) import inspect from textwrap import dedent import types @@ -37,7 +40,10 @@ class providing the base-class of operations. from pandas._config.config import option_context -from pandas._libs import Timestamp, lib +from pandas._libs import ( + Timestamp, + lib, +) import pandas._libs.groupby as libgroupby from pandas._typing import ( F, @@ -50,7 +56,12 @@ class providing the base-class of operations. ) from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution, cache_readonly, doc +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, + doc, +) from pandas.core.dtypes.cast import maybe_downcast_numeric from pandas.core.dtypes.common import ( @@ -64,17 +75,35 @@ class providing the base-class of operations. is_scalar, is_timedelta64_dtype, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) from pandas.core import nanops import pandas.core.algorithms as algorithms -from pandas.core.arrays import Categorical, DatetimeArray -from pandas.core.base import DataError, PandasObject, SelectionMixin +from pandas.core.arrays import ( + Categorical, + DatetimeArray, +) +from pandas.core.base import ( + DataError, + PandasObject, + SelectionMixin, +) import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.core.groupby import base, numba_, ops -from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex +from pandas.core.groupby import ( + base, + numba_, + ops, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, +) from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter from pandas.core.util.numba_ import NUMBA_FUNC_CACHE @@ -344,7 +373,7 @@ class providing the base-class of operations. in the subframe. If f also supports application to the entire subframe, then a fast path is used starting from the second chunk. * f must not mutate groups. Mutation is not supported and may - produce unexpected results. + produce unexpected results. See :ref:`udf-mutation` for more details. When using ``engine='numba'``, there will be no "fall back" behavior internally. The group data and group index will be passed as numpy arrays to the JITed @@ -447,6 +476,10 @@ class providing the base-class of operations. The group data and group index will be passed as numpy arrays to the JITed user defined function, and no alternative execution attempts will be tried. {examples} + +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`udf-mutation` +for more details. """ @@ -1513,11 +1546,12 @@ def mean(self, numeric_only: bool = True): 2 4.0 Name: B, dtype: float64 """ - return self._cython_agg_general( + result = self._cython_agg_general( "mean", alt=lambda x, axis: Series(x).mean(numeric_only=numeric_only), numeric_only=numeric_only, ) + return result.__finalize__(self.obj, method="groupby") @final @Substitution(name="groupby") @@ -1539,11 +1573,12 @@ def median(self, numeric_only=True): Series or DataFrame Median of values within each group. """ - return self._cython_agg_general( + result = self._cython_agg_general( "median", alt=lambda x, axis: Series(x).median(axis=axis, numeric_only=numeric_only), numeric_only=numeric_only, ) + return result.__finalize__(self.obj, method="groupby") @final @Substitution(name="groupby") diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c7dc6d021a4c3..89becb880c519 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -4,12 +4,22 @@ """ from __future__ import annotations -from typing import Dict, Hashable, List, Optional, Set, Tuple +from typing import ( + Dict, + Hashable, + List, + Optional, + Set, + Tuple, +) import warnings import numpy as np -from pandas._typing import FrameOrSeries, final +from pandas._typing import ( + FrameOrSeries, + final, +) from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly @@ -22,12 +32,22 @@ ) import pandas.core.algorithms as algorithms -from pandas.core.arrays import Categorical, ExtensionArray +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.groupby import ops -from pandas.core.groupby.categorical import recode_for_groupby, recode_from_groupby -from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex +from pandas.core.groupby.categorical import ( + recode_for_groupby, + recode_from_groupby, +) +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, + MultiIndex, +) from pandas.core.series import Series from pandas.io.formats.printing import pprint_thing @@ -281,9 +301,8 @@ def _get_grouper(self, obj, validate: bool = True): a tuple of binner, grouper, obj (possibly sorted) """ self._set_grouper(obj) - # pandas\core\groupby\grouper.py:310: error: Value of type variable - # "FrameOrSeries" of "get_grouper" cannot be "Optional[Any]" - # [type-var] + # error: Value of type variable "FrameOrSeries" of "get_grouper" cannot be + # "Optional[Any]" self.grouper, _, self.obj = get_grouper( # type: ignore[type-var] self.obj, [self.key], @@ -370,8 +389,7 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): @final @property def groups(self): - # pandas\core\groupby\grouper.py:382: error: Item "None" of - # "Optional[Any]" has no attribute "groups" [union-attr] + # error: Item "None" of "Optional[Any]" has no attribute "groups" return self.grouper.groups # type: ignore[union-attr] @final diff --git a/pandas/core/groupby/numba_.py b/pandas/core/groupby/numba_.py index 5c983985628ad..3ba70baec1561 100644 --- a/pandas/core/groupby/numba_.py +++ b/pandas/core/groupby/numba_.py @@ -1,6 +1,12 @@ """Common utilities for Numba operations with groupby ops""" import inspect -from typing import Any, Callable, Dict, Optional, Tuple +from typing import ( + Any, + Callable, + Dict, + Optional, + Tuple, +) import numpy as np diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 1b1406fe9cd0f..5004d1fe08a5b 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -22,10 +22,20 @@ import numpy as np -from pandas._libs import NaT, iNaT, lib +from pandas._libs import ( + NaT, + iNaT, + lib, +) import pandas._libs.groupby as libgroupby import pandas._libs.reduction as libreduction -from pandas._typing import ArrayLike, F, FrameOrSeries, Shape, final +from pandas._typing import ( + ArrayLike, + F, + FrameOrSeries, + Shape, + final, +) from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly @@ -55,15 +65,25 @@ needs_i8_conversion, ) from pandas.core.dtypes.generic import ABCCategoricalIndex -from pandas.core.dtypes.missing import isna, maybe_fill +from pandas.core.dtypes.missing import ( + isna, + maybe_fill, +) import pandas.core.algorithms as algorithms from pandas.core.base import SelectionMixin import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.core.groupby import base, grouper -from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.groupby import ( + base, + grouper, +) +from pandas.core.indexes.api import ( + Index, + MultiIndex, + ensure_index, +) from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -722,11 +742,10 @@ def _aggregate_series_fast(self, obj: Series, func: F): group_index, _, ngroups = self.group_info # avoids object / Series creation overhead - dummy = obj.iloc[:0] indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) - grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy) + grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups) result, counts = grouper.get_result() return result, counts @@ -925,8 +944,7 @@ def agg_series(self, obj: Series, func: F): # preempt SeriesBinGrouper from raising TypeError return self._aggregate_series_pure_python(obj, func) - dummy = obj[:0] - grouper = libreduction.SeriesBinGrouper(obj, func, self.bins, dummy) + grouper = libreduction.SeriesBinGrouper(obj, func, self.bins) return grouper.get_result() diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index c7011b4339fe7..86d6b772fe2e4 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -8,7 +8,11 @@ import numpy as np -from pandas._typing import Any, AnyArrayLike, ArrayLike +from pandas._typing import ( + Any, + AnyArrayLike, + ArrayLike, +) from pandas.core.dtypes.common import ( is_array_like, @@ -18,7 +22,10 @@ is_integer_dtype, is_list_like, ) -from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) if TYPE_CHECKING: from pandas.core.frame import DataFrame @@ -337,7 +344,7 @@ def length_of_indexer(indexer, target=None) -> int: raise AssertionError("cannot find the length of the indexer") -def deprecate_ndim_indexing(result, stacklevel=3): +def deprecate_ndim_indexing(result, stacklevel: int = 3): """ Helper function to raise the deprecation warning for multi-dimensional indexing on 1D Series/Index. diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 430d3ea8f5e33..017f58bff03e9 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -19,9 +19,19 @@ ) from pandas.core.dtypes.generic import ABCSeries -from pandas.core.accessor import PandasDelegate, delegate_names -from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray -from pandas.core.base import NoNewAttributesMixin, PandasObject +from pandas.core.accessor import ( + PandasDelegate, + delegate_names, +) +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) +from pandas.core.base import ( + NoNewAttributesMixin, + PandasObject, +) from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index d4f22e482af84..5656323b82fb7 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,7 +1,13 @@ import textwrap -from typing import List, Set +from typing import ( + List, + Set, +) -from pandas._libs import NaT, lib +from pandas._libs import ( + NaT, + lib, +) from pandas.errors import InvalidIndexError from pandas.core.indexes.base import ( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 789ca04b894cd..e633d6b28a8c5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -17,6 +17,7 @@ Sequence, Set, Tuple, + Type, TypeVar, Union, cast, @@ -25,19 +26,41 @@ import numpy as np -from pandas._libs import algos as libalgos, index as libindex, lib +from pandas._libs import ( + algos as libalgos, + index as libindex, + lib, +) import pandas._libs.join as libjoin -from pandas._libs.lib import is_datetime_array, no_default +from pandas._libs.lib import ( + is_datetime_array, + no_default, +) from pandas._libs.tslibs import ( IncompatibleFrequency, OutOfBoundsDatetime, Timestamp, tz_compare, ) -from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Shape, final +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, + Shape, + T, + final, +) from pandas.compat.numpy import function as nv -from pandas.errors import DuplicateLabelError, InvalidIndexError -from pandas.util._decorators import Appender, cache_readonly, doc +from pandas.errors import ( + DuplicateLabelError, + InvalidIndexError, +) +from pandas.util._decorators import ( + Appender, + cache_readonly, + doc, +) from pandas.core.dtypes.cast import ( can_hold_element, @@ -88,26 +111,45 @@ ABCTimedeltaIndex, ) from pandas.core.dtypes.inference import is_dict_like -from pandas.core.dtypes.missing import array_equivalent, is_valid_na_for_dtype, isna +from pandas.core.dtypes.missing import ( + array_equivalent, + is_valid_na_for_dtype, + isna, +) -from pandas.core import missing, ops +from pandas.core import ( + missing, + ops, +) from pandas.core.accessor import CachedAccessor import pandas.core.algorithms as algos from pandas.core.array_algos.putmask import ( setitem_datetimelike_compat, validate_putmask, ) -from pandas.core.arrays import Categorical, ExtensionArray -from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype +from pandas.core.arrays import ( + Categorical, + ExtensionArray, +) +from pandas.core.arrays.datetimes import ( + tz_to_dtype, + validate_tz_from_dtype, +) from pandas.core.arrays.sparse import SparseDtype -from pandas.core.base import IndexOpsMixin, PandasObject +from pandas.core.base import ( + IndexOpsMixin, + PandasObject, +) import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.frozen import FrozenList from pandas.core.ops import get_op_result_name from pandas.core.ops.invalid import make_invalid_op -from pandas.core.sorting import ensure_key_mapped, nargsort +from pandas.core.sorting import ( + ensure_key_mapped, + nargsort, +) from pandas.core.strings import StringMethods from pandas.io.formats.printing import ( @@ -119,7 +161,14 @@ ) if TYPE_CHECKING: - from pandas import CategoricalIndex, IntervalIndex, MultiIndex, RangeIndex, Series + from pandas import ( + CategoricalIndex, + DataFrame, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + ) from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -232,16 +281,22 @@ class Index(IndexOpsMixin, PandasObject): # for why we need to wrap these instead of making them class attributes # Moreover, cython will choose the appropriate-dtyped sub-function # given the dtypes of the passed arguments - def _left_indexer_unique(self, left, right): + def _left_indexer_unique(self, left: np.ndarray, right: np.ndarray) -> np.ndarray: return libjoin.left_join_indexer_unique(left, right) - def _left_indexer(self, left, right): + def _left_indexer( + self, left: np.ndarray, right: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.left_join_indexer(left, right) - def _inner_indexer(self, left, right): + def _inner_indexer( + self, left: np.ndarray, right: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.inner_join_indexer(left, right) - def _outer_indexer(self, left, right): + def _outer_indexer( + self, left: np.ndarray, right: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.outer_join_indexer(left, right) _typ = "index" @@ -502,7 +557,7 @@ def asi8(self): return None @classmethod - def _simple_new(cls, values, name: Hashable = None): + def _simple_new(cls: Type[_IndexT], values, name: Hashable = None) -> _IndexT: """ We require that we have a dtype compat for the values. If we are passed a non-dtype compat, then coerce using the constructor. @@ -525,11 +580,11 @@ def _simple_new(cls, values, name: Hashable = None): return result @cache_readonly - def _constructor(self): + def _constructor(self: _IndexT) -> Type[_IndexT]: return type(self) @final - def _maybe_check_unique(self): + def _maybe_check_unique(self) -> None: """ Check that an Index has no duplicates. @@ -580,13 +635,13 @@ def _format_duplicate_message(self): # Index Internals Methods @final - def _get_attributes_dict(self): + def _get_attributes_dict(self) -> Dict[str_t, Any]: """ Return an attributes dict for my class. """ return {k: getattr(self, k, None) for k in self._attributes} - def _shallow_copy(self, values, name: Hashable = no_default): + def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT: """ Create a new Index with the same class as the caller, don't copy the data, use the same object attributes with passed in attributes taking @@ -660,11 +715,11 @@ def _reset_identity(self) -> None: self._id = _Identity(object()) @final - def _cleanup(self): + def _cleanup(self) -> None: self._engine.clear_mapping() @cache_readonly - def _engine(self): + def _engine(self) -> libindex.ObjectEngine: # property, for now, slow to look up # to avoid a reference cycle, bind `target_values` to a local variable, so @@ -747,6 +802,23 @@ def view(self, cls=None): # we need to see if we are subclassing an # index type here if cls is not None and not hasattr(cls, "_typ"): + dtype = cls + if isinstance(cls, str): + dtype = pandas_dtype(cls) + + if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion( + dtype + ): + if dtype.kind == "m" and dtype != "m8[ns]": + # e.g. m8[s] + return self._data.view(cls) + + arr = self._data.view("i8") + idx_cls = self._dtype_to_subclass(dtype) + arr_cls = idx_cls._data_cls + arr = arr_cls._simple_new(self._data.view("i8"), dtype=dtype) + return idx_cls._simple_new(arr, name=self.name) + result = self._data.view(cls) else: result = self._view() @@ -1197,7 +1269,7 @@ def to_flat_index(self): """ return self - def to_series(self, index=None, name=None): + def to_series(self, index=None, name: Hashable = None) -> Series: """ Create a Series with both index and values equal to the index keys. @@ -1260,7 +1332,7 @@ def to_series(self, index=None, name=None): return Series(self._values.copy(), index=index, name=name) - def to_frame(self, index: bool = True, name=None): + def to_frame(self, index: bool = True, name=None) -> DataFrame: """ Create a DataFrame with a column containing the Index. @@ -1375,10 +1447,10 @@ def _validate_names( return new_names - def _get_names(self): + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) - def _set_names(self, values, level=None): + def _set_names(self, values, level=None) -> None: """ Set new names on index. Each name has to be a hashable type. @@ -1579,14 +1651,14 @@ def nlevels(self) -> int: """ return 1 - def _sort_levels_monotonic(self): + def _sort_levels_monotonic(self: _IndexT) -> _IndexT: """ Compat with MultiIndex. """ return self @final - def _validate_index_level(self, level): + def _validate_index_level(self, level) -> None: """ Validate index level. @@ -2323,7 +2395,7 @@ def hasnans(self) -> bool: return False @final - def isna(self): + def isna(self) -> np.ndarray: """ Detect missing values. @@ -2381,7 +2453,7 @@ def isna(self): isnull = isna @final - def notna(self): + def notna(self) -> np.ndarray: """ Detect existing (non-missing) values. @@ -2459,7 +2531,7 @@ def fillna(self, value=None, downcast=None): return Index(result, name=self.name) return self._view() - def dropna(self, how="any"): + def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: """ Return Index without NA/NaN values. @@ -2484,7 +2556,7 @@ def dropna(self, how="any"): # -------------------------------------------------------------------- # Uniqueness Methods - def unique(self, level=None): + def unique(self: _IndexT, level: Optional[Hashable] = None) -> _IndexT: """ Return unique values in the index. @@ -2492,12 +2564,13 @@ def unique(self, level=None): Parameters ---------- - level : int or str, optional, default None + level : int or hashable, optional Only return values from specified level (for MultiIndex). + If int, gets the level by integer position, else by level name. Returns ------- - Index without duplicates + Index See Also -------- @@ -2514,7 +2587,7 @@ def unique(self, level=None): return self._shallow_copy(result) @final - def drop_duplicates(self, keep="first"): + def drop_duplicates(self: _IndexT, keep: Union[str_t, bool] = "first") -> _IndexT: """ Return Index with duplicate values removed. @@ -2565,7 +2638,7 @@ def drop_duplicates(self, keep="first"): return super().drop_duplicates(keep=keep) - def duplicated(self, keep="first"): + def duplicated(self, keep: Union[str_t, bool] = "first") -> np.ndarray: """ Indicate duplicate index values. @@ -3151,12 +3224,12 @@ def symmetric_difference(self, other, result_name=None, sort=None): return Index(the_diff, name=result_name) @final - def _assert_can_do_setop(self, other): + def _assert_can_do_setop(self, other) -> bool: if not is_list_like(other): raise TypeError("Input must be Index or array-like") return True - def _convert_can_do_setop(self, other): + def _convert_can_do_setop(self, other) -> Tuple[Index, Hashable]: if not isinstance(other, Index): other = Index(other, name=self.name) result_name = self.name @@ -3339,7 +3412,7 @@ def _get_indexer( return ensure_platform_int(indexer) @final - def _check_indexing_method(self, method): + def _check_indexing_method(self, method: Optional[str_t]) -> None: """ Raise if we have a get_indexer `method` that is not supported or valid. """ @@ -3357,7 +3430,9 @@ def _check_indexing_method(self, method): raise ValueError("Invalid fill method") - def _convert_tolerance(self, tolerance, target): + def _convert_tolerance( + self, tolerance, target: Union[np.ndarray, Index] + ) -> np.ndarray: # override this method on subclasses tolerance = np.asarray(tolerance) if target.size != tolerance.size and tolerance.size > 1: @@ -3460,7 +3535,7 @@ def _filter_indexer_tolerance( # -------------------------------------------------------------------- # Indexer Conversion Methods - def _get_partial_string_timestamp_match_key(self, key): + def _get_partial_string_timestamp_match_key(self, key: T) -> T: """ Translate any partial string timestamp matches in key, returning the new key. @@ -3471,7 +3546,7 @@ def _get_partial_string_timestamp_match_key(self, key): return key @final - def _validate_positional_slice(self, key: slice): + def _validate_positional_slice(self, key: slice) -> None: """ For positional indexing, a slice must have either int or None for each of start, stop, and step. @@ -3572,7 +3647,7 @@ def _convert_listlike_indexer(self, keyarr): indexer = self._convert_list_indexer(keyarr) return indexer, keyarr - def _convert_arr_indexer(self, keyarr): + def _convert_arr_indexer(self, keyarr) -> np.ndarray: """ Convert an array-like indexer to the appropriate dtype. @@ -3617,13 +3692,13 @@ def _invalid_indexer(self, form: str_t, key) -> TypeError: # Reindex Methods @final - def _can_reindex(self, indexer): + def _validate_can_reindex(self, indexer: np.ndarray) -> None: """ Check if we are allowing reindexing with this particular indexer. Parameters ---------- - indexer : an integer indexer + indexer : an integer ndarray Raises ------ @@ -6071,14 +6146,14 @@ def ensure_index( if hasattr(index_like, "name"): # https://github.com/python/mypy/issues/1424 # error: Item "ExtensionArray" of "Union[ExtensionArray, - # Sequence[Any]]" has no attribute "name" [union-attr] + # Sequence[Any]]" has no attribute "name" # error: Item "Sequence[Any]" of "Union[ExtensionArray, Sequence[Any]]" - # has no attribute "name" [union-attr] - # error: "Sequence[Any]" has no attribute "name" [attr-defined] + # has no attribute "name" + # error: "Sequence[Any]" has no attribute "name" # error: Item "Sequence[Any]" of "Union[Series, Sequence[Any]]" has no - # attribute "name" [union-attr] + # attribute "name" # error: Item "Sequence[Any]" of "Union[Any, Sequence[Any]]" has no - # attribute "name" [union-attr] + # attribute "name" name = index_like.name # type: ignore[union-attr, attr-defined] return Index(index_like, name=name, copy=copy) @@ -6146,7 +6221,7 @@ def trim_front(strings: List[str]) -> List[str]: return strings -def _validate_join_method(method: str): +def _validate_join_method(method: str) -> None: if method not in ["left", "right", "inner", "outer"]: raise ValueError(f"do not recognize join method {method}") @@ -6358,7 +6433,7 @@ def get_unanimous_names(*indexes: Index) -> Tuple[Hashable, ...]: return names -def unpack_nested_dtype(other: Index) -> Index: +def unpack_nested_dtype(other: _IndexT) -> _IndexT: """ When checking if our dtype is comparable with another, we need to unpack CategoricalDtype to look at its categories.dtype. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 265170dd28a3b..13c53dfafed4d 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -1,4 +1,9 @@ -from typing import Any, Hashable, List, Optional +from typing import ( + Any, + Hashable, + List, + Optional, +) import warnings import numpy as np @@ -7,22 +12,42 @@ from pandas._libs import index as libindex from pandas._libs.lib import no_default -from pandas._typing import ArrayLike, Dtype -from pandas.util._decorators import Appender, doc +from pandas._typing import ( + ArrayLike, + Dtype, +) +from pandas.util._decorators import ( + Appender, + doc, +) from pandas.core.dtypes.common import ( ensure_platform_int, is_categorical_dtype, is_scalar, ) -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, + notna, +) from pandas.core import accessor -from pandas.core.arrays.categorical import Categorical, contains +from pandas.core.arrays.categorical import ( + Categorical, + contains, +) from pandas.core.construction import extract_array import pandas.core.indexes.base as ibase -from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name -from pandas.core.indexes.extension import NDArrayBackedExtensionIndex, inherit_names +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, + maybe_extract_name, +) +from pandas.core.indexes.extension import ( + NDArrayBackedExtensionIndex, + inherit_names, +) _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update({"target_klass": "CategoricalIndex"}) @@ -318,8 +343,7 @@ def _format_attrs(self): "categories", ibase.default_pprint(self.categories, max_seq_items=max_categories), ), - # pandas\core\indexes\category.py:315: error: "CategoricalIndex" - # has no attribute "ordered" [attr-defined] + # error: "CategoricalIndex" has no attribute "ordered" ("ordered", self.ordered), # type: ignore[attr-defined] ] if self.name is not None: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 00f47c0aaf538..e1f2a40598963 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -17,11 +17,25 @@ import numpy as np -from pandas._libs import NaT, Timedelta, iNaT, join as libjoin, lib -from pandas._libs.tslibs import BaseOffset, Resolution, Tick +from pandas._libs import ( + NaT, + Timedelta, + iNaT, + join as libjoin, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + Resolution, + Tick, +) from pandas._typing import Callable from pandas.compat.numpy import function as nv -from pandas.util._decorators import Appender, cache_readonly, doc +from pandas.util._decorators import ( + Appender, + cache_readonly, + doc, +) from pandas.core.dtypes.common import ( is_bool_dtype, @@ -35,11 +49,18 @@ from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCSeries -from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin import pandas.core.common as com import pandas.core.indexes.base as ibase -from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.base import ( + Index, + _index_shared_docs, +) from pandas.core.indexes.extension import ( NDArrayBackedExtensionIndex, inherit_names, @@ -606,12 +627,6 @@ def insert(self, loc: int, item): result._data._freq = self._get_insert_freq(loc, item) return result - def _validate_fill_value(self, value): - """ - Convert value to be insertable to ndarray. - """ - return self._data._validate_setitem_value(value) - # -------------------------------------------------------------------- # Join/Set Methods @@ -711,13 +726,9 @@ def _intersection(self, other: Index, sort=False) -> Index: result = self[:0] else: lslice = slice(*left.slice_locs(start, end)) - left_chunk = left._values[lslice] - # error: Argument 1 to "_simple_new" of "DatetimeIndexOpsMixin" has - # incompatible type "Union[ExtensionArray, Any]"; expected - # "Union[DatetimeArray, TimedeltaArray, PeriodArray]" [arg-type] - result = type(self)._simple_new(left_chunk) # type: ignore[arg-type] + result = left._values[lslice] - return self._wrap_setop_result(other, result) + return result def _can_fast_intersect(self: _T, other: _T) -> bool: # Note: we only get here with len(self) > 0 and len(other) > 0 diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 2ef703de85dbe..9ea43d083f5b3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1,13 +1,29 @@ from __future__ import annotations -from datetime import date, datetime, time, timedelta, tzinfo +from datetime import ( + date, + datetime, + time, + timedelta, + tzinfo, +) import operator -from typing import TYPE_CHECKING, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Optional, + Tuple, +) import warnings import numpy as np -from pandas._libs import NaT, Period, Timestamp, index as libindex, lib +from pandas._libs import ( + NaT, + Period, + Timestamp, + index as libindex, + lib, +) from pandas._libs.tslibs import ( Resolution, ints_to_pydatetime, @@ -16,9 +32,15 @@ to_offset, ) from pandas._libs.tslibs.offsets import prefix_mapping -from pandas._typing import Dtype, DtypeObj +from pandas._typing import ( + Dtype, + DtypeObj, +) from pandas.errors import InvalidIndexError -from pandas.util._decorators import cache_readonly, doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.core.dtypes.common import ( DT64NS_DTYPE, @@ -28,15 +50,27 @@ ) from pandas.core.dtypes.missing import is_valid_na_for_dtype -from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype +from pandas.core.arrays.datetimes import ( + DatetimeArray, + tz_to_dtype, +) import pandas.core.common as com -from pandas.core.indexes.base import Index, get_unanimous_names, maybe_extract_name +from pandas.core.indexes.base import ( + Index, + get_unanimous_names, + maybe_extract_name, +) from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.indexes.extension import inherit_names from pandas.core.tools.times import to_time if TYPE_CHECKING: - from pandas import DataFrame, Float64Index, PeriodIndex, TimedeltaIndex + from pandas import ( + DataFrame, + Float64Index, + PeriodIndex, + TimedeltaIndex, + ) def _new_DatetimeIndex(cls, d): diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index ea3678a7e15d9..f1418869713d6 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -1,18 +1,32 @@ """ Shared methods for Index subclasses backed by ExtensionArray. """ -from typing import List, TypeVar +from typing import ( + List, + TypeVar, + Union, +) import numpy as np from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import cache_readonly, doc - -from pandas.core.dtypes.common import is_dtype_equal, is_object_dtype, pandas_dtype -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries - -from pandas.core.arrays import ExtensionArray +from pandas.util._decorators import ( + cache_readonly, + doc, +) + +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_object_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) + +from pandas.core.arrays import IntervalArray from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.base import Index @@ -203,7 +217,7 @@ class ExtensionIndex(Index): # The base class already passes through to _data: # size, __len__, dtype - _data: ExtensionArray + _data: Union[IntervalArray, NDArrayBackedExtensionArray] __eq__ = _make_wrapped_comparison_op("__eq__") __ne__ = _make_wrapped_comparison_op("__ne__") @@ -226,9 +240,8 @@ def __getitem__(self, key): if result.ndim == 1: return type(self)(result, name=self.name) # Unpack to ndarray for MPL compat - # pandas\core\indexes\extension.py:220: error: "ExtensionArray" has - # no attribute "_data" [attr-defined] - result = result._data # type: ignore[attr-defined] + + result = result._ndarray # Includes cases where we get a 2D ndarray back for MPL compat deprecate_ndim_indexing(result) @@ -263,6 +276,12 @@ def insert(self, loc: int, item): # ExtensionIndex subclasses must override Index.insert raise AbstractMethodError(self) + def _validate_fill_value(self, value): + """ + Convert value to be insertable to underlying array. + """ + return self._data._validate_setitem_value(value) + def _get_unique_index(self): if self.is_unique: return self @@ -296,6 +315,10 @@ def astype(self, dtype, copy=True): return self return self.copy() + if isinstance(dtype, np.dtype) and dtype.kind == "M" and dtype != "M8[ns]": + # For now Datetime supports this by unwrapping ndarray, but DTI doesn't + raise TypeError(f"Cannot cast {type(self._data).__name__} to dtype") + new_values = self._data.astype(dtype, copy=copy) # pass copy=False because any copying will be done in the diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index af353cf3fb5f7..ad512b8393166 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -2,20 +2,47 @@ from __future__ import annotations from functools import wraps -from operator import le, lt +from operator import ( + le, + lt, +) import textwrap -from typing import TYPE_CHECKING, Any, Hashable, List, Optional, Tuple, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + List, + Optional, + Tuple, + Union, + cast, +) import numpy as np from pandas._config import get_option from pandas._libs import lib -from pandas._libs.interval import Interval, IntervalMixin, IntervalTree -from pandas._libs.tslibs import BaseOffset, Timedelta, Timestamp, to_offset -from pandas._typing import Dtype, DtypeObj +from pandas._libs.interval import ( + Interval, + IntervalMixin, + IntervalTree, +) +from pandas._libs.tslibs import ( + BaseOffset, + Timedelta, + Timestamp, + to_offset, +) +from pandas._typing import ( + Dtype, + DtypeObj, +) from pandas.errors import InvalidIndexError -from pandas.util._decorators import Appender, cache_readonly +from pandas.util._decorators import ( + Appender, + cache_readonly, +) from pandas.util._exceptions import rewrite_exception from pandas.core.dtypes.cast import ( @@ -42,9 +69,15 @@ ) from pandas.core.dtypes.dtypes import IntervalDtype -from pandas.core.algorithms import take_nd, unique +from pandas.core.algorithms import ( + take_nd, + unique, +) from pandas.core.array_algos.putmask import validate_putmask -from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs +from pandas.core.arrays.interval import ( + IntervalArray, + _interval_shared_docs, +) import pandas.core.common as com from pandas.core.indexers import is_valid_positional_slice import pandas.core.indexes.base as ibase @@ -55,10 +88,19 @@ ensure_index, maybe_extract_name, ) -from pandas.core.indexes.datetimes import DatetimeIndex, date_range -from pandas.core.indexes.extension import ExtensionIndex, inherit_names +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + date_range, +) +from pandas.core.indexes.extension import ( + ExtensionIndex, + inherit_names, +) from pandas.core.indexes.multi import MultiIndex -from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range +from pandas.core.indexes.timedeltas import ( + TimedeltaIndex, + timedelta_range, +) from pandas.core.ops import get_op_result_name if TYPE_CHECKING: @@ -533,6 +575,10 @@ def _maybe_convert_i8(self, key): key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) if lib.is_period(key): key_i8 = key.ordinal + elif isinstance(key_i8, Timestamp): + key_i8 = key_i8.value + elif isinstance(key_i8, (np.datetime64, np.timedelta64)): + key_i8 = key_i8.view("i8") else: # DatetimeIndex/TimedeltaIndex key_dtype, key_i8 = key.dtype, Index(key.asi8) @@ -776,9 +822,7 @@ def _convert_list_indexer(self, keyarr): def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: if not isinstance(dtype, IntervalDtype): return False - if self.closed != dtype.closed: - return False - common_subtype = find_common_type([self.dtype.subtype, dtype.subtype]) + common_subtype = find_common_type([self.dtype, dtype]) return not is_object_dtype(common_subtype) # -------------------------------------------------------------------- @@ -990,9 +1034,6 @@ def func(self, other, sort=sort): # -------------------------------------------------------------------- - def _validate_fill_value(self, value): - return self._data._validate_setitem_value(value) - @property def _is_all_dates(self) -> bool: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 26d59db1b08fd..1889821c79756 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -21,12 +21,29 @@ from pandas._config import get_option -from pandas._libs import algos as libalgos, index as libindex, lib +from pandas._libs import ( + algos as libalgos, + index as libindex, + lib, +) from pandas._libs.hashtable import duplicated_int64 -from pandas._typing import AnyArrayLike, DtypeObj, Scalar, Shape +from pandas._typing import ( + AnyArrayLike, + DtypeObj, + Scalar, + Shape, +) from pandas.compat.numpy import function as nv -from pandas.errors import InvalidIndexError, PerformanceWarning, UnsortedIndexError -from pandas.util._decorators import Appender, cache_readonly, doc +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, + UnsortedIndexError, +) +from pandas.util._decorators import ( + Appender, + cache_readonly, + doc, +) from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.dtypes.common import ( @@ -42,8 +59,15 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ABCDataFrame, ABCDatetimeIndex, ABCTimedeltaIndex -from pandas.core.dtypes.missing import array_equivalent, isna +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, +) import pandas.core.algorithms as algos from pandas.core.arrays import Categorical @@ -72,7 +96,10 @@ ) if TYPE_CHECKING: - from pandas import CategoricalIndex, Series + from pandas import ( + CategoricalIndex, + Series, + ) _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update( @@ -1082,7 +1109,7 @@ def _engine(self): return MultiIndexUIntEngine(self.levels, self.codes, offsets) @property - def _constructor(self): + def _constructor(self) -> Callable[..., MultiIndex]: return type(self).from_tuples @doc(Index._shallow_copy) @@ -1095,7 +1122,7 @@ def _view(self) -> MultiIndex: result = type(self)( levels=self.levels, codes=self.codes, - sortorder=None, + sortorder=self.sortorder, names=self.names, verify_integrity=False, ) @@ -1448,8 +1475,7 @@ def _set_names(self, names, level=None, validate=True): raise TypeError( f"{type(self).__name__}.name must be a hashable type" ) - # pandas\core\indexes\multi.py:1448: error: Cannot determine type - # of '__setitem__' [has-type] + # error: Cannot determine type of '__setitem__' self._names[lev] = name # type: ignore[has-type] # If .levels has been accessed, the names in our cache will be stale. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index bb4a4f62e59cf..96c8c1ab9b69c 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -1,10 +1,19 @@ -from typing import Hashable, Optional +from typing import ( + Hashable, + Optional, +) import warnings import numpy as np -from pandas._libs import index as libindex, lib -from pandas._typing import Dtype, DtypeObj +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._typing import ( + Dtype, + DtypeObj, +) from pandas.util._decorators import doc from pandas.core.dtypes.cast import astype_nansafe @@ -24,7 +33,10 @@ from pandas.core.dtypes.generic import ABCSeries import pandas.core.common as com -from pandas.core.indexes.base import Index, maybe_extract_name +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) _num_index_shared_docs = {} diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a9561cc477d4a..0c5dbec2094e5 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1,15 +1,35 @@ from __future__ import annotations -from datetime import datetime, timedelta -from typing import Any, Optional +from datetime import ( + datetime, + timedelta, +) +from typing import ( + Any, + Optional, +) import warnings import numpy as np -from pandas._libs import index as libindex, lib -from pandas._libs.tslibs import BaseOffset, Period, Resolution, Tick -from pandas._libs.tslibs.parsing import DateParseError, parse_time_string -from pandas._typing import Dtype, DtypeObj +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + BaseOffset, + Period, + Resolution, + Tick, +) +from pandas._libs.tslibs.parsing import ( + DateParseError, + parse_time_string, +) +from pandas._typing import ( + Dtype, + DtypeObj, +) from pandas.errors import InvalidIndexError from pandas.util._decorators import doc @@ -33,7 +53,10 @@ import pandas.core.indexes.base as ibase from pandas.core.indexes.base import maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin -from pandas.core.indexes.datetimes import DatetimeIndex, Index +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + Index, +) from pandas.core.indexes.extension import inherit_names from pandas.core.indexes.numeric import Int64Index @@ -166,21 +189,21 @@ def to_timestamp(self, freq=None, how="start") -> DatetimeIndex: return DatetimeIndex._simple_new(arr, name=self.name) # https://github.com/python/mypy/issues/1362 - # error: Decorated property not supported [misc] + # error: Decorated property not supported @property # type:ignore[misc] @doc(PeriodArray.hour.fget) def hour(self) -> Int64Index: return Int64Index(self._data.hour, name=self.name) # https://github.com/python/mypy/issues/1362 - # error: Decorated property not supported [misc] + # error: Decorated property not supported @property # type:ignore[misc] @doc(PeriodArray.minute.fget) def minute(self) -> Int64Index: return Int64Index(self._data.minute, name=self.name) # https://github.com/python/mypy/issues/1362 - # error: Decorated property not supported [misc] + # error: Decorated property not supported @property # type:ignore[misc] @doc(PeriodArray.second.fget) def second(self) -> Int64Index: diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ee0b49aac3f79..a0f546a6bd748 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -3,7 +3,15 @@ from datetime import timedelta import operator from sys import getsizeof -from typing import TYPE_CHECKING, Any, Hashable, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + List, + Optional, + Tuple, + Type, +) import warnings import numpy as np @@ -12,7 +20,10 @@ from pandas._libs.lib import no_default from pandas._typing import Dtype from pandas.compat.numpy import function as nv -from pandas.util._decorators import cache_readonly, doc +from pandas.util._decorators import ( + cache_readonly, + doc, +) from pandas.util._exceptions import rewrite_exception from pandas.core.dtypes.common import ( @@ -31,7 +42,10 @@ from pandas.core.construction import extract_array import pandas.core.indexes.base as ibase from pandas.core.indexes.base import maybe_extract_name -from pandas.core.indexes.numeric import Float64Index, Int64Index +from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, +) from pandas.core.ops.common import unpack_zerodim_and_defer if TYPE_CHECKING: @@ -158,7 +172,7 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: # -------------------------------------------------------------------- @cache_readonly - def _constructor(self): + def _constructor(self) -> Type[Int64Index]: """ return the class to use for construction """ return Int64Index diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 79eb8de083958..a23dd10bc3c0e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,16 +1,32 @@ """ implement the TimedeltaIndex """ -from pandas._libs import index as libindex, lib -from pandas._libs.tslibs import Timedelta, to_offset -from pandas._typing import DtypeObj, Optional +from pandas._libs import ( + index as libindex, + lib, +) +from pandas._libs.tslibs import ( + Timedelta, + to_offset, +) +from pandas._typing import ( + DtypeObj, + Optional, +) from pandas.errors import InvalidIndexError -from pandas.core.dtypes.common import TD64NS_DTYPE, is_scalar, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + TD64NS_DTYPE, + is_scalar, + is_timedelta64_dtype, +) from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.timedeltas import TimedeltaArray import pandas.core.common as com -from pandas.core.indexes.base import Index, maybe_extract_name +from pandas.core.indexes.base import ( + Index, + maybe_extract_name, +) from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.indexes.extension import inherit_names diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cc7c5f666feda..cfe16627d5c64 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,7 +1,15 @@ from __future__ import annotations from contextlib import suppress -from typing import TYPE_CHECKING, Any, Hashable, List, Sequence, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Hashable, + List, + Sequence, + Tuple, + Union, +) import warnings import numpy as np @@ -10,7 +18,10 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim -from pandas.errors import AbstractMethodError, InvalidIndexError +from pandas.errors import ( + AbstractMethodError, + InvalidIndexError, +) from pandas.util._decorators import doc from pandas.core.dtypes.common import ( @@ -26,8 +37,15 @@ is_sequence, ) from pandas.core.dtypes.concat import concat_compat -from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries -from pandas.core.dtypes.missing import infer_fill_value, isna +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + infer_fill_value, + isna, +) import pandas.core.common as com from pandas.core.construction import array as pd_array @@ -40,7 +58,10 @@ from pandas.core.indexes.api import Index if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) # "null slice" _NS = slice(None, None) @@ -2371,7 +2392,7 @@ def is_label_like(key) -> bool: return not isinstance(key, slice) and not is_list_like_indexer(key) -def need_slice(obj) -> bool: +def need_slice(obj: slice) -> bool: """ Returns ------- @@ -2382,57 +2403,3 @@ def need_slice(obj) -> bool: or obj.stop is not None or (obj.step is not None and obj.step != 1) ) - - -def non_reducing_slice(slice_): - """ - Ensure that a slice doesn't reduce to a Series or Scalar. - - Any user-passed `subset` should have this called on it - to make sure we're always working with DataFrames. - """ - # default to column slice, like DataFrame - # ['A', 'B'] -> IndexSlices[:, ['A', 'B']] - kinds = (ABCSeries, np.ndarray, Index, list, str) - if isinstance(slice_, kinds): - slice_ = IndexSlice[:, slice_] - - def pred(part) -> bool: - """ - Returns - ------- - bool - True if slice does *not* reduce, - False if `part` is a tuple. - """ - # true when slice does *not* reduce, False when part is a tuple, - # i.e. MultiIndex slice - if isinstance(part, tuple): - # GH#39421 check for sub-slice: - return any((isinstance(s, slice) or is_list_like(s)) for s in part) - else: - return isinstance(part, slice) or is_list_like(part) - - if not is_list_like(slice_): - if not isinstance(slice_, slice): - # a 1-d slice, like df.loc[1] - slice_ = [[slice_]] - else: - # slice(a, b, c) - slice_ = [slice_] # to tuplize later - else: - slice_ = [part if pred(part) else [part] for part in slice_] - return tuple(slice_) - - -def maybe_numeric_slice(df, slice_, include_bool: bool = False): - """ - Want nice defaults for background_gradient that don't break - with non-numeric data. But if slice_ is passed go with that. - """ - if slice_ is None: - dtypes = [np.number] - if include_bool: - dtypes.append(bool) - slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns] - return slice_ diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index ff4e186e147d7..132598e03d6c0 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -11,7 +11,6 @@ ObjectBlock, TimeDeltaBlock, make_block, - safe_reshape, ) from pandas.core.internals.concat import concatenate_block_managers from pandas.core.internals.managers import ( @@ -31,7 +30,6 @@ "FloatBlock", "ObjectBlock", "TimeDeltaBlock", - "safe_reshape", "make_block", "DataManager", "ArrayManager", diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index a8493e647f39a..e09a434170780 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -3,31 +3,59 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + List, + Optional, + Tuple, + TypeVar, + Union, +) import numpy as np -from pandas._libs import algos as libalgos, lib -from pandas._typing import ArrayLike, DtypeObj, Hashable +from pandas._libs import lib +from pandas._typing import ( + ArrayLike, + DtypeObj, + Hashable, +) from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, +) from pandas.core.dtypes.common import ( is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_numeric_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas.core.dtypes.missing import array_equals, isna +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + array_equals, + isna, +) import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray from pandas.core.arrays.sparse import SparseDtype from pandas.core.construction import extract_array from pandas.core.indexers import maybe_convert_indices -from pandas.core.indexes.api import Index, ensure_index +from pandas.core.indexes.api import ( + Index, + ensure_index, +) from pandas.core.internals.base import DataManager from pandas.core.internals.blocks import make_block @@ -377,28 +405,9 @@ def shift(self, periods: int, axis: int, fill_value) -> ArrayManager: ) def fillna(self, value, limit, inplace: bool, downcast) -> ArrayManager: - # TODO implement downcast - inplace = validate_bool_kwarg(inplace, "inplace") - - def array_fillna(array, value, limit, inplace): - - mask = isna(array) - if limit is not None: - limit = libalgos.validate_limit(None, limit=limit) - mask[mask.cumsum() > limit] = False - - # TODO could optimize for arrays that cannot hold NAs - # (like _can_hold_na on Blocks) - if not inplace: - array = array.copy() - - # np.putmask(array, mask, value) - if np.any(mask): - # TODO allow invalid value if there is nothing to fill? - array[mask] = value - return array - - return self.apply(array_fillna, value=value, limit=limit, inplace=inplace) + return self.apply_with_block( + "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast + ) def downcast(self) -> ArrayManager: return self.apply_with_block("downcast") @@ -454,7 +463,7 @@ def is_mixed_type(self) -> bool: @property def is_numeric_mixed_type(self) -> bool: - return False + return all(is_numeric_dtype(t) for t in self.get_dtypes()) @property def any_extension_types(self) -> bool: @@ -659,24 +668,53 @@ def idelete(self, indexer): def iset(self, loc: Union[int, slice, np.ndarray], value): """ - Set new item in-place. Does not consolidate. Adds new Block if not - contained in the current set of items + Set new column(s). + + This changes the ArrayManager in-place, but replaces (an) existing + column(s), not changing column values in-place). + + Parameters + ---------- + loc : integer, slice or boolean mask + Positional location (already bounds checked) + value : array-like """ + # single column -> single integer index if lib.is_integer(loc): - # TODO normalize array -> this should in theory not be needed? + # TODO the extract array should in theory not be needed? value = extract_array(value, extract_numpy=True) + + # TODO can we avoid needing to unpack this here? That means converting + # DataFrame into 1D array when loc is an integer if isinstance(value, np.ndarray) and value.ndim == 2: + assert value.shape[1] == 1 value = value[0, :] assert isinstance(value, (np.ndarray, ExtensionArray)) - # value = np.asarray(value) - # assert isinstance(value, np.ndarray) + assert value.ndim == 1 assert len(value) == len(self._axes[0]) self.arrays[loc] = value return - # TODO - raise Exception + # multiple columns -> convert slice or array to integer indices + elif isinstance(loc, slice): + indices = range( + loc.start if loc.start is not None else 0, + loc.stop if loc.stop is not None else self.shape_proper[1], + loc.step if loc.step is not None else 1, + ) + else: + assert isinstance(loc, np.ndarray) + assert loc.dtype == "bool" + indices = np.nonzero(loc)[0] + + assert value.ndim == 2 + assert value.shape[0] == len(self._axes[0]) + + for value_idx, mgr_idx in enumerate(indices): + value_arr = value[:, value_idx] + self.arrays[mgr_idx] = value_arr + return def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False): """ @@ -764,7 +802,7 @@ def _reindex_indexer( # some axes don't allow reindexing with dups if not allow_dups: - self._axes[axis]._can_reindex(indexer) + self._axes[axis]._validate_can_reindex(indexer) # if axis >= self.ndim: # raise IndexError("Requested axis not found in manager") diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 585a2dccf3acf..2ce91134f61d6 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -2,12 +2,18 @@ Base class for the internal managers. Both BlockManager and ArrayManager inherit from this class. """ -from typing import List, TypeVar +from typing import ( + List, + TypeVar, +) from pandas.errors import AbstractMethodError from pandas.core.base import PandasObject -from pandas.core.indexes.api import Index, ensure_index +from pandas.core.indexes.api import ( + Index, + ensure_index, +) T = TypeVar("T", bound="DataManager") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8ba6018e743bb..efda1f8038cb7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2,12 +2,22 @@ import inspect import re -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Type, Union, cast +from typing import ( + TYPE_CHECKING, + Any, + Callable, + List, + Optional, + Type, + Union, + cast, +) import numpy as np from pandas._libs import ( Interval, + NaT, Period, Timestamp, algos as libalgos, @@ -17,7 +27,13 @@ ) from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion -from pandas._typing import ArrayLike, Dtype, DtypeObj, Shape +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + Shape, + final, +) from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -32,22 +48,31 @@ soft_convert_objects, ) from pandas.core.dtypes.common import ( - DT64NS_DTYPE, - TD64NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, - is_integer, is_list_like, is_object_dtype, is_sparse, pandas_dtype, ) -from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, PandasDtype -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + ExtensionDtype, + PandasDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCPandasArray, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna, +) import pandas.core.algorithms as algos from pandas.core.array_algos.putmask import ( @@ -84,7 +109,10 @@ import pandas.core.missing as missing if TYPE_CHECKING: - from pandas import Float64Index, Index + from pandas import ( + Float64Index, + Index, + ) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray @@ -101,9 +129,6 @@ class Block(PandasObject): __slots__ = ["_mgr_locs", "values", "ndim"] is_numeric = False is_float = False - is_datetime = False - is_datetimetz = False - is_timedelta = False is_bool = False is_object = False is_extension = False @@ -144,7 +169,8 @@ def __init__(self, values, placement, ndim: int): f"placement implies {len(self.mgr_locs)}" ) - def _maybe_coerce_values(self, values): + @classmethod + def _maybe_coerce_values(cls, values): """ Ensure we have correctly-typed values. @@ -182,10 +208,17 @@ def _check_ndim(self, values, ndim): if ndim is None: ndim = values.ndim - if self._validate_ndim and values.ndim != ndim: + if self._validate_ndim: + if values.ndim != ndim: + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) + elif values.ndim > ndim: + # ExtensionBlock raise ValueError( "Wrong number of dimensions. " - f"values.ndim != ndim [{values.ndim} != {ndim}]" + f"values.ndim > ndim [{values.ndim} > {ndim}]" ) return ndim @@ -199,6 +232,7 @@ def _holder(self): """ return None + @final @property def _consolidate_key(self): return self._can_consolidate, self.dtype.name @@ -210,15 +244,11 @@ def is_view(self) -> bool: values = cast(np.ndarray, values) return values.base is not None + @final @property def is_categorical(self) -> bool: return self._holder is Categorical - @property - def is_datelike(self) -> bool: - """ return True if I am a non-datelike """ - return self.is_datetime or self.is_timedelta - def external_values(self): """ The array that Series.values returns (public attribute). @@ -251,6 +281,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: return self.values.astype(object) return self.values + @final def get_block_values_for_json(self) -> np.ndarray: """ This is used in the JSON C code. @@ -273,6 +304,7 @@ def mgr_locs(self, new_mgr_locs): self._mgr_locs = new_mgr_locs + @final def make_block(self, values, placement=None) -> Block: """ Create a new block, with type inference propagate any values that are @@ -281,18 +313,18 @@ def make_block(self, values, placement=None) -> Block: if placement is None: placement = self.mgr_locs if self.is_extension: - values = _block_shape(values, ndim=self.ndim) + values = ensure_block_shape(values, ndim=self.ndim) return make_block(values, placement=placement, ndim=self.ndim) - def make_block_same_class(self, values, placement=None, ndim=None) -> Block: + @final + def make_block_same_class(self, values, placement=None) -> Block: """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs - if ndim is None: - ndim = self.ndim - return type(self)(values, placement=placement, ndim=ndim) + return type(self)(values, placement=placement, ndim=self.ndim) + @final def __repr__(self) -> str: # don't want to print out all of the items here name = type(self).__name__ @@ -305,12 +337,15 @@ def __repr__(self) -> str: return result + @final def __len__(self) -> int: return len(self.values) + @final def __getstate__(self): return self.mgr_locs.indexer, self.values + @final def __setstate__(self, state): self.mgr_locs = libinternals.BlockPlacement(state[0]) self.values = state[1] @@ -321,6 +356,7 @@ def _slice(self, slicer): return self.values[slicer] + @final def getitem_block(self, slicer, new_mgr_locs=None) -> Block: """ Perform __getitem__-like, return result as block. @@ -344,6 +380,7 @@ def getitem_block(self, slicer, new_mgr_locs=None) -> Block: def shape(self) -> Shape: return self.values.shape + @final @property def dtype(self) -> DtypeObj: return self.values.dtype @@ -362,6 +399,7 @@ def set_inplace(self, locs, values): """ self.values[locs] = values + @final def delete(self, loc) -> None: """ Delete given loc(-s) from block in-place. @@ -369,6 +407,7 @@ def delete(self, loc) -> None: self.values = np.delete(self.values, loc, 0) self.mgr_locs = self.mgr_locs.delete(loc) + @final def apply(self, func, **kwargs) -> List[Block]: """ apply the function to my values; return a block if we are not @@ -400,6 +439,7 @@ def reduce(self, func, ignore_failures: bool = False) -> List[Block]: nb = self.make_block(res_values) return [nb] + @final def _split_op_result(self, result) -> List[Block]: # See also: split_and_operate if is_extension_array_dtype(result) and result.ndim > 1: @@ -442,8 +482,7 @@ def fillna( if self._can_hold_element(value): nb = self if inplace else self.copy() putmask_inplace(nb.values, mask, value) - # TODO: should be nb._maybe_downcast? - return self._maybe_downcast([nb], downcast) + return nb._maybe_downcast([nb], downcast) if noop: # we can't process the value, but nothing to do @@ -461,6 +500,7 @@ def f(mask, val, idx): return self.split_and_operate(None, f, inplace) + @final def _split(self) -> List[Block]: """ Split a block into a list of single-column blocks. @@ -475,6 +515,7 @@ def _split(self) -> List[Block]: new_blocks.append(nb) return new_blocks + @final def split_and_operate( self, mask, f, inplace: bool, ignore_failures: bool = False ) -> List[Block]: @@ -507,7 +548,7 @@ def make_a_block(nv, ref_loc): else: # Put back the dimension that was taken from it and make # a block out of the result. - nv = _block_shape(nv, ndim=self.ndim) + nv = ensure_block_shape(nv, ndim=self.ndim) block = self.make_block(values=nv, placement=ref_loc) return block @@ -549,7 +590,8 @@ def _maybe_downcast(self, blocks: List[Block], downcast=None) -> List[Block]: # no need to downcast our float # unless indicated - if downcast is None and (self.is_float or self.is_datelike): + if downcast is None and self.dtype.kind in ["f", "m", "M"]: + # TODO: complex? more generally, self._can_hold_na? return blocks return extend_blocks([b.downcast(downcast) for b in blocks]) @@ -590,6 +632,7 @@ def f(mask, val, idx): return self.split_and_operate(None, f, False) + @final def astype(self, dtype, copy: bool = False, errors: str = "raise"): """ Coerce to the new dtype. @@ -636,17 +679,28 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): raise newb = self.make_block(new_values) - if newb.is_numeric and self.is_numeric: - if newb.shape != self.shape: - raise TypeError( - f"cannot set astype for copy = [{copy}] for dtype " - f"({self.dtype.name} [{self.shape}]) to different shape " - f"({newb.dtype.name} [{newb.shape}])" - ) + if newb.shape != self.shape: + raise TypeError( + f"cannot set astype for copy = [{copy}] for dtype " + f"({self.dtype.name} [{self.shape}]) to different shape " + f"({newb.dtype.name} [{newb.shape}])" + ) return newb def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike: values = self.values + if values.dtype.kind in ["m", "M"]: + values = self.array_values() + + if ( + values.dtype.kind in ["m", "M"] + and dtype.kind in ["i", "u"] + and isinstance(dtype, np.dtype) + and dtype.itemsize != 8 + ): + # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced + msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" + raise TypeError(msg) if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) @@ -682,6 +736,7 @@ def _can_hold_element(self, element: Any) -> bool: """ require the same dtype as ourselves """ raise NotImplementedError("Implemented on subclasses") + @final def should_store(self, value: ArrayLike) -> bool: """ Should we set self.values[indexer] = value inplace or do we need to cast? @@ -715,12 +770,16 @@ def to_native_types(self, na_rep="nan", quoting=None, **kwargs): return self.make_block(values) # block actions # + @final def copy(self, deep: bool = True): """ copy constructor """ values = self.values if deep: values = values.copy() - return self.make_block_same_class(values, ndim=self.ndim) + return self.make_block_same_class(values) + + # --------------------------------------------------------------------- + # Replace def replace( self, @@ -766,6 +825,7 @@ def replace( blocks = blk.convert(numeric=False, copy=not inplace) return blocks + @final def _replace_regex( self, to_replace, @@ -811,6 +871,7 @@ def _replace_regex( nbs = [block] return nbs + @final def _replace_list( self, src_list: List[Any], @@ -872,6 +933,58 @@ def _replace_list( rb = new_rb return rb + @final + def _replace_coerce( + self, + to_replace, + value, + mask: np.ndarray, + inplace: bool = True, + regex: bool = False, + ) -> List[Block]: + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + mask : np.ndarray[bool] + True indicate corresponding element is ignored. + inplace : bool, default True + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + + Returns + ------- + List[Block] + """ + if mask.any(): + if not regex: + nb = self.coerce_to_target_dtype(value) + if nb is self and not inplace: + nb = nb.copy() + putmask_inplace(nb.values, mask, value) + return [nb] + else: + regex = should_use_regex(regex, to_replace) + if regex: + return self._replace_regex( + to_replace, + value, + inplace=inplace, + convert=False, + mask=mask, + ) + return self.replace(to_replace, value, inplace=inplace, regex=False) + return [self] + + # --------------------------------------------------------------------- + def setitem(self, indexer, value): """ Attempt self.values[indexer] = value, possibly creating a new array. @@ -921,7 +1034,7 @@ def setitem(self, indexer, value): arr_value = value else: is_ea_value = False - arr_value = np.array(value) + arr_value = np.asarray(value) if transpose: values = values.T @@ -1055,6 +1168,7 @@ def f(mask, val, idx): new_blocks = self.split_and_operate(mask, f, True) return new_blocks + @final def coerce_to_target_dtype(self, other) -> Block: """ coerce the current block to a dtype compat for other @@ -1070,6 +1184,7 @@ def coerce_to_target_dtype(self, other) -> Block: return self.astype(new_dtype, copy=False) + @final def interpolate( self, method: str = "pad", @@ -1128,6 +1243,7 @@ def interpolate( **kwargs, ) + @final def _interpolate_with_fill( self, method: str = "pad", @@ -1152,9 +1268,10 @@ def _interpolate_with_fill( limit_area=limit_area, ) - blocks = [self.make_block_same_class(values, ndim=self.ndim)] + blocks = [self.make_block_same_class(values)] return self._maybe_downcast(blocks, downcast) + @final def _interpolate( self, method: str, @@ -1367,7 +1484,7 @@ def _unstack(self, unstacker, fill_value, new_placement): new_values = new_values.T[mask] new_placement = new_placement[mask] - blocks = [make_block(new_values, placement=new_placement)] + blocks = [make_block(new_values, placement=new_placement, ndim=2)] return blocks, mask def quantile( @@ -1402,55 +1519,6 @@ def quantile( return make_block(result, placement=self.mgr_locs, ndim=2) - def _replace_coerce( - self, - to_replace, - value, - mask: np.ndarray, - inplace: bool = True, - regex: bool = False, - ) -> List[Block]: - """ - Replace value corresponding to the given boolean array with another - value. - - Parameters - ---------- - to_replace : object or pattern - Scalar to replace or regular expression to match. - value : object - Replacement object. - mask : np.ndarray[bool] - True indicate corresponding element is ignored. - inplace : bool, default True - Perform inplace modification. - regex : bool, default False - If true, perform regular expression substitution. - - Returns - ------- - List[Block] - """ - if mask.any(): - if not regex: - nb = self.coerce_to_target_dtype(value) - if nb is self and not inplace: - nb = nb.copy() - putmask_inplace(nb.values, mask, value) - return [nb] - else: - regex = should_use_regex(regex, to_replace) - if regex: - return self._replace_regex( - to_replace, - value, - inplace=inplace, - convert=False, - mask=mask, - ) - return self.replace(to_replace, value, inplace=inplace, regex=False) - return [self] - class ExtensionBlock(Block): """ @@ -1538,12 +1606,15 @@ def putmask(self, mask, new) -> List[Block]: if isinstance(new, (np.ndarray, ExtensionArray)) and len(new) == len(mask): new = new[mask] - mask = safe_reshape(mask, new_values.shape) + if mask.ndim == new_values.ndim + 1: + # TODO(EA2D): unnecessary with 2D EAs + mask = mask.reshape(new_values.shape) new_values[mask] = new return [self.make_block(values=new_values)] - def _maybe_coerce_values(self, values): + @classmethod + def _maybe_coerce_values(cls, values): """ Unbox to an extension array. @@ -1608,8 +1679,8 @@ def setitem(self, indexer, value): be a compatible shape. """ if not self._can_hold_element(value): - # This is only relevant for DatetimeTZBlock, which has a - # non-trivial `_can_hold_element`. + # This is only relevant for DatetimeTZBlock, ObjectValuesExtensionBlock, + # which has a non-trivial `_can_hold_element`. # https://github.com/pandas-dev/pandas/issues/24020 # Need a dedicated setitem until GH#24020 (type promotion in setitem # for extension arrays) is designed and implemented. @@ -1715,21 +1786,15 @@ def fillna( ) -> List[Block]: values = self.values if inplace else self.values.copy() values = values.fillna(value=value, limit=limit) - return [ - self.make_block_same_class( - values=values, placement=self.mgr_locs, ndim=self.ndim - ) - ] + return [self.make_block_same_class(values=values)] def interpolate( self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs ): values = self.values if inplace else self.values.copy() - return self.make_block_same_class( - values=values.fillna(value=fill_value, method=method, limit=limit), - placement=self.mgr_locs, - ) + new_values = values.fillna(value=fill_value, method=method, limit=limit) + return self.make_block_same_class(new_values) def diff(self, n: int, axis: int = 1) -> List[Block]: if axis == 0 and n != 0: @@ -1751,13 +1816,8 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Blo Dispatches to underlying ExtensionArray and re-boxes in an ExtensionBlock. """ - return [ - self.make_block_same_class( - self.values.shift(periods=periods, fill_value=fill_value), - placement=self.mgr_locs, - ndim=self.ndim, - ) - ] + new_values = self.values.shift(periods=periods, fill_value=fill_value) + return [self.make_block_same_class(new_values)] def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: @@ -1804,7 +1864,7 @@ def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: np.where(cond, self.values, other), dtype=dtype ) - return [self.make_block_same_class(result, placement=self.mgr_locs)] + return [self.make_block_same_class(result)] def _unstack(self, unstacker, fill_value, new_placement): # ExtensionArray-safe unstack. @@ -1931,38 +1991,59 @@ def to_native_types( return self.make_block(res) -class DatetimeLikeBlockMixin(HybridMixin, Block): - """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" - - @property - def _holder(self): - return DatetimeArray +class NDArrayBackedExtensionBlock(HybridMixin, Block): + """ + Block backed by an NDArrayBackedExtensionArray + """ - @property - def fill_value(self): - return np.datetime64("NaT", "ns") + def internal_values(self): + # Override to return DatetimeArray and TimedeltaArray + return self.array_values() def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ + values = self.array_values() if is_object_dtype(dtype): # DTA/TDA constructor and astype can handle 2D - return self._holder(self.values).astype(object) - return self.values - - def internal_values(self): - # Override to return DatetimeArray and TimedeltaArray - return self.array_values() - - def array_values(self): - return self._holder._simple_new(self.values) + values = values.astype(object) + # TODO(EA2D): reshape not needed with 2D EAs + return np.asarray(values).reshape(self.shape) def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta # TODO(EA2D): this can be removed if we ever have 2D EA return self.array_values().reshape(self.shape)[key] + def putmask(self, mask, new) -> List[Block]: + mask = extract_bool_array(mask) + + if not self._can_hold_element(new): + return self.astype(object).putmask(mask, new) + + # TODO(EA2D): reshape unnecessary with 2D EAs + arr = self.array_values().reshape(self.shape) + arr = cast("NDArrayBackedExtensionArray", arr) + arr.T.putmask(mask, new) + return [self] + + def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: + # TODO(EA2D): reshape unnecessary with 2D EAs + arr = self.array_values().reshape(self.shape) + + cond = extract_bool_array(cond) + + try: + res_values = arr.T.where(cond, other).T + except (ValueError, TypeError): + return super().where(other, cond, errors=errors, axis=axis) + + # TODO(EA2D): reshape not needed with 2D EAs + res_values = res_values.reshape(self.values.shape) + nb = self.make_block_same_class(res_values) + return [nb] + def diff(self, n: int, axis: int = 0) -> List[Block]: """ 1st discrete difference. @@ -1987,85 +2068,87 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: values = self.array_values().reshape(self.shape) new_values = values - values.shift(n, axis=axis) - return [ - TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer, ndim=self.ndim) - ] + return [self.make_block(new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs - values = self.array_values() + values = self.array_values().reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) return [self.make_block_same_class(new_values)] - def to_native_types(self, na_rep="NaT", **kwargs): - """ convert to our native types format """ - arr = self.array_values() - - result = arr._format_native_types(na_rep=na_rep, **kwargs) - return self.make_block(result) - - def putmask(self, mask, new) -> List[Block]: - mask = extract_bool_array(mask) - - if not self._can_hold_element(new): - return self.astype(object).putmask(mask, new) - - # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values().reshape(self.shape) - arr = cast("NDArrayBackedExtensionArray", arr) - arr.T.putmask(mask, new) - return [self] - - def where(self, other, cond, errors="raise", axis: int = 0) -> List[Block]: - # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values().reshape(self.shape) - - cond = extract_bool_array(cond) + def fillna( + self, value, limit=None, inplace: bool = False, downcast=None + ) -> List[Block]: - try: - res_values = arr.T.where(cond, other).T - except (ValueError, TypeError): - return super().where(other, cond, errors=errors, axis=axis) + if not self._can_hold_element(value) and self.dtype.kind != "m": + # We support filling a DatetimeTZ with a `value` whose timezone + # is different by coercing to object. + # TODO: don't special-case td64 + return self.astype(object).fillna(value, limit, inplace, downcast) - # TODO(EA2D): reshape not needed with 2D EAs - res_values = res_values.reshape(self.values.shape) - nb = self.make_block_same_class(res_values) - return [nb] + values = self.array_values() + values = values if inplace else values.copy() + new_values = values.fillna(value=value, limit=limit) + return [self.make_block_same_class(values=new_values)] -class DatetimeBlock(DatetimeLikeBlockMixin): - __slots__ = () - is_datetime = True +class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock): + """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" - @property - def _can_hold_na(self): - return True + is_numeric = False + _can_hold_na = True + _dtype: np.dtype + _holder: Type[Union[DatetimeArray, TimedeltaArray]] - def _maybe_coerce_values(self, values): + @classmethod + def _maybe_coerce_values(cls, values): """ Input validation for values passed to __init__. Ensure that - we have datetime64ns, coercing if necessary. + we have nanosecond datetime64/timedelta64, coercing if necessary. Parameters ---------- values : array-like - Must be convertible to datetime64 + Must be convertible to datetime64/timedelta64 Returns ------- - values : ndarray[datetime64ns] + values : ndarray[datetime64ns/timedelta64ns] Overridden by DatetimeTZBlock. """ - if values.dtype != DT64NS_DTYPE: - values = conversion.ensure_datetime64ns(values) + if values.dtype != cls._dtype: + # non-nano we will convert to nano + if values.dtype.kind != cls._dtype.kind: + # caller is responsible for ensuring td64/dt64 dtype + raise TypeError(values.dtype) # pragma: no cover + + values = cls._holder._from_sequence(values)._data - if isinstance(values, DatetimeArray): + if isinstance(values, cls._holder): values = values._data assert isinstance(values, np.ndarray), type(values) return values + def array_values(self): + return self._holder._simple_new(self.values) + + def to_native_types(self, na_rep="NaT", **kwargs): + """ convert to our native types format """ + arr = self.array_values() + + result = arr._format_native_types(na_rep=na_rep, **kwargs) + return self.make_block(result) + + +class DatetimeBlock(DatetimeLikeBlockMixin): + __slots__ = () + is_datetime = True + fill_value = np.datetime64("NaT", "ns") + _dtype = fill_value.dtype + _holder = DatetimeArray + def set_inplace(self, locs, values): """ See Block.set.__doc__ @@ -2081,24 +2164,25 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): values: DatetimeArray __slots__ = () - is_datetimetz = True is_extension = True + _can_hold_na = True + is_numeric = False + + _holder = DatetimeArray internal_values = Block.internal_values _can_hold_element = DatetimeBlock._can_hold_element to_native_types = DatetimeBlock.to_native_types diff = DatetimeBlock.diff - fill_value = np.datetime64("NaT", "ns") + fill_value = NaT where = DatetimeBlock.where putmask = DatetimeLikeBlockMixin.putmask + fillna = DatetimeLikeBlockMixin.fillna array_values = ExtensionBlock.array_values - @property - def _holder(self): - return DatetimeArray - - def _maybe_coerce_values(self, values): + @classmethod + def _maybe_coerce_values(cls, values): """ Input validation for values passed to __init__. Ensure that we have datetime64TZ, coercing if necessary. @@ -2112,8 +2196,8 @@ def _maybe_coerce_values(self, values): ------- values : DatetimeArray """ - if not isinstance(values, self._holder): - values = self._holder(values) + if not isinstance(values, cls._holder): + values = cls._holder(values) if values.tz is None: raise ValueError("cannot create a DatetimeTZBlock without a tz") @@ -2126,118 +2210,18 @@ def is_view(self) -> bool: # check the ndarray values of the DatetimeIndex values return self.values._data.base is not None - def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: - """ - Returns an ndarray of values. - - Parameters - ---------- - dtype : np.dtype - Only `object`-like dtypes are respected here (not sure - why). - - Returns - ------- - values : ndarray - When ``dtype=object``, then and object-dtype ndarray of - boxed values is returned. Otherwise, an M8[ns] ndarray - is returned. - - DatetimeArray is always 1-d. ``get_values`` will reshape - the return value to be the same dimensionality as the - block. - """ - values = self.values - if is_object_dtype(dtype): - values = values.astype(object) - - # TODO(EA2D): reshape unnecessary with 2D EAs - # Ensure that our shape is correct for DataFrame. - # ExtensionArrays are always 1-D, even in a DataFrame when - # the analogous NumPy-backed column would be a 2-D ndarray. - return np.asarray(values).reshape(self.shape) - def external_values(self): # NB: this is different from np.asarray(self.values), since that # return an object-dtype ndarray of Timestamps. - if self.is_datetimetz: - # avoid FutureWarning in .astype in casting from dt64t to dt64 - return self.values._data - return np.asarray(self.values.astype("datetime64[ns]", copy=False)) - - def fillna( - self, value, limit=None, inplace: bool = False, downcast=None - ) -> List[Block]: - # We support filling a DatetimeTZ with a `value` whose timezone - # is different by coercing to object. - if self._can_hold_element(value): - return super().fillna(value, limit, inplace, downcast) - - # different timezones, or a non-tz - return self.astype(object).fillna( - value, limit=limit, inplace=inplace, downcast=downcast - ) - - def _check_ndim(self, values, ndim): - """ - ndim inference and validation. - - This is overridden by the DatetimeTZBlock to check the case of 2D - data (values.ndim == 2), which should only be allowed if ndim is - also 2. - The case of 1D array is still allowed with both ndim of 1 or 2, as - if the case for other EAs. Therefore, we are only checking - `values.ndim > ndim` instead of `values.ndim != ndim` as for - consolidated blocks. - """ - if ndim is None: - ndim = values.ndim - - if values.ndim > ndim: - raise ValueError( - "Wrong number of dimensions. " - f"values.ndim != ndim [{values.ndim} != {ndim}]" - ) - return ndim + # Avoid FutureWarning in .astype in casting from dt64tz to dt64 + return self.values._data class TimeDeltaBlock(DatetimeLikeBlockMixin): __slots__ = () - is_timedelta = True - _can_hold_na = True - is_numeric = False + _holder = TimedeltaArray fill_value = np.timedelta64("NaT", "ns") - - def _maybe_coerce_values(self, values): - if values.dtype != TD64NS_DTYPE: - # non-nano we will convert to nano - if values.dtype.kind != "m": - # caller is responsible for ensuring timedelta64 dtype - raise TypeError(values.dtype) # pragma: no cover - - values = TimedeltaArray._from_sequence(values)._data - if isinstance(values, TimedeltaArray): - values = values._data - assert isinstance(values, np.ndarray), type(values) - return values - - @property - def _holder(self): - return TimedeltaArray - - def fillna( - self, value, limit=None, inplace: bool = False, downcast=None - ) -> List[Block]: - # TODO(EA2D): if we operated on array_values, TDA.fillna would handle - # raising here. - if is_integer(value): - # Deprecation GH#24694, GH#19233 - raise TypeError( - "Passing integers to fillna for timedelta64[ns] dtype is no " - "longer supported. To obtain the old behavior, pass " - "`pd.Timedelta(seconds=n)` instead." - ) - return super().fillna(value, limit=limit, inplace=inplace, downcast=downcast) + _dtype = fill_value.dtype class ObjectBlock(Block): @@ -2245,7 +2229,8 @@ class ObjectBlock(Block): is_object = True _can_hold_na = True - def _maybe_coerce_values(self, values): + @classmethod + def _maybe_coerce_values(cls, values): if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) return values @@ -2467,35 +2452,15 @@ def extend_blocks(result, blocks=None) -> List[Block]: return blocks -def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: - """ guarantee the shape of the values to be at least 1 d """ +def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: + """ + Reshape if possible to have values.ndim == ndim. + """ if values.ndim < ndim: - shape = values.shape if not is_extension_array_dtype(values.dtype): # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 # block.shape is incorrect for "2D" ExtensionArrays # We can't, and don't need to, reshape. - # error: "ExtensionArray" has no attribute "reshape" - values = values.reshape(tuple((1,) + shape)) # type: ignore[attr-defined] - return values - -def safe_reshape(arr: ArrayLike, new_shape: Shape) -> ArrayLike: - """ - Reshape `arr` to have shape `new_shape`, unless it is an ExtensionArray, - in which case it will be returned unchanged (see gh-13012). - - Parameters - ---------- - arr : np.ndarray or ExtensionArray - new_shape : Tuple[int] - - Returns - ------- - np.ndarray or ExtensionArray - """ - if not is_extension_array_dtype(arr.dtype): - # Note: this will include TimedeltaArray and tz-naive DatetimeArray - # TODO(EA2D): special case will be unnecessary with 2D EAs - arr = np.asarray(arr).reshape(new_shape) - return arr + values = np.asarray(values).reshape(1, -1) + return values diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index a2c930f6d9b22..16440f7a4c2bf 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -2,27 +2,45 @@ import copy import itertools -from typing import TYPE_CHECKING, Dict, List, Sequence +from typing import ( + TYPE_CHECKING, + Dict, + List, + Sequence, +) import numpy as np from pandas._libs import internals as libinternals -from pandas._typing import ArrayLike, DtypeObj, Manager, Shape +from pandas._typing import ( + ArrayLike, + DtypeObj, + Manager, + Shape, +) from pandas.util._decorators import cache_readonly -from pandas.core.dtypes.cast import ensure_dtype_can_hold_na, find_common_type +from pandas.core.dtypes.cast import ( + ensure_dtype_can_hold_na, + find_common_type, +) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_sparse, ) from pandas.core.dtypes.concat import concat_compat -from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna_all +from pandas.core.dtypes.missing import ( + is_valid_na_for_dtype, + isna_all, +) import pandas.core.algorithms as algos -from pandas.core.arrays import DatetimeArray, ExtensionArray +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, +) from pandas.core.internals.array_manager import ArrayManager from pandas.core.internals.blocks import make_block from pandas.core.internals.managers import BlockManager @@ -100,11 +118,8 @@ def concatenate_block_managers( else: b = make_block(values, placement=placement, ndim=blk.ndim) else: - b = make_block( - _concatenate_join_units(join_units, concat_axis, copy=copy), - placement=placement, - ndim=len(axes), - ) + new_values = _concatenate_join_units(join_units, concat_axis, copy=copy) + b = make_block(new_values, placement=placement, ndim=len(axes)) blocks.append(b) return BlockManager(blocks, axes) @@ -291,20 +306,15 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: if len(values) and values[0] is None: fill_value = None - if is_datetime64tz_dtype(blk_dtype) or is_datetime64tz_dtype( - empty_dtype - ): + if is_datetime64tz_dtype(empty_dtype): # TODO(EA2D): special case unneeded with 2D EAs i8values = np.full(self.shape[1], fill_value.value) return DatetimeArray(i8values, dtype=empty_dtype) - elif is_categorical_dtype(blk_dtype): - pass elif is_extension_array_dtype(blk_dtype): pass elif is_extension_array_dtype(empty_dtype): - missing_arr = empty_dtype.construct_array_type()._from_sequence( - [], dtype=empty_dtype - ) + cls = empty_dtype.construct_array_type() + missing_arr = cls._from_sequence([], dtype=empty_dtype) ncols, nrows = self.shape assert ncols == 1, ncols empty_arr = -1 * np.ones((nrows,), dtype=np.intp) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 878a5c9aafe5d..eb1a7a355f313 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -21,7 +21,12 @@ import numpy.ma as ma from pandas._libs import lib -from pandas._typing import Axis, DtypeObj, Manager, Scalar +from pandas._typing import ( + Axis, + DtypeObj, + Manager, + Scalar, +) from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, @@ -31,6 +36,7 @@ maybe_convert_platform, maybe_infer_to_datetimelike, maybe_upcast, + sanitize_to_nanoseconds, ) from pandas.core.dtypes.common import ( is_datetime64tz_dtype, @@ -49,9 +55,15 @@ ABCTimedeltaIndex, ) -from pandas.core import algorithms, common as com +from pandas.core import ( + algorithms, + common as com, +) from pandas.core.arrays import Categorical -from pandas.core.construction import extract_array, sanitize_array +from pandas.core.construction import ( + extract_array, + sanitize_array, +) from pandas.core.indexes import base as ibase from pandas.core.indexes.api import ( Index, @@ -154,7 +166,10 @@ def mgr_to_mgr(mgr, typ: str): Convert to specific type of Manager. Does not copy if the type is already correct. Does not guarantee a copy otherwise. """ - from pandas.core.internals import ArrayManager, BlockManager + from pandas.core.internals import ( + ArrayManager, + BlockManager, + ) new_mgr: Manager @@ -363,7 +378,7 @@ def convert(v): # this is equiv of np.asarray, but does object conversion # and platform dtype preservation try: - if is_list_like(values[0]) or hasattr(values[0], "len"): + if is_list_like(values[0]): values = np.array([convert(v) for v in values]) elif isinstance(values[0], np.ndarray) and values[0].ndim == 0: # GH#21861 @@ -813,8 +828,7 @@ def sanitize_index(data, index: Index): if isinstance(data, np.ndarray): - # coerce datetimelike types - if data.dtype.kind in ["M", "m"]: - data = sanitize_array(data, index, copy=False) + # coerce datetimelike types to ns + data = sanitize_to_nanoseconds(data) return data diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1c45b39ba990a..9c536abbc7559 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -19,12 +19,23 @@ import numpy as np -from pandas._libs import internals as libinternals, lib -from pandas._typing import ArrayLike, Dtype, DtypeObj, Shape +from pandas._libs import ( + internals as libinternals, + lib, +) +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + Shape, +) from pandas.errors import PerformanceWarning from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, +) from pandas.core.dtypes.common import ( DT64NS_DTYPE, is_dtype_equal, @@ -32,14 +43,25 @@ is_list_like, ) from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries -from pandas.core.dtypes.missing import array_equals, isna +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCPandasArray, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + array_equals, + isna, +) import pandas.core.algorithms as algos from pandas.core.arrays.sparse import SparseDtype from pandas.core.construction import extract_array from pandas.core.indexers import maybe_convert_indices -from pandas.core.indexes.api import Float64Index, Index, ensure_index +from pandas.core.indexes.api import ( + Float64Index, + Index, + ensure_index, +) from pandas.core.internals.base import DataManager from pandas.core.internals.blocks import ( Block, @@ -47,12 +69,15 @@ DatetimeTZBlock, ExtensionBlock, ObjectValuesExtensionBlock, + ensure_block_shape, extend_blocks, get_block_type, make_block, - safe_reshape, ) -from pandas.core.internals.ops import blockwise_all, operate_blockwise +from pandas.core.internals.ops import ( + blockwise_all, + operate_blockwise, +) # TODO: flexible with index=None and/or items=None @@ -190,7 +215,7 @@ def make_empty(self: T, axes=None) -> T: assert isinstance(self, SingleBlockManager) # for mypy blk = self.blocks[0] arr = blk.values[:0] - nb = blk.make_block_same_class(arr, placement=slice(0, 0), ndim=1) + nb = blk.make_block_same_class(arr, placement=slice(0, 0)) blocks = [nb] else: blocks = [] @@ -942,12 +967,8 @@ def iget(self, i: int) -> SingleBlockManager: values = block.iget(self.blklocs[i]) # shortcut for select a single-dim from a 2-dim BM - return SingleBlockManager( - block.make_block_same_class( - values, placement=slice(0, len(values)), ndim=1 - ), - self.axes[1], - ) + nb = type(block)(values, placement=slice(0, len(values)), ndim=1) + return SingleBlockManager(nb, self.axes[1]) def iget_values(self, i: int) -> ArrayLike: """ @@ -1017,7 +1038,7 @@ def value_getitem(placement): value = value.T if value.ndim == self.ndim - 1: - value = safe_reshape(value, (1,) + value.shape) + value = ensure_block_shape(value, ndim=2) def value_getitem(placement): return value @@ -1140,10 +1161,9 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False if value.ndim == 2: value = value.T - - if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype): + elif value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype): # TODO(EA2D): special case not needed with 2D EAs - value = safe_reshape(value, (1,) + value.shape) + value = ensure_block_shape(value, ndim=2) block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) @@ -1221,7 +1241,7 @@ def reindex_indexer( # some axes don't allow reindexing with dups if not allow_dups: - self.axes[axis]._can_reindex(indexer) + self.axes[axis]._validate_can_reindex(indexer) if axis >= self.ndim: raise IndexError("Requested axis not found in manager") diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index 8250db3f5d888..70d4f3b91c245 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -1,7 +1,12 @@ from __future__ import annotations from collections import namedtuple -from typing import TYPE_CHECKING, Iterator, List, Tuple +from typing import ( + TYPE_CHECKING, + Iterator, + List, + Tuple, +) import numpy as np diff --git a/pandas/core/missing.py b/pandas/core/missing.py index d0ad38235d7e5..9ae5f7d1b7497 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -4,12 +4,26 @@ from __future__ import annotations from functools import partial -from typing import TYPE_CHECKING, Any, List, Optional, Set, Union +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Set, + Union, +) import numpy as np -from pandas._libs import algos, lib -from pandas._typing import ArrayLike, Axis, DtypeObj +from pandas._libs import ( + algos, + lib, +) +from pandas._typing import ( + ArrayLike, + Axis, + DtypeObj, +) from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 4af1084033ce2..24e75a2bbeff2 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,15 +1,32 @@ import functools import itertools import operator -from typing import Any, Optional, Tuple, Union, cast +from typing import ( + Any, + Optional, + Tuple, + Union, + cast, +) import warnings import numpy as np from pandas._config import get_option -from pandas._libs import NaT, Timedelta, iNaT, lib -from pandas._typing import ArrayLike, Dtype, DtypeObj, F, Scalar +from pandas._libs import ( + NaT, + Timedelta, + iNaT, + lib, +) +from pandas._typing import ( + ArrayLike, + Dtype, + DtypeObj, + F, + Scalar, +) from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import ( @@ -30,7 +47,11 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, +) from pandas.core.construction import extract_array diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 11ce2a1a3b8a3..8ace64fedacb9 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -6,7 +6,11 @@ from __future__ import annotations import operator -from typing import TYPE_CHECKING, Optional, Set +from typing import ( + TYPE_CHECKING, + Optional, + Set, +) import warnings import numpy as np @@ -15,8 +19,14 @@ from pandas._typing import Level from pandas.util._decorators import Appender -from pandas.core.dtypes.common import is_array_like, is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.common import ( + is_array_like, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core import algorithms @@ -37,7 +47,11 @@ make_flex_doc, ) from pandas.core.ops.invalid import invalid_comparison # noqa:F401 -from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401 +from pandas.core.ops.mask_ops import ( # noqa: F401 + kleene_and, + kleene_or, + kleene_xor, +) from pandas.core.ops.methods import add_flex_arithmetic_methods # noqa:F401 from pandas.core.ops.roperator import ( # noqa:F401 radd, @@ -55,7 +69,10 @@ ) if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) # ----------------------------------------------------------------------------- # constants diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 857840cf9d8b9..10807dffb026b 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -10,8 +10,16 @@ import numpy as np -from pandas._libs import Timedelta, Timestamp, lib, ops as libops -from pandas._typing import ArrayLike, Shape +from pandas._libs import ( + Timedelta, + Timestamp, + lib, + ops as libops, +) +from pandas._typing import ( + ArrayLike, + Shape, +) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike, @@ -27,8 +35,15 @@ is_object_dtype, is_scalar, ) -from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndex, ABCSeries -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + notna, +) from pandas.core.construction import ensure_wrapped_if_datetimelike from pandas.core.ops import missing @@ -420,7 +435,10 @@ def _maybe_upcast_for_op(obj, shape: Shape): Be careful to call this *after* determining the `name` attribute to be attached to the result of the arithmetic operation. """ - from pandas.core.arrays import DatetimeArray, TimedeltaArray + from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, + ) if type(obj) is timedelta: # GH#22390 cast up to Timedelta to rely on Timedelta diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 25a38b3a373ae..2a76eb92120e7 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -7,7 +7,11 @@ from pandas._libs.lib import item_from_zerodim from pandas._typing import F -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) def unpack_zerodim_and_defer(name: str) -> Callable[[F], F]: diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py index 06ed321327e06..06ca6ed806f25 100644 --- a/pandas/core/ops/docstrings.py +++ b/pandas/core/ops/docstrings.py @@ -1,7 +1,10 @@ """ Templating for ops docstrings """ -from typing import Dict, Optional +from typing import ( + Dict, + Optional, +) def make_flex_doc(op_name: str, typ: str) -> str: diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index 8fb81faf313d7..a9edb2d138246 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -1,11 +1,17 @@ """ Ops for masked arrays. """ -from typing import Optional, Union +from typing import ( + Optional, + Union, +) import numpy as np -from pandas._libs import lib, missing as libmissing +from pandas._libs import ( + lib, + missing as libmissing, +) def kleene_or( diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 4866905d32b83..700c4a946e2b2 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -3,7 +3,10 @@ """ import operator -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.ops.roperator import ( radd, diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index c33cb32dcec19..20b7510c33160 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -25,9 +25,17 @@ import numpy as np -from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar - -from pandas.core.ops.roperator import rdivmod, rfloordiv, rmod +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, + is_scalar, +) + +from pandas.core.ops.roperator import ( + rdivmod, + rfloordiv, + rmod, +) def fill_zeros(result, x, y): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 68f791ac0a837..2308f9edb4328 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -3,7 +3,14 @@ import copy from datetime import timedelta from textwrap import dedent -from typing import Callable, Dict, Optional, Tuple, Union, no_type_check +from typing import ( + Callable, + Dict, + Optional, + Tuple, + Union, + no_type_check, +) import numpy as np @@ -16,18 +23,35 @@ Timestamp, to_offset, ) -from pandas._typing import T, TimedeltaConvertibleTypes, TimestampConvertibleTypes +from pandas._typing import ( + T, + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, +) from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError -from pandas.util._decorators import Appender, Substitution, doc +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) import pandas.core.algorithms as algos from pandas.core.apply import ResamplerWindowApply from pandas.core.base import DataError -from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.groupby.base import GotItemMixin, ShallowMixin +from pandas.core.generic import ( + NDFrame, + _shared_docs, +) +from pandas.core.groupby.base import ( + GotItemMixin, + ShallowMixin, +) from pandas.core.groupby.generic import SeriesGroupBy from pandas.core.groupby.groupby import ( BaseGroupBy, @@ -38,12 +62,29 @@ from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper from pandas.core.indexes.api import Index -from pandas.core.indexes.datetimes import DatetimeIndex, date_range -from pandas.core.indexes.period import PeriodIndex, period_range -from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + date_range, +) +from pandas.core.indexes.period import ( + PeriodIndex, + period_range, +) +from pandas.core.indexes.timedeltas import ( + TimedeltaIndex, + timedelta_range, +) -from pandas.tseries.frequencies import is_subperiod, is_superperiod -from pandas.tseries.offsets import DateOffset, Day, Nano, Tick +from pandas.tseries.frequencies import ( + is_subperiod, + is_superperiod, +) +from pandas.tseries.offsets import ( + DateOffset, + Day, + Nano, + Tick, +) _shared_docs_kwargs: Dict[str, str] = {} @@ -96,9 +137,8 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs): self.as_index = True self.exclusions = set() self.binner = None - # pandas\core\resample.py:96: error: Incompatible types in assignment - # (expression has type "None", variable has type "BaseGrouper") - # [assignment] + # error: Incompatible types in assignment (expression has type "None", variable + # has type "BaseGrouper") self.grouper = None # type: ignore[assignment] if self.groupby is not None: @@ -301,7 +341,7 @@ def pipe( def aggregate(self, func, *args, **kwargs): self._set_binner() - result, how = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if result is None: how = func grouper = None @@ -419,8 +459,7 @@ def _apply_loffset(self, result): result : Series or DataFrame the result of resample """ - # pandas\core\resample.py:409: error: Cannot determine type of - # 'loffset' [has-type] + # error: Cannot determine type of 'loffset' needs_offset = ( isinstance( self.loffset, # type: ignore[has-type] @@ -431,8 +470,7 @@ def _apply_loffset(self, result): ) if needs_offset: - # pandas\core\resample.py:415: error: Cannot determine type of - # 'loffset' [has-type] + # error: Cannot determine type of 'loffset' result.index = result.index + self.loffset # type: ignore[has-type] self.loffset = None @@ -869,8 +907,7 @@ def std(self, ddof=1, *args, **kwargs): Standard deviation of values within each group. """ nv.validate_resampler_func("std", args, kwargs) - # pandas\core\resample.py:850: error: Unexpected keyword argument - # "ddof" for "_downsample" [call-arg] + # error: Unexpected keyword argument "ddof" for "_downsample" return self._downsample("std", ddof=ddof) # type: ignore[call-arg] def var(self, ddof=1, *args, **kwargs): @@ -888,8 +925,7 @@ def var(self, ddof=1, *args, **kwargs): Variance of values within each group. """ nv.validate_resampler_func("var", args, kwargs) - # pandas\core\resample.py:867: error: Unexpected keyword argument - # "ddof" for "_downsample" [call-arg] + # error: Unexpected keyword argument "ddof" for "_downsample" return self._downsample("var", ddof=ddof) # type: ignore[call-arg] @doc(GroupBy.size) @@ -948,11 +984,8 @@ def quantile(self, q=0.5, **kwargs): Return a DataFrame, where the coulmns are groupby columns, and the values are its quantiles. """ - # pandas\core\resample.py:920: error: Unexpected keyword argument "q" - # for "_downsample" [call-arg] - - # pandas\core\resample.py:920: error: Too many arguments for - # "_downsample" [call-arg] + # error: Unexpected keyword argument "q" for "_downsample" + # error: Too many arguments for "_downsample" return self._downsample("quantile", q=q, **kwargs) # type: ignore[call-arg] @@ -1005,8 +1038,7 @@ def __init__(self, obj, *args, **kwargs): for attr in self._attributes: setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) - # pandas\core\resample.py:972: error: Too many arguments for "__init__" - # of "object" [call-arg] + # error: Too many arguments for "__init__" of "object" super().__init__(None) # type: ignore[call-arg] self._groupby = groupby self._groupby.mutated = True @@ -1070,8 +1102,8 @@ def _downsample(self, how, **kwargs): return obj # do we have a regular frequency - # pandas\core\resample.py:1037: error: "BaseGrouper" has no - # attribute "binlabels" [attr-defined] + + # error: "BaseGrouper" has no attribute "binlabels" if ( (ax.freq is not None or ax.inferred_freq is not None) and len(self.grouper.binlabels) > len(ax) # type: ignore[attr-defined] diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py index 3c76eef809c7a..58d741c2c6988 100644 --- a/pandas/core/reshape/api.py +++ b/pandas/core/reshape/api.py @@ -1,8 +1,23 @@ # flake8: noqa from pandas.core.reshape.concat import concat -from pandas.core.reshape.melt import lreshape, melt, wide_to_long -from pandas.core.reshape.merge import merge, merge_asof, merge_ordered -from pandas.core.reshape.pivot import crosstab, pivot, pivot_table +from pandas.core.reshape.melt import ( + lreshape, + melt, + wide_to_long, +) +from pandas.core.reshape.merge import ( + merge, + merge_asof, + merge_ordered, +) +from pandas.core.reshape.pivot import ( + crosstab, + pivot, + pivot_table, +) from pandas.core.reshape.reshape import get_dummies -from pandas.core.reshape.tile import cut, qcut +from pandas.core.reshape.tile import ( + cut, + qcut, +) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index f9bff603cec38..92fc4a2e85163 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -23,7 +23,10 @@ from pandas.util._decorators import cache_readonly from pandas.core.dtypes.concat import concat_compat -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core.arrays.categorical import ( @@ -43,7 +46,10 @@ from pandas.core.internals import concatenate_block_managers if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) from pandas.core.generic import NDFrame # --------------------------------------------------------------------- diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index b5f8b2d02207b..80a44e8fda39b 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -1,27 +1,43 @@ from __future__ import annotations import re -from typing import TYPE_CHECKING, List, cast +from typing import ( + TYPE_CHECKING, + List, + cast, +) import warnings import numpy as np -from pandas.util._decorators import Appender, deprecate_kwarg +from pandas.util._decorators import ( + Appender, + deprecate_kwarg, +) -from pandas.core.dtypes.common import is_extension_array_dtype, is_list_like +from pandas.core.dtypes.common import ( + is_extension_array_dtype, + is_list_like, +) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.missing import notna from pandas.core.arrays import Categorical import pandas.core.common as com -from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) from pandas.core.reshape.concat import concat from pandas.core.reshape.util import tile_compat from pandas.core.shared_docs import _shared_docs from pandas.core.tools.numeric import to_numeric if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) @Appender(_shared_docs["melt"] % {"caller": "pd.melt(df, ", "other": "DataFrame.melt"}) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 8704d757c3289..79d018427aa33 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -8,12 +8,22 @@ from functools import partial import hashlib import string -from typing import TYPE_CHECKING, Optional, Tuple, cast +from typing import ( + TYPE_CHECKING, + Optional, + Tuple, + cast, +) import warnings import numpy as np -from pandas._libs import Timedelta, hashtable as libhashtable, join as libjoin, lib +from pandas._libs import ( + Timedelta, + hashtable as libhashtable, + join as libjoin, + lib, +) from pandas._typing import ( ArrayLike, FrameOrSeries, @@ -22,7 +32,10 @@ Suffixes, ) from pandas.errors import MergeError -from pandas.util._decorators import Appender, Substitution +from pandas.util._decorators import ( + Appender, + Substitution, +) from pandas.core.dtypes.common import ( ensure_float64, @@ -44,10 +57,20 @@ is_object_dtype, needs_i8_conversion, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas.core.dtypes.missing import isna, na_value_for_dtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) -from pandas import Categorical, Index, MultiIndex +from pandas import ( + Categorical, + Index, + MultiIndex, +) from pandas.core import groupby import pandas.core.algorithms as algos import pandas.core.common as com @@ -996,9 +1019,8 @@ def _get_merge_keys(self): """ left_keys = [] right_keys = [] - # pandas\core\reshape\merge.py:966: error: Need type annotation for - # 'join_names' (hint: "join_names: List[] = ...") - # [var-annotated] + # error: Need type annotation for 'join_names' (hint: "join_names: List[] + # = ...") join_names = [] # type: ignore[var-annotated] right_drop = [] left_drop = [] diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7ac98d7fcbd33..8feb379a82ada 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -16,17 +16,37 @@ import numpy as np -from pandas._typing import FrameOrSeriesUnion, IndexLabel -from pandas.util._decorators import Appender, Substitution +from pandas._typing import ( + AggFuncType, + AggFuncTypeBase, + AggFuncTypeDict, + FrameOrSeriesUnion, + IndexLabel, +) +from pandas.util._decorators import ( + Appender, + Substitution, +) from pandas.core.dtypes.cast import maybe_downcast_to_dtype -from pandas.core.dtypes.common import is_integer_dtype, is_list_like, is_scalar -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_list_like, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) import pandas.core.common as com from pandas.core.frame import _shared_docs from pandas.core.groupby import Grouper -from pandas.core.indexes.api import Index, MultiIndex, get_objs_combined_axis +from pandas.core.indexes.api import ( + Index, + MultiIndex, + get_objs_combined_axis, +) from pandas.core.reshape.concat import concat from pandas.core.reshape.util import cartesian_product from pandas.core.series import Series @@ -40,11 +60,11 @@ @Substitution("\ndata : DataFrame") @Appender(_shared_docs["pivot_table"], indents=1) def pivot_table( - data, + data: DataFrame, values=None, index=None, columns=None, - aggfunc="mean", + aggfunc: AggFuncType = "mean", fill_value=None, margins=False, dropna=True, @@ -58,7 +78,7 @@ def pivot_table( pieces: List[DataFrame] = [] keys = [] for func in aggfunc: - table = pivot_table( + _table = __internal_pivot_table( data, values=values, index=index, @@ -70,11 +90,42 @@ def pivot_table( margins_name=margins_name, observed=observed, ) - pieces.append(table) + pieces.append(_table) keys.append(getattr(func, "__name__", func)) - return concat(pieces, keys=keys, axis=1) + table = concat(pieces, keys=keys, axis=1) + return table.__finalize__(data, method="pivot_table") + + table = __internal_pivot_table( + data, + values, + index, + columns, + aggfunc, + fill_value, + margins, + dropna, + margins_name, + observed, + ) + return table.__finalize__(data, method="pivot_table") + +def __internal_pivot_table( + data: DataFrame, + values, + index, + columns, + aggfunc: Union[AggFuncTypeBase, AggFuncTypeDict], + fill_value, + margins: bool, + dropna: bool, + margins_name: str, + observed: bool, +) -> DataFrame: + """ + Helper of :func:`pandas.pivot_table` for any non-list ``aggfunc``. + """ keys = index + columns values_passed = values is not None diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c335768748b26..543bf44e61216 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1,7 +1,11 @@ from __future__ import annotations import itertools -from typing import List, Optional, Union +from typing import ( + List, + Optional, + Union, +) import numpy as np @@ -28,7 +32,10 @@ from pandas.core.arrays import SparseArray from pandas.core.arrays.categorical import factorize_from_iterable from pandas.core.frame import DataFrame -from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) from pandas.core.series import Series from pandas.core.sorting import ( compress_group_index, @@ -422,7 +429,7 @@ def unstack(obj, level, fill_value=None): return obj.T.stack(dropna=False) elif not isinstance(obj.index, MultiIndex): # GH 36113 - # Give nicer error messages when unstack a Series whose + # Give nicer error messages when unstack a Series whose # Index is not a MultiIndex. raise ValueError( f"index must be a MultiIndex to unstack, {type(obj.index)} was passed" @@ -444,9 +451,10 @@ def _unstack_frame(obj, level, fill_value=None): mgr = obj._mgr.unstack(unstacker, fill_value=fill_value) return obj._constructor(mgr) else: - return _Unstacker( - obj.index, level=level, constructor=obj._constructor - ).get_result(obj._values, value_columns=obj.columns, fill_value=fill_value) + unstacker = _Unstacker(obj.index, level=level, constructor=obj._constructor) + return unstacker.get_result( + obj._values, value_columns=obj.columns, fill_value=fill_value + ) def _unstack_extension_series(series, level, fill_value): diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 969b416669023..89eba5bf41c78 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -3,7 +3,10 @@ """ import numpy as np -from pandas._libs import Timedelta, Timestamp +from pandas._libs import ( + Timedelta, + Timestamp, +) from pandas._libs.lib import infer_dtype from pandas.core.dtypes.common import ( @@ -24,7 +27,13 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna -from pandas import Categorical, Index, IntervalIndex, to_datetime, to_timedelta +from pandas import ( + Categorical, + Index, + IntervalIndex, + to_datetime, + to_timedelta, +) import pandas.core.algorithms as algos import pandas.core.nanops as nanops diff --git a/pandas/core/series.py b/pandas/core/series.py index 7d97c9f6189f3..34e9464006b30 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -25,7 +25,12 @@ from pandas._config import get_option -from pandas._libs import lib, properties, reshape, tslibs +from pandas._libs import ( + lib, + properties, + reshape, + tslibs, +) from pandas._libs.lib import no_default from pandas._typing import ( AggFuncType, @@ -41,11 +46,19 @@ ) from pandas.compat.numpy import function as nv from pandas.errors import InvalidIndexError -from pandas.util._decorators import Appender, Substitution, doc -from pandas.util._validators import validate_bool_kwarg, validate_percentile +from pandas.util._decorators import ( + Appender, + Substitution, + doc, +) +from pandas.util._validators import ( + validate_bool_kwarg, + validate_percentile, +) from pandas.core.dtypes.cast import ( convert_dtypes, + maybe_box_native, maybe_cast_to_extension_array, validate_numeric_casting, ) @@ -72,9 +85,15 @@ remove_na_arraylike, ) -from pandas.core import algorithms, base, generic, missing, nanops, ops +from pandas.core import ( + algorithms, + base, + generic, + missing, + nanops, + ops, +) from pandas.core.accessor import CachedAccessor -from pandas.core.aggregation import transform from pandas.core.apply import series_apply from pandas.core.arrays import ExtensionArray from pandas.core.arrays.categorical import CategoricalAccessor @@ -87,7 +106,10 @@ sanitize_array, ) from pandas.core.generic import NDFrame -from pandas.core.indexers import deprecate_ndim_indexing, unpack_1tuple +from pandas.core.indexers import ( + deprecate_ndim_indexing, + unpack_1tuple, +) from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( CategoricalIndex, @@ -104,7 +126,10 @@ from pandas.core.internals import SingleBlockManager from pandas.core.internals.construction import sanitize_index from pandas.core.shared_docs import _shared_docs -from pandas.core.sorting import ensure_key_mapped, nargsort +from pandas.core.sorting import ( + ensure_key_mapped, + nargsort, +) from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -112,7 +137,10 @@ import pandas.plotting if TYPE_CHECKING: - from pandas._typing import TimedeltaConvertibleTypes, TimestampConvertibleTypes + from pandas._typing import ( + TimedeltaConvertibleTypes, + TimestampConvertibleTypes, + ) from pandas.core.frame import DataFrame from pandas.core.groupby.generic import SeriesGroupBy @@ -1563,7 +1591,7 @@ def to_dict(self, into=dict): """ # GH16122 into_c = com.standardize_mapping(into) - return into_c(self.items()) + return into_c((k, maybe_box_native(v)) for k, v in self.items()) def to_frame(self, name=None) -> DataFrame: """ @@ -3973,27 +4001,7 @@ def aggregate(self, func=None, axis=0, *args, **kwargs): func = dict(kwargs.items()) op = series_apply(self, func, args=args, kwargs=kwargs) - result, how = op.agg() - if result is None: - - # we can be called from an inner function which - # passes this meta-data - kwargs.pop("_axis", None) - kwargs.pop("_level", None) - - # try a regular apply, this evaluates lambdas - # row-by-row; however if the lambda is expected a Series - # expression, e.g.: lambda x: x-x.quantile(0.25) - # this will fail, so we can try a vectorized evaluation - - # we cannot FIRST try the vectorized evaluation, because - # then .agg and .apply would have different semantics if the - # operation is actually defined on the Series, e.g. str - try: - result = self.apply(func, *args, **kwargs) - except (ValueError, AttributeError, TypeError): - result = func(self, *args, **kwargs) - + result = op.agg() return result agg = aggregate @@ -4006,7 +4014,10 @@ def aggregate(self, func=None, axis=0, *args, **kwargs): def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> FrameOrSeriesUnion: - return transform(self, func, axis, *args, **kwargs) + # Validate axis argument + self._get_axis_number(axis) + result = series_apply(self, func=func, args=args, kwargs=kwargs).transform() + return result def apply( self, @@ -4044,6 +4055,12 @@ def apply( Series.agg: Only perform aggregating type operations. Series.transform: Only perform transforming type operations. + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`udf-mutation` + for more details. + Examples -------- Create a series with typical summer temperatures for each city. @@ -4111,8 +4128,7 @@ def apply( Helsinki 2.484907 dtype: float64 """ - op = series_apply(self, func, convert_dtype, args, kwargs) - return op.apply() + return series_apply(self, func, convert_dtype, args, kwargs).apply() def _reduce( self, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index ad2eafe7295b0..49eb87a3bc8ba 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -41,6 +41,10 @@ ----- `agg` is an alias for `aggregate`. Use the alias. +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`udf-mutation` +for more details. + A passed user-defined-function will be passed a Series for evaluation. {examples}""" @@ -296,6 +300,12 @@ {klass}.agg : Only perform aggregating type operations. {klass}.apply : Invoke function on a {klass}. +Notes +----- +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`udf-mutation` +for more details. + Examples -------- >>> df = pd.DataFrame({{'A': range(3), 'B': range(1, 4)}}) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index cfbabab491ae4..67863036929b3 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -16,7 +16,11 @@ import numpy as np -from pandas._libs import algos, hashtable, lib +from pandas._libs import ( + algos, + hashtable, + lib, +) from pandas._libs.hashtable import unique_label_indices from pandas._typing import IndexKeyFunc diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py index e7bf94cdc08ea..2a324ebf77d9d 100644 --- a/pandas/core/sparse/api.py +++ b/pandas/core/sparse/api.py @@ -1,3 +1,6 @@ -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) __all__ = ["SparseArray", "SparseDtype"] diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 0e6ffa637f1ae..32a99c0a020b2 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1,7 +1,11 @@ import codecs from functools import wraps import re -from typing import Dict, List, Optional +from typing import ( + Dict, + List, + Optional, +) import warnings import numpy as np @@ -16,7 +20,12 @@ is_integer, is_list_like, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCMultiIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) from pandas.core.dtypes.missing import isna from pandas.core.base import NoNewAttributesMixin @@ -233,7 +242,10 @@ def _wrap_result( fill_value=np.nan, returns_string=True, ): - from pandas import Index, MultiIndex + from pandas import ( + Index, + MultiIndex, + ) if not hasattr(result, "ndim") or not hasattr(result, "dtype"): if isinstance(result, ABCDataFrame): @@ -338,7 +350,10 @@ def _get_series_list(self, others): list of Series Others transformed into list of Series. """ - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) # self._orig is either Series or Index idx = self._orig if isinstance(self._orig, ABCIndex) else self._orig.index @@ -515,7 +530,11 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"): For more examples, see :ref:`here `. """ # TODO: dispatch - from pandas import Index, Series, concat + from pandas import ( + Index, + Series, + concat, + ) if isinstance(others, str): raise ValueError("Did you mean to supply a `sep` keyword?") @@ -2990,7 +3009,10 @@ def _str_extract_noexpand(arr, pat, flags=0): Index. """ - from pandas import DataFrame, array + from pandas import ( + DataFrame, + array, + ) regex = re.compile(pat, flags=flags) groups_or_na = _groups_or_na_fun(regex) diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index 08064244a2ff9..b8033668aa18f 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -1,5 +1,8 @@ import abc -from typing import Pattern, Union +from typing import ( + Pattern, + Union, +) import numpy as np diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 471f1e521b991..0a4543057c386 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -1,6 +1,12 @@ import re import textwrap -from typing import Optional, Pattern, Set, Union, cast +from typing import ( + Optional, + Pattern, + Set, + Union, + cast, +) import unicodedata import warnings @@ -9,9 +15,15 @@ import pandas._libs.lib as lib import pandas._libs.missing as libmissing import pandas._libs.ops as libops -from pandas._typing import Dtype, Scalar - -from pandas.core.dtypes.common import is_re, is_scalar +from pandas._typing import ( + Dtype, + Scalar, +) + +from pandas.core.dtypes.common import ( + is_re, + is_scalar, +) from pandas.core.dtypes.missing import isna from pandas.core.strings.base import BaseStringArrayMethods diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index b0df626da973a..18f9ece3e3812 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -35,7 +35,11 @@ guess_datetime_format, ) from pandas._libs.tslibs.strptime import array_strptime -from pandas._typing import AnyArrayLike, ArrayLike, Timezone +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Timezone, +) from pandas.core.dtypes.common import ( ensure_object, @@ -49,10 +53,16 @@ is_numeric_dtype, is_scalar, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.dtypes.missing import notna -from pandas.arrays import DatetimeArray, IntegerArray +from pandas.arrays import ( + DatetimeArray, + IntegerArray, +) from pandas.core import algorithms from pandas.core.algorithms import unique from pandas.core.arrays.datetimes import ( @@ -882,7 +892,11 @@ def _assemble_from_unit_mappings(arg, errors, tz): ------- Series """ - from pandas import DataFrame, to_numeric, to_timedelta + from pandas import ( + DataFrame, + to_numeric, + to_timedelta, + ) arg = DataFrame(arg) if not arg.columns.is_unique: diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 3807fdd47b54f..1032edcb22b46 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -13,7 +13,10 @@ is_scalar, needs_i8_conversion, ) -from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) import pandas as pd from pandas.core.arrays.numeric import NumericArray @@ -218,7 +221,10 @@ def to_numeric(arg, errors="raise", downcast=None): data = np.zeros(mask.shape, dtype=values.dtype) data[~mask] = values - from pandas.core.arrays import FloatingArray, IntegerArray + from pandas.core.arrays import ( + FloatingArray, + IntegerArray, + ) klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray values = klass(data, mask.copy()) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 0a274dcfd1d73..a335146265523 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,10 +6,16 @@ from pandas._libs import lib from pandas._libs.tslibs import NaT -from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit +from pandas._libs.tslibs.timedeltas import ( + Timedelta, + parse_timedelta_unit, +) from pandas.core.dtypes.common import is_list_like -from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) from pandas.core.arrays.timedeltas import sequence_to_td64ns diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py index 9b86a7325fa6d..d5ccae9abc385 100644 --- a/pandas/core/tools/times.py +++ b/pandas/core/tools/times.py @@ -1,11 +1,20 @@ -from datetime import datetime, time -from typing import List, Optional +from datetime import ( + datetime, + time, +) +from typing import ( + List, + Optional, +) import numpy as np from pandas._libs.lib import is_list_like -from pandas.core.dtypes.generic import ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCIndex, + ABCSeries, +) from pandas.core.dtypes.missing import notna diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 5af3f8f4e0a7f..6d375a92ea50a 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -13,7 +13,12 @@ is_extension_array_dtype, is_list_like, ) -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCMultiIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCMultiIndex, + ABCSeries, +) # 16 byte long hashing key _default_hash_key = "0123456789123456" @@ -155,7 +160,10 @@ def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key): elif not is_list_like(vals): raise TypeError("must be convertible to a list-of-tuples") - from pandas import Categorical, MultiIndex + from pandas import ( + Categorical, + MultiIndex, + ) if not isinstance(vals, ABCMultiIndex): vals = MultiIndex.from_tuples(vals) @@ -270,7 +278,11 @@ def hash_array( # then hash and rename categories. We allow skipping the categorization # when the values are known/likely to be unique. if categorize: - from pandas import Categorical, Index, factorize + from pandas import ( + Categorical, + Index, + factorize, + ) codes, categories = factorize(vals, sort=False) cat = Categorical(codes, Index(categories), ordered=False, fastpath=True) diff --git a/pandas/core/util/numba_.py b/pandas/core/util/numba_.py index ed920c174ea69..3da6a5cbf7326 100644 --- a/pandas/core/util/numba_.py +++ b/pandas/core/util/numba_.py @@ -1,7 +1,12 @@ """Common utilities for Numba operations""" from distutils.version import LooseVersion import types -from typing import Callable, Dict, Optional, Tuple +from typing import ( + Callable, + Dict, + Optional, + Tuple, +) import numpy as np diff --git a/pandas/core/window/__init__.py b/pandas/core/window/__init__.py index b3d0820fee4da..8f42cd782c67f 100644 --- a/pandas/core/window/__init__.py +++ b/pandas/core/window/__init__.py @@ -2,5 +2,12 @@ ExponentialMovingWindow, ExponentialMovingWindowGroupby, ) -from pandas.core.window.expanding import Expanding, ExpandingGroupby # noqa:F401 -from pandas.core.window.rolling import Rolling, RollingGroupby, Window # noqa:F401 +from pandas.core.window.expanding import ( # noqa:F401 + Expanding, + ExpandingGroupby, +) +from pandas.core.window.rolling import ( # noqa:F401 + Rolling, + RollingGroupby, + Window, +) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 8e935b7c05300..d85aa20de5ab4 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -5,7 +5,10 @@ import numpy as np -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.indexes.api import MultiIndex diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 633427369902d..518119b63209e 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -3,14 +3,21 @@ import datetime from functools import partial from textwrap import dedent -from typing import Optional, Union +from typing import ( + Optional, + Union, +) import warnings import numpy as np from pandas._libs.tslibs import Timedelta import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, TimedeltaConvertibleTypes +from pandas._typing import ( + FrameOrSeries, + FrameOrSeriesUnion, + TimedeltaConvertibleTypes, +) from pandas.compat.numpy import function as nv from pandas.util._decorators import doc @@ -35,7 +42,10 @@ GroupbyIndexer, ) from pandas.core.window.numba_ import generate_numba_groupby_ewma_func -from pandas.core.window.rolling import BaseWindow, BaseWindowGroupby +from pandas.core.window.rolling import ( + BaseWindow, + BaseWindowGroupby, +) def get_center_of_mass( diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index f91441de41448..64e092d853456 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -1,5 +1,12 @@ from textwrap import dedent -from typing import Any, Callable, Dict, Optional, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + Optional, + Tuple, + Union, +) import numpy as np @@ -19,8 +26,15 @@ window_agg_numba_parameters, window_apply_parameters, ) -from pandas.core.window.indexers import BaseIndexer, ExpandingIndexer, GroupbyIndexer -from pandas.core.window.rolling import BaseWindowGroupby, RollingAndExpandingMixin +from pandas.core.window.indexers import ( + BaseIndexer, + ExpandingIndexer, + GroupbyIndexer, +) +from pandas.core.window.rolling import ( + BaseWindowGroupby, + RollingAndExpandingMixin, +) class Expanding(RollingAndExpandingMixin): diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py index a3b9695d777d9..f8e2734b99e20 100644 --- a/pandas/core/window/indexers.py +++ b/pandas/core/window/indexers.py @@ -1,6 +1,11 @@ """Indexer objects for computing start/end window bounds for rolling operations""" from datetime import timedelta -from typing import Dict, Optional, Tuple, Type +from typing import ( + Dict, + Optional, + Tuple, + Type, +) import numpy as np diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index aa69d4fa675cd..c9107c8ed0aa7 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -1,5 +1,11 @@ import functools -from typing import Any, Callable, Dict, Optional, Tuple +from typing import ( + Any, + Callable, + Dict, + Optional, + Tuple, +) import numpy as np diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b5714dbcd9e91..20bf0142b0855 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -24,9 +24,17 @@ import numpy as np -from pandas._libs.tslibs import BaseOffset, to_offset +from pandas._libs.tslibs import ( + BaseOffset, + to_offset, +) import pandas._libs.window.aggregations as window_aggregations -from pandas._typing import ArrayLike, Axis, FrameOrSeries, FrameOrSeriesUnion +from pandas._typing import ( + ArrayLike, + Axis, + FrameOrSeries, + FrameOrSeriesUnion, +) from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import doc @@ -50,14 +58,29 @@ from pandas.core.algorithms import factorize from pandas.core.apply import ResamplerWindowApply -from pandas.core.base import DataError, SelectionMixin +from pandas.core.base import ( + DataError, + SelectionMixin, +) import pandas.core.common as common from pandas.core.construction import extract_array -from pandas.core.groupby.base import GotItemMixin, ShallowMixin -from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.groupby.base import ( + GotItemMixin, + ShallowMixin, +) +from pandas.core.indexes.api import ( + Index, + MultiIndex, +) from pandas.core.reshape.concat import concat -from pandas.core.util.numba_ import NUMBA_FUNC_CACHE, maybe_use_numba -from pandas.core.window.common import flex_binary_moment, zsqrt +from pandas.core.util.numba_ import ( + NUMBA_FUNC_CACHE, + maybe_use_numba, +) +from pandas.core.window.common import ( + flex_binary_moment, + zsqrt, +) from pandas.core.window.doc import ( _shared_docs, args_compat, @@ -84,7 +107,10 @@ ) if TYPE_CHECKING: - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) from pandas.core.internals import Block # noqa:F401 @@ -510,7 +536,7 @@ def calc(x): return self._apply_tablewise(homogeneous_func, name) def aggregate(self, func, *args, **kwargs): - result, how = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if result is None: return self.apply(func, raw=False, args=args, kwargs=kwargs) return result @@ -994,7 +1020,7 @@ def calc(x): axis="", ) def aggregate(self, func, *args, **kwargs): - result, how = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() + result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if result is None: # these must apply directly diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index ea60ae5c1d227..a0f6ddfd84d7b 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -6,7 +6,10 @@ from pandas._config.config import OptionError -from pandas._libs.tslibs import OutOfBoundsDatetime, OutOfBoundsTimedelta +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + OutOfBoundsTimedelta, +) class NullFrequencyError(ValueError): diff --git a/pandas/io/api.py b/pandas/io/api.py index 2d25ffe5f8a6b..2241f491b5d48 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -5,17 +5,35 @@ # flake8: noqa from pandas.io.clipboards import read_clipboard -from pandas.io.excel import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, + read_excel, +) from pandas.io.feather_format import read_feather from pandas.io.gbq import read_gbq from pandas.io.html import read_html from pandas.io.json import read_json from pandas.io.orc import read_orc from pandas.io.parquet import read_parquet -from pandas.io.parsers import read_csv, read_fwf, read_table -from pandas.io.pickle import read_pickle, to_pickle -from pandas.io.pytables import HDFStore, read_hdf +from pandas.io.parsers import ( + read_csv, + read_fwf, + read_table, +) +from pandas.io.pickle import ( + read_pickle, + to_pickle, +) +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) from pandas.io.sas import read_sas from pandas.io.spss import read_spss -from pandas.io.sql import read_sql, read_sql_query, read_sql_table +from pandas.io.sql import ( + read_sql, + read_sql_query, + read_sql_table, +) from pandas.io.stata import read_stata diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 233e58d14adf1..788fc62165c0c 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -44,7 +44,13 @@ import contextlib import ctypes -from ctypes import c_size_t, c_wchar, c_wchar_p, get_errno, sizeof +from ctypes import ( + c_size_t, + c_wchar, + c_wchar_p, + get_errno, + sizeof, +) import distutils.spawn import os import platform diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 97178261bdf72..54cb6b9f91137 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -4,7 +4,10 @@ from pandas.core.dtypes.generic import ABCDataFrame -from pandas import get_option, option_context +from pandas import ( + get_option, + option_context, +) def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover diff --git a/pandas/io/common.py b/pandas/io/common.py index e5a1f58ec6cd2..e5da12d7b1753 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -6,10 +6,27 @@ from collections import abc import dataclasses import gzip -from io import BufferedIOBase, BytesIO, RawIOBase, StringIO, TextIOWrapper +from io import ( + BufferedIOBase, + BytesIO, + RawIOBase, + StringIO, + TextIOWrapper, +) import mmap import os -from typing import IO, Any, AnyStr, Dict, List, Mapping, Optional, Tuple, Union, cast +from typing import ( + IO, + Any, + AnyStr, + Dict, + List, + Mapping, + Optional, + Tuple, + Union, + cast, +) from urllib.parse import ( urljoin, urlparse as parse_url, @@ -28,7 +45,10 @@ FilePathOrBuffer, StorageOptions, ) -from pandas.compat import get_lzma_file, import_lzma +from pandas.compat import ( + get_lzma_file, + import_lzma, +) from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.common import is_file_like @@ -325,7 +345,10 @@ def _get_filepath_or_buffer( err_types_to_retry_with_anon: List[Any] = [] try: import_optional_dependency("botocore") - from botocore.exceptions import ClientError, NoCredentialsError + from botocore.exceptions import ( + ClientError, + NoCredentialsError, + ) err_types_to_retry_with_anon = [ ClientError, diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py index 3bad493dee388..854e2a1ec3a73 100644 --- a/pandas/io/excel/__init__.py +++ b/pandas/io/excel/__init__.py @@ -1,4 +1,8 @@ -from pandas.io.excel._base import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel._base import ( + ExcelFile, + ExcelWriter, + read_excel, +) from pandas.io.excel._odswriter import ODSWriter as _ODSWriter from pandas.io.excel._openpyxl import OpenpyxlWriter as _OpenpyxlWriter from pandas.io.excel._util import register_writer diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index f12a530ea6c34..9ad589d4583c6 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -7,24 +7,53 @@ from io import BytesIO import os from textwrap import fill -from typing import Any, Dict, Mapping, Optional, Union, cast +from typing import ( + Any, + Dict, + Mapping, + Optional, + Union, + cast, +) import warnings import zipfile from pandas._config import config from pandas._libs.parsers import STR_NA_VALUES -from pandas._typing import Buffer, DtypeArg, FilePathOrBuffer, StorageOptions -from pandas.compat._optional import get_version, import_optional_dependency +from pandas._typing import ( + Buffer, + DtypeArg, + FilePathOrBuffer, + StorageOptions, +) +from pandas.compat._optional import ( + get_version, + import_optional_dependency, +) from pandas.errors import EmptyDataError -from pandas.util._decorators import Appender, deprecate_nonkeyword_arguments, doc +from pandas.util._decorators import ( + Appender, + deprecate_nonkeyword_arguments, + doc, +) -from pandas.core.dtypes.common import is_bool, is_float, is_integer, is_list_like +from pandas.core.dtypes.common import ( + is_bool, + is_float, + is_integer, + is_list_like, +) from pandas.core.frame import DataFrame from pandas.core.shared_docs import _shared_docs -from pandas.io.common import IOHandles, get_handle, stringify_path, validate_header_arg +from pandas.io.common import ( + IOHandles, + get_handle, + stringify_path, + validate_header_arg, +) from pandas.io.excel._util import ( fill_mi_header, get_default_engine, @@ -129,11 +158,9 @@ ``pyxlsb`` will be used. .. versionadded:: 1.3.0 - - Otherwise if `openpyxl `_ is installed, - then ``openpyxl`` will be used. - - Otherwise if ``xlrd >= 2.0`` is installed, a ``ValueError`` will be raised. - - Otherwise ``xlrd`` will be used and a ``FutureWarning`` will be raised. This - case will raise a ``ValueError`` in a future version of pandas. + - Otherwise ``openpyxl`` will be used. + + .. versionchanged:: 1.3.0 converters : dict, default None Dict of functions for converting values in certain columns. Keys can @@ -893,8 +920,8 @@ def check_extension(cls, ext: str): """ if ext.startswith("."): ext = ext[1:] - # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" - # (not iterable) [attr-defined] + # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" (not + # iterable) if not any( ext in extension for extension in cls.supported_extensions # type: ignore[attr-defined] @@ -997,7 +1024,7 @@ class ExcelFile: Parameters ---------- path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath), - a file-like object, xlrd workbook or openpypl workbook. + a file-like object, xlrd workbook or openpyxl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. engine : str, default None @@ -1111,9 +1138,7 @@ def __init__( stacklevel = 2 warnings.warn( f"Your version of xlrd is {xlrd_version}. In xlrd >= 2.0, " - f"only the xls format is supported. As a result, the " - f"openpyxl engine will be used if it is installed and the " - f"engine argument is not specified. Install " + f"only the xls format is supported. Install " f"openpyxl instead.", FutureWarning, stacklevel=stacklevel, diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 8987d5bb42057..c5aa4a061a05b 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,8 +1,15 @@ -from typing import List, cast +from typing import ( + List, + cast, +) import numpy as np -from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions +from pandas._typing import ( + FilePathOrBuffer, + Scalar, + StorageOptions, +) from pandas.compat._optional import import_optional_dependency import pandas as pd @@ -78,7 +85,11 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: """ Parse an ODF Table into a list of lists """ - from odf.table import CoveredTableCell, TableCell, TableRow + from odf.table import ( + CoveredTableCell, + TableCell, + TableRow, + ) covered_cell_name = CoveredTableCell().qname table_cell_name = TableCell().qname diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 0bea19bec2cdd..d00e600b4e5d4 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,6 +1,14 @@ from collections import defaultdict import datetime -from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Union +from typing import ( + Any, + DefaultDict, + Dict, + List, + Optional, + Tuple, + Union, +) import pandas._libs.json as json from pandas._typing import StorageOptions @@ -55,7 +63,11 @@ def write_cells( """ Write the frame cells using odf """ - from odf.table import Table, TableCell, TableRow + from odf.table import ( + Table, + TableCell, + TableRow, + ) from odf.text import P sheet_name = self._get_sheet_name(sheet_name) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 3a753a707166e..be2c9b919a5c3 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -1,15 +1,26 @@ from __future__ import annotations -from distutils.version import LooseVersion import mmap -from typing import TYPE_CHECKING, Dict, List, Optional +from typing import ( + TYPE_CHECKING, + Dict, + List, + Optional, +) import numpy as np -from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions -from pandas.compat._optional import get_version, import_optional_dependency - -from pandas.io.excel._base import BaseExcelReader, ExcelWriter +from pandas._typing import ( + FilePathOrBuffer, + Scalar, + StorageOptions, +) +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import ( + BaseExcelReader, + ExcelWriter, +) from pandas.io.excel._util import validate_freeze_panes if TYPE_CHECKING: @@ -216,7 +227,10 @@ def _convert_to_fill(cls, fill_dict): ------- fill : openpyxl.styles.Fill """ - from openpyxl.styles import GradientFill, PatternFill + from openpyxl.styles import ( + GradientFill, + PatternFill, + ) _pattern_fill_key_map = { "patternType": "fill_type", @@ -509,40 +523,23 @@ def get_sheet_by_index(self, index: int): def _convert_cell(self, cell, convert_float: bool) -> Scalar: - from openpyxl.cell.cell import TYPE_BOOL, TYPE_ERROR, TYPE_NUMERIC + from openpyxl.cell.cell import ( + TYPE_ERROR, + TYPE_NUMERIC, + ) if cell.value is None: return "" # compat with xlrd - elif cell.is_date: - return cell.value elif cell.data_type == TYPE_ERROR: return np.nan - elif cell.data_type == TYPE_BOOL: - return bool(cell.value) - elif cell.data_type == TYPE_NUMERIC: - # GH5394 - if convert_float: - val = int(cell.value) - if val == cell.value: - return val - else: - return float(cell.value) + elif not convert_float and cell.data_type == TYPE_NUMERIC: + return float(cell.value) return cell.value def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: - # GH 39001 - # Reading of excel file depends on dimension data being correct but - # writers sometimes omit or get it wrong - import openpyxl - - version = LooseVersion(get_version(openpyxl)) - - # There is no good way of determining if a sheet is read-only - # https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1605 - is_readonly = hasattr(sheet, "reset_dimensions") - if version >= "3.0.0" and is_readonly: + if self.book.read_only: sheet.reset_dimensions() data: List[List[Scalar]] = [] @@ -556,7 +553,7 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: # Trim trailing empty rows data = data[: last_row_with_data + 1] - if version >= "3.0.0" and is_readonly and len(data) > 0: + if self.book.read_only and len(data) > 0: # With dimension reset, openpyxl no longer pads rows max_width = max(len(data_row) for data_row in data) if min(len(data_row) for data_row in data) < max_width: diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py index f77a6bd5b1ad5..71ec189854f6d 100644 --- a/pandas/io/excel/_pyxlsb.py +++ b/pandas/io/excel/_pyxlsb.py @@ -1,6 +1,10 @@ from typing import List -from pandas._typing import FilePathOrBuffer, Scalar, StorageOptions +from pandas._typing import ( + FilePathOrBuffer, + Scalar, + StorageOptions, +) from pandas.compat._optional import import_optional_dependency from pandas.io.excel._base import BaseExcelReader diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 01ccc9d15a6a3..6612b681a9171 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -1,8 +1,14 @@ -from typing import List, MutableMapping +from typing import ( + List, + MutableMapping, +) from pandas.compat._optional import import_optional_dependency -from pandas.core.dtypes.common import is_integer, is_list_like +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) _writers: MutableMapping[str, str] = {} @@ -62,13 +68,6 @@ def get_default_engine(ext, mode="reader"): _default_writers["xlsx"] = "xlsxwriter" return _default_writers[ext] else: - if ( - import_optional_dependency("openpyxl", errors="ignore") is None - and import_optional_dependency("xlrd", errors="ignore") is not None - ): - # if no openpyxl but xlrd installed, return xlrd - # the version is handled elsewhere - _default_readers["xlsx"] = "xlrd" return _default_readers[ext] diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index d7bbec578d89d..849572cff813a 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -1,4 +1,8 @@ -from typing import Dict, List, Tuple +from typing import ( + Dict, + List, + Tuple, +) import pandas._libs.json as json from pandas._typing import StorageOptions diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py index 9a725c15de61e..a8386242faf72 100644 --- a/pandas/io/excel/_xlwt.py +++ b/pandas/io/excel/_xlwt.py @@ -1,4 +1,7 @@ -from typing import TYPE_CHECKING, Dict +from typing import ( + TYPE_CHECKING, + Dict, +) import pandas._libs.json as json from pandas._typing import StorageOptions diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 422677771b4d0..3999f91a7b141 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -2,11 +2,18 @@ from typing import AnyStr -from pandas._typing import FilePathOrBuffer, StorageOptions +from pandas._typing import ( + FilePathOrBuffer, + StorageOptions, +) from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc -from pandas import DataFrame, Int64Index, RangeIndex +from pandas import ( + DataFrame, + Int64Index, + RangeIndex, +) from pandas.core import generic from pandas.io.common import get_handle diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index ea291bcbfa44c..bdd2b3d6e4c6a 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -69,8 +69,7 @@ def check_main(): return not hasattr(main, "__file__") or get_option("mode.sim_interactive") try: - # pandas\io\formats\console.py:72: error: Name '__IPYTHON__' is not - # defined [name-defined] + # error: Name '__IPYTHON__' is not defined return __IPYTHON__ or check_main() # type: ignore[name-defined] except NameError: return check_main() @@ -85,8 +84,7 @@ def in_ipython_frontend(): bool """ try: - # pandas\io\formats\console.py:86: error: Name 'get_ipython' is not - # defined [name-defined] + # error: Name 'get_ipython' is not defined ip = get_ipython() # type: ignore[name-defined] return "zmq" in str(type(ip)).lower() except NameError: diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index 8abe13db370ca..f27bae2c161f3 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -3,7 +3,10 @@ """ import re -from typing import Dict, Optional +from typing import ( + Dict, + Optional, +) import warnings diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index b027d8139f24b..1ec2f7bfdd4be 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -21,18 +21,32 @@ import numpy as np from pandas._libs.lib import is_list_like -from pandas._typing import IndexLabel, StorageOptions +from pandas._typing import ( + IndexLabel, + StorageOptions, +) from pandas.util._decorators import doc from pandas.core.dtypes import missing -from pandas.core.dtypes.common import is_float, is_scalar +from pandas.core.dtypes.common import ( + is_float, + is_scalar, +) -from pandas import DataFrame, Index, MultiIndex, PeriodIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, +) from pandas.core import generic import pandas.core.common as com from pandas.io.formats._color_data import CSS4_COLORS -from pandas.io.formats.css import CSSResolver, CSSWarning +from pandas.io.formats.css import ( + CSSResolver, + CSSWarning, +) from pandas.io.formats.format import get_level_lengths from pandas.io.formats.printing import pprint_thing @@ -475,7 +489,7 @@ def __init__( if not len(Index(cols).intersection(df.columns)): raise KeyError("passes columns are not ALL present dataframe") - if len(Index(cols).intersection(df.columns)) != len(cols): + if len(Index(cols).intersection(df.columns)) != len(set(cols)): # Deprecated in GH#17295, enforced in 1.0.0 raise KeyError("Not all names specified in 'columns' are found") @@ -613,9 +627,8 @@ def _format_header(self) -> Iterable[ExcelCell]: "" ] * len(self.columns) if reduce(lambda x, y: x and y, map(lambda x: x != "", row)): - # pandas\io\formats\excel.py:618: error: Incompatible types in - # assignment (expression has type "Generator[ExcelCell, None, - # None]", variable has type "Tuple[]") [assignment] + # error: Incompatible types in assignment (expression has type + # "Generator[ExcelCell, None, None]", variable has type "Tuple[]") gen2 = ( # type: ignore[assignment] ExcelCell(self.rowcounter, colindex, val, self.header_style) for colindex, val in enumerate(row) @@ -760,7 +773,8 @@ def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]: series = self.df.iloc[:, colidx] for i, val in enumerate(series): if styles is not None: - xlstyle = self.style_converter(";".join(styles[i, colidx])) + css = ";".join([a + ":" + str(v) for (a, v) in styles[i, colidx]]) + xlstyle = self.style_converter(css) yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) def get_formatted_cells(self) -> Iterable[ExcelCell]: @@ -819,9 +833,8 @@ def write( if isinstance(writer, ExcelWriter): need_save = False else: - # pandas\io\formats\excel.py:808: error: Cannot instantiate - # abstract class 'ExcelWriter' with abstract attributes 'engine', - # 'save', 'supported_extensions' and 'write_cells' [abstract] + # error: Cannot instantiate abstract class 'ExcelWriter' with abstract + # attributes 'engine', 'save', 'supported_extensions' and 'write_cells' writer = ExcelWriter( # type: ignore[abstract] writer, engine=engine, storage_options=storage_options ) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 05d94366e6623..a1b6986079723 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -5,7 +5,10 @@ from __future__ import annotations from contextlib import contextmanager -from csv import QUOTE_NONE, QUOTE_NONNUMERIC +from csv import ( + QUOTE_NONE, + QUOTE_NONNUMERIC, +) import decimal from functools import partial from io import StringIO @@ -33,11 +36,19 @@ import numpy as np -from pandas._config.config import get_option, set_option +from pandas._config.config import ( + get_option, + set_option, +) from pandas._libs import lib from pandas._libs.missing import NA -from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT +from pandas._libs.tslibs import ( + NaT, + Timedelta, + Timestamp, + iNaT, +) from pandas._libs.tslibs.nattype import NaTType from pandas._typing import ( ArrayLike, @@ -66,23 +77,39 @@ is_scalar, is_timedelta64_dtype, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) from pandas.core.arrays.datetimes import DatetimeArray from pandas.core.arrays.timedeltas import TimedeltaArray from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.construction import extract_array -from pandas.core.indexes.api import Index, MultiIndex, PeriodIndex, ensure_index +from pandas.core.indexes.api import ( + Index, + MultiIndex, + PeriodIndex, + ensure_index, +) from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.reshape.concat import concat from pandas.io.common import stringify_path -from pandas.io.formats.printing import adjoin, justify, pprint_thing +from pandas.io.formats.printing import ( + adjoin, + justify, + pprint_thing, +) if TYPE_CHECKING: - from pandas import Categorical, DataFrame, Series + from pandas import ( + Categorical, + DataFrame, + Series, + ) common_docstring = """ @@ -989,7 +1016,10 @@ def to_html( render_links : bool, default False Convert URLs to HTML links. """ - from pandas.io.formats.html import HTMLFormatter, NotebookFormatter + from pandas.io.formats.html import ( + HTMLFormatter, + NotebookFormatter, + ) Klass = NotebookFormatter if notebook else HTMLFormatter @@ -1347,11 +1377,9 @@ def _value_formatter( def base_formatter(v): assert float_format is not None # for mypy - # pandas\io\formats\format.py:1411: error: "str" not callable - # [operator] - - # pandas\io\formats\format.py:1411: error: Unexpected keyword - # argument "value" for "__call__" of "EngFormatter" [call-arg] + # error: "str" not callable + # error: Unexpected keyword argument "value" for "__call__" of + # "EngFormatter" return ( float_format(value=v) # type: ignore[operator,call-arg] if notna(v) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index b4f7e3922f02f..6f4a6d87c7959 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -3,16 +3,32 @@ """ from textwrap import dedent -from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union, cast +from typing import ( + Any, + Dict, + Iterable, + List, + Mapping, + Optional, + Tuple, + Union, + cast, +) from pandas._config import get_option from pandas._libs import lib -from pandas import MultiIndex, option_context +from pandas import ( + MultiIndex, + option_context, +) from pandas.io.common import is_url -from pandas.io.formats.format import DataFrameFormatter, get_level_lengths +from pandas.io.formats.format import ( + DataFrameFormatter, + get_level_lengths, +) from pandas.io.formats.printing import pprint_thing diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index b1675fa5c5375..2c1739998da08 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -1,6 +1,9 @@ from __future__ import annotations -from abc import ABC, abstractmethod +from abc import ( + ABC, + abstractmethod, +) import sys from typing import ( IO, @@ -16,7 +19,10 @@ from pandas._config import get_option -from pandas._typing import Dtype, FrameOrSeriesUnion +from pandas._typing import ( + Dtype, + FrameOrSeriesUnion, +) from pandas.core.indexes.api import Index diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index f6f3571955e6e..fce0814e979a4 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -1,8 +1,19 @@ """ Module for formatting output data in Latex. """ -from abc import ABC, abstractmethod -from typing import Iterator, List, Optional, Sequence, Tuple, Type, Union +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + Iterator, + List, + Optional, + Sequence, + Tuple, + Type, + Union, +) import numpy as np diff --git a/pandas/io/formats/string.py b/pandas/io/formats/string.py index 1fe2ed9806535..622001f280885 100644 --- a/pandas/io/formats/string.py +++ b/pandas/io/formats/string.py @@ -2,7 +2,11 @@ Module for formatting output data in console (to string). """ from shutil import get_terminal_size -from typing import Iterable, List, Optional +from typing import ( + Iterable, + List, + Optional, +) import numpy as np diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 735fb345363c7..3b0d857217d43 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -27,23 +27,34 @@ from pandas._config import get_option from pandas._libs import lib -from pandas._typing import Axis, FrameOrSeries, FrameOrSeriesUnion, IndexLabel +from pandas._typing import ( + Axis, + FrameOrSeries, + FrameOrSeriesUnion, + IndexLabel, +) from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc from pandas.core.dtypes.common import is_float +from pandas.core.dtypes.generic import ABCSeries import pandas as pd -from pandas.api.types import is_dict_like, is_list_like +from pandas.api.types import ( + is_dict_like, + is_list_like, +) from pandas.core import generic import pandas.core.common as com from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.core.indexing import maybe_numeric_slice, non_reducing_slice +from pandas.core.indexes.api import Index jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") -CSSSequence = Sequence[Tuple[str, Union[str, int, float]]] -CSSProperties = Union[str, CSSSequence] + +CSSPair = Tuple[str, Union[str, int, float]] +CSSList = List[CSSPair] +CSSProperties = Union[str, CSSList] CSSStyles = List[Dict[str, CSSProperties]] try: @@ -158,13 +169,12 @@ def __init__( uuid_len: int = 5, ): # validate ordered args - if not isinstance(data, (pd.Series, pd.DataFrame)): - raise TypeError("``data`` must be a Series or DataFrame") - if data.ndim == 1: + if isinstance(data, pd.Series): data = data.to_frame() + if not isinstance(data, DataFrame): + raise TypeError("``data`` must be a Series or DataFrame") if not data.index.is_unique or not data.columns.is_unique: raise ValueError("style is not supported for non-unique indices.") - assert isinstance(data, DataFrame) self.data: DataFrame = data self.index: pd.Index = data.index self.columns: pd.Index = data.columns @@ -184,7 +194,7 @@ def __init__( # assign additional default vars self.hidden_index: bool = False self.hidden_columns: Sequence[int] = [] - self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list) + self.ctx: DefaultDict[Tuple[int, int], CSSList] = defaultdict(list) self.cell_context: Dict[str, Any] = {} self._todo: List[Tuple[Callable, Tuple, Dict]] = [] self.tooltips: Optional[_Tooltips] = None @@ -404,7 +414,8 @@ def _translate(self): clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) - cellstyle_map = defaultdict(list) + cellstyle_map: DefaultDict[Tuple[CSSPair, ...], List[str]] = defaultdict(list) + head = [] for r in range(n_clvls): @@ -455,7 +466,7 @@ def _translate(self): } colspan = col_lengths.get((r, c), 0) if colspan > 1: - es["attributes"] = [f'colspan="{colspan}"'] + es["attributes"] = f'colspan="{colspan}"' row_es.append(es) head.append(row_es) @@ -474,8 +485,15 @@ def _translate(self): ) index_header_row.extend( - [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}] - * (len(clabels[0]) - len(hidden_columns)) + [ + { + "type": "th", + "value": BLANK_VALUE, + "class": " ".join([BLANK_CLASS, f"col{c}"]), + } + for c in range(len(clabels[0])) + if c not in hidden_columns + ] ) head.append(index_header_row) @@ -499,7 +517,7 @@ def _translate(self): } rowspan = idx_lengths.get((c, r), 0) if rowspan > 1: - es["attributes"] = [f'rowspan="{rowspan}"'] + es["attributes"] = f'rowspan="{rowspan}"' row_es.append(es) for c, col in enumerate(self.data.columns): @@ -511,28 +529,25 @@ def _translate(self): "value": value, "display_value": formatter(value), "is_visible": (c not in hidden_columns), + "attributes": "", } # only add an id if the cell has a style - props = [] + props: CSSList = [] if self.cell_ids or (r, c) in ctx: row_dict["id"] = "_".join(cs[1:]) - for x in ctx[r, c]: - # have to handle empty styles like [''] - if x.count(":"): - props.append(tuple(x.split(":"))) - else: - props.append(("", "")) + props.extend(ctx[r, c]) # add custom classes from cell context cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) row_dict["class"] = " ".join(cs) row_es.append(row_dict) - cellstyle_map[tuple(props)].append(f"row{r}_col{c}") + if props: # (), [] won't be in cellstyle_map, cellstyle respectively + cellstyle_map[tuple(props)].append(f"row{r}_col{c}") body.append(row_es) - cellstyle = [ + cellstyle: List[Dict[str, Union[CSSList, List[str]]]] = [ {"props": list(props), "selectors": selectors} for props, selectors in cellstyle_map.items() ] @@ -608,7 +623,7 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler row_locs = range(len(self.data)) col_locs = range(len(self.data.columns)) else: - subset = non_reducing_slice(subset) + subset = _non_reducing_slice(subset) if len(subset) == 1: subset = subset, self.data.columns @@ -738,19 +753,14 @@ def render(self, **kwargs) -> str: self._compute() # TODO: namespace all the pandas keys d = self._translate() - # filter out empty styles, every cell will have a class - # but the list of props may just be [['', '']]. - # so we have the nested anys below - trimmed = [x for x in d["cellstyle"] if any(any(y) for y in x["props"])] - d["cellstyle"] = trimmed d.update(kwargs) return self.template.render(**d) def _update_ctx(self, attrs: DataFrame) -> None: """ - Update the state of the Styler. + Update the state of the Styler for data cells. - Collects a mapping of {index_label: [': ']}. + Collects a mapping of {index_label: [('', ''), ..]}. Parameters ---------- @@ -759,20 +769,13 @@ def _update_ctx(self, attrs: DataFrame) -> None: Whitespace shouldn't matter and the final trailing ';' shouldn't matter. """ - coli = {k: i for i, k in enumerate(self.columns)} - rowi = {k: i for i, k in enumerate(self.index)} - for jj in range(len(attrs.columns)): - cn = attrs.columns[jj] - j = coli[cn] + for cn in attrs.columns: for rn, c in attrs[[cn]].itertuples(): if not c: continue - c = c.rstrip(";") - if not c: - continue - i = rowi[rn] - for pair in c.split(";"): - self.ctx[(i, j)].append(pair) + css_list = _maybe_convert_css_to_tuples(c) + i, j = self.index.get_loc(rn), self.columns.get_loc(cn) + self.ctx[(i, j)].extend(css_list) def _copy(self, deepcopy: bool = False) -> Styler: styler = Styler( @@ -834,7 +837,7 @@ def _apply( **kwargs, ) -> Styler: subset = slice(None) if subset is None else subset - subset = non_reducing_slice(subset) + subset = _non_reducing_slice(subset) data = self.data.loc[subset] if axis is not None: result = data.apply(func, axis=axis, result_type="expand", **kwargs) @@ -937,7 +940,7 @@ def _applymap(self, func: Callable, subset=None, **kwargs) -> Styler: func = partial(func, **kwargs) # applymap doesn't take kwargs? if subset is None: subset = pd.IndexSlice[:] - subset = non_reducing_slice(subset) + subset = _non_reducing_slice(subset) result = self.data.loc[subset].applymap(func) self._update_ctx(result) return self @@ -1287,7 +1290,7 @@ def hide_columns(self, subset) -> Styler: ------- self : Styler """ - subset = non_reducing_slice(subset) + subset = _non_reducing_slice(subset) hidden_df = self.data.loc[subset] self.hidden_columns = self.columns.get_indexer_for(hidden_df.columns) return self @@ -1296,33 +1299,6 @@ def hide_columns(self, subset) -> Styler: # A collection of "builtin" styles # ----------------------------------------------------------------------- - @staticmethod - def _highlight_null(v, null_color: str) -> str: - return f"background-color: {null_color}" if pd.isna(v) else "" - - def highlight_null( - self, - null_color: str = "red", - subset: Optional[IndexLabel] = None, - ) -> Styler: - """ - Shade the background ``null_color`` for missing values. - - Parameters - ---------- - null_color : str, default 'red' - subset : label or list of labels, default None - A valid slice for ``data`` to limit the style application to. - - .. versionadded:: 1.1.0 - - Returns - ------- - self : Styler - """ - self.applymap(self._highlight_null, null_color=null_color, subset=subset) - return self - def background_gradient( self, cmap="PuBu", @@ -1389,8 +1365,9 @@ def background_gradient( of the data is extended by ``low * (x.max() - x.min())`` and ``high * (x.max() - x.min())`` before normalizing. """ - subset = maybe_numeric_slice(self.data, subset) - subset = non_reducing_slice(subset) + if subset is None: + subset = self.data.select_dtypes(include=np.number).columns + self.apply( self._background_gradient, cmap=cmap, @@ -1451,7 +1428,7 @@ def relative_luminance(rgba) -> float: The relative luminance as a value from 0 to 1 """ r, g, b = ( - x / 12.92 if x <= 0.03928 else ((x + 0.055) / 1.055 ** 2.4) + x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 for x in rgba[:3] ) return 0.2126 * r + 0.7152 * g + 0.0722 * b @@ -1623,8 +1600,9 @@ def bar( "(eg: color=['#d65f5f', '#5fba7d'])" ) - subset = maybe_numeric_slice(self.data, subset) - subset = non_reducing_slice(subset) + if subset is None: + subset = self.data.select_dtypes(include=np.number).columns + self.apply( self._bar, subset=subset, @@ -1638,8 +1616,39 @@ def bar( return self + def highlight_null( + self, + null_color: str = "red", + subset: Optional[IndexLabel] = None, + ) -> Styler: + """ + Shade the background ``null_color`` for missing values. + + Parameters + ---------- + null_color : str, default 'red' + subset : label or list of labels, default None + A valid slice for ``data`` to limit the style application to. + + .. versionadded:: 1.1.0 + + Returns + ------- + self : Styler + """ + + def f(data: DataFrame, props: str) -> np.ndarray: + return np.where(pd.isna(data).values, props, "") + + return self.apply( + f, axis=None, subset=subset, props=f"background-color: {null_color};" + ) + def highlight_max( - self, subset=None, color: str = "yellow", axis: Optional[Axis] = 0 + self, + subset: Optional[IndexLabel] = None, + color: str = "yellow", + axis: Optional[Axis] = 0, ) -> Styler: """ Highlight the maximum by shading the background. @@ -1658,10 +1667,19 @@ def highlight_max( ------- self : Styler """ - return self._highlight_handler(subset=subset, color=color, axis=axis, max_=True) + + def f(data: FrameOrSeries, props: str) -> np.ndarray: + return np.where(data == np.nanmax(data.values), props, "") + + return self.apply( + f, axis=axis, subset=subset, props=f"background-color: {color};" + ) def highlight_min( - self, subset=None, color: str = "yellow", axis: Optional[Axis] = 0 + self, + subset: Optional[IndexLabel] = None, + color: str = "yellow", + axis: Optional[Axis] = 0, ) -> Styler: """ Highlight the minimum by shading the background. @@ -1680,43 +1698,13 @@ def highlight_min( ------- self : Styler """ - return self._highlight_handler( - subset=subset, color=color, axis=axis, max_=False - ) - - def _highlight_handler( - self, - subset=None, - color: str = "yellow", - axis: Optional[Axis] = None, - max_: bool = True, - ) -> Styler: - subset = non_reducing_slice(maybe_numeric_slice(self.data, subset)) - self.apply( - self._highlight_extrema, color=color, axis=axis, subset=subset, max_=max_ - ) - return self - - @staticmethod - def _highlight_extrema( - data: FrameOrSeries, color: str = "yellow", max_: bool = True - ): - """ - Highlight the min or max in a Series or DataFrame. - """ - attr = f"background-color: {color}" - if max_: - extrema = data == np.nanmax(data.to_numpy()) - else: - extrema = data == np.nanmin(data.to_numpy()) + def f(data: FrameOrSeries, props: str) -> np.ndarray: + return np.where(data == np.nanmin(data.values), props, "") - if data.ndim == 1: # Series from .apply - return [attr if v else "" for v in extrema] - else: # DataFrame from .tee - return pd.DataFrame( - np.where(extrema, attr, ""), index=data.index, columns=data.columns - ) + return self.apply( + f, axis=axis, subset=subset, props=f"background-color: {color};" + ) @classmethod def from_custom_template(cls, searchpath, name): @@ -1740,8 +1728,8 @@ def from_custom_template(cls, searchpath, name): loader = jinja2.ChoiceLoader([jinja2.FileSystemLoader(searchpath), cls.loader]) # mypy doesn't like dynamically-defined classes - # error: Variable "cls" is not valid as a type [valid-type] - # error: Invalid base class "cls" [misc] + # error: Variable "cls" is not valid as a type + # error: Invalid base class "cls" class MyStyler(cls): # type:ignore[valid-type,misc] env = jinja2.Environment(loader=loader) template = env.get_template(name) @@ -2068,7 +2056,7 @@ def _maybe_wrap_formatter( raise TypeError(msg) -def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSSequence: +def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: """ Convert css-string to sequence of tuples format if needed. 'color:red; border:1px solid black;' -> [('color', 'red'), @@ -2088,3 +2076,44 @@ def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSSequence: f"for example 'attr: val;'. '{style}' was given." ) return style + + +def _non_reducing_slice(slice_): + """ + Ensure that a slice doesn't reduce to a Series or Scalar. + + Any user-passed `subset` should have this called on it + to make sure we're always working with DataFrames. + """ + # default to column slice, like DataFrame + # ['A', 'B'] -> IndexSlices[:, ['A', 'B']] + kinds = (ABCSeries, np.ndarray, Index, list, str) + if isinstance(slice_, kinds): + slice_ = pd.IndexSlice[:, slice_] + + def pred(part) -> bool: + """ + Returns + ------- + bool + True if slice does *not* reduce, + False if `part` is a tuple. + """ + # true when slice does *not* reduce, False when part is a tuple, + # i.e. MultiIndex slice + if isinstance(part, tuple): + # GH#39421 check for sub-slice: + return any((isinstance(s, slice) or is_list_like(s)) for s in part) + else: + return isinstance(part, slice) or is_list_like(part) + + if not is_list_like(slice_): + if not isinstance(slice_, slice): + # a 1-d slice, like df.loc[1] + slice_ = [[slice_]] + else: + # slice(a, b, c) + slice_ = [slice_] # to tuplize later + else: + slice_ = [part if pred(part) else [part] for part in slice_] + return tuple(slice_) diff --git a/pandas/io/formats/templates/html.tpl b/pandas/io/formats/templates/html.tpl index b315c57a65cdf..65fc1dfbb37c4 100644 --- a/pandas/io/formats/templates/html.tpl +++ b/pandas/io/formats/templates/html.tpl @@ -39,7 +39,7 @@ {% for c in r %} {% if c.is_visible != False %} - <{{c.type}} class="{{c.class}}" {{c.attributes|join(" ")}}>{{c.value}} + <{{c.type}} class="{{c.class}}" {{c.attributes}}>{{c.value}} {% endif %} {% endfor %} @@ -56,7 +56,7 @@ {% for c in r %} {% if c.is_visible != False %} - <{{c.type}} {% if c.id is defined -%} id="T_{{uuid}}{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes|join(" ")}}>{{c.display_value}} + <{{c.type}} {% if c.id is defined -%} id="T_{{uuid}}{{c.id}}" {%- endif %} class="{{c.class}}" {{c.attributes}}>{{c.display_value}} {% endif %} {% endfor %} diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 260d688ccb0cc..215d966609ab4 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,7 +1,14 @@ """ Google BigQuery support """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Optional, + Union, +) from pandas.compat._optional import import_optional_dependency diff --git a/pandas/io/html.py b/pandas/io/html.py index c445ee81ec8ed..7541e5d62fd1e 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -8,11 +8,22 @@ import numbers import os import re -from typing import Dict, List, Optional, Pattern, Sequence, Tuple, Union +from typing import ( + Dict, + List, + Optional, + Pattern, + Sequence, + Tuple, + Union, +) from pandas._typing import FilePathOrBuffer from pandas.compat._optional import import_optional_dependency -from pandas.errors import AbstractMethodError, EmptyDataError +from pandas.errors import ( + AbstractMethodError, + EmptyDataError, +) from pandas.util._decorators import deprecate_nonkeyword_arguments from pandas.core.dtypes.common import is_list_like @@ -20,7 +31,12 @@ from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.frame import DataFrame -from pandas.io.common import is_url, stringify_path, urlopen, validate_header_arg +from pandas.io.common import ( + is_url, + stringify_path, + urlopen, + validate_header_arg, +) from pandas.io.formats.printing import pprint_thing from pandas.io.parsers import TextParser @@ -700,7 +716,11 @@ def _build_doc(self): pandas.io.html._HtmlFrameParser._build_doc """ from lxml.etree import XMLSyntaxError - from lxml.html import HTMLParser, fromstring, parse + from lxml.html import ( + HTMLParser, + fromstring, + parse, + ) parser = HTMLParser(recover=True, encoding=self.encoding) diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py index 48febb086c302..1de1abcdb9920 100644 --- a/pandas/io/json/__init__.py +++ b/pandas/io/json/__init__.py @@ -1,5 +1,13 @@ -from pandas.io.json._json import dumps, loads, read_json, to_json -from pandas.io.json._normalize import _json_normalize, json_normalize +from pandas.io.json._json import ( + dumps, + loads, + read_json, + to_json, +) +from pandas.io.json._normalize import ( + _json_normalize, + json_normalize, +) from pandas.io.json._table_schema import build_table_schema __all__ = [ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 497cf261fcece..635a493d03d61 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1,9 +1,20 @@ -from abc import ABC, abstractmethod +from abc import ( + ABC, + abstractmethod, +) from collections import abc import functools from io import StringIO from itertools import islice -from typing import Any, Callable, Mapping, Optional, Tuple, Type, Union +from typing import ( + Any, + Callable, + Mapping, + Optional, + Tuple, + Type, + Union, +) import numpy as np @@ -18,11 +29,25 @@ StorageOptions, ) from pandas.errors import AbstractMethodError -from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments, doc +from pandas.util._decorators import ( + deprecate_kwarg, + deprecate_nonkeyword_arguments, + doc, +) -from pandas.core.dtypes.common import ensure_str, is_period_dtype +from pandas.core.dtypes.common import ( + ensure_str, + is_period_dtype, +) -from pandas import DataFrame, MultiIndex, Series, isna, notna, to_datetime +from pandas import ( + DataFrame, + MultiIndex, + Series, + isna, + notna, + to_datetime, +) from pandas.core import generic from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.generic import NDFrame @@ -37,7 +62,10 @@ stringify_path, ) from pandas.io.json._normalize import convert_to_line_delimits -from pandas.io.json._table_schema import build_table_schema, parse_table_schema +from pandas.io.json._table_schema import ( + build_table_schema, + parse_table_schema, +) from pandas.io.parsers.readers import validate_integer loads = json.loads diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 8dcc9fa490635..975eb263eca07 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -2,9 +2,20 @@ # JSON normalization routines from __future__ import annotations -from collections import abc, defaultdict +from collections import ( + abc, + defaultdict, +) import copy -from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union +from typing import ( + Any, + DefaultDict, + Dict, + Iterable, + List, + Optional, + Union, +) import numpy as np diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 0499a35296490..4824dab764259 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -3,11 +3,21 @@ https://specs.frictionlessdata.io/json-table-schema/ """ -from typing import TYPE_CHECKING, Any, Dict, Optional, cast +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Optional, + cast, +) import warnings import pandas._libs.json as json -from pandas._typing import DtypeObj, FrameOrSeries, JSONSerializable +from pandas._typing import ( + DtypeObj, + FrameOrSeries, + JSONSerializable, +) from pandas.core.dtypes.common import ( is_bool_dtype, diff --git a/pandas/io/orc.py b/pandas/io/orc.py index a219be99540dc..df76156aac9eb 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -2,7 +2,11 @@ from __future__ import annotations import distutils -from typing import TYPE_CHECKING, List, Optional +from typing import ( + TYPE_CHECKING, + List, + Optional, +) from pandas._typing import FilePathOrBuffer diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 0a322059ed77c..183d753ddd60b 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -4,15 +4,29 @@ from distutils.version import LooseVersion import io import os -from typing import Any, AnyStr, Dict, List, Optional, Tuple +from typing import ( + Any, + AnyStr, + Dict, + List, + Optional, + Tuple, +) from warnings import catch_warnings -from pandas._typing import FilePathOrBuffer, StorageOptions +from pandas._typing import ( + FilePathOrBuffer, + StorageOptions, +) from pandas.compat._optional import import_optional_dependency from pandas.errors import AbstractMethodError from pandas.util._decorators import doc -from pandas import DataFrame, MultiIndex, get_option +from pandas import ( + DataFrame, + MultiIndex, + get_option, +) from pandas.core import generic from pandas.io.common import ( diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 8961fd0a7af06..2d17978b60327 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -24,8 +24,14 @@ import pandas._libs.parsers as parsers from pandas._libs.parsers import STR_NA_VALUES from pandas._libs.tslibs import parsing -from pandas._typing import DtypeArg, FilePathOrBuffer -from pandas.errors import ParserError, ParserWarning +from pandas._typing import ( + DtypeArg, + FilePathOrBuffer, +) +from pandas.errors import ( + ParserError, + ParserWarning, +) from pandas.core.dtypes.cast import astype_nansafe from pandas.core.dtypes.common import ( @@ -49,11 +55,18 @@ from pandas.core import algorithms from pandas.core.arrays import Categorical -from pandas.core.indexes.api import Index, MultiIndex, ensure_index_from_sequences +from pandas.core.indexes.api import ( + Index, + MultiIndex, + ensure_index_from_sequences, +) from pandas.core.series import Series from pandas.core.tools import datetimes as tools -from pandas.io.common import IOHandles, get_handle +from pandas.io.common import ( + IOHandles, + get_handle, +) from pandas.io.date_converters import generic_parser parser_defaults = { diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index d1d77c5e044be..135e093cdc1e0 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -3,7 +3,10 @@ from pandas.core.indexes.api import ensure_index_from_sequences -from pandas.io.parsers.base_parser import ParserBase, is_index_col +from pandas.io.parsers.base_parser import ( + ParserBase, + is_index_col, +) class CParserWrapper(ParserBase): @@ -25,29 +28,23 @@ def __init__(self, src: FilePathOrBuffer, **kwds): for key in ("storage_options", "encoding", "memory_map", "compression"): kwds.pop(key, None) if self.handles.is_mmap and hasattr(self.handles.handle, "mmap"): - # pandas\io\parsers.py:1861: error: Item "IO[Any]" of - # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, - # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + # error: Item "IO[Any]" of "Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]" has no attribute "mmap" - # pandas\io\parsers.py:1861: error: Item "RawIOBase" of - # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, - # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + # error: Item "RawIOBase" of "Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]" has no attribute "mmap" - # pandas\io\parsers.py:1861: error: Item "BufferedIOBase" of - # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, - # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + # error: Item "BufferedIOBase" of "Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]" has no attribute "mmap" - # pandas\io\parsers.py:1861: error: Item "TextIOBase" of - # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, - # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + # error: Item "TextIOBase" of "Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]" has no attribute "mmap" - # pandas\io\parsers.py:1861: error: Item "TextIOWrapper" of - # "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, - # TextIOWrapper, mmap]" has no attribute "mmap" [union-attr] + # error: Item "TextIOWrapper" of "Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]" has no attribute "mmap" - # pandas\io\parsers.py:1861: error: Item "mmap" of "Union[IO[Any], - # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]" has - # no attribute "mmap" [union-attr] + # error: Item "mmap" of "Union[IO[Any], RawIOBase, BufferedIOBase, + # TextIOBase, TextIOWrapper, mmap]" has no attribute "mmap" self.handles.handle = self.handles.handle.mmap # type: ignore[union-attr] try: diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 223acdea80ca6..37f553c724c9e 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -1,19 +1,39 @@ -from collections import abc, defaultdict +from collections import ( + abc, + defaultdict, +) import csv from io import StringIO import re import sys -from typing import DefaultDict, Iterator, List, Optional, Set, Tuple, cast +from typing import ( + DefaultDict, + Iterator, + List, + Optional, + Set, + Tuple, + cast, +) import numpy as np import pandas._libs.lib as lib -from pandas._typing import FilePathOrBuffer, Union -from pandas.errors import EmptyDataError, ParserError +from pandas._typing import ( + FilePathOrBuffer, + Union, +) +from pandas.errors import ( + EmptyDataError, + ParserError, +) from pandas.core.dtypes.common import is_integer -from pandas.io.parsers.base_parser import ParserBase, parser_defaults +from pandas.io.parsers.base_parser import ( + ParserBase, + parser_defaults, +) # BOM character (byte order mark) # This exists at the beginning of a file to indicate endianness @@ -217,10 +237,9 @@ def _read(): reader = _read() - # pandas\io\parsers.py:2427: error: Incompatible types in assignment - # (expression has type "_reader", variable has type "Union[IO[Any], - # RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap, None]") - # [assignment] + # error: Incompatible types in assignment (expression has type "_reader", + # variable has type "Union[IO[Any], RawIOBase, BufferedIOBase, TextIOBase, + # TextIOWrapper, mmap, None]") self.data = reader # type: ignore[assignment] def read(self, rows=None): @@ -278,8 +297,7 @@ def _exclude_implicit_index(self, alldata): # legacy def get_chunk(self, size=None): if size is None: - # pandas\io\parsers.py:2528: error: "PythonParser" has no attribute - # "chunksize" [attr-defined] + # error: "PythonParser" has no attribute "chunksize" size = self.chunksize # type: ignore[attr-defined] return self.read(rows=size) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index dc45336bb4c0f..edfc7ee0b6258 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -5,27 +5,54 @@ import csv import sys from textwrap import fill -from typing import Any, Dict, List, Optional, Set, Type +from typing import ( + Any, + Dict, + List, + Optional, + Set, + Type, +) import warnings import numpy as np import pandas._libs.lib as lib from pandas._libs.parsers import STR_NA_VALUES -from pandas._typing import DtypeArg, FilePathOrBuffer, StorageOptions, Union -from pandas.errors import AbstractMethodError, ParserWarning +from pandas._typing import ( + DtypeArg, + FilePathOrBuffer, + StorageOptions, + Union, +) +from pandas.errors import ( + AbstractMethodError, + ParserWarning, +) from pandas.util._decorators import Appender -from pandas.core.dtypes.common import is_file_like, is_float, is_integer, is_list_like +from pandas.core.dtypes.common import ( + is_file_like, + is_float, + is_integer, + is_list_like, +) from pandas.core import generic from pandas.core.frame import DataFrame from pandas.core.indexes.api import RangeIndex from pandas.io.common import validate_header_arg -from pandas.io.parsers.base_parser import ParserBase, is_index_col, parser_defaults +from pandas.io.parsers.base_parser import ( + ParserBase, + is_index_col, + parser_defaults, +) from pandas.io.parsers.c_parser_wrapper import CParserWrapper -from pandas.io.parsers.python_parser import FixedWidthFieldParser, PythonParser +from pandas.io.parsers.python_parser import ( + FixedWidthFieldParser, + PythonParser, +) _doc_read_csv_and_table = ( r""" diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 2dcbaf38fa51a..785afce9e0214 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -3,7 +3,11 @@ from typing import Any import warnings -from pandas._typing import CompressionOptions, FilePathOrBuffer, StorageOptions +from pandas._typing import ( + CompressionOptions, + FilePathOrBuffer, + StorageOptions, +) from pandas.compat import pickle_compat as pc from pandas.util._decorators import doc diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8917be1f558b2..88b444acfea62 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -6,7 +6,10 @@ from contextlib import suppress import copy -from datetime import date, tzinfo +from datetime import ( + date, + tzinfo, +) import itertools import os import re @@ -29,11 +32,23 @@ import numpy as np -from pandas._config import config, get_option +from pandas._config import ( + config, + get_option, +) -from pandas._libs import lib, writers as libwriters +from pandas._libs import ( + lib, + writers as libwriters, +) from pandas._libs.tslibs import timezones -from pandas._typing import ArrayLike, DtypeArg, FrameOrSeries, FrameOrSeriesUnion, Shape +from pandas._typing import ( + ArrayLike, + DtypeArg, + FrameOrSeries, + FrameOrSeriesUnion, + Shape, +) from pandas.compat._optional import import_optional_dependency from pandas.compat.pickle_compat import patch_pickle from pandas.errors import PerformanceWarning @@ -65,18 +80,32 @@ concat, isna, ) -from pandas.core.arrays import Categorical, DatetimeArray, PeriodArray +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + PeriodArray, +) import pandas.core.common as com -from pandas.core.computation.pytables import PyTablesExpr, maybe_expression +from pandas.core.computation.pytables import ( + PyTablesExpr, + maybe_expression, +) from pandas.core.construction import extract_array from pandas.core.indexes.api import ensure_index from pandas.core.internals import BlockManager from pandas.io.common import stringify_path -from pandas.io.formats.printing import adjoin, pprint_thing +from pandas.io.formats.printing import ( + adjoin, + pprint_thing, +) if TYPE_CHECKING: - from tables import Col, File, Node + from tables import ( + Col, + File, + Node, + ) from pandas.core.internals import Block @@ -3410,8 +3439,8 @@ def queryables(self) -> Dict[str, Any]: (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns) ] - # error: Unsupported operand types for + ("List[Tuple[str, IndexCol]]" - # and "List[Tuple[str, None]]") + # error: Unsupported operand types for + ("List[Tuple[str, IndexCol]]" and + # "List[Tuple[str, None]]") return dict(d1 + d2 + d3) # type: ignore[operator] def index_cols(self): diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 9853fa41d3fb9..392dfa22ee67b 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -14,13 +14,24 @@ http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm """ from collections import abc -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import struct -from typing import IO, Any, Union, cast +from typing import ( + IO, + Any, + Union, + cast, +) import numpy as np -from pandas.errors import EmptyDataError, OutOfBoundsDatetime +from pandas.errors import ( + EmptyDataError, + OutOfBoundsDatetime, +) import pandas as pd from pandas import isna diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 2ecfbed8cc83f..c71de542bbf77 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -10,7 +10,10 @@ from collections import abc from datetime import datetime import struct -from typing import IO, cast +from typing import ( + IO, + cast, +) import warnings import numpy as np diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 8888be02dd5ea..69da038929482 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -3,8 +3,17 @@ """ from __future__ import annotations -from abc import ABCMeta, abstractmethod -from typing import TYPE_CHECKING, Hashable, Optional, Union, overload +from abc import ( + ABCMeta, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Hashable, + Optional, + Union, + overload, +) from pandas._typing import FilePathOrBuffer diff --git a/pandas/io/spss.py b/pandas/io/spss.py index 79cdfbf15392a..fb0ecee995463 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -1,5 +1,9 @@ from pathlib import Path -from typing import Optional, Sequence, Union +from typing import ( + Optional, + Sequence, + Union, +) from pandas.compat._optional import import_optional_dependency diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 725cb633918e7..c028e1f5c5dbe 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -4,10 +4,24 @@ """ from contextlib import contextmanager -from datetime import date, datetime, time +from datetime import ( + date, + datetime, + time, +) from functools import partial import re -from typing import Any, Dict, Iterator, List, Optional, Sequence, Union, cast, overload +from typing import ( + Any, + Dict, + Iterator, + List, + Optional, + Sequence, + Union, + cast, + overload, +) import warnings import numpy as np @@ -15,11 +29,18 @@ import pandas._libs.lib as lib from pandas._typing import DtypeArg -from pandas.core.dtypes.common import is_datetime64tz_dtype, is_dict_like, is_list_like +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_dict_like, + is_list_like, +) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna -from pandas.core.api import DataFrame, Series +from pandas.core.api import ( + DataFrame, + Series, +) from pandas.core.base import PandasObject from pandas.core.tools.datetimes import to_datetime @@ -1036,7 +1057,11 @@ def _get_column_names_and_types(self, dtype_mapper): return column_names_and_types def _create_table_setup(self): - from sqlalchemy import Column, PrimaryKeyConstraint, Table + from sqlalchemy import ( + Column, + PrimaryKeyConstraint, + Table, + ) column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type) @@ -1186,7 +1211,14 @@ def _sqlalchemy_type(self, col): return Text def _get_dtype(self, sqltype): - from sqlalchemy.types import TIMESTAMP, Boolean, Date, DateTime, Float, Integer + from sqlalchemy.types import ( + TIMESTAMP, + Boolean, + Date, + DateTime, + Float, + Integer, + ) if isinstance(sqltype, Float): return float @@ -1517,7 +1549,10 @@ def to_sql( else: dtype = cast(dict, dtype) - from sqlalchemy.types import TypeEngine, to_instance + from sqlalchemy.types import ( + TypeEngine, + to_instance, + ) for col, my_type in dtype.items(): if not isinstance(to_instance(my_type), TypeEngine): diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 8f8c435fae4f3..462c7b41f4271 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -36,8 +36,16 @@ from pandas._libs.lib import infer_dtype from pandas._libs.writers import max_len_string_array -from pandas._typing import Buffer, CompressionOptions, FilePathOrBuffer, StorageOptions -from pandas.util._decorators import Appender, doc +from pandas._typing import ( + Buffer, + CompressionOptions, + FilePathOrBuffer, + StorageOptions, +) +from pandas.util._decorators import ( + Appender, + doc, +) from pandas.core.dtypes.common import ( ensure_object, diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 597217ec67b0e..1a5efee586ee1 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1,15 +1,30 @@ from __future__ import annotations import importlib -from typing import TYPE_CHECKING, Optional, Sequence, Tuple, Union +from typing import ( + TYPE_CHECKING, + Optional, + Sequence, + Tuple, + Union, +) from pandas._config import get_option from pandas._typing import IndexLabel -from pandas.util._decorators import Appender, Substitution - -from pandas.core.dtypes.common import is_integer, is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.util._decorators import ( + Appender, + Substitution, +) + +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCSeries, +) from pandas.core.base import PandasObject diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py index e212127549355..b12ca6187c945 100644 --- a/pandas/plotting/_matplotlib/__init__.py +++ b/pandas/plotting/_matplotlib/__init__.py @@ -1,6 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Type +from typing import ( + TYPE_CHECKING, + Dict, + Type, +) from pandas.plotting._matplotlib.boxplot import ( BoxPlot, @@ -8,7 +12,10 @@ boxplot_frame, boxplot_frame_groupby, ) -from pandas.plotting._matplotlib.converter import deregister, register +from pandas.plotting._matplotlib.converter import ( + deregister, + register, +) from pandas.plotting._matplotlib.core import ( AreaPlot, BarhPlot, @@ -18,7 +25,12 @@ PiePlot, ScatterPlot, ) -from pandas.plotting._matplotlib.hist import HistPlot, KdePlot, hist_frame, hist_series +from pandas.plotting._matplotlib.hist import ( + HistPlot, + KdePlot, + hist_frame, + hist_series, +) from pandas.plotting._matplotlib.misc import ( andrews_curves, autocorrelation_plot, diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index eec4c409a81b6..1ec4efe7b4795 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -14,7 +14,10 @@ import pandas.core.common as com from pandas.io.formats.printing import pprint_thing -from pandas.plotting._matplotlib.core import LinePlot, MPLPlot +from pandas.plotting._matplotlib.core import ( + LinePlot, + MPLPlot, +) from pandas.plotting._matplotlib.style import get_standard_colors from pandas.plotting._matplotlib.tools import ( create_subplots, diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 3d2d69162c70a..677c3e791c72b 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -1,18 +1,35 @@ import contextlib import datetime as pydt -from datetime import datetime, timedelta, tzinfo +from datetime import ( + datetime, + timedelta, + tzinfo, +) import functools -from typing import Any, Dict, List, Optional, Tuple +from typing import ( + Any, + Dict, + List, + Optional, + Tuple, +) from dateutil.relativedelta import relativedelta import matplotlib.dates as dates -from matplotlib.ticker import AutoLocator, Formatter, Locator +from matplotlib.ticker import ( + AutoLocator, + Formatter, + Locator, +) from matplotlib.transforms import nonsingular import matplotlib.units as units import numpy as np from pandas._libs import lib -from pandas._libs.tslibs import Timestamp, to_offset +from pandas._libs.tslibs import ( + Timestamp, + to_offset, +) from pandas._libs.tslibs.dtypes import FreqGroup from pandas._libs.tslibs.offsets import BaseOffset @@ -24,10 +41,18 @@ is_nested_list_like, ) -from pandas import Index, Series, get_option +from pandas import ( + Index, + Series, + get_option, +) import pandas.core.common as com from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) import pandas.core.tools.datetimes as tools # constants diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7d743075674f1..3b0d59501ba05 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1,6 +1,12 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Hashable, List, Optional, Tuple +from typing import ( + TYPE_CHECKING, + Hashable, + List, + Optional, + Tuple, +) import warnings from matplotlib.artist import Artist @@ -29,7 +35,10 @@ ABCPeriodIndex, ABCSeries, ) -from pandas.core.dtypes.missing import isna, notna +from pandas.core.dtypes.missing import ( + isna, + notna, +) import pandas.core.common as com @@ -594,17 +603,14 @@ def _make_legend(self): if self.legend: if self.legend == "reverse": - # pandas\plotting\_matplotlib\core.py:578: error: - # Incompatible types in assignment (expression has type + # error: Incompatible types in assignment (expression has type # "Iterator[Any]", variable has type "List[Any]") - # [assignment] self.legend_handles = reversed( # type: ignore[assignment] self.legend_handles ) - # pandas\plotting\_matplotlib\core.py:579: error: - # Incompatible types in assignment (expression has type + # error: Incompatible types in assignment (expression has type # "Iterator[Optional[Hashable]]", variable has type - # "List[Optional[Hashable]]") [assignment] + # "List[Optional[Hashable]]") self.legend_labels = reversed( # type: ignore[assignment] self.legend_labels ) @@ -1149,10 +1155,9 @@ def _make_plot(self): it = self._iter_data(data=data, keep_index=True) else: x = self._get_xticks(convert_period=True) - # pandas\plotting\_matplotlib\core.py:1100: error: Incompatible - # types in assignment (expression has type "Callable[[Any, Any, - # Any, Any, Any, Any, KwArg(Any)], Any]", variable has type - # "Callable[[Any, Any, Any, Any, KwArg(Any)], Any]") [assignment] + # error: Incompatible types in assignment (expression has type + # "Callable[[Any, Any, Any, Any, Any, Any, KwArg(Any)], Any]", variable has + # type "Callable[[Any, Any, Any, Any, KwArg(Any)], Any]") plotf = self._plot # type: ignore[assignment] it = self._iter_data() @@ -1601,9 +1606,8 @@ def blank_labeler(label, value): if labels is not None: blabels = [blank_labeler(left, value) for left, value in zip(labels, y)] else: - # pandas\plotting\_matplotlib\core.py:1546: error: Incompatible - # types in assignment (expression has type "None", variable has - # type "List[Any]") [assignment] + # error: Incompatible types in assignment (expression has type "None", + # variable has type "List[Any]") blabels = None # type: ignore[assignment] results = ax.pie(y, labels=blabels, **kwds) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 018d19e81d5c4..3de467c77d289 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -4,12 +4,24 @@ import numpy as np -from pandas.core.dtypes.common import is_integer, is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex -from pandas.core.dtypes.missing import isna, remove_na_arraylike +from pandas.core.dtypes.common import ( + is_integer, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, +) +from pandas.core.dtypes.missing import ( + isna, + remove_na_arraylike, +) from pandas.io.formats.printing import pprint_thing -from pandas.plotting._matplotlib.core import LinePlot, MPLPlot +from pandas.plotting._matplotlib.core import ( + LinePlot, + MPLPlot, +) from pandas.plotting._matplotlib.tools import ( create_subplots, flatten_axes, diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 01ac3de4ff3bb..3d5f4af72db6c 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -1,7 +1,14 @@ from __future__ import annotations import random -from typing import TYPE_CHECKING, Dict, Hashable, List, Optional, Set +from typing import ( + TYPE_CHECKING, + Dict, + Hashable, + List, + Optional, + Set, +) import matplotlib.lines as mlines import matplotlib.patches as patches @@ -22,7 +29,10 @@ from matplotlib.axes import Axes from matplotlib.figure import Figure - from pandas import DataFrame, Series + from pandas import ( + DataFrame, + Series, + ) def scatter_matrix( diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 51916075018a3..8374988708701 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -3,11 +3,19 @@ from __future__ import annotations import functools -from typing import TYPE_CHECKING, Optional, cast +from typing import ( + TYPE_CHECKING, + Optional, + cast, +) import numpy as np -from pandas._libs.tslibs import BaseOffset, Period, to_offset +from pandas._libs.tslibs import ( + BaseOffset, + Period, + to_offset, +) from pandas._libs.tslibs.dtypes import FreqGroup from pandas._typing import FrameOrSeriesUnion @@ -23,12 +31,20 @@ TimeSeries_DateLocator, TimeSeries_TimedeltaFormatter, ) -from pandas.tseries.frequencies import get_period_alias, is_subperiod, is_superperiod +from pandas.tseries.frequencies import ( + get_period_alias, + is_subperiod, + is_superperiod, +) if TYPE_CHECKING: from matplotlib.axes import Axes - from pandas import DatetimeIndex, Index, Series + from pandas import ( + DatetimeIndex, + Index, + Series, + ) # --------------------------------------------------------------------- # Plotting functions and monkey patches diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index df94b71f5e7a9..500d570835493 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -2,7 +2,14 @@ from __future__ import annotations from math import ceil -from typing import TYPE_CHECKING, Iterable, List, Sequence, Tuple, Union +from typing import ( + TYPE_CHECKING, + Iterable, + List, + Sequence, + Tuple, + Union, +) import warnings import matplotlib.table @@ -12,7 +19,11 @@ from pandas._typing import FrameOrSeriesUnion from pandas.core.dtypes.common import is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndex, + ABCSeries, +) from pandas.plotting._matplotlib import compat diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 58f44104b99d6..e0a860b9d8709 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -530,8 +530,7 @@ def reset(self): ------- None """ - # pandas\plotting\_misc.py:533: error: Cannot access "__init__" - # directly [misc] + # error: Cannot access "__init__" directly self.__init__() # type: ignore[misc] def _get_canonical_key(self, key): diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index 71804bded3e44..7b6cc9412e03d 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -59,5 +59,5 @@ def test_types(self): def test_deprecated_from_api_types(self): for t in self.deprecated: - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): getattr(types, t)(1) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index dd8904674428f..3ac9d98874f86 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -8,7 +8,14 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, notna +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + notna, +) import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.tests.frame.common import zip_frames diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py index c4959ee2c8962..7718ec5215499 100644 --- a/pandas/tests/apply/test_frame_transform.py +++ b/pandas/tests/apply/test_frame_transform.py @@ -4,7 +4,11 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.core.groupby.base import transformation_kernels @@ -202,7 +206,7 @@ def test_transform_bad_dtype(op, frame_or_series): # tshift is deprecated warn = None if op != "tshift" else FutureWarning - with tm.assert_produces_warning(warn, check_stacklevel=False): + with tm.assert_produces_warning(warn): with pytest.raises(ValueError, match=msg): obj.transform(op) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index a5c40af5c7f35..bf8311f992ea5 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -1,4 +1,7 @@ -from collections import Counter, defaultdict +from collections import ( + Counter, + defaultdict, +) from itertools import chain import numpy as np @@ -7,7 +10,15 @@ from pandas.core.dtypes.common import is_number import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, timedelta_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + isna, + timedelta_range, +) import pandas._testing as tm from pandas.core.base import SpecificationError diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py index 73cc789c6eb3a..e67ea4f14e4ac 100644 --- a/pandas/tests/apply/test_series_transform.py +++ b/pandas/tests/apply/test_series_transform.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Series, concat +from pandas import ( + Series, + concat, +) import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.core.groupby.base import transformation_kernels diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index e26bb513838a5..386490623dc47 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -4,7 +4,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, Series, array as pd_array +from pandas import ( + DataFrame, + Index, + Series, + array as pd_array, +) import pandas._testing as tm from pandas.core.arrays import PandasArray diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index f507c6d4f45fb..d90592c68e351 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index +from pandas import ( + Float64Index, + Int64Index, + RangeIndex, + UInt64Index, +) import pandas._testing as tm # ------------------------------------------------------------------ diff --git a/pandas/tests/arithmetic/test_array_ops.py b/pandas/tests/arithmetic/test_array_ops.py index 53cb10ba9fc5e..2c347d965bbf7 100644 --- a/pandas/tests/arithmetic/test_array_ops.py +++ b/pandas/tests/arithmetic/test_array_ops.py @@ -4,7 +4,10 @@ import pytest import pandas._testing as tm -from pandas.core.ops.array_ops import comparison_op, na_logical_op +from pandas.core.ops.array_ops import ( + comparison_op, + na_logical_op, +) def test_na_logical_op_2d(): diff --git a/pandas/tests/arithmetic/test_categorical.py b/pandas/tests/arithmetic/test_categorical.py index a978f763fbaaa..924f32b5ac9ac 100644 --- a/pandas/tests/arithmetic/test_categorical.py +++ b/pandas/tests/arithmetic/test_categorical.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import Categorical, Series +from pandas import ( + Categorical, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b2d88b3556388..f28407df24508 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1,8 +1,15 @@ # Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. # Specifically for datetime64 and datetime64tz dtypes -from datetime import datetime, time, timedelta -from itertools import product, starmap +from datetime import ( + datetime, + time, + timedelta, +) +from itertools import ( + product, + starmap, +) import operator import warnings @@ -28,7 +35,10 @@ date_range, ) import pandas._testing as tm -from pandas.core.arrays import DatetimeArray, TimedeltaArray +from pandas.core.arrays import ( + DatetimeArray, + TimedeltaArray, +) from pandas.core.ops import roperator from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, @@ -318,40 +328,40 @@ def test_dt64arr_timestamp_equality(self, box_with_array): box_with_array if box_with_array not in [pd.Index, pd.array] else np.ndarray ) - ser = Series([Timestamp("2000-01-29 01:59:00"), "NaT"]) + ser = Series([Timestamp("2000-01-29 01:59:00"), Timestamp("2000-01-30"), "NaT"]) ser = tm.box_expected(ser, box_with_array) result = ser != ser - expected = tm.box_expected([False, True], xbox) + expected = tm.box_expected([False, False, True], xbox) tm.assert_equal(result, expected) warn = FutureWarning if box_with_array is pd.DataFrame else None with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser != ser[0] - expected = tm.box_expected([False, True], xbox) + expected = tm.box_expected([False, True, True], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser != ser[1] - expected = tm.box_expected([True, True], xbox) + result = ser != ser[2] + expected = tm.box_expected([True, True, True], xbox) tm.assert_equal(result, expected) result = ser == ser - expected = tm.box_expected([True, False], xbox) + expected = tm.box_expected([True, True, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated result = ser == ser[0] - expected = tm.box_expected([True, False], xbox) + expected = tm.box_expected([True, False, False], xbox) tm.assert_equal(result, expected) with tm.assert_produces_warning(warn): # alignment for frame vs series comparisons deprecated - result = ser == ser[1] - expected = tm.box_expected([False, False], xbox) + result = ser == ser[2] + expected = tm.box_expected([False, False, False], xbox) tm.assert_equal(result, expected) @@ -1010,10 +1020,7 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture): obj = tm.box_expected(dti, box_with_array) expected = tm.box_expected(expected, box_with_array) - warn = None - if box_with_array is not pd.DataFrame or tz_naive_fixture is None: - warn = PerformanceWarning - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = obj - obj.astype(object) tm.assert_equal(result, expected) @@ -1276,7 +1283,7 @@ def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): ] ) vec = tm.box_expected(vec, box_with_array) - vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec # DateOffset relativedelta fastpath relative_kwargs = [ @@ -1401,7 +1408,7 @@ def test_dt64arr_add_sub_DateOffsets( ] ) vec = tm.box_expected(vec, box_with_array) - vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + vec_items = vec.iloc[0] if box_with_array is pd.DataFrame else vec offset_cls = getattr(pd.offsets, cls_name) @@ -1515,10 +1522,7 @@ def test_dt64arr_add_sub_offset_array( if box_other: other = tm.box_expected(other, box_with_array) - warn = PerformanceWarning - if box_with_array is pd.DataFrame and tz is not None: - warn = None - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): res = op(dtarr, other) tm.assert_equal(res, expected) @@ -2459,18 +2463,14 @@ def test_dti_addsub_object_arraylike( expected = DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox) - warn = PerformanceWarning - if box_with_array is pd.DataFrame and tz is not None: - warn = None - - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = dtarr + other tm.assert_equal(result, expected) expected = DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) expected = tm.box_expected(expected, xbox) - with tm.assert_produces_warning(warn): + with tm.assert_produces_warning(PerformanceWarning): result = dtarr - other tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index f4f258b559939..1e2622d6a8fcd 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -5,7 +5,10 @@ from decimal import Decimal from itertools import combinations import operator -from typing import Any, List +from typing import ( + Any, + List, +) import numpy as np import pytest @@ -532,13 +535,17 @@ def test_df_div_zero_series_does_not_commute(self): # ------------------------------------------------------------------ # Mod By Zero - def test_df_mod_zero_df(self): + def test_df_mod_zero_df(self, using_array_manager): # GH#3590, modulo as ints df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) # this is technically wrong, as the integer portion is coerced to float - # ### - first = Series([0, 0, 0, 0], dtype="float64") + first = Series([0, 0, 0, 0]) + if not using_array_manager: + # INFO(ArrayManager) BlockManager doesn't preserve dtype per column + # while ArrayManager performs op column-wisedoes and thus preserves + # dtype if possible + first = first.astype("float64") second = Series([np.nan, np.nan, np.nan, 0]) expected = pd.DataFrame({"first": first, "second": second}) result = df % df diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index a31c2e6d8c258..c5e086d24ec0c 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -9,7 +9,10 @@ import pytest import pandas as pd -from pandas import Series, Timestamp +from pandas import ( + Series, + Timestamp, +) import pandas._testing as tm from pandas.core import ops diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 4a2e8ba8219aa..f0c03fee50b39 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -6,11 +6,22 @@ import numpy as np import pytest -from pandas._libs.tslibs import IncompatibleFrequency, Period, Timestamp, to_offset +from pandas._libs.tslibs import ( + IncompatibleFrequency, + Period, + Timestamp, + to_offset, +) from pandas.errors import PerformanceWarning import pandas as pd -from pandas import PeriodIndex, Series, Timedelta, TimedeltaIndex, period_range +from pandas import ( + PeriodIndex, + Series, + Timedelta, + TimedeltaIndex, + period_range, +) import pandas._testing as tm from pandas.core import ops from pandas.core.arrays import TimedeltaArray diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 740ec3be4a1c6..b08442321f502 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1,11 +1,17 @@ # Arithmetic tests for DataFrame/Series/Index/Array classes that should # behave identically. -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest -from pandas.errors import OutOfBoundsDatetime, PerformanceWarning +from pandas.errors import ( + OutOfBoundsDatetime, + PerformanceWarning, +) import pandas as pd from pandas import ( @@ -1748,7 +1754,9 @@ def test_tdarr_div_length_mismatch(self, box_with_array): # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ - def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array): + def test_td64arr_floordiv_td64arr_with_nat( + self, box_with_array, using_array_manager + ): # GH#35529 box = box_with_array xbox = np.ndarray if box is pd.array else box @@ -1761,6 +1769,11 @@ def test_td64arr_floordiv_td64arr_with_nat(self, box_with_array): expected = np.array([1.0, 1.0, np.nan], dtype=np.float64) expected = tm.box_expected(expected, xbox) + if box is DataFrame and using_array_manager: + # INFO(ArrayManager) floorfiv returns integer, and ArrayManager + # performs ops column-wise and thus preserves int64 dtype for + # columns without missing values + expected[[0, 1]] = expected[[0, 1]].astype("int64") result = left // right @@ -2040,7 +2053,9 @@ def test_td64arr_rmul_numeric_array(self, box_with_array, vector, any_real_dtype [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], ids=lambda x: type(x).__name__, ) - def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype): + def test_td64arr_div_numeric_array( + self, box_with_array, vector, any_real_dtype, using_array_manager + ): # GH#4521 # divide/multiply by integers xbox = get_upcast_box(box_with_array, vector) @@ -2075,6 +2090,16 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype) expected = [tdser[n] / vector[n] for n in range(len(tdser))] expected = pd.Index(expected) # do dtype inference expected = tm.box_expected(expected, xbox) + assert tm.get_dtype(expected) == "m8[ns]" + + if using_array_manager and box_with_array is pd.DataFrame: + # TODO the behaviour is buggy here (third column with all-NaT + # as result doesn't get preserved as timedelta64 dtype). + # Reported at https://github.com/pandas-dev/pandas/issues/39750 + # Changing the expected instead of xfailing to continue to test + # the correct behaviour for the other columns + expected[2] = Series([pd.NaT, pd.NaT], dtype=object) + tm.assert_equal(result, expected) with pytest.raises(TypeError, match=pattern): diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index abf4ddd681d69..6899d821f80ad 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -6,7 +6,13 @@ from pandas.compat import PYPY -from pandas import Categorical, Index, NaT, Series, date_range +from pandas import ( + Categorical, + Index, + NaT, + Series, + date_range, +) import pandas._testing as tm from pandas.api.types import is_scalar diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py index 98b0f978c5f59..a6dea639488a2 100644 --- a/pandas/tests/arrays/categorical/test_api.py +++ b/pandas/tests/arrays/categorical/test_api.py @@ -3,7 +3,13 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Series, +) import pandas._testing as tm from pandas.core.arrays.categorical import recode_for_categories from pandas.tests.arrays.categorical.common import TestCategorical diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 556f8c24f2ab1..7c144c390a128 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -1,11 +1,20 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import numpy as np import pytest -from pandas.compat import IS64, is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) -from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py index 6be9a424a5544..209891ba8f043 100644 --- a/pandas/tests/arrays/categorical/test_dtypes.py +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -3,7 +3,13 @@ from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import Categorical, CategoricalIndex, Index, Series, Timestamp +from pandas import ( + Categorical, + CategoricalIndex, + Index, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py index 36ed790eff63c..0a35f490ff210 100644 --- a/pandas/tests/arrays/categorical/test_missing.py +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -6,7 +6,13 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, DataFrame, Index, Series, isna +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 328b5771e617c..4a00df2d783cf 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -5,7 +5,12 @@ import pytest import pandas as pd -from pandas import Categorical, DataFrame, Series, date_range +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, +) import pandas._testing as tm from pandas.tests.arrays.categorical.common import TestCategorical diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py index 9589216557cd5..4f65c8dfaf0be 100644 --- a/pandas/tests/arrays/categorical/test_sorting.py +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Categorical, Index +from pandas import ( + Categorical, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/arrays/categorical/test_take.py b/pandas/tests/arrays/categorical/test_take.py index 97d9db483c401..6cb54908724c9 100644 --- a/pandas/tests/arrays/categorical/test_take.py +++ b/pandas/tests/arrays/categorical/test_take.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Categorical, Index +from pandas import ( + Categorical, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/arrays/floating/conftest.py b/pandas/tests/arrays/floating/conftest.py index 1e80518e15941..9eab11516c295 100644 --- a/pandas/tests/arrays/floating/conftest.py +++ b/pandas/tests/arrays/floating/conftest.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas.core.arrays.floating import Float32Dtype, Float64Dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) @pytest.fixture(params=[Float32Dtype, Float64Dtype]) diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index a3eade98d99d6..4ce3dd35b538b 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -4,7 +4,10 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays import FloatingArray -from pandas.core.arrays.floating import Float32Dtype, Float64Dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) def test_uses_pandas_na(): diff --git a/pandas/tests/arrays/floating/test_repr.py b/pandas/tests/arrays/floating/test_repr.py index 8767b79242c83..a8868fd93747a 100644 --- a/pandas/tests/arrays/floating/test_repr.py +++ b/pandas/tests/arrays/floating/test_repr.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas.core.arrays.floating import Float32Dtype, Float64Dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) def test_dtypes(dtype): diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py index a7e31ede8e384..b48567d37ecaf 100644 --- a/pandas/tests/arrays/integer/test_construction.py +++ b/pandas/tests/arrays/integer/test_construction.py @@ -5,7 +5,11 @@ import pandas._testing as tm from pandas.api.types import is_integer from pandas.core.arrays import IntegerArray -from pandas.core.arrays.integer import Int8Dtype, Int32Dtype, Int64Dtype +from pandas.core.arrays.integer import ( + Int8Dtype, + Int32Dtype, + Int64Dtype, +) @pytest.fixture(params=[pd.array, IntegerArray._from_sequence]) diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py index 457117c167749..e3f59205aa07c 100644 --- a/pandas/tests/arrays/integer/test_dtypes.py +++ b/pandas/tests/arrays/integer/test_dtypes.py @@ -5,7 +5,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays.integer import Int8Dtype, UInt32Dtype +from pandas.core.arrays.integer import ( + Int8Dtype, + UInt32Dtype, +) def test_dtypes(dtype): diff --git a/pandas/tests/arrays/interval/test_astype.py b/pandas/tests/arrays/interval/test_astype.py index e118e40196e43..d7a2140f817f3 100644 --- a/pandas/tests/arrays/interval/test_astype.py +++ b/pandas/tests/arrays/interval/test_astype.py @@ -1,6 +1,11 @@ import pytest -from pandas import Categorical, CategoricalDtype, Index, IntervalIndex +from pandas import ( + Categorical, + CategoricalDtype, + Index, + IntervalIndex, +) import pandas._testing as tm diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py index 9c78c2a48b9ff..4853bec51106c 100644 --- a/pandas/tests/arrays/interval/test_ops.py +++ b/pandas/tests/arrays/interval/test_ops.py @@ -2,7 +2,12 @@ import numpy as np import pytest -from pandas import Interval, IntervalIndex, Timedelta, Timestamp +from pandas import ( + Interval, + IntervalIndex, + Timedelta, + Timestamp, +) import pandas._testing as tm from pandas.core.arrays import IntervalArray diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index 1d2833c5da276..88e11f57a7835 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -1,4 +1,7 @@ -from typing import Any, List +from typing import ( + Any, + List, +) import numpy as np import pytest diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 8dc9d2a996728..f4e803cf4405f 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -6,7 +6,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import PeriodArray, period_array +from pandas.core.arrays import ( + PeriodArray, + period_array, +) pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") diff --git a/pandas/tests/arrays/period/test_constructors.py b/pandas/tests/arrays/period/test_constructors.py index 0a8a106767fb6..52543d91e8f2a 100644 --- a/pandas/tests/arrays/period/test_constructors.py +++ b/pandas/tests/arrays/period/test_constructors.py @@ -6,7 +6,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import PeriodArray, period_array +from pandas.core.arrays import ( + PeriodArray, + period_array, +) @pytest.mark.parametrize( diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 4c6781509f8af..10f5a7e9a1dc4 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -7,7 +7,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) class TestSeriesAccessor: diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py index 6cc730ce49840..79cf8298ab1a6 100644 --- a/pandas/tests/arrays/sparse/test_arithmetics.py +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -8,7 +8,10 @@ import pandas as pd import pandas._testing as tm from pandas.core import ops -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) @pytest.fixture(params=["integer", "block"]) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 7f1b0f49ebd1e..31522f24fc7a2 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -11,7 +11,10 @@ import pandas as pd from pandas import isna import pandas._testing as tm -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) class TestSparseArray: diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index 992dff218415d..c1466882b8443 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -8,7 +8,11 @@ from pandas import Series import pandas._testing as tm -from pandas.core.arrays.sparse import BlockIndex, IntIndex, make_sparse_index +from pandas.core.arrays.sparse import ( + BlockIndex, + IntIndex, + make_sparse_index, +) TEST_LENGTH = 20 diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 8c766b96ecb9a..d5254adc1ee24 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -9,7 +9,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays.string_arrow import ArrowStringArray, ArrowStringDtype +from pandas.core.arrays.string_arrow import ( + ArrowStringArray, + ArrowStringDtype, +) skip_if_no_pyarrow = td.skip_if_no("pyarrow", min_version="1.0.0") diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py index f14d5349dcea3..5d2b7c43f6765 100644 --- a/pandas/tests/arrays/test_array.py +++ b/pandas/tests/arrays/test_array.py @@ -21,8 +21,15 @@ StringArray, TimedeltaArray, ) -from pandas.core.arrays import PandasArray, period_array -from pandas.tests.extension.decimal import DecimalArray, DecimalDtype, to_decimal +from pandas.core.arrays import ( + PandasArray, + period_array, +) +from pandas.tests.extension.decimal import ( + DecimalArray, + DecimalDtype, + to_decimal, +) @pytest.mark.parametrize( diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index b43812b8024bb..070dec307f527 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1,16 +1,33 @@ import re -from typing import Type, Union +from typing import ( + Type, + Union, +) import numpy as np import pytest -from pandas._libs import NaT, OutOfBoundsDatetime, Timestamp +from pandas._libs import ( + NaT, + OutOfBoundsDatetime, + Timestamp, +) from pandas.compat import np_version_under1p18 import pandas as pd -from pandas import DatetimeIndex, Period, PeriodIndex, TimedeltaIndex +from pandas import ( + DatetimeIndex, + Period, + PeriodIndex, + TimedeltaIndex, +) import pandas._testing as tm -from pandas.core.arrays import DatetimeArray, PandasArray, PeriodArray, TimedeltaArray +from pandas.core.arrays import ( + DatetimeArray, + PandasArray, + PeriodArray, + TimedeltaArray, +) # TODO: more freq variants diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index d044b191cf279..e7f3e8c659316 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -9,7 +9,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.arrays import PeriodArray, period_array +from pandas.core.arrays import ( + PeriodArray, + period_array, +) # ---------------------------------------------------------------------------- # Dtype diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py index 697364fc87175..b042e29986c80 100644 --- a/pandas/tests/base/test_constructors.py +++ b/pandas/tests/base/test_constructors.py @@ -7,10 +7,17 @@ from pandas.compat import PYPY import pandas as pd -from pandas import DataFrame, Index, Series +from pandas import ( + DataFrame, + Index, + Series, +) import pandas._testing as tm from pandas.core.accessor import PandasDelegate -from pandas.core.base import NoNewAttributesMixin, PandasObject +from pandas.core.base import ( + NoNewAttributesMixin, + PandasObject, +) @pytest.fixture( diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index 67e519553517f..8be475d5a922a 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -1,11 +1,20 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_datetime64_dtype, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_timedelta64_dtype, +) from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd -from pandas import CategoricalIndex, Series, Timedelta, Timestamp, date_range +from pandas import ( + CategoricalIndex, + Series, + Timedelta, + Timestamp, + date_range, +) import pandas._testing as tm from pandas.core.arrays import ( DatetimeArray, diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index d02078814f60f..c0250e2b3e958 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -3,12 +3,22 @@ import numpy as np import pytest -from pandas.compat import IS64, PYPY +from pandas.compat import ( + IS64, + PYPY, +) -from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, +) import pandas as pd -from pandas import DataFrame, Index, Series +from pandas import ( + DataFrame, + Index, + Series, +) @pytest.mark.parametrize( diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 1a554c85e018b..4aefa4be176fb 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -3,7 +3,10 @@ from pandas._libs import iNaT -from pandas.core.dtypes.common import is_datetime64tz_dtype, needs_i8_conversion +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + needs_i8_conversion, +) import pandas as pd import pandas._testing as tm diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 37ee61417eeab..5b8179bdc10a9 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -2,31 +2,53 @@ from functools import reduce from itertools import product import operator -from typing import Dict, List, Type +from typing import ( + Dict, + List, + Type, +) import warnings import numpy as np import pytest -from pandas.compat import is_platform_windows, np_version_under1p17 +from pandas.compat import ( + is_platform_windows, + np_version_under1p17, +) from pandas.errors import PerformanceWarning import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_bool, is_list_like, is_scalar +from pandas.core.dtypes.common import ( + is_bool, + is_list_like, + is_scalar, +) import pandas as pd -from pandas import DataFrame, Series, compat, date_range +from pandas import ( + DataFrame, + Series, + compat, + date_range, +) import pandas._testing as tm from pandas.core.computation import pytables from pandas.core.computation.check import NUMEXPR_VERSION -from pandas.core.computation.engines import ENGINES, NumExprClobberingError +from pandas.core.computation.engines import ( + ENGINES, + NumExprClobberingError, +) import pandas.core.computation.expr as expr from pandas.core.computation.expr import ( BaseExprVisitor, PandasExprVisitor, PythonExprVisitor, ) -from pandas.core.computation.expressions import NUMEXPR_INSTALLED, USE_NUMEXPR +from pandas.core.computation.expressions import ( + NUMEXPR_INSTALLED, + USE_NUMEXPR, +) from pandas.core.computation.ops import ( ARITH_OPS_SYMS, SPECIAL_CASE_ARITH_OPS_SYMS, diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index e815a90207a08..21b1b7ed6ee65 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -4,7 +4,11 @@ import pytest -from pandas._config.localization import can_set_locale, get_locales, set_locale +from pandas._config.localization import ( + can_set_locale, + get_locales, + set_locale, +) from pandas.compat import is_platform_windows diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py index 4ff3375d1988d..eccd838a11331 100644 --- a/pandas/tests/dtypes/cast/test_construct_from_scalar.py +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -4,7 +4,11 @@ from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas import Categorical, Timedelta, Timestamp +from pandas import ( + Categorical, + Timedelta, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py index e9057e9635f37..0c3e9841eba3e 100644 --- a/pandas/tests/dtypes/cast/test_downcast.py +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -5,7 +5,11 @@ from pandas.core.dtypes.cast import maybe_downcast_to_dtype -from pandas import DatetimeIndex, Series, Timestamp +from pandas import ( + DatetimeIndex, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py index 50f7c7c2e085a..8484b5525a92a 100644 --- a/pandas/tests/dtypes/cast/test_find_common_type.py +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -9,7 +9,10 @@ PeriodDtype, ) -from pandas import Categorical, Index +from pandas import ( + Categorical, + Index, +) @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/cast/test_infer_datetimelike.py b/pandas/tests/dtypes/cast/test_infer_datetimelike.py index f4253e9d9e37b..3c3844e69586d 100644 --- a/pandas/tests/dtypes/cast/test_infer_datetimelike.py +++ b/pandas/tests/dtypes/cast/test_infer_datetimelike.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, NaT, Series, Timestamp +from pandas import ( + DataFrame, + NaT, + Series, + Timestamp, +) @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py index a47c5555d3e9f..b08dc82a48fe3 100644 --- a/pandas/tests/dtypes/cast/test_infer_dtype.py +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -1,4 +1,8 @@ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) import numpy as np import pytest @@ -105,13 +109,11 @@ def test_infer_from_scalar_tz(tz, pandas_dtype): if pandas_dtype: exp_dtype = f"datetime64[ns, {tz}]" - exp_val = dt.value else: exp_dtype = np.object_ - exp_val = dt assert dtype == exp_dtype - assert val == exp_val + assert val == dt @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py new file mode 100644 index 0000000000000..3f62f31dac219 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -0,0 +1,40 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_box_native + +from pandas import ( + Interval, + Period, + Timedelta, + Timestamp, +) + + +@pytest.mark.parametrize( + "obj,expected_dtype", + [ + (b"\x00\x10", bytes), + (int(4), int), + (np.uint(4), int), + (np.int32(-4), int), + (np.uint8(4), int), + (float(454.98), float), + (np.float16(0.4), float), + (np.float64(1.4), float), + (np.bool_(False), bool), + (datetime(2005, 2, 25), datetime), + (np.datetime64("2005-02-25"), Timestamp), + (Timestamp("2005-02-25"), Timestamp), + (np.timedelta64(1, "D"), Timedelta), + (Timedelta(1, "D"), Timedelta), + (Interval(0, 1), Interval), + (Period("4Q2005"), Period), + ], +) +def test_maybe_box_native(obj, expected_dtype): + boxed_obj = maybe_box_native(obj) + result_dtype = type(boxed_obj) + assert result_dtype is expected_dtype diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 08303fc601b3e..786944816bcf6 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -24,6 +24,7 @@ from pandas.core.dtypes.missing import isna import pandas as pd +import pandas._testing as tm @pytest.fixture( @@ -403,7 +404,13 @@ def test_maybe_promote_any_with_datetime64( expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value - _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + warn = None + if type(fill_value) is datetime.date and dtype.kind == "M": + # Casting date to dt64 is deprecated + warn = FutureWarning + + with tm.assert_produces_warning(warn, check_stacklevel=False): + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) @pytest.mark.parametrize( diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a5522e503c7f4..2b689364c5002 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -724,7 +724,7 @@ def test_astype_nansafe(val, typ): msg = "Cannot convert NaT values to integer" with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): # datetimelike astype(int64) deprecated astype_nansafe(arr, dtype=typ) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c0a2a0c3a9897..bf83085058cfc 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -35,7 +35,10 @@ date_range, ) import pandas._testing as tm -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) class Base: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0f4cef772458f..046256535df57 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -5,7 +5,12 @@ """ import collections from collections import namedtuple -from datetime import date, datetime, time, timedelta +from datetime import ( + date, + datetime, + time, + timedelta, +) from decimal import Decimal from fractions import Fraction from io import StringIO @@ -16,7 +21,10 @@ import pytest import pytz -from pandas._libs import lib, missing as libmissing +from pandas._libs import ( + lib, + missing as libmissing, +) import pandas.util._test_decorators as td from pandas.core.dtypes import inference diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index f6566d205e65c..57efe8e4840f1 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -8,10 +8,20 @@ from pandas._config import config as cf from pandas._libs import missing as libmissing -from pandas._libs.tslibs import iNaT, is_null_datetimelike +from pandas._libs.tslibs import ( + iNaT, + is_null_datetimelike, +) -from pandas.core.dtypes.common import is_float, is_scalar -from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype +from pandas.core.dtypes.common import ( + is_float, + is_scalar, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) from pandas.core.dtypes.missing import ( array_equivalent, isna, @@ -22,7 +32,14 @@ ) import pandas as pd -from pandas import DatetimeIndex, Float64Index, NaT, Series, TimedeltaIndex, date_range +from pandas import ( + DatetimeIndex, + Float64Index, + NaT, + Series, + TimedeltaIndex, + date_range, +) import pandas._testing as tm now = pd.Timestamp.now() diff --git a/pandas/tests/extension/arrow/test_timestamp.py b/pandas/tests/extension/arrow/test_timestamp.py index 10e560b34a21c..819e5549d05ae 100644 --- a/pandas/tests/extension/arrow/test_timestamp.py +++ b/pandas/tests/extension/arrow/test_timestamp.py @@ -6,7 +6,10 @@ import pytest import pandas as pd -from pandas.api.extensions import ExtensionDtype, register_extension_dtype +from pandas.api.extensions import ( + ExtensionDtype, + register_extension_dtype, +) pytest.importorskip("pyarrow", minversion="0.13.0") diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index b63af0c22b450..ea4443010c6a6 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -4,7 +4,11 @@ import pytest import pandas as pd -from pandas.api.types import infer_dtype, is_object_dtype, is_string_dtype +from pandas.api.types import ( + infer_dtype, + is_object_dtype, + is_string_dtype, +) from pandas.tests.extension.base.base import BaseExtensionTests diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index bae8e9df72d41..2a27f670fa046 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -1,4 +1,7 @@ -from typing import Optional, Type +from typing import ( + Optional, + Type, +) import pytest diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index c7976c5800173..9e1c517704743 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -9,13 +9,26 @@ import numpy as np from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.common import is_dtype_equal, is_float, pandas_dtype +from pandas.core.dtypes.common import ( + is_dtype_equal, + is_float, + pandas_dtype, +) import pandas as pd -from pandas.api.extensions import no_default, register_extension_dtype -from pandas.api.types import is_list_like, is_scalar +from pandas.api.extensions import ( + no_default, + register_extension_dtype, +) +from pandas.api.types import ( + is_list_like, + is_scalar, +) from pandas.core.arraylike import OpsMixin -from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin +from pandas.core.arrays import ( + ExtensionArray, + ExtensionScalarOpsMixin, +) from pandas.core.indexers import check_array_indexer diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py index b6402b6c09526..7ebfd54a5b0d6 100644 --- a/pandas/tests/extension/json/__init__.py +++ b/pandas/tests/extension/json/__init__.py @@ -1,3 +1,7 @@ -from pandas.tests.extension.json.array import JSONArray, JSONDtype, make_data +from pandas.tests.extension.json.array import ( + JSONArray, + JSONDtype, + make_data, +) __all__ = ["JSONArray", "JSONDtype", "make_data"] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 5fcfe4faac55a..ca593da6d97bc 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -13,20 +13,30 @@ """ from __future__ import annotations -from collections import UserDict, abc +from collections import ( + UserDict, + abc, +) import itertools import numbers import random import string import sys -from typing import Any, Mapping, Type +from typing import ( + Any, + Mapping, + Type, +) import numpy as np from pandas.core.dtypes.common import pandas_dtype import pandas as pd -from pandas.api.extensions import ExtensionArray, ExtensionDtype +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, +) from pandas.api.types import is_bool_dtype diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 90a39f3b33e95..b8fa158083327 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -6,7 +6,11 @@ import pandas as pd import pandas._testing as tm from pandas.tests.extension import base -from pandas.tests.extension.json.array import JSONArray, JSONDtype, make_data +from pandas.tests.extension.json.array import ( + JSONArray, + JSONDtype, + make_data, +) @pytest.fixture diff --git a/pandas/tests/extension/list/__init__.py b/pandas/tests/extension/list/__init__.py index 1cd85657e0de4..0f3f2f3537788 100644 --- a/pandas/tests/extension/list/__init__.py +++ b/pandas/tests/extension/list/__init__.py @@ -1,3 +1,7 @@ -from pandas.tests.extension.list.array import ListArray, ListDtype, make_data +from pandas.tests.extension.list.array import ( + ListArray, + ListDtype, + make_data, +) __all__ = ["ListArray", "ListDtype", "make_data"] diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py index 998dad208033e..4715bbdad6428 100644 --- a/pandas/tests/extension/list/array.py +++ b/pandas/tests/extension/list/array.py @@ -15,7 +15,10 @@ from pandas.core.dtypes.base import ExtensionDtype import pandas as pd -from pandas.api.types import is_object_dtype, is_string_dtype +from pandas.api.types import ( + is_object_dtype, + is_string_dtype, +) from pandas.core.arrays import ExtensionArray diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py index 832bdf5bea3cf..295f08679c3eb 100644 --- a/pandas/tests/extension/list/test_list.py +++ b/pandas/tests/extension/list/test_list.py @@ -1,7 +1,11 @@ import pytest import pandas as pd -from pandas.tests.extension.list.array import ListArray, ListDtype, make_data +from pandas.tests.extension.list.array import ( + ListArray, + ListDtype, + make_data, +) @pytest.fixture diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 10e82a8c9bff1..3f1f2c02c79f7 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -19,7 +19,11 @@ import pytest import pandas as pd -from pandas import Categorical, CategoricalIndex, Timestamp +from pandas import ( + Categorical, + CategoricalIndex, + Timestamp, +) import pandas._testing as tm from pandas.api.types import CategoricalDtype from pandas.tests.extension import base diff --git a/pandas/tests/extension/test_floating.py b/pandas/tests/extension/test_floating.py index e6ab3a4de86f6..617dfc694741e 100644 --- a/pandas/tests/extension/test_floating.py +++ b/pandas/tests/extension/test_floating.py @@ -21,7 +21,10 @@ import pandas as pd import pandas._testing as tm from pandas.api.types import is_float_dtype -from pandas.core.arrays.floating import Float32Dtype, Float64Dtype +from pandas.core.arrays.floating import ( + Float32Dtype, + Float64Dtype, +) from pandas.tests.extension import base diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 4ee9cb89fc227..2305edc1e1327 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -18,7 +18,10 @@ import pandas as pd import pandas._testing as tm -from pandas.api.types import is_extension_array_dtype, is_integer_dtype +from pandas.api.types import ( + is_extension_array_dtype, + is_integer_dtype, +) from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index a5b54bc153f5d..17f29e02a2883 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -16,7 +16,10 @@ import numpy as np import pytest -from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype +from pandas.core.dtypes.dtypes import ( + ExtensionDtype, + PandasDtype, +) import pandas as pd import pandas._testing as tm @@ -186,11 +189,6 @@ def test_getitem_scalar(self, data): # AssertionError super().test_getitem_scalar(data) - @skip_nested - def test_take_series(self, data): - # ValueError: PandasArray must be 1-dimensional. - super().test_take_series(data) - class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): def test_groupby_extension_apply( @@ -219,13 +217,6 @@ def test_shift_fill_value(self, data): # np.array shape inference. Shift implementation fails. super().test_shift_fill_value(data) - @skip_nested - @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) - @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) - def test_unique(self, data, box, method): - # Fails creating expected - super().test_unique(data, box, method) - @skip_nested def test_fillna_copy_frame(self, data_missing): # The "scalar" for this array isn't a scalar. @@ -241,31 +232,10 @@ def test_searchsorted(self, data_for_sorting, as_series): # Test setup fails. super().test_searchsorted(data_for_sorting, as_series) - @skip_nested - def test_where_series(self, data, na_value, as_frame): - # Test setup fails. - super().test_where_series(data, na_value, as_frame) - - @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) - def test_repeat(self, data, repeats, as_series, use_numpy, request): - if data.dtype.numpy_dtype == object and repeats != 0: - mark = pytest.mark.xfail(reason="mask shapes mismatch") - request.node.add_marker(mark) - super().test_repeat(data, repeats, as_series, use_numpy) - @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") def test_diff(self, data, periods): return super().test_diff(data, periods) - @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) - def test_equals(self, data, na_value, as_series, box, request): - # Fails creating with _from_sequence - if box is pd.DataFrame and data.dtype.numpy_dtype == object: - mark = pytest.mark.xfail(reason="AssertionError in _get_same_shape_values") - request.node.add_marker(mark) - - super().test_equals(data, na_value, as_series, box) - class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): divmod_exc = None @@ -286,8 +256,11 @@ def test_divmod_series_array(self, data): def test_arith_series_with_scalar(self, data, all_arithmetic_operators): super().test_arith_series_with_scalar(data, all_arithmetic_operators) - @skip_nested - def test_arith_series_with_array(self, data, all_arithmetic_operators): + def test_arith_series_with_array(self, data, all_arithmetic_operators, request): + opname = all_arithmetic_operators + if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]: + mark = pytest.mark.xfail(reason="Fails for object dtype") + request.node.add_marker(mark) super().test_arith_series_with_array(data, all_arithmetic_operators) @skip_nested @@ -322,11 +295,6 @@ def test_fillna_scalar(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_scalar(data_missing) - @skip_nested - def test_fillna_series_method(self, data_missing, fillna_method): - # Non-scalar "scalar" values. - super().test_fillna_series_method(data_missing, fillna_method) - @skip_nested def test_fillna_series(self, data_missing): # Non-scalar "scalar" values. @@ -355,20 +323,6 @@ def test_merge(self, data, na_value): # Fails creating expected (key column becomes a PandasDtype because) super().test_merge(data, na_value) - @skip_nested - def test_merge_on_extension_array(self, data): - # Fails creating expected - super().test_merge_on_extension_array(data) - - @skip_nested - def test_merge_on_extension_array_duplicates(self, data): - # Fails creating expected - super().test_merge_on_extension_array_duplicates(data) - - @skip_nested - def test_transpose_frame(self, data): - super().test_transpose_frame(data) - class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): @skip_nested diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 766910618d925..067fada5edcae 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -16,7 +16,10 @@ import numpy as np import pytest -from pandas.compat import IS64, is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import is_object_dtype diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 95ebaa4641d1b..65f228f2be411 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -1,6 +1,9 @@ from typing import List -from pandas import DataFrame, concat +from pandas import ( + DataFrame, + concat, +) def _check_mixed_float(df, dtype=None): diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py index c6c8515d3b89b..7d485ee62c7d2 100644 --- a/pandas/tests/frame/conftest.py +++ b/pandas/tests/frame/conftest.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import DataFrame, NaT, date_range +from pandas import ( + DataFrame, + NaT, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 6c9e6751fd9a4..72107d849f598 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -3,7 +3,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.construction import create_series_with_explicit_dtype diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index ed6333bac1caa..0d36f3bd80e26 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -7,7 +7,14 @@ from pandas.compat import is_platform_little_endian -from pandas import CategoricalIndex, DataFrame, Index, Interval, RangeIndex, Series +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + Interval, + RangeIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index b3e0783d7388f..464b24e45abf4 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -4,7 +4,12 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, DataFrame, Index, Series +from pandas import ( + Categorical, + DataFrame, + Index, + Series, +) import pandas._testing as tm msg1 = "Cannot setitem on a Categorical with a new category, set the categories first" diff --git a/pandas/tests/frame/indexing/test_delitem.py b/pandas/tests/frame/indexing/test_delitem.py index f6c7b6ed5d14d..fa10c9ef7b85a 100644 --- a/pandas/tests/frame/indexing/test_delitem.py +++ b/pandas/tests/frame/indexing/test_delitem.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex +from pandas import ( + DataFrame, + MultiIndex, +) class TestDataFrameDelItem: diff --git a/pandas/tests/frame/indexing/test_get_value.py b/pandas/tests/frame/indexing/test_get_value.py index 9a2ec975f1e31..65a1c64a1578a 100644 --- a/pandas/tests/frame/indexing/test_get_value.py +++ b/pandas/tests/frame/indexing/test_get_value.py @@ -1,6 +1,9 @@ import pytest -from pandas import DataFrame, MultiIndex +from pandas import ( + DataFrame, + MultiIndex, +) class TestGetValue: diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 4282db6933371..7c48c412fd694 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -10,6 +10,7 @@ MultiIndex, Series, Timestamp, + concat, get_dummies, period_range, ) @@ -176,6 +177,87 @@ def test_getitem_bool_mask_categorical_index(self): with pytest.raises(TypeError, match=msg): df4[df4.index > 1] + @pytest.mark.parametrize( + "data1,data2,expected_data", + ( + ( + [[1, 2], [3, 4]], + [[0.5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], + ), + ( + [[1, 2], [3, 4]], + [[5, 6], [7, 8]], + [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], + ), + ), + ) + def test_getitem_bool_mask_duplicate_columns_mixed_dtypes( + self, + data1, + data2, + expected_data, + ): + # GH#31954 + + df1 = DataFrame(np.array(data1)) + df2 = DataFrame(np.array(data2)) + df = concat([df1, df2], axis=1) + + result = df[df > 2] + + exdict = {i: np.array(col) for i, col in enumerate(expected_data)} + expected = DataFrame(exdict).rename(columns={2: 0, 3: 1}) + tm.assert_frame_equal(result, expected) + + @pytest.fixture + def df_dup_cols(self): + dups = ["A", "A", "C", "D"] + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + return df + + def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self, df_dup_cols): + # `df.A > 6` is a DataFrame with a different shape from df + + # boolean with the duplicate raises + df = df_dup_cols + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df[df.A > 6] + + def test_getitem_boolean_series_with_duplicate_columns(self, df_dup_cols): + # boolean indexing + # GH#4879 + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + expected = df[df.C > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df.C > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + def test_getitem_boolean_frame_with_duplicate_columns(self, df_dup_cols): + + # where + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + # `df > 6` is a DataFrame with the same shape+alignment as df + expected = df[df > 6] + expected.columns = df_dup_cols.columns + + df = df_dup_cols + result = df[df > 6] + + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + class TestGetitemSlice: def test_getitem_slice_float64(self, frame_or_series): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 6808ffe65e561..9b6bdbf3a9d60 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1,4 +1,9 @@ -from datetime import date, datetime, time, timedelta +from datetime import ( + date, + datetime, + time, + timedelta, +) import re import numpy as np @@ -212,7 +217,7 @@ def test_setitem_multi_index(self): it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] cols = MultiIndex.from_product(it) - index = pd.date_range("20141006", periods=20) + index = date_range("20141006", periods=20) vals = np.random.randint(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) @@ -290,7 +295,7 @@ def test_getitem_boolean( # we are producing a warning that since the passed boolean # key is not the same as the given index, we will reindex # not sure this is really necessary - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + with tm.assert_produces_warning(UserWarning): indexer_obj = indexer_obj.reindex(datetime_frame.index[::-1]) subframe_obj = datetime_frame[indexer_obj] tm.assert_frame_equal(subframe_obj, subframe) @@ -1141,7 +1146,8 @@ def test_setitem_frame_mixed(self, float_string_frame): f.loc[key] = piece tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) - # rows unaligned + def test_setitem_frame_mixed_rows_unaligned(self, float_string_frame): + # GH#3216 rows unaligned f = float_string_frame.copy() piece = DataFrame( [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], @@ -1154,7 +1160,8 @@ def test_setitem_frame_mixed(self, float_string_frame): f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] ) - # key is unaligned with values + def test_setitem_frame_mixed_key_unaligned(self, float_string_frame): + # GH#3216 key is unaligned with values f = float_string_frame.copy() piece = f.loc[f.index[:2], ["A"]] piece.index = f.index[-2:] @@ -1163,7 +1170,8 @@ def test_setitem_frame_mixed(self, float_string_frame): piece["B"] = np.nan tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) - # ndarray + def test_setitem_frame_mixed_ndarray(self, float_string_frame): + # GH#3216 ndarray f = float_string_frame.copy() piece = float_string_frame.loc[f.index[:2], ["A", "B"]] key = (f.index[slice(-2, None)], ["A", "B"]) @@ -1349,7 +1357,7 @@ def test_loc_duplicates(self): # gh-17105 # insert a duplicate element to the index - trange = pd.date_range( + trange = date_range( start=Timestamp(year=2017, month=1, day=1), end=Timestamp(year=2017, month=1, day=5), ) @@ -1413,7 +1421,7 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): # GH 12981 # Assignment of unaligned offset-aware datetime series. # Make sure timezone isn't lost - column = Series(pd.date_range("2015-01-01", periods=3, tz="utc"), name="dates") + column = Series(date_range("2015-01-01", periods=3, tz="utc"), name="dates") df = DataFrame({"dates": column}) df["dates"] = column[[1, 0, 2]] tm.assert_series_equal(df["dates"], column) @@ -1466,7 +1474,7 @@ def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): result.loc[:, idxer] = expected tm.assert_frame_equal(result, expected) - def test_at_time_between_time_datetimeindex(self): + def test_loc_setitem_time_key(self): index = date_range("2012-01-01", "2012-01-05", freq="30min") df = DataFrame(np.random.randn(len(index), 5), index=index) akey = time(12, 0, 0) @@ -1474,20 +1482,6 @@ def test_at_time_between_time_datetimeindex(self): ainds = [24, 72, 120, 168] binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] - result = df.at_time(akey) - expected = df.loc[akey] - expected2 = df.iloc[ainds] - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, expected2) - assert len(result) == 4 - - result = df.between_time(bkey.start, bkey.stop) - expected = df.loc[bkey] - expected2 = df.iloc[binds] - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, expected2) - assert len(result) == 12 - result = df.copy() result.loc[akey] = 0 result = result.loc[akey] @@ -1524,26 +1518,11 @@ def test_loc_getitem_index_namedtuple(self): result = df.loc[IndexType("foo", "bar")]["A"] assert result == 1 - @pytest.mark.parametrize( - "tpl", - [ - (1,), - ( - 1, - 2, - ), - ], - ) + @pytest.mark.parametrize("tpl", [(1,), (1, 2)]) def test_loc_getitem_index_single_double_tuples(self, tpl): # GH 20991 idx = Index( - [ - (1,), - ( - 1, - 2, - ), - ], + [(1,), (1, 2)], name="A", tupleize_cols=False, ) @@ -1737,7 +1716,7 @@ def test_object_casting_indexing_wraps_datetimelike(): df = DataFrame( { "A": [1, 2], - "B": pd.date_range("2000", periods=2), + "B": date_range("2000", periods=2), "C": pd.timedelta_range("1 Day", periods=2), } ) diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 6e4deb5469777..921d277e09230 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -8,7 +8,10 @@ from pandas.errors import PerformanceWarning -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/frame/indexing/test_lookup.py b/pandas/tests/frame/indexing/test_lookup.py index 21d732695fba4..caab5feea853b 100644 --- a/pandas/tests/frame/indexing/test_lookup.py +++ b/pandas/tests/frame/indexing/test_lookup.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index bd541719c0877..afa8c757c23e4 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -4,7 +4,10 @@ import numpy as np -from pandas import DataFrame, isna +from pandas import ( + DataFrame, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 84def57f6b6e0..b8150c26aa6bb 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -3,7 +3,10 @@ from pandas.core.dtypes.common import is_float_dtype -from pandas import DataFrame, isna +from pandas import ( + DataFrame, + isna, +) class TestSetValue: diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9318764a1b5ad..6763113036de8 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -2,18 +2,30 @@ import pytest from pandas.core.dtypes.base import registry as ea_registry -from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_interval_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) from pandas import ( Categorical, DataFrame, + DatetimeIndex, Index, Interval, + IntervalIndex, NaT, Period, PeriodIndex, Series, Timestamp, + cut, date_range, notna, period_range, @@ -395,6 +407,99 @@ def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs + def test_setitem_intervals(self): + + df = DataFrame({"A": range(10)}) + ser = cut(df["A"], 5) + assert isinstance(ser.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainer are converted to in-line objects + # contining an IntervalIndex.values + df["B"] = ser + df["C"] = np.array(ser) + df["D"] = ser.values + df["E"] = np.array(ser.values) + + assert is_categorical_dtype(df["B"].dtype) + assert is_interval_dtype(df["B"].cat.categories) + assert is_categorical_dtype(df["D"].dtype) + assert is_interval_dtype(df["D"].cat.categories) + + assert is_object_dtype(df["C"]) + assert is_object_dtype(df["E"]) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B)) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.C), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df["B"], df["B"]) + tm.assert_series_equal(df["B"], df["D"], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df["C"], df["C"]) + tm.assert_series_equal(df["C"], df["E"], check_names=False) + + def test_setitem_categorical(self): + # GH#35369 + df = DataFrame({"h": Series(list("mn")).astype("category")}) + df.h = df.h.cat.reorder_categories(["n", "m"]) + expected = DataFrame( + {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])} + ) + tm.assert_frame_equal(df, expected) + + +class TestSetitemTZAwareValues: + @pytest.fixture + def idx(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + return idx + + @pytest.fixture + def expected(self, idx): + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + return expected + + def test_setitem_dt64series(self, idx, expected): + # convert to utc + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + df["B"] = idx + + with tm.assert_produces_warning(FutureWarning) as m: + df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) + + result = df["B"] + comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") + tm.assert_series_equal(result, comp) + + def test_setitem_datetimeindex(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # assign to frame + df["B"] = idx + result = df["B"] + tm.assert_series_equal(result, expected) + + def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): + # setting a DataFrame column with a tzaware DTI retains the dtype + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + # object array of datetimes with a tz + df["B"] = idx.to_pydatetime() + result = df["B"] + tm.assert_series_equal(result, expected) + class TestDataFrameSetItemWithExpansion: def test_setitem_listlike_views(self): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 2f098426efaf9..bc84d7c70b01c 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -6,7 +6,14 @@ from pandas.core.dtypes.common import is_scalar import pandas as pd -from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range, isna +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + Timestamp, + date_range, + isna, +) import pandas._testing as tm @@ -499,6 +506,7 @@ def test_where_axis(self): assert return_value is None tm.assert_frame_equal(result, expected) + def test_where_axis_multiple_dtypes(self): # Multiple dtypes (=> multiple Blocks) df = pd.concat( [ diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 3be3ce15622b4..59c88af265c08 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,7 +3,14 @@ import numpy as np import pytest -from pandas import DataFrame, Index, IndexSlice, MultiIndex, Series, concat +from pandas import ( + DataFrame, + Index, + IndexSlice, + MultiIndex, + Series, + concat, +) import pandas._testing as tm import pandas.core.common as com diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 5dd4f7f8f8800..a6e6914ba701e 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -3,7 +3,12 @@ import pytz import pandas as pd -from pandas import DataFrame, Index, Series, date_range +from pandas import ( + DataFrame, + Index, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py index 36c875b8abe6f..ba58d88fb4863 100644 --- a/pandas/tests/frame/methods/test_append.py +++ b/pandas/tests/frame/methods/test_append.py @@ -4,7 +4,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, Timestamp, date_range, timedelta_range +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + timedelta_range, +) import pandas._testing as tm # TODO td.skip_array_manager_not_yet_implemented diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py index 368ce88abe165..8a32841466b18 100644 --- a/pandas/tests/frame/methods/test_asfreq.py +++ b/pandas/tests/frame/methods/test_asfreq.py @@ -2,7 +2,13 @@ import numpy as np -from pandas import DataFrame, DatetimeIndex, Series, date_range, to_datetime +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, + to_datetime, +) import pandas._testing as tm from pandas.tseries import offsets diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 46f5a20f38941..35e958ff3a2b1 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -432,19 +432,11 @@ def test_astype_to_incorrect_datetimelike(self, unit): other = f"m8[{unit}]" df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) - msg = ( - fr"cannot astype a datetimelike from \[datetime64\[ns\]\] to " - fr"\[timedelta64\[{unit}\]\]" - fr"|(Cannot cast DatetimeArray to dtype timedelta64\[{unit}\])" - ) + msg = fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]" with pytest.raises(TypeError, match=msg): df.astype(other) - msg = ( - fr"cannot astype a timedelta from \[timedelta64\[ns\]\] to " - fr"\[datetime64\[{unit}\]\]" - fr"|(Cannot cast TimedeltaArray to dtype datetime64\[{unit}\])" - ) + msg = fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]" df = DataFrame(np.array([[1, 2, 3]], dtype=other)) with pytest.raises(TypeError, match=msg): df.astype(dtype) diff --git a/pandas/tests/frame/methods/test_at_time.py b/pandas/tests/frame/methods/test_at_time.py index 7ac3868e8ddf4..2d05176d20f5f 100644 --- a/pandas/tests/frame/methods/test_at_time.py +++ b/pandas/tests/frame/methods/test_at_time.py @@ -6,7 +6,10 @@ from pandas._libs.tslibs import timezones -from pandas import DataFrame, date_range +from pandas import ( + DataFrame, + date_range, +) import pandas._testing as tm @@ -110,3 +113,16 @@ def test_at_time_axis(self, axis): result.index = result.index._with_freq(None) expected.index = expected.index._with_freq(None) tm.assert_frame_equal(result, expected) + + def test_at_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + ainds = [24, 72, 120, 168] + + result = df.at_time(akey) + expected = df.loc[akey] + expected2 = df.iloc[ainds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 4 diff --git a/pandas/tests/frame/methods/test_between_time.py b/pandas/tests/frame/methods/test_between_time.py index 73722f36a0b86..0daa267767269 100644 --- a/pandas/tests/frame/methods/test_between_time.py +++ b/pandas/tests/frame/methods/test_between_time.py @@ -1,4 +1,7 @@ -from datetime import datetime, time +from datetime import ( + datetime, + time, +) import numpy as np import pytest @@ -6,7 +9,11 @@ from pandas._libs.tslibs import timezones import pandas.util._test_decorators as td -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm @@ -187,3 +194,16 @@ def test_between_time_axis_raises(self, axis): ts.columns = mask with pytest.raises(TypeError, match=msg): ts.between_time(stime, etime, axis=1) + + def test_between_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.between_time(bkey.start, bkey.stop) + expected = df.loc[bkey] + expected2 = df.iloc[binds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 12 diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index 2da6c6e3f0a51..8a2374a414482 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 1325bfbda24c6..b4d8a53e4b23f 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -3,10 +3,18 @@ import numpy as np import pytest -from pandas.core.dtypes.cast import find_common_type, is_dtype_equal +from pandas.core.dtypes.cast import ( + find_common_type, + is_dtype_equal, +) import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_convert.py b/pandas/tests/frame/methods/test_convert.py index a00b2b5960884..13fec9829c3db 100644 --- a/pandas/tests/frame/methods/test_convert.py +++ b/pandas/tests/frame/methods/test_convert.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 1727a76c191ee..1dbcc36c83abc 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -3,7 +3,11 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Index, Series +from pandas import ( + DataFrame, + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index f8d729a215ba8..33b98fc5c2135 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -6,7 +6,11 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, isna +from pandas import ( + DataFrame, + Series, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 15bafb7a835ba..0b4ce0dfa80fc 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -4,7 +4,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import Categorical, DataFrame, Series, Timestamp, date_range +from pandas import ( + Categorical, + DataFrame, + Series, + Timestamp, + date_range, +) import pandas._testing as tm # TODO(ArrayManager) quantile is needed for describe() @@ -277,7 +283,7 @@ def test_describe_tz_values(self, tz_naive_fixture): tm.assert_frame_equal(result, expected) def test_datetime_is_numeric_includes_datetime(self): - df = DataFrame({"a": pd.date_range("2012", periods=3), "b": [1, 2, 3]}) + df = DataFrame({"a": date_range("2012", periods=3), "b": [1, 2, 3]}) result = df.describe(datetime_is_numeric=True) expected = DataFrame( { @@ -385,3 +391,14 @@ def test_describe_when_include_all_exclude_not_allowed(self, exclude): msg = "exclude must be None when include is 'all'" with pytest.raises(ValueError, match=msg): df.describe(include="all", exclude=exclude) + + def test_describe_with_duplicate_columns(self): + df = DataFrame( + [[1, 1, 1], [2, 2, 2], [3, 3, 3]], + columns=["bar", "a", "a"], + dtype="float64", + ) + result = df.describe() + ser = df.iloc[:, 0].describe() + expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index b8328b43a6b13..75d93ed2aafc6 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, Timestamp, date_range +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, +) import pandas._testing as tm @@ -75,7 +80,7 @@ def test_diff_datetime_axis0_with_nat(self, tz): @pytest.mark.parametrize("tz", [None, "UTC"]) def test_diff_datetime_with_nat_zero_periods(self, tz): # diff on NaT values should give NaT, not timedelta64(0) - dti = pd.date_range("2016-01-01", periods=4, tz=tz) + dti = date_range("2016-01-01", periods=4, tz=tz) ser = Series(dti) df = ser.to_frame() @@ -173,7 +178,7 @@ def test_diff_axis(self): def test_diff_period(self): # GH#32995 Don't pass an incorrect axis - pi = pd.date_range("2016-01-01", periods=3).to_period("D") + pi = date_range("2016-01-01", periods=3).to_period("D") df = DataFrame({"A": pi}) result = df.diff(1, axis=1) diff --git a/pandas/tests/frame/methods/test_dot.py b/pandas/tests/frame/methods/test_dot.py index ecbec6b06e923..555e5f0e26eaf 100644 --- a/pandas/tests/frame/methods/test_dot.py +++ b/pandas/tests/frame/methods/test_dot.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index 4568cda24d5cf..dc9a1565aad1e 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -7,7 +7,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) import pandas._testing as tm @@ -21,7 +27,7 @@ def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): # GH 8594 mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) - s = pd.Series([10, 20, 30], index=mi) + s = Series([10, 20, 30], index=mi) df = DataFrame([10, 20, 30], index=mi) with pytest.raises(KeyError, match=msg): @@ -34,7 +40,7 @@ def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): def test_drop_errors_ignore(labels, level): # GH 8594 mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) - s = pd.Series([10, 20, 30], index=mi) + s = Series([10, 20, 30], index=mi) df = DataFrame([10, 20, 30], index=mi) expected_s = s.drop(labels, level=level, errors="ignore") @@ -451,3 +457,13 @@ def test_drop_with_non_unique_multiindex(self): result = df.drop(index="x") expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]])) tm.assert_frame_equal(result, expected) + + def test_drop_with_duplicate_columns(self): + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + result = df.drop(["a"], axis=1) + expected = DataFrame([[1], [1], [1]], columns=["bar"]) + tm.assert_frame_equal(result, expected) + result = df.drop("a", axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py index b1d3890540bf9..10c1f37f4c9ba 100644 --- a/pandas/tests/frame/methods/test_drop_duplicates.py +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -4,7 +4,10 @@ import numpy as np import pytest -from pandas import DataFrame, NaT +from pandas import ( + DataFrame, + NaT, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py index ce98704b03106..e1302d4b73f2b 100644 --- a/pandas/tests/frame/methods/test_droplevel.py +++ b/pandas/tests/frame/methods/test_droplevel.py @@ -1,6 +1,10 @@ import pytest -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_dropna.py b/pandas/tests/frame/methods/test_dropna.py index 9cbfee5e663ae..e28c716544209 100644 --- a/pandas/tests/frame/methods/test_dropna.py +++ b/pandas/tests/frame/methods/test_dropna.py @@ -5,7 +5,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_dtypes.py b/pandas/tests/frame/methods/test_dtypes.py index 840e23604939a..84841ad7a634e 100644 --- a/pandas/tests/frame/methods/test_dtypes.py +++ b/pandas/tests/frame/methods/test_dtypes.py @@ -5,7 +5,12 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd -from pandas import DataFrame, Series, date_range, option_context +from pandas import ( + DataFrame, + Series, + date_range, + option_context, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py index 7a1c16adc2a09..0b90914281d3b 100644 --- a/pandas/tests/frame/methods/test_duplicated.py +++ b/pandas/tests/frame/methods/test_duplicated.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py index ac9a66b4c7a6f..dddd6c6d2eaf2 100644 --- a/pandas/tests/frame/methods/test_equals.py +++ b/pandas/tests/frame/methods/test_equals.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import DataFrame, date_range +from pandas import ( + DataFrame, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py index 43469a093f827..70b9af358c1b9 100644 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ b/pandas/tests/frame/methods/test_first_and_last.py @@ -3,7 +3,10 @@ """ import pytest -from pandas import DataFrame, bdate_range +from pandas import ( + DataFrame, + bdate_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_first_valid_index.py b/pandas/tests/frame/methods/test_first_valid_index.py index 8d021f0e3954e..c2de1a6bb7b14 100644 --- a/pandas/tests/frame/methods/test_first_valid_index.py +++ b/pandas/tests/frame/methods/test_first_valid_index.py @@ -4,7 +4,11 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 25b2a41b4b3a5..8628b76f54b1d 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -1,7 +1,13 @@ import numpy as np import pandas as pd -from pandas import Categorical, DataFrame, Index, Series, Timestamp +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + Timestamp, +) import pandas._testing as tm from pandas.core.arrays import IntervalArray diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 2477ad79d8a2c..5c6fcec887dfb 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -3,7 +3,11 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py index 126c78a657c58..a5f285d31301b 100644 --- a/pandas/tests/frame/methods/test_is_homogeneous_dtype.py +++ b/pandas/tests/frame/methods/test_is_homogeneous_dtype.py @@ -3,7 +3,10 @@ import pandas.util._test_decorators as td -from pandas import Categorical, DataFrame +from pandas import ( + Categorical, + DataFrame, +) # _is_homogeneous_type always returns True for ArrayManager pytestmark = td.skip_array_manager_invalid_test diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index 5e50e63016f26..d2ebd09c4cc48 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_join.py b/pandas/tests/frame/methods/test_join.py index 42694dc3ff37c..90456ad949f59 100644 --- a/pandas/tests/frame/methods/test_join.py +++ b/pandas/tests/frame/methods/test_join.py @@ -6,7 +6,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Index, MultiIndex, date_range, period_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + date_range, + period_range, +) import pandas._testing as tm # TODO(ArrayManager) concat with reindexing @@ -221,7 +227,7 @@ def test_suppress_future_warning_with_sort_kw(sort_kw): if sort_kw is False: expected = expected.reindex(index=["c", "a", "b"]) - with tm.assert_produces_warning(None, check_stacklevel=False): + with tm.assert_produces_warning(None): result = a.join([b, c], how="outer", sort=sort_kw) tm.assert_frame_equal(result, expected) @@ -304,7 +310,7 @@ def test_join_multiindex_leftright(self): tm.assert_frame_equal(df1.join(df2, how="left"), exp) tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) - exp_idx = pd.MultiIndex.from_product( + exp_idx = MultiIndex.from_product( [["a", "b"], ["x", "y", "z"]], names=["first", "second"] ) exp = DataFrame( diff --git a/pandas/tests/frame/methods/test_matmul.py b/pandas/tests/frame/methods/test_matmul.py index c34bf991ffc4c..702ab3916d77a 100644 --- a/pandas/tests/frame/methods/test_matmul.py +++ b/pandas/tests/frame/methods/test_matmul.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import DataFrame, Index, Series +from pandas import ( + DataFrame, + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py index 56fb9ab0d8f00..8749218df59e1 100644 --- a/pandas/tests/frame/methods/test_pct_change.py +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_pipe.py b/pandas/tests/frame/methods/test_pipe.py index b865c191aebaf..26ea904260a65 100644 --- a/pandas/tests/frame/methods/test_pipe.py +++ b/pandas/tests/frame/methods/test_pipe.py @@ -1,6 +1,9 @@ import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index 2926e29e61d56..a4f99b8287188 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 6d6016df52238..46d3e335539fb 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -4,7 +4,11 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, Timestamp +from pandas import ( + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm pytestmark = td.skip_array_manager_not_yet_implemented diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 5b66f58b8f069..ce46d1d8b1869 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -1,13 +1,22 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest from pandas._libs import iNaT -from pandas._libs.algos import Infinity, NegInfinity +from pandas._libs.algos import ( + Infinity, + NegInfinity, +) import pandas.util._test_decorators as td -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index e4e2656f4337c..8a3ac265db154 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import inspect from itertools import permutations @@ -21,10 +24,69 @@ import pandas.core.common as com +class TestReindexSetIndex: + # Tests that check both reindex and set_index + + def test_dti_set_index_reindex_datetimeindex(self): + # GH#6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") + idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + def test_dti_set_index_reindex_freq_with_tz(self): + # GH#11314 with tz + index = date_range( + datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" + ) + df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) + new_index = date_range( + datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" + ) + + result = df.set_index(new_index) + assert result.index.freq == index.freq + + def test_set_reset_index_intervalindex(self): + + df = DataFrame({"A": range(10)}) + ser = pd.cut(df.A, 5) + df["B"] = ser + df = df.set_index("B") + + df = df.reset_index() + + class TestDataFrameSelectReindex: # These are specific reindex-based tests; other indexing tests should go in # test_indexing + def test_reindex_date_fill_value(self): + # passing date to dt64 is deprecated + arr = date_range("2016-01-01", periods=6).values.reshape(3, 2) + df = DataFrame(arr, columns=["A", "B"], index=range(3)) + + ts = df.iloc[0, 0] + fv = ts.date() + + with tm.assert_produces_warning(FutureWarning): + res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv) + + expected = DataFrame( + {"A": df["A"].tolist() + [ts], "B": df["B"].tolist() + [ts], "C": [ts] * 4} + ) + tm.assert_frame_equal(res, expected) + + # same with a datetime-castable str + res = df.reindex( + index=range(4), columns=["A", "B", "C"], fill_value="2016-01-01" + ) + tm.assert_frame_equal(res, expected) + def test_reindex_with_multi_index(self): # https://github.com/pandas-dev/pandas/issues/29896 # tests for reindexing a multi-indexed DataFrame with a new MultiIndex @@ -582,6 +644,18 @@ def test_reindex_dups(self): with pytest.raises(ValueError, match=msg): df.reindex(index=list(range(len(df)))) + def test_reindex_with_duplicate_columns(self): + + # reindex is invalid! + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df.reindex(columns=["bar"]) + with pytest.raises(ValueError, match=msg): + df.reindex(columns=["bar", "foo"]) + def test_reindex_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 1080d97b30987..677d862dfe077 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -4,7 +4,15 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series +import pandas.util._test_decorators as td + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + merge, +) import pandas._testing as tm @@ -352,3 +360,45 @@ def test_rename_mapper_and_positional_arguments_raises(self): with pytest.raises(TypeError, match=msg): df.rename({}, columns={}, index={}) + + @td.skip_array_manager_not_yet_implemented + def test_rename_with_duplicate_columns(self): + # GH#4403 + df4 = DataFrame( + {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, + index=MultiIndex.from_tuples( + [(600809, 20130331)], names=["STK_ID", "RPT_Date"] + ), + ) + + df5 = DataFrame( + { + "RPT_Date": [20120930, 20121231, 20130331], + "STK_ID": [600809] * 3, + "STK_Name": ["饡驦", "饡驦", "饡驦"], + "TClose": [38.05, 41.66, 30.01], + }, + index=MultiIndex.from_tuples( + [(600809, 20120930), (600809, 20121231), (600809, 20130331)], + names=["STK_ID", "RPT_Date"], + ), + ) + # TODO: can we construct this without merge? + k = merge(df4, df5, how="inner", left_index=True, right_index=True) + result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) + str(result) + result.dtypes + + expected = DataFrame( + [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], + columns=[ + "RT", + "TClose", + "TExg", + "RPT_Date", + "STK_ID", + "STK_Name", + "QT_Close", + ], + ).set_index(["STK_ID", "RPT_Date"], drop=False) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_rename_axis.py b/pandas/tests/frame/methods/test_rename_axis.py index 3339119841813..dd4a77c6509b8 100644 --- a/pandas/tests/frame/methods/test_rename_axis.py +++ b/pandas/tests/frame/methods/test_rename_axis.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_reorder_levels.py b/pandas/tests/frame/methods/test_reorder_levels.py index 451fc9a5cf717..0173de88b6d6d 100644 --- a/pandas/tests/frame/methods/test_reorder_levels.py +++ b/pandas/tests/frame/methods/test_reorder_levels.py @@ -3,7 +3,10 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, MultiIndex +from pandas import ( + DataFrame, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index febd113f309ae..9ae5bb151b685 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1,13 +1,23 @@ from datetime import datetime from io import StringIO import re -from typing import Dict, List, Union +from typing import ( + Dict, + List, + Union, +) import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp, date_range +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 8644f56e4f253..bd66d54792fba 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -6,7 +6,10 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) import pandas as pd from pandas import ( @@ -423,7 +426,7 @@ def test_reset_index_multiindex_columns(self): def test_reset_index_datetime(self, tz_naive_fixture): # GH#3950 tz = tz_naive_fixture - idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") idx2 = Index(range(5), name="idx2", dtype="int64") idx = MultiIndex.from_arrays([idx1, idx2]) df = DataFrame( @@ -450,7 +453,7 @@ def test_reset_index_datetime(self, tz_naive_fixture): tm.assert_frame_equal(df.reset_index(), expected) - idx3 = pd.date_range( + idx3 = date_range( "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" ) idx = MultiIndex.from_arrays([idx1, idx2, idx3]) @@ -489,7 +492,7 @@ def test_reset_index_datetime(self, tz_naive_fixture): # GH#7793 idx = MultiIndex.from_product( - [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] + [["a", "b"], date_range("20130101", periods=3, tz=tz)] ) df = DataFrame( np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py index 5cf5aea8846c5..ebe33922be541 100644 --- a/pandas/tests/frame/methods/test_round.py +++ b/pandas/tests/frame/methods/test_round.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index e17c14940746d..f11e13ca2574e 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -3,7 +3,10 @@ from pandas.compat import np_version_under1p17 -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm import pandas.core.common as com diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 2a94b18b806f8..7d3333e493136 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -4,7 +4,10 @@ from pandas.core.dtypes.dtypes import ExtensionDtype import pandas as pd -from pandas import DataFrame, Timestamp +from pandas import ( + DataFrame, + Timestamp, +) import pandas._testing as tm from pandas.core.arrays import ExtensionArray diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index a46a91811f40e..ee538e1d9d9ac 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index b66a95bae51c5..430abd9700a23 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -1,4 +1,11 @@ -from datetime import datetime, timedelta +""" +See also: test_reindex.py:TestReindexSetIndex +""" + +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py index aefc407d0c432..0474206aec06f 100644 --- a/pandas/tests/frame/methods/test_shift.py +++ b/pandas/tests/frame/methods/test_shift.py @@ -4,7 +4,14 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import CategoricalIndex, DataFrame, Index, Series, date_range, offsets +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + Series, + date_range, + offsets, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 053684ba08484..eaea22df3adfe 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -6,7 +6,13 @@ from pandas.errors import PerformanceWarning import pandas as pd -from pandas import Categorical, DataFrame, NaT, Timestamp, date_range +from pandas import ( + Categorical, + DataFrame, + NaT, + Timestamp, + date_range, +) import pandas._testing as tm @@ -839,7 +845,7 @@ def test_sort_column_level_and_index_label( if len(levels) > 1: # Accessing multi-level columns that are not lexsorted raises a # performance warning - with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + with tm.assert_produces_warning(PerformanceWarning): tm.assert_frame_equal(result, expected) else: tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 4cf0b1febf0af..aed784a6e4c3c 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -12,6 +12,7 @@ DataFrame, Index, MultiIndex, + NaT, Series, Timestamp, date_range, @@ -41,7 +42,7 @@ def read_csv(self, path, **kwargs): params = {"index_col": 0, "parse_dates": True} params.update(**kwargs) - return pd.read_csv(path, **params) + return read_csv(path, **params) def test_to_csv_from_csv1(self, float_frame, datetime_frame): @@ -123,7 +124,7 @@ def test_to_csv_from_csv3(self): df1.to_csv(path) df2.to_csv(path, mode="a", header=False) xp = pd.concat([df1, df2]) - rs = pd.read_csv(path, index_col=0) + rs = read_csv(path, index_col=0) rs.columns = [int(label) for label in rs.columns] xp.columns = [int(label) for label in xp.columns] tm.assert_frame_equal(xp, rs) @@ -139,7 +140,7 @@ def test_to_csv_from_csv4(self): ) df.to_csv(path) - result = pd.read_csv(path, index_col="dt_index") + result = read_csv(path, index_col="dt_index") result.index = pd.to_timedelta(result.index) # TODO: remove renaming when GH 10875 is solved result.index = result.index.rename("dt_index") @@ -153,7 +154,7 @@ def test_to_csv_from_csv5(self, timezone_frame): with tm.ensure_clean("__tmp_to_csv_from_csv5__") as path: timezone_frame.to_csv(path) - result = pd.read_csv(path, index_col=0, parse_dates=["A"]) + result = read_csv(path, index_col=0, parse_dates=["A"]) converter = ( lambda c: to_datetime(result[c]) @@ -166,8 +167,6 @@ def test_to_csv_from_csv5(self, timezone_frame): def test_to_csv_cols_reordering(self): # GH3454 - import pandas as pd - chunksize = 5 N = int(chunksize * 2.5) @@ -177,17 +176,15 @@ def test_to_csv_cols_reordering(self): with tm.ensure_clean() as path: df.to_csv(path, columns=cols, chunksize=chunksize) - rs_c = pd.read_csv(path, index_col=0) + rs_c = read_csv(path, index_col=0) tm.assert_frame_equal(df[cols], rs_c, check_names=False) def test_to_csv_new_dupe_cols(self): - import pandas as pd - def _check_df(df, cols=None): with tm.ensure_clean() as path: df.to_csv(path, columns=cols, chunksize=chunksize) - rs_c = pd.read_csv(path, index_col=0) + rs_c = read_csv(path, index_col=0) # we wrote them in a different order # so compare them in that order @@ -227,8 +224,6 @@ def _check_df(df, cols=None): @pytest.mark.slow def test_to_csv_dtnat(self): # GH3437 - from pandas import NaT - def make_dtnat_arr(n, nnat=None): if nnat is None: nnat = int(n * 0.1) # 10% @@ -999,7 +994,7 @@ def test_to_csv_path_is_none(self, float_frame): # Series.to_csv() csv_str = float_frame.to_csv(path_or_buf=None) assert isinstance(csv_str, str) - recons = pd.read_csv(StringIO(csv_str), index_col=0) + recons = read_csv(StringIO(csv_str), index_col=0) tm.assert_frame_equal(float_frame, recons) @pytest.mark.parametrize( @@ -1040,7 +1035,7 @@ def test_to_csv_compression(self, df, encoding, compression): df.to_csv(handles.handle, encoding=encoding) assert not handles.handle.closed - result = pd.read_csv( + result = read_csv( filename, compression=compression, encoding=encoding, @@ -1122,7 +1117,7 @@ def test_to_csv_with_dst_transitions(self): with tm.ensure_clean("csv_date_format_with_dst") as path: # make sure we are not failing on transitions - times = pd.date_range( + times = date_range( "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", @@ -1144,7 +1139,7 @@ def test_to_csv_with_dst_transitions(self): tm.assert_frame_equal(result, df) # GH11619 - idx = pd.date_range("2015-01-01", "2015-12-31", freq="H", tz="Europe/Paris") + idx = date_range("2015-01-01", "2015-12-31", freq="H", tz="Europe/Paris") idx = idx._with_freq(None) # freq does not round-trip idx._data._freq = None # otherwise there is trouble on unpickle df = DataFrame({"values": 1, "idx": idx}, index=idx) @@ -1250,7 +1245,7 @@ def test_to_csv_quoting(self): # presents with encoding? text_rows = ["a,b,c", '1,"test \r\n",3'] text = tm.convert_rows_list_to_csv_str(text_rows) - df = pd.read_csv(StringIO(text)) + df = read_csv(StringIO(text)) buf = StringIO() df.to_csv(buf, encoding="utf-8", index=False) @@ -1286,7 +1281,7 @@ def test_period_index_date_overflow(self): assert result == expected # Overflow with pd.NaT - dates = ["1990-01-01", pd.NaT, "3005-01-01"] + dates = ["1990-01-01", NaT, "3005-01-01"] index = pd.PeriodIndex(dates, freq="D") df = DataFrame([4, 5, 6], index=index) @@ -1298,7 +1293,7 @@ def test_period_index_date_overflow(self): def test_multi_index_header(self): # see gh-5539 - columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + columns = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) df.columns = columns diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index db96543dc69b8..6d0d4e045e491 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -1,11 +1,18 @@ -from collections import OrderedDict, defaultdict +from collections import ( + OrderedDict, + defaultdict, +) from datetime import datetime import numpy as np import pytest import pytz -from pandas import DataFrame, Series, Timestamp +from pandas import ( + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm @@ -256,31 +263,44 @@ def test_to_dict_wide(self): expected = {f"A_{i:d}": i for i in range(256)} assert result == expected - def test_to_dict_orient_dtype(self): - # GH22620 & GH21256 - - df = DataFrame( - { - "bool": [True, True, False], - "datetime": [ + @pytest.mark.parametrize( + "data,dtype", + ( + ([True, True, False], bool), + [ + [ datetime(2018, 1, 1), datetime(2019, 2, 2), datetime(2020, 3, 3), ], - "float": [1.0, 2.0, 3.0], - "int": [1, 2, 3], - "str": ["X", "Y", "Z"], - } - ) + Timestamp, + ], + [[1.0, 2.0, 3.0], float], + [[1, 2, 3], int], + [["X", "Y", "Z"], str], + ), + ) + def test_to_dict_orient_dtype(self, data, dtype): + # GH22620 & GH21256 - expected = { - "int": int, - "float": float, - "str": str, - "datetime": Timestamp, - "bool": bool, - } + df = DataFrame({"a": data}) + d = df.to_dict(orient="records") + assert all(type(record["a"]) is dtype for record in d) + + @pytest.mark.parametrize( + "data,expected_dtype", + ( + [np.uint64(2), int], + [np.int64(-9), int], + [np.float64(1.1), float], + [np.bool_(True), bool], + [np.datetime64("2005-02-25"), Timestamp], + ), + ) + def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype): + # GH22620 & GH21256 - for df_dict in df.to_dict("records"): - result = {col: type(df_dict[col]) for col in list(df.columns)} - assert result == expected + df = DataFrame({"a": data}, index=[0]) + d = df.to_dict(orient="records") + result = type(d[0]["a"]) + assert result is expected_dtype diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 8de47cb17d7d3..8b5f45465cb3c 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -2,7 +2,10 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, MultiIndex +from pandas import ( + DataFrame, + MultiIndex, +) import pandas._testing as tm from pandas.core.arrays import PandasArray diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index 0682989294457..532f7c87557c8 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -2,7 +2,10 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Timestamp +from pandas import ( + DataFrame, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 548842e653a63..d6ab3268c8c37 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -3,7 +3,10 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, date_range +from pandas import ( + DataFrame, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py index c6d6637edc88c..210e86067566a 100644 --- a/pandas/tests/frame/methods/test_truncate.py +++ b/pandas/tests/frame/methods/test_truncate.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py index 9176d2300c39e..046f7a4f9e1c3 100644 --- a/pandas/tests/frame/methods/test_tz_convert.py +++ b/pandas/tests/frame/methods/test_tz_convert.py @@ -1,7 +1,13 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py index 9108bd52bdfa0..425ec4335455e 100644 --- a/pandas/tests/frame/methods/test_tz_localize.py +++ b/pandas/tests/frame/methods/test_tz_localize.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index d9de026dbf4e9..59e0605cc5a91 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index 5426e4368722e..38e58860959b8 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -3,7 +3,14 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, NaT, Series, Timestamp, date_range, period_range +from pandas import ( + DataFrame, + NaT, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm @@ -48,6 +55,12 @@ def test_values_duplicates(self): tm.assert_numpy_array_equal(result, expected) + def test_values_with_duplicate_columns(self): + df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) + result = df.values + expected = np.array([[1, 2.5], [3, 4.5]]) + assert (result == expected).all().all() + @pytest.mark.parametrize("constructor", [date_range, period_range]) def test_values_casts_datetimelike_to_object(self, constructor): series = Series(constructor("2000-01-01", periods=10, freq="D")) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 862f5b87785f5..c68171ab254c7 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -1,111 +1,13 @@ from datetime import datetime -import numpy as np -import pytest import pytz -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_interval_dtype, - is_object_dtype, -) - -from pandas import ( - DataFrame, - DatetimeIndex, - Index, - IntervalIndex, - Series, - Timestamp, - cut, - date_range, -) +from pandas import DataFrame import pandas._testing as tm class TestDataFrameAlterAxes: - @pytest.fixture - def idx_expected(self): - idx = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B").tz_localize( - "US/Pacific" - ) - - expected = Series( - np.array( - [ - Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), - Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), - ], - dtype="object", - ), - name="B", - ) - assert expected.dtype == idx.dtype - return idx, expected - - def test_to_series_keep_tz_deprecated_true(self, idx_expected): - # convert to series while keeping the timezone - idx, expected = idx_expected - - msg = "stop passing 'keep_tz'" - with tm.assert_produces_warning(FutureWarning) as m: - result = idx.to_series(keep_tz=True, index=[0, 1]) - assert msg in str(m[0].message) - - tm.assert_series_equal(result, expected) - - def test_to_series_keep_tz_deprecated_false(self, idx_expected): - idx, expected = idx_expected - - with tm.assert_produces_warning(FutureWarning) as m: - result = idx.to_series(keep_tz=False, index=[0, 1]) - tm.assert_series_equal(result, expected.dt.tz_convert(None)) - msg = "do 'idx.tz_convert(None)' before calling" - assert msg in str(m[0].message) - - def test_setitem_dt64series(self, idx_expected): - # convert to utc - idx, expected = idx_expected - df = DataFrame(np.random.randn(2, 1), columns=["A"]) - df["B"] = idx - - with tm.assert_produces_warning(FutureWarning) as m: - df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) - msg = "do 'idx.tz_convert(None)' before calling" - assert msg in str(m[0].message) - - result = df["B"] - comp = Series(idx.tz_convert("UTC").tz_localize(None), name="B") - tm.assert_series_equal(result, comp) - - def test_setitem_datetimeindex(self, idx_expected): - # setting a DataFrame column with a tzaware DTI retains the dtype - idx, expected = idx_expected - df = DataFrame(np.random.randn(2, 1), columns=["A"]) - - # assign to frame - df["B"] = idx - result = df["B"] - tm.assert_series_equal(result, expected) - - def test_setitem_object_array_of_tzaware_datetimes(self, idx_expected): - # setting a DataFrame column with a tzaware DTI retains the dtype - idx, expected = idx_expected - df = DataFrame(np.random.randn(2, 1), columns=["A"]) - - # object array of datetimes with a tz - df["B"] = idx.to_pydatetime() - result = df["B"] - tm.assert_series_equal(result, expected) - - def test_constructor_from_tzaware_datetimeindex(self, idx_expected): - # don't cast a DatetimeIndex WITH a tz, leave as object - # GH 6032 - idx, expected = idx_expected - - # convert index to series - result = Series(idx) - tm.assert_series_equal(result, expected) + # Tests for setting index/columns attributes directly (i.e. __setattr__) def test_set_axis_setattr_index(self): # GH 6785 @@ -117,31 +19,6 @@ def test_set_axis_setattr_index(self): df.pop("ts") tm.assert_frame_equal(df, expected) - def test_dti_set_index_reindex(self): - # GH 6631 - df = DataFrame(np.random.random(6)) - idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") - idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") - - df = df.set_index(idx1) - tm.assert_index_equal(df.index, idx1) - df = df.reindex(idx2) - tm.assert_index_equal(df.index, idx2) - - def test_dti_set_index_reindex_with_tz(self): - # GH 11314 - # with tz - index = date_range( - datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" - ) - df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) - new_index = date_range( - datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" - ) - - result = df.set_index(new_index) - assert result.index.freq == index.freq - # Renaming def test_assign_columns(self, float_frame): @@ -151,52 +28,3 @@ def test_assign_columns(self, float_frame): df.columns = ["foo", "bar", "baz", "quux", "foo2"] tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False) tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False) - - -class TestIntervalIndex: - def test_setitem(self): - - df = DataFrame({"A": range(10)}) - ser = cut(df["A"], 5) - assert isinstance(ser.cat.categories, IntervalIndex) - - # B & D end up as Categoricals - # the remainer are converted to in-line objects - # contining an IntervalIndex.values - df["B"] = ser - df["C"] = np.array(ser) - df["D"] = ser.values - df["E"] = np.array(ser.values) - - assert is_categorical_dtype(df["B"].dtype) - assert is_interval_dtype(df["B"].cat.categories) - assert is_categorical_dtype(df["D"].dtype) - assert is_interval_dtype(df["D"].cat.categories) - - assert is_object_dtype(df["C"]) - assert is_object_dtype(df["E"]) - - # they compare equal as Index - # when converted to numpy objects - c = lambda x: Index(np.array(x)) - tm.assert_index_equal(c(df.B), c(df.B)) - tm.assert_index_equal(c(df.B), c(df.C), check_names=False) - tm.assert_index_equal(c(df.B), c(df.D), check_names=False) - tm.assert_index_equal(c(df.C), c(df.D), check_names=False) - - # B & D are the same Series - tm.assert_series_equal(df["B"], df["B"]) - tm.assert_series_equal(df["B"], df["D"], check_names=False) - - # C & E are the same Series - tm.assert_series_equal(df["C"], df["C"]) - tm.assert_series_equal(df["C"], df["E"], check_names=False) - - def test_set_reset_index(self): - - df = DataFrame({"A": range(10)}) - s = cut(df.A, 5) - df["B"] = s - df = df.set_index("B") - - df = df.reset_index() diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 6b8284908213a..2f2de9764219b 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -6,10 +6,18 @@ import pytest import pandas.util._test_decorators as td -from pandas.util._test_decorators import async_mark, skip_if_no +from pandas.util._test_decorators import ( + async_mark, + skip_if_no, +) import pandas as pd -from pandas import DataFrame, Series, date_range, timedelta_range +from pandas import ( + DataFrame, + Series, + date_range, + timedelta_range, +) import pandas._testing as tm @@ -65,7 +73,7 @@ def test_tab_completion(self): df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) for key in list("ABCD"): assert key in dir(df) - assert isinstance(df.__getitem__("A"), pd.Series) + assert isinstance(df.__getitem__("A"), Series) # DataFrame whose first-level columns are identifiers shall have # them in __dir__. @@ -77,7 +85,7 @@ def test_tab_completion(self): assert key in dir(df) for key in list("EFGH"): assert key not in dir(df) - assert isinstance(df.__getitem__("A"), pd.DataFrame) + assert isinstance(df.__getitem__("A"), DataFrame) def test_not_hashable(self): empty_frame = DataFrame() diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index afc25c48beb5f..44b6d44ee6275 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -8,11 +8,21 @@ import pytz import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm import pandas.core.common as com -from pandas.core.computation.expressions import _MIN_ELEMENTS, NUMEXPR_INSTALLED -from pandas.tests.frame.common import _check_mixed_float, _check_mixed_int +from pandas.core.computation.expressions import ( + _MIN_ELEMENTS, + NUMEXPR_INSTALLED, +) +from pandas.tests.frame.common import ( + _check_mixed_float, + _check_mixed_int, +) class DummyElement: @@ -47,6 +57,21 @@ def any(self, axis=None): class TestFrameComparisons: # Specifically _not_ flex-comparisons + def test_comparison_with_categorical_dtype(self): + # GH#12564 + + df = DataFrame({"A": ["foo", "bar", "baz"]}) + exp = DataFrame({"A": [True, False, False]}) + + res = df == "foo" + tm.assert_frame_equal(res, exp) + + # casting to categorical shouldn't affect the result + df["A"] = df["A"].astype("category") + + res = df == "foo" + tm.assert_frame_equal(res, exp) + def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks df = DataFrame(np.random.randn(6, 4), columns=list("ABCD")) @@ -587,6 +612,26 @@ def test_flex_add_scalar_fill_value(self): res = df.add(2, fill_value=0) tm.assert_frame_equal(res, exp) + def test_sub_alignment_with_duplicate_index(self): + # GH#5185 dup aligning operations should work + df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) + df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) + expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) + result = df1.sub(df2) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"]) + def test_arithmetic_with_duplicate_columns(self, op): + # operations + df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) + expected = getattr(df, op)(df) + expected.columns = ["A", "A"] + df.columns = ["A", "A"] + result = getattr(df, op)(df) + tm.assert_frame_equal(result, expected) + str(result) + result.dtypes + class TestFrameArithmetic: def test_td64_op_nat_casting(self): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index b36f6fcf8b9f8..193f1617fbb55 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) from io import StringIO import itertools @@ -18,7 +21,10 @@ option_context, ) import pandas._testing as tm -from pandas.core.internals import NumericBlock, ObjectBlock +from pandas.core.internals import ( + NumericBlock, + ObjectBlock, +) # Segregated collection of methods that require the BlockManager internal data # structure diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 9ec745932514f..14adc8a992609 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1,5 +1,12 @@ -from collections import OrderedDict, abc -from datetime import date, datetime, timedelta +from collections import ( + OrderedDict, + abc, +) +from datetime import ( + date, + datetime, + timedelta, +) import functools import itertools import re @@ -13,13 +20,18 @@ from pandas.compat import np_version_under1p19 from pandas.core.dtypes.common import is_integer_dtype -from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) import pandas as pd from pandas import ( Categorical, CategoricalIndex, DataFrame, + DatetimeIndex, Index, Interval, MultiIndex, @@ -32,7 +44,11 @@ isna, ) import pandas._testing as tm -from pandas.arrays import IntervalArray, PeriodArray, SparseArray +from pandas.arrays import ( + IntervalArray, + PeriodArray, + SparseArray, +) MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] MIXED_INT_DTYPES = [ @@ -48,6 +64,19 @@ class TestDataFrameConstructors: + def test_constructor_from_tzaware_datetimeindex(self): + # don't cast a DatetimeIndex WITH a tz, leave as object + # GH#6032 + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + assert expected.dtype == idx.dtype + + # convert index to series + result = Series(idx) + tm.assert_series_equal(result, expected) + def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): # GH#39462 nat = np.datetime64("NaT", "ns") @@ -2179,7 +2208,7 @@ class DatetimeSubclass(datetime): def test_with_mismatched_index_length_raises(self): # GH#33437 - dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + dti = date_range("2016-01-01", periods=3, tz="US/Pacific") with pytest.raises(ValueError, match="Shape of passed values"): DataFrame(dti, index=range(4)) diff --git a/pandas/tests/frame/test_cumulative.py b/pandas/tests/frame/test_cumulative.py index 248f3500c41df..39714a4566494 100644 --- a/pandas/tests/frame/test_cumulative.py +++ b/pandas/tests/frame/test_cumulative.py @@ -8,7 +8,10 @@ import numpy as np -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/frame/test_iteration.py b/pandas/tests/frame/test_iteration.py index d6268f90b2681..c6e5aa6f86d29 100644 --- a/pandas/tests/frame/test_iteration.py +++ b/pandas/tests/frame/test_iteration.py @@ -2,9 +2,17 @@ import numpy as np -from pandas.compat import IS64, is_platform_windows - -from pandas import Categorical, DataFrame, Series, date_range +from pandas.compat import ( + IS64, + is_platform_windows, +) + +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index dca12c632a418..f509ae52ad5a5 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -4,7 +4,13 @@ import numpy as np import pytest -from pandas import CategoricalIndex, DataFrame, Interval, Series, isnull +from pandas import ( + CategoricalIndex, + DataFrame, + Interval, + Series, + isnull, +) import pandas._testing as tm diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 8dcf6f2188058..c3812e109b938 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm @@ -14,7 +18,7 @@ def check(result, expected=None): class TestDataFrameNonuniqueIndexes: - def test_column_dups_operations(self): + def test_setattr_columns_vs_construct_with_columns(self): # assignment # GH 3687 @@ -25,6 +29,7 @@ def test_column_dups_operations(self): expected = DataFrame(arr, columns=idx) check(df, expected) + def test_setattr_columns_vs_construct_with_columns_datetimeindx(self): idx = date_range("20130101", periods=4, freq="Q-NOV") df = DataFrame( [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=["a", "a", "a", "a"] @@ -33,6 +38,7 @@ def test_column_dups_operations(self): expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx) check(df, expected) + def test_insert_with_duplicate_columns(self): # insert df = DataFrame( [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], @@ -119,6 +125,7 @@ def test_column_dups_operations(self): ) tm.assert_frame_equal(df, expected) + def test_dup_across_dtypes(self): # dup across dtypes df = DataFrame( [[1, 1, 1.0, 5], [1, 1, 2.0, 5], [2, 1, 3.0, 5]], @@ -155,83 +162,7 @@ def test_column_dups_operations(self): ) check(df, expected) - # values - df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) - result = df.values - expected = np.array([[1, 2.5], [3, 4.5]]) - assert (result == expected).all().all() - - # rename, GH 4403 - df4 = DataFrame( - {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, - index=MultiIndex.from_tuples( - [(600809, 20130331)], names=["STK_ID", "RPT_Date"] - ), - ) - - df5 = DataFrame( - { - "RPT_Date": [20120930, 20121231, 20130331], - "STK_ID": [600809] * 3, - "STK_Name": ["饡驦", "饡驦", "饡驦"], - "TClose": [38.05, 41.66, 30.01], - }, - index=MultiIndex.from_tuples( - [(600809, 20120930), (600809, 20121231), (600809, 20130331)], - names=["STK_ID", "RPT_Date"], - ), - ) - - k = pd.merge(df4, df5, how="inner", left_index=True, right_index=True) - result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) - str(result) - result.dtypes - - expected = DataFrame( - [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], - columns=[ - "RT", - "TClose", - "TExg", - "RPT_Date", - "STK_ID", - "STK_Name", - "QT_Close", - ], - ).set_index(["STK_ID", "RPT_Date"], drop=False) - tm.assert_frame_equal(result, expected) - - # reindex is invalid! - df = DataFrame( - [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] - ) - msg = "cannot reindex from a duplicate axis" - with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar"]) - with pytest.raises(ValueError, match=msg): - df.reindex(columns=["bar", "foo"]) - - # drop - df = DataFrame( - [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] - ) - result = df.drop(["a"], axis=1) - expected = DataFrame([[1], [1], [1]], columns=["bar"]) - check(result, expected) - result = df.drop("a", axis=1) - check(result, expected) - - # describe - df = DataFrame( - [[1, 1, 1], [2, 2, 2], [3, 3, 3]], - columns=["bar", "a", "a"], - dtype="float64", - ) - result = df.describe() - s = df.iloc[:, 0].describe() - expected = pd.concat([s, s, s], keys=df.columns, axis=1) - check(result, expected) - + def test_column_dups_indexes(self): # check column dups with index equal and not equal to df's index df = DataFrame( np.random.randn(5, 3), @@ -248,15 +179,7 @@ def test_column_dups_operations(self): this_df["A"] = index check(this_df, expected_df) - # operations - for op in ["__add__", "__mul__", "__sub__", "__truediv__"]: - df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)}) - expected = getattr(df, op)(df) - expected.columns = ["A", "A"] - df.columns = ["A", "A"] - result = getattr(df, op)(df) - check(result, expected) - + def test_changing_dtypes_with_duplicate_columns(self): # multiple assignments that change dtypes # the location indexer is a slice # GH 6120 @@ -272,7 +195,7 @@ def test_column_dups_operations(self): df["that"] = 1 check(df, expected) - def test_column_dups2(self): + def test_column_dups_drop(self): # drop buggy GH 6240 df = DataFrame( @@ -289,6 +212,7 @@ def test_column_dups2(self): result = df2.drop("C", axis=1) tm.assert_frame_equal(result, expected) + def test_column_dups_dropna(self): # dropna df = DataFrame( { @@ -310,53 +234,7 @@ def test_column_dups2(self): result = df.dropna(subset=["A", "C"], how="all") tm.assert_frame_equal(result, expected) - def test_getitem_boolean_series_with_duplicate_columns(self): - # boolean indexing - # GH 4879 - dups = ["A", "A", "C", "D"] - df = DataFrame( - np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" - ) - expected = df[df.C > 6] - expected.columns = dups - df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") - result = df[df.C > 6] - check(result, expected) - - def test_getitem_boolean_frame_with_duplicate_columns(self): - dups = ["A", "A", "C", "D"] - - # where - df = DataFrame( - np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" - ) - # `df > 6` is a DataFrame with the same shape+alignment as df - expected = df[df > 6] - expected.columns = dups - df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") - result = df[df > 6] - check(result, expected) - - def test_getitem_boolean_frame_unaligned_with_duplicate_columns(self): - # `df.A > 6` is a DataFrame with a different shape from df - dups = ["A", "A", "C", "D"] - - # boolean with the duplicate raises - df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") - msg = "cannot reindex from a duplicate axis" - with pytest.raises(ValueError, match=msg): - df[df.A > 6] - - def test_column_dups_indexing(self): - - # dup aligning operations should work - # GH 5185 - df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) - df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) - expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) - result = df1.sub(df2) - tm.assert_frame_equal(result, expected) - + def test_dup_columns_comparisons(self): # equality df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"]) @@ -374,6 +252,7 @@ def test_column_dups_indexing(self): ) tm.assert_frame_equal(result, expected) + def test_mixed_column_selection(self): # mixed column selection # GH 5639 dfbool = DataFrame( @@ -387,6 +266,7 @@ def test_column_dups_indexing(self): result = dfbool[["one", "three", "one"]] check(result, expected) + def test_multi_axis_dups(self): # multi-axis dups # GH 6121 df = DataFrame( @@ -422,6 +302,7 @@ def test_columns_with_dups(self): expected = DataFrame([[1, 2, 3]], columns=["b", "a", "a.1"]) tm.assert_frame_equal(df, expected) + def test_columns_with_dup_index(self): # with a dup index df = DataFrame([[1, 2]], columns=["a", "a"]) df.columns = ["b", "b"] @@ -429,6 +310,7 @@ def test_columns_with_dups(self): expected = DataFrame([[1, 2]], columns=["b", "b"]) tm.assert_frame_equal(df, expected) + def test_multi_dtype(self): # multi-dtype df = DataFrame( [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], @@ -441,12 +323,14 @@ def test_columns_with_dups(self): ) tm.assert_frame_equal(df, expected) + def test_multi_dtype2(self): df = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a", "a", "a"]) df.columns = ["a", "a.1", "a.2", "a.3"] str(df) expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) tm.assert_frame_equal(df, expected) + def test_dups_across_blocks(self): # dups across blocks df_float = DataFrame(np.random.randn(10, 3), dtype="float64") df_int = DataFrame(np.random.randn(10, 3), dtype="int64") @@ -464,6 +348,7 @@ def test_columns_with_dups(self): for i in range(len(df.columns)): df.iloc[:, i] + def test_dup_columns_across_dtype(self): # dup columns across dtype GH 2079/2194 vals = [[1, -1, 2.0], [2, -2, 3.0]] rs = DataFrame(vals, columns=["A", "A", "B"]) @@ -486,36 +371,3 @@ def test_set_value_by_index(self): df.iloc[:, 0] = 3 tm.assert_series_equal(df.iloc[:, 1], expected) - - @pytest.mark.parametrize( - "data1,data2,expected_data", - ( - ( - [[1, 2], [3, 4]], - [[0.5, 6], [7, 8]], - [[np.nan, 3.0], [np.nan, 4.0], [np.nan, 7.0], [6.0, 8.0]], - ), - ( - [[1, 2], [3, 4]], - [[5, 6], [7, 8]], - [[np.nan, 3.0], [np.nan, 4.0], [5, 7], [6, 8]], - ), - ), - ) - def test_masking_duplicate_columns_mixed_dtypes( - self, - data1, - data2, - expected_data, - ): - # GH31954 - - df1 = DataFrame(np.array(data1)) - df2 = DataFrame(np.array(data2)) - df = pd.concat([df1, df2], axis=1) - - result = df[df > 2] - expected = DataFrame( - {i: np.array(col) for i, col in enumerate(expected_data)} - ).rename(columns={2: 0, 3: 1}) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index 1e37822798244..0b7699e46d720 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -3,7 +3,10 @@ """ import numpy as np -from pandas import Categorical, DataFrame +from pandas import ( + Categorical, + DataFrame, +) import pandas._testing as tm diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index af134db587306..fdbf8a93ddddf 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -7,7 +7,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.core.computation.check import NUMEXPR_INSTALLED @@ -713,7 +719,7 @@ def test_inf(self): def test_check_tz_aware_index_query(self, tz_aware_fixture): # https://github.com/pandas-dev/pandas/issues/29463 tz = tz_aware_fixture - df_index = pd.date_range( + df_index = date_range( start="2019-01-01", freq="1d", periods=10, tz=tz, name="time" ) expected = DataFrame(index=df_index) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 1c397d6a6a1b5..3f205bde31940 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -285,7 +285,10 @@ def test_stat_op_api(self, float_frame, float_string_frame): assert_stat_op_api("median", float_frame, float_string_frame) try: - from scipy.stats import kurtosis, skew # noqa:F401 + from scipy.stats import ( # noqa:F401 + kurtosis, + skew, + ) assert_stat_op_api("skew", float_frame, float_string_frame) assert_stat_op_api("kurt", float_frame, float_string_frame) @@ -368,7 +371,10 @@ def kurt(x): ) try: - from scipy import kurtosis, skew # noqa:F401 + from scipy import ( # noqa:F401 + kurtosis, + skew, + ) assert_stat_op_calc("skew", skewness, float_frame_with_na) assert_stat_op_calc("kurt", kurt, float_frame_with_na) @@ -663,7 +669,7 @@ def test_mode_sortwarning(self): df = DataFrame({"A": [np.nan, np.nan, "a", "a"]}) expected = DataFrame({"A": ["a", np.nan]}) - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + with tm.assert_produces_warning(UserWarning): result = df.mode(dropna=False) result = result.sort_values(by="A").reset_index(drop=True) @@ -825,7 +831,7 @@ def test_sum_nanops_timedelta(self): idx = ["a", "b", "c"] df = DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]}) - df2 = df.apply(pd.to_timedelta) + df2 = df.apply(to_timedelta) # 0 by default result = df2.sum() @@ -855,9 +861,9 @@ def test_sum_bool(self, float_frame): def test_sum_mixed_datetime(self): # GH#30886 - df = DataFrame( - {"A": pd.date_range("2000", periods=4), "B": [1, 2, 3, 4]} - ).reindex([2, 3, 4]) + df = DataFrame({"A": date_range("2000", periods=4), "B": [1, 2, 3, 4]}).reindex( + [2, 3, 4] + ) result = df.sum() expected = Series({"B": 7.0}) @@ -887,7 +893,7 @@ def test_mean_datetimelike(self): df = DataFrame( { "A": np.arange(3), - "B": pd.date_range("2016-01-01", periods=3), + "B": date_range("2016-01-01", periods=3), "C": pd.timedelta_range("1D", periods=3), "D": pd.period_range("2016", periods=3, freq="A"), } @@ -906,7 +912,7 @@ def test_mean_datetimelike_numeric_only_false(self): df = DataFrame( { "A": np.arange(3), - "B": pd.date_range("2016-01-01", periods=3), + "B": date_range("2016-01-01", periods=3), "C": pd.timedelta_range("1D", periods=3), } ) @@ -977,7 +983,7 @@ def test_idxmax(self, float_frame, int_frame): def test_idxmax_mixed_dtype(self): # don't cast to object, which would raise in nanops - dti = pd.date_range("2016-01-01", periods=3) + dti = date_range("2016-01-01", periods=3) df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti}) @@ -1267,8 +1273,8 @@ def test_min_max_dt64_api_consistency_with_NaT(self): # returned NaT for series. These tests check that the API is consistent in # min/max calls on empty Series/DataFrames. See GH:33704 for more # information - df = DataFrame({"x": pd.to_datetime([])}) - expected_dt_series = Series(pd.to_datetime([])) + df = DataFrame({"x": to_datetime([])}) + expected_dt_series = Series(to_datetime([])) # check axis 0 assert (df.min(axis=0).x is pd.NaT) == (expected_dt_series.min() is pd.NaT) assert (df.max(axis=0).x is pd.NaT) == (expected_dt_series.max() is pd.NaT) @@ -1296,7 +1302,7 @@ def test_min_max_dt64_api_consistency_empty_df(self): @pytest.mark.parametrize("method", ["min", "max"]) def test_preserve_timezone(self, initial: str, method): # GH 28552 - initial_dt = pd.to_datetime(initial) + initial_dt = to_datetime(initial) expected = Series([initial_dt]) df = DataFrame([expected]) result = getattr(df, method)(axis=1) @@ -1324,7 +1330,7 @@ def test_frame_any_with_timedelta(self): df = DataFrame( { "a": Series([0, 0]), - "t": Series([pd.to_timedelta(0, "s"), pd.to_timedelta(1, "ms")]), + "t": Series([to_timedelta(0, "s"), to_timedelta(1, "ms")]), } ) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 22b50310cedd6..c8131049b51d2 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) from io import StringIO import warnings diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 81e10d276e79c..9945b739f8a87 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -6,7 +6,15 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Period, + Series, + Timedelta, + date_range, +) import pandas._testing as tm @@ -150,7 +158,7 @@ def test_unstack_fill_frame(self): def test_unstack_fill_frame_datetime(self): # Test unstacking with date times - dv = pd.date_range("2012-01-01", periods=4).values + dv = date_range("2012-01-01", periods=4).values data = Series(dv) data.index = MultiIndex.from_tuples( [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] @@ -600,7 +608,7 @@ def test_unstack_dtypes(self): "A": ["a"] * 5, "C": c, "D": d, - "B": pd.date_range("2012-01-01", periods=5), + "B": date_range("2012-01-01", periods=5), } ) @@ -934,7 +942,7 @@ def verify(df): df = DataFrame( { "1st": [1, 2, 1, 2, 1, 2], - "2nd": pd.date_range("2014-02-01", periods=6, freq="D"), + "2nd": date_range("2014-02-01", periods=6, freq="D"), "jim": 100 + np.arange(6), "joe": (np.random.randn(6) * 10).round(2), } @@ -1163,9 +1171,7 @@ def test_unstack_timezone_aware_values(): def test_stack_timezone_aware_values(): # GH 19420 - ts = pd.date_range( - freq="D", start="20180101", end="20180103", tz="America/New_York" - ) + ts = date_range(freq="D", start="20180101", end="20180103", tz="America/New_York") df = DataFrame({"A": ts}, index=["a", "b", "c"]) result = df.stack() expected = Series( diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 2b462d5a10c51..784ca03fa9c03 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -4,7 +4,12 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm @@ -56,11 +61,11 @@ def custom_frame_function(self): assert cdf_rows.custom_frame_function() == "OK" # Make sure sliced part of multi-index frame is custom class - mcol = pd.MultiIndex.from_tuples([("A", "A"), ("A", "B")]) + mcol = MultiIndex.from_tuples([("A", "A"), ("A", "B")]) cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) assert isinstance(cdf_multi["A"], CustomDataFrame) - mcol = pd.MultiIndex.from_tuples([("A", ""), ("B", "")]) + mcol = MultiIndex.from_tuples([("A", ""), ("B", "")]) cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) assert isinstance(cdf_multi2["A"], CustomSeries) @@ -700,7 +705,7 @@ def test_idxmax_preserves_subclass(self): def test_equals_subclass(self): # https://github.com/pandas-dev/pandas/pull/34402 # allow subclass in both directions - df1 = pd.DataFrame({"a": [1, 2, 3]}) + df1 = DataFrame({"a": [1, 2, 3]}) df2 = tm.SubclassedDataFrame({"a": [1, 2, 3]}) assert df1.equals(df2) assert df2.equals(df1) diff --git a/pandas/tests/frame/test_ufunc.py b/pandas/tests/frame/test_ufunc.py index 62b12f8a60307..0bcb62251fc5c 100644 --- a/pandas/tests/frame/test_ufunc.py +++ b/pandas/tests/frame/test_ufunc.py @@ -218,7 +218,10 @@ def test_alignment_deprecation_many_inputs(): # https://github.com/pandas-dev/pandas/issues/39184 # test that the deprecation also works with > 2 inputs -> using a numba # written ufunc for this because numpy itself doesn't have such ufuncs - from numba import float64, vectorize + from numba import ( + float64, + vectorize, + ) @vectorize([float64(float64, float64, float64)]) def my_ufunc(x, y, z): diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index 73a68e8508644..15c51e5f3e6e4 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -149,13 +149,15 @@ marks=not_implemented_mark, ), (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")), - pytest.param( - ( - pd.DataFrame, - {"A": [1], "B": [1]}, - operator.methodcaller("pivot_table", columns="A"), - ), - marks=not_implemented_mark, + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A"), + ), + ( + pd.DataFrame, + ({"A": [1], "B": [1]},), + operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]), ), (pd.DataFrame, frame_data, operator.methodcaller("stack")), pytest.param( @@ -740,6 +742,8 @@ def test_categorical_accessor(method): [ operator.methodcaller("sum"), lambda x: x.agg("sum"), + lambda x: x.agg("mean"), + lambda x: x.agg("median"), ], ) def test_groupby_finalize(obj, method): @@ -757,6 +761,12 @@ def test_groupby_finalize(obj, method): lambda x: x.agg(["sum", "count"]), lambda x: x.transform(lambda y: y), lambda x: x.apply(lambda y: y), + lambda x: x.agg("std"), + lambda x: x.agg("var"), + lambda x: x.agg("sem"), + lambda x: x.agg("size"), + lambda x: x.agg("ohlc"), + lambda x: x.agg("describe"), ], ) @not_implemented_mark diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py index 194b8bdd4715e..49a1dc8bbb21c 100644 --- a/pandas/tests/generic/test_frame.py +++ b/pandas/tests/generic/test_frame.py @@ -5,7 +5,12 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.tests.generic.test_generic import Generic diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 199c521cfc81b..8574589cb27bb 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -1,11 +1,17 @@ -from copy import copy, deepcopy +from copy import ( + copy, + deepcopy, +) import numpy as np import pytest from pandas.core.dtypes.common import is_scalar -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm # ---------------------------------------------------------------------- diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 38ab8d333e880..823ce7435f229 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -4,7 +4,11 @@ import pytest import pandas as pd -from pandas import MultiIndex, Series, date_range +from pandas import ( + MultiIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.tests.generic.test_generic import Generic diff --git a/pandas/tests/generic/test_to_xarray.py b/pandas/tests/generic/test_to_xarray.py index a6aa45406305c..8e33465efcbf7 100644 --- a/pandas/tests/generic/test_to_xarray.py +++ b/pandas/tests/generic/test_to_xarray.py @@ -3,7 +3,13 @@ import pandas.util._test_decorators as td -from pandas import Categorical, DataFrame, MultiIndex, Series, date_range +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 48527de6b2047..92b7aefa6dd8c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -13,7 +13,13 @@ from pandas.core.dtypes.common import is_integer_dtype import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, concat +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, +) import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.core.groupby.grouper import Grouping diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 8799f6faa775c..f9b45f4d9f4cf 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -8,7 +8,15 @@ from pandas.core.dtypes.common import is_float_dtype import pandas as pd -from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range +from pandas import ( + DataFrame, + Index, + NaT, + Series, + Timedelta, + Timestamp, + bdate_range, +) import pandas._testing as tm from pandas.core.groupby.groupby import DataError diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py index c4266996748c2..6de81d03ca418 100644 --- a/pandas/tests/groupby/aggregate/test_numba.py +++ b/pandas/tests/groupby/aggregate/test_numba.py @@ -4,7 +4,11 @@ from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td -from pandas import DataFrame, NamedAgg, option_context +from pandas import ( + DataFrame, + NamedAgg, + option_context, +) import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 04f17865b088a..f945f898603ac 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -627,7 +627,11 @@ def test_groupby_agg_err_catching(err_cls): # in _python_agg_general # Use a non-standard EA to make sure we don't go down ndarray paths - from pandas.tests.extension.decimal.array import DecimalArray, make_data, to_decimal + from pandas.tests.extension.decimal.array import ( + DecimalArray, + make_data, + to_decimal, + ) data = make_data()[:5] df = DataFrame( diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py index 0b9721968a881..b69a467f91659 100644 --- a/pandas/tests/groupby/conftest.py +++ b/pandas/tests/groupby/conftest.py @@ -1,9 +1,15 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex +from pandas import ( + DataFrame, + MultiIndex, +) import pandas._testing as tm -from pandas.core.groupby.base import reduction_kernels, transformation_kernels +from pandas.core.groupby.base import ( + reduction_kernels, + transformation_kernels, +) @pytest.fixture diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 57ccf6ebd24bd..de8335738791d 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -8,7 +8,13 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.core.groupby.base import ( groupby_other_methods, diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 975cebe16dc55..4bbdba9fedbff 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1,11 +1,20 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) from io import StringIO import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, bdate_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + bdate_range, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index aff9911961b25..f873c93d90683 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas._libs import lib, reduction as libreduction +from pandas._libs import ( + lib, + reduction as libreduction, +) import pandas as pd from pandas import Series @@ -10,11 +13,10 @@ def test_series_grouper(): obj = Series(np.random.randn(10)) - dummy = obj.iloc[:0] labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) - grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2, dummy) + grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2) result, counts = grouper.get_result() expected = np.array([obj[3:6].mean(), obj[6:].mean()]) @@ -31,16 +33,15 @@ def test_series_grouper_requires_nonempty_raises(): labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"): - libreduction.SeriesGrouper(dummy, np.mean, labels, 2, dummy) + libreduction.SeriesGrouper(dummy, np.mean, labels, 2) def test_series_bin_grouper(): obj = Series(np.random.randn(10)) - dummy = obj[:0] bins = np.array([3, 6]) - grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins, dummy) + grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins) result, counts = grouper.get_result() expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 448e6c6e6f64a..995fd58a84cbd 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, Timestamp +from pandas import ( + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index f532e496ccca9..ecd9d16228939 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -7,7 +7,15 @@ from pandas.errors import UnsupportedFunctionCall import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, +) import pandas._testing as tm import pandas.core.nanops as nanops from pandas.util import _test_decorators as td diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index 1410038274152..c6f3e7618e3f7 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -1,7 +1,13 @@ import numpy as np import pytest -from pandas import DataFrame, NaT, Series, Timedelta, Timestamp +from pandas import ( + DataFrame, + NaT, + Series, + Timedelta, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index d268d87708552..8008c6c98acc9 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 10e69ddcd5f80..a1d956a6fe096 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -167,7 +167,10 @@ def test_grouper_multilevel_freq(self): # GH 7885 # with level and freq specified in a pd.Grouper - from datetime import date, timedelta + from datetime import ( + date, + timedelta, + ) d0 = date.today() - timedelta(days=14) dates = date_range(d0, date.today()) diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index e2ca63d9ab922..e53518269408a 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, date_range +from pandas import ( + DataFrame, + Index, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 26b3af4234be1..1b74096cbfbdf 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -2,7 +2,14 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 22970eff28f19..6656fd565f79d 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -5,7 +5,14 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, NaT, Series, Timestamp, date_range +from pandas import ( + DataFrame, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 1acbc8cf5c0ad..3e43d13bb8b67 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -1,7 +1,10 @@ import numpy as np import pandas as pd -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index c8d6d09577c2b..9c9d1aa881890 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index f2046c5768668..6116703ebd174 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, concat +from pandas import ( + DataFrame, + Series, + concat, +) import pandas._testing as tm from pandas.core.base import DataError diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 412e3e8f732de..13147ca704b56 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -1,6 +1,10 @@ import pytest -from pandas import DataFrame, Index, Series +from pandas import ( + DataFrame, + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index ba27e5a24ba00..f87e4117f57fd 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, PeriodIndex, Series +from pandas import ( + DataFrame, + Index, + PeriodIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py index 3a184bdd007c7..fbee2361b9b45 100644 --- a/pandas/tests/groupby/transform/test_numba.py +++ b/pandas/tests/groupby/transform/test_numba.py @@ -3,7 +3,10 @@ from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td -from pandas import DataFrame, option_context +from pandas import ( + DataFrame, + option_context, +) import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 3f04f0f1163e7..ae0f7545df8cf 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -4,7 +4,10 @@ import numpy as np import pytest -from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_timedelta64_dtype, +) import pandas as pd from pandas import ( @@ -769,6 +772,18 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request): comp_func(result, exp) +def test_transform_ffill(): + # GH 24211 + data = [["a", 0.0], ["a", float("nan")], ["b", 1.0], ["b", float("nan")]] + df = DataFrame(data, columns=["key", "values"]) + result = df.groupby("key").transform("ffill") + expected = DataFrame({"values": [0.0, 0.0, 1.0, 1.0]}) + tm.assert_frame_equal(result, expected) + result = df.groupby("key")["values"].transform("ffill") + expected = Series([0.0, 0.0, 1.0, 1.0], name="values") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("mix_groupings", [True, False]) @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize("val1,val2", [("foo", "bar"), (1, 2), (1.0, 2.0)]) diff --git a/pandas/tests/indexes/base_class/test_constructors.py b/pandas/tests/indexes/base_class/test_constructors.py index b3229b4eda030..0c4f9c6d759b9 100644 --- a/pandas/tests/indexes/base_class/test_constructors.py +++ b/pandas/tests/indexes/base_class/test_constructors.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index ddcb3c5b87ebc..2bc9b2cd1a1bd 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -4,7 +4,10 @@ import pytest import pandas as pd -from pandas import Index, Series +from pandas import ( + Index, + Series, +) import pandas._testing as tm from pandas.core.algorithms import safe_sort diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 48a90652a2c06..854ae8b62db30 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -3,7 +3,13 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalDtype, CategoricalIndex, Index, IntervalIndex +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, + IntervalIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 75b3a6ece0b21..4b2df268f5c1a 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -6,7 +6,10 @@ import pandas as pd from pandas import Categorical import pandas._testing as tm -from pandas.core.indexes.api import CategoricalIndex, Index +from pandas.core.indexes.api import ( + CategoricalIndex, + Index, +) from pandas.tests.indexes.common import Base diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py index 255e6a11d945d..2acf79ee0bced 100644 --- a/pandas/tests/indexes/categorical/test_constructors.py +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalDtype, CategoricalIndex, Index +from pandas import ( + Categorical, + CategoricalDtype, + CategoricalIndex, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/categorical/test_equals.py b/pandas/tests/indexes/categorical/test_equals.py index 3f9a58c6a06cd..2648155c938b0 100644 --- a/pandas/tests/indexes/categorical/test_equals.py +++ b/pandas/tests/indexes/categorical/test_equals.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalIndex, Index +from pandas import ( + Categorical, + CategoricalIndex, + Index, +) class TestEquals: diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 7ec67d83e4b3b..490a68233367a 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -4,7 +4,12 @@ from pandas.errors import InvalidIndexError import pandas as pd -from pandas import CategoricalIndex, Index, IntervalIndex, Timestamp +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py index c15818bc87f7c..71ee82981721d 100644 --- a/pandas/tests/indexes/categorical/test_map.py +++ b/pandas/tests/indexes/categorical/test_map.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import CategoricalIndex, Index, Series +from pandas import ( + CategoricalIndex, + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py index 8228c5139ccdd..33139359cfe72 100644 --- a/pandas/tests/indexes/categorical/test_reindex.py +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -1,7 +1,13 @@ import numpy as np import pytest -from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 0354e362dc4ac..f8daf23fb08a3 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -243,11 +243,6 @@ def test_copy_name2(self, index): with pytest.raises(TypeError, match=msg): index.copy(name=[["mario"]]) - def test_copy_dtype_deprecated(self, index): - # GH35853 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - index.copy(dtype=object) - def test_ensure_copied_data(self, index): # Check the "copy" argument of each Index.__new__ is honoured # GH12309 @@ -504,10 +499,9 @@ def test_format_empty(self): assert empty_idx.format() == [] assert empty_idx.format(name=True) == [""] - def test_hasnans_isnans(self, index): + def test_hasnans_isnans(self, index_flat): # GH 11343, added tests for hasnans / isnans - if isinstance(index, MultiIndex): - return + index = index_flat # cases in indices doesn't include NaN idx = index.copy(deep=True) diff --git a/pandas/tests/indexes/datetimelike_/test_equals.py b/pandas/tests/indexes/datetimelike_/test_equals.py index 55a90f982a971..7221e560c1112 100644 --- a/pandas/tests/indexes/datetimelike_/test_equals.py +++ b/pandas/tests/indexes/datetimelike_/test_equals.py @@ -1,7 +1,10 @@ """ Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex """ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest diff --git a/pandas/tests/indexes/datetimelike_/test_indexing.py b/pandas/tests/indexes/datetimelike_/test_indexing.py index 51de446eea3e3..eb37c2c4ad2a3 100644 --- a/pandas/tests/indexes/datetimelike_/test_indexing.py +++ b/pandas/tests/indexes/datetimelike_/test_indexing.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import DatetimeIndex, Index +from pandas import ( + DatetimeIndex, + Index, +) import pandas._testing as tm dtlike_dtypes = [ diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index bed7cb9b54eba..8eb0e086ec3f7 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -29,7 +29,7 @@ def test_astype(self): ) tm.assert_index_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = idx.astype(int) expected = Int64Index( [1463356800000000000] + [-9223372036854775808] * 3, @@ -39,7 +39,7 @@ def test_astype(self): tm.assert_index_equal(result, expected) rng = date_range("1/1/2000", periods=10, name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = rng.astype("i8") tm.assert_index_equal(result, Index(rng.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, rng.asi8) @@ -50,7 +50,7 @@ def test_astype_uint(self): np.array([946684800000000000, 946771200000000000], dtype="uint64"), name="idx", ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_factorize.py b/pandas/tests/indexes/datetimes/methods/test_factorize.py index 6e095e29e47cd..90ad65c46046f 100644 --- a/pandas/tests/indexes/datetimes/methods/test_factorize.py +++ b/pandas/tests/indexes/datetimes/methods/test_factorize.py @@ -1,6 +1,11 @@ import numpy as np -from pandas import DatetimeIndex, Index, date_range, factorize +from pandas import ( + DatetimeIndex, + Index, + date_range, + factorize, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_insert.py b/pandas/tests/indexes/datetimes/methods/test_insert.py similarity index 97% rename from pandas/tests/indexes/datetimes/test_insert.py rename to pandas/tests/indexes/datetimes/methods/test_insert.py index 684c6b813b48f..bf3d5bf88149e 100644 --- a/pandas/tests/indexes/datetimes/test_insert.py +++ b/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -4,7 +4,14 @@ import pytest import pytz -from pandas import NA, DatetimeIndex, Index, NaT, Timestamp, date_range +from pandas import ( + NA, + DatetimeIndex, + Index, + NaT, + Timestamp, + date_range, +) import pandas._testing as tm @@ -13,8 +20,12 @@ class TestInsert: @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) def test_insert_nat(self, tz, null): # GH#16537, GH#18295 (test missing) + idx = DatetimeIndex(["2017-01-01"], tz=tz) expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz) + if tz is not None and isinstance(null, np.datetime64): + expected = Index([null, idx[0]], dtype=object) + res = idx.insert(0, null) tm.assert_index_equal(res, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_shift.py b/pandas/tests/indexes/datetimes/methods/test_shift.py index 611df5d99cb9c..5a47b36a2a8d0 100644 --- a/pandas/tests/indexes/datetimes/methods/test_shift.py +++ b/pandas/tests/indexes/datetimes/methods/test_shift.py @@ -6,7 +6,11 @@ from pandas.errors import NullFrequencyError import pandas as pd -from pandas import DatetimeIndex, Series, date_range +from pandas import ( + DatetimeIndex, + Series, + date_range, +) import pandas._testing as tm START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) diff --git a/pandas/tests/indexes/datetimes/methods/test_snap.py b/pandas/tests/indexes/datetimes/methods/test_snap.py index 8baea9fe8341f..e591441c4f148 100644 --- a/pandas/tests/indexes/datetimes/methods/test_snap.py +++ b/pandas/tests/indexes/datetimes/methods/test_snap.py @@ -1,6 +1,9 @@ import pytest -from pandas import DatetimeIndex, date_range +from pandas import ( + DatetimeIndex, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/methods/test_to_frame.py b/pandas/tests/indexes/datetimes/methods/test_to_frame.py new file mode 100644 index 0000000000000..ec6254f52f4d5 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_to_frame.py @@ -0,0 +1,14 @@ +from pandas import ( + DataFrame, + date_range, +) +import pandas._testing as tm + + +class TestToFrame: + def test_to_frame_datetime_tz(self): + # GH#25809 + idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") + result = idx.to_frame() + expected = DataFrame(idx, index=idx) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_series.py b/pandas/tests/indexes/datetimes/methods/test_to_series.py new file mode 100644 index 0000000000000..5a216d3c89899 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_to_series.py @@ -0,0 +1,40 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Series, +) +import pandas._testing as tm + + +class TestToSeries: + @pytest.fixture + def idx_expected(self): + naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") + idx = naive.tz_localize("US/Pacific") + + expected = Series(np.array(idx.tolist(), dtype="object"), name="B") + + assert expected.dtype == idx.dtype + return idx, expected + + def test_to_series_keep_tz_deprecated_true(self, idx_expected): + # convert to series while keeping the timezone + idx, expected = idx_expected + + msg = "stop passing 'keep_tz'" + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=True, index=[0, 1]) + assert msg in str(m[0].message) + + tm.assert_series_equal(result, expected) + + def test_to_series_keep_tz_deprecated_false(self, idx_expected): + idx, expected = idx_expected + + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=False, index=[0, 1]) + tm.assert_series_equal(result, expected.dt.tz_convert(None)) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) diff --git a/pandas/tests/indexes/datetimes/test_asof.py b/pandas/tests/indexes/datetimes/test_asof.py new file mode 100644 index 0000000000000..c794aefc6a48b --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_asof.py @@ -0,0 +1,14 @@ +from pandas import ( + Index, + Timestamp, + date_range, +) + + +class TestAsOf: + def test_asof_partial(self): + index = date_range("2010-01-01", periods=2, freq="m") + expected = Timestamp("2010-02-28") + result = index.asof("2010-02") + assert result == expected + assert not isinstance(result, Index) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 3f30a1a076eda..07cd89c23f1e0 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1,4 +1,8 @@ -from datetime import datetime, timedelta, timezone +from datetime import ( + datetime, + timedelta, + timezone, +) from functools import partial from operator import attrgetter @@ -7,12 +11,25 @@ import pytest import pytz -from pandas._libs.tslibs import OutOfBoundsDatetime, conversion +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + conversion, +) import pandas as pd -from pandas import DatetimeIndex, Index, Timestamp, date_range, offsets, to_datetime +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + date_range, + offsets, + to_datetime, +) import pandas._testing as tm -from pandas.core.arrays import DatetimeArray, period_array +from pandas.core.arrays import ( + DatetimeArray, + period_array, +) class TestDatetimeIndex: diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index b8def8072a3b7..9399945bf1913 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -2,7 +2,11 @@ test date_range, bdate_range construction from the convenience range functions """ -from datetime import datetime, time, timedelta +from datetime import ( + datetime, + time, + timedelta, +) import numpy as np import pytest @@ -10,12 +14,25 @@ from pytz import timezone from pandas._libs.tslibs import timezones -from pandas._libs.tslibs.offsets import BDay, CDay, DateOffset, MonthEnd, prefix_mapping +from pandas._libs.tslibs.offsets import ( + BDay, + CDay, + DateOffset, + MonthEnd, + prefix_mapping, +) from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td import pandas as pd -from pandas import DatetimeIndex, Timedelta, Timestamp, bdate_range, date_range, offsets +from pandas import ( + DatetimeIndex, + Timedelta, + Timestamp, + bdate_range, + date_range, + offsets, +) import pandas._testing as tm from pandas.core.arrays.datetimes import generate_range diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index 846bca2ecf2f6..6eef43fe496dd 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -5,7 +5,14 @@ import pytest import pandas as pd -from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Timestamp, + date_range, + offsets, +) import pandas._testing as tm @@ -233,10 +240,3 @@ def test_asarray_tz_aware(self): result = np.asarray(idx, dtype=object) tm.assert_numpy_array_equal(result, expected) - - def test_to_frame_datetime_tz(self): - # GH 25809 - idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC") - result = idx.to_frame() - expected = DataFrame(idx, index=idx) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 0360b33a4a519..94303359958b3 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -1,7 +1,10 @@ """ generic tests from the Datetimelike class """ import pytest -from pandas import DatetimeIndex, date_range +from pandas import ( + DatetimeIndex, + date_range, +) import pandas._testing as tm from pandas.tests.indexes.datetimelike import DatetimeLike diff --git a/pandas/tests/indexes/datetimes/test_delete.py b/pandas/tests/indexes/datetimes/test_delete.py index 4fbb440bc89e5..e9de5a055a5c2 100644 --- a/pandas/tests/indexes/datetimes/test_delete.py +++ b/pandas/tests/indexes/datetimes/test_delete.py @@ -1,6 +1,10 @@ import pytest -from pandas import DatetimeIndex, Series, date_range +from pandas import ( + DatetimeIndex, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_formats.py b/pandas/tests/indexes/datetimes/test_formats.py index a98a96b436107..36046aaeacaae 100644 --- a/pandas/tests/indexes/datetimes/test_formats.py +++ b/pandas/tests/indexes/datetimes/test_formats.py @@ -6,7 +6,10 @@ import pytz import pandas as pd -from pandas import DatetimeIndex, Series +from pandas import ( + DatetimeIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 3e935d0dfdd5f..819ec52e1a52f 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -1,4 +1,9 @@ -from datetime import date, datetime, time, timedelta +from datetime import ( + date, + datetime, + time, + timedelta, +) import numpy as np import pytest @@ -6,10 +11,20 @@ from pandas.errors import InvalidIndexError import pandas as pd -from pandas import DatetimeIndex, Index, Timestamp, bdate_range, date_range, notna +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + bdate_range, + date_range, + notna, +) import pandas._testing as tm -from pandas.tseries.offsets import BDay, CDay +from pandas.tseries.offsets import ( + BDay, + CDay, +) START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py index 9a9c94fa19e6d..8b633e8db8836 100644 --- a/pandas/tests/indexes/datetimes/test_join.py +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -3,10 +3,19 @@ import numpy as np import pytest -from pandas import DatetimeIndex, Index, Timestamp, date_range, to_datetime +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + date_range, + to_datetime, +) import pandas._testing as tm -from pandas.tseries.offsets import BDay, BMonthEnd +from pandas.tseries.offsets import ( + BDay, + BMonthEnd, +) class TestJoin: diff --git a/pandas/tests/indexes/datetimes/test_map.py b/pandas/tests/indexes/datetimes/test_map.py index 2644ad7616b51..45698ef225151 100644 --- a/pandas/tests/indexes/datetimes/test_map.py +++ b/pandas/tests/indexes/datetimes/test_map.py @@ -1,6 +1,12 @@ import pytest -from pandas import DatetimeIndex, Index, MultiIndex, Period, date_range +from pandas import ( + DatetimeIndex, + Index, + MultiIndex, + Period, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 333a1ac169bb7..d230aa43e43d1 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -7,7 +7,14 @@ import pytest import pandas as pd -from pandas import DatetimeIndex, Index, Timedelta, Timestamp, date_range, offsets +from pandas import ( + DatetimeIndex, + Index, + Timedelta, + Timestamp, + date_range, + offsets, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 090e21be254e3..676c0ee99ef7c 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -18,7 +18,11 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import BDay, Day, Hour +from pandas.tseries.offsets import ( + BDay, + Day, + Hour, +) START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_pickle.py b/pandas/tests/indexes/datetimes/test_pickle.py index bb08d4c66cb3c..3905daa9688ac 100644 --- a/pandas/tests/indexes/datetimes/test_pickle.py +++ b/pandas/tests/indexes/datetimes/test_pickle.py @@ -1,6 +1,10 @@ import pytest -from pandas import NaT, date_range, to_datetime +from pandas import ( + NaT, + date_range, + to_datetime, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_reindex.py b/pandas/tests/indexes/datetimes/test_reindex.py index fa847ad072ada..e4911aa3c4a29 100644 --- a/pandas/tests/indexes/datetimes/test_reindex.py +++ b/pandas/tests/indexes/datetimes/test_reindex.py @@ -2,7 +2,10 @@ import numpy as np -from pandas import DatetimeIndex, date_range +from pandas import ( + DatetimeIndex, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index d6016b9e14743..40a03396cf98e 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -6,11 +6,18 @@ import numpy as np import pytest -from pandas._libs.tslibs import OutOfBoundsDatetime, to_offset +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + to_offset, +) from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG import pandas as pd -from pandas import DatetimeIndex, Timestamp, date_range +from pandas import ( + DatetimeIndex, + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index c7632c3c5c455..513a47d6be7ab 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -17,7 +17,11 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import BMonthEnd, Minute, MonthEnd +from pandas.tseries.offsets import ( + BMonthEnd, + Minute, + MonthEnd, +) START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index e448cf0b578ae..3ab58471cdbed 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1,15 +1,27 @@ """ Tests for DatetimeIndex timezone-related methods """ -from datetime import date, datetime, time, timedelta, tzinfo +from datetime import ( + date, + datetime, + time, + timedelta, + tzinfo, +) import dateutil -from dateutil.tz import gettz, tzlocal +from dateutil.tz import ( + gettz, + tzlocal, +) import numpy as np import pytest import pytz -from pandas._libs.tslibs import conversion, timezones +from pandas._libs.tslibs import ( + conversion, + timezones, +) import pandas.util._test_decorators as td import pandas as pd diff --git a/pandas/tests/indexes/datetimes/test_unique.py b/pandas/tests/indexes/datetimes/test_unique.py index f85cc87ee88a8..a6df9cb748294 100644 --- a/pandas/tests/indexes/datetimes/test_unique.py +++ b/pandas/tests/indexes/datetimes/test_unique.py @@ -1,8 +1,15 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import pytest -from pandas import DatetimeIndex, NaT, Timestamp +from pandas import ( + DatetimeIndex, + NaT, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index c269d6ff11896..f421a4695138c 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + IntervalDtype, +) from pandas import ( CategoricalIndex, diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py index 738f0be2dbc86..8bf418a2fc731 100644 --- a/pandas/tests/indexes/interval/test_base.py +++ b/pandas/tests/indexes/interval/test_base.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import IntervalIndex, Series, date_range +from pandas import ( + IntervalIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.tests.indexes.common import Base diff --git a/pandas/tests/indexes/interval/test_equals.py b/pandas/tests/indexes/interval/test_equals.py index e53a836366432..87e2348e5fdb3 100644 --- a/pandas/tests/indexes/interval/test_equals.py +++ b/pandas/tests/indexes/interval/test_equals.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import IntervalIndex, date_range +from pandas import ( + IntervalIndex, + date_range, +) class TestEquals: diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index e3b25ca4993c6..059b0b75f4190 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import Index, IntervalIndex, Timestamp, interval_range +from pandas import ( + Index, + IntervalIndex, + Timestamp, + interval_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py index ce477485bb21e..9d0a2fa81b53b 100644 --- a/pandas/tests/indexes/multi/conftest.py +++ b/pandas/tests/indexes/multi/conftest.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) # Note: identical the the "multi" entry in the top-level "index" fixture diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index e842fafda0327..83515d7fb82b1 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -4,7 +4,12 @@ from pandas.compat import np_version_under1p17 import pandas as pd -from pandas import Index, MultiIndex, date_range, period_range +from pandas import ( + Index, + MultiIndex, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 11687b535d2b7..c4b4562fe9e47 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -1,4 +1,7 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import itertools import numpy as np @@ -9,7 +12,12 @@ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike import pandas as pd -from pandas import Index, MultiIndex, Series, date_range +from pandas import ( + Index, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index c80548783d148..2d504b8172ba8 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex +from pandas import ( + DataFrame, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py index 5dd74819444cf..9a0e4bc0996be 100644 --- a/pandas/tests/indexes/multi/test_copy.py +++ b/pandas/tests/indexes/multi/test_copy.py @@ -1,4 +1,7 @@ -from copy import copy, deepcopy +from copy import ( + copy, + deepcopy, +) import pytest diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 76d704737688d..2dbc4185256de 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -6,7 +6,10 @@ from pandas.errors import PerformanceWarning import pandas as pd -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 9497ccb46da07..bc0b6e0b028a8 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -5,7 +5,10 @@ from pandas._libs import hashtable -from pandas import DatetimeIndex, MultiIndex +from pandas import ( + DatetimeIndex, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py index c44f7622c04dd..be27091618b0a 100644 --- a/pandas/tests/indexes/multi/test_equivalence.py +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex, Series +from pandas import ( + Index, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_formats.py b/pandas/tests/indexes/multi/test_formats.py index 4e56b7c73c64f..17699aa32929e 100644 --- a/pandas/tests/indexes/multi/test_formats.py +++ b/pandas/tests/indexes/multi/test_formats.py @@ -4,7 +4,10 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_get_level_values.py b/pandas/tests/indexes/multi/test_get_level_values.py index f976515870259..25b4501a03adb 100644 --- a/pandas/tests/indexes/multi/test_get_level_values.py +++ b/pandas/tests/indexes/multi/test_get_level_values.py @@ -1,7 +1,13 @@ import numpy as np import pandas as pd -from pandas import CategoricalIndex, Index, MultiIndex, Timestamp, date_range +from pandas import ( + CategoricalIndex, + Index, + MultiIndex, + Timestamp, + date_range, +) import pandas._testing as tm @@ -94,7 +100,10 @@ def test_get_level_values_na(): def test_get_level_values_when_periods(): # GH33131. See also discussion in GH32669. # This test can probably be removed when PeriodIndex._engine is removed. - from pandas import Period, PeriodIndex + from pandas import ( + Period, + PeriodIndex, + ) idx = MultiIndex.from_arrays( [PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")] diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index d43ee3330ef08..0c561395788ad 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -4,7 +4,10 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd -from pandas import CategoricalIndex, MultiIndex +from pandas import ( + CategoricalIndex, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index b6f131e5c1c9b..fba94960ddaad 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -3,10 +3,18 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError, PerformanceWarning +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, +) import pandas as pd -from pandas import Categorical, Index, MultiIndex, date_range +from pandas import ( + Categorical, + Index, + MultiIndex, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 2fdf6d1913a0f..ff0c2a0d67885 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -6,7 +6,11 @@ from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike import pandas as pd -from pandas import IntervalIndex, MultiIndex, RangeIndex +from pandas import ( + IntervalIndex, + MultiIndex, + RangeIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 6b6b9346fe1fe..42a3c28e6797b 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py index 11bcd61383a7c..b31e50330d3cd 100644 --- a/pandas/tests/indexes/multi/test_monotonic.py +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) def test_is_monotonic_increasing(): diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py index 7dfe0b20a7478..286522f6b946d 100644 --- a/pandas/tests/indexes/multi/test_partial_indexing.py +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -1,6 +1,11 @@ import pytest -from pandas import DataFrame, IndexSlice, MultiIndex, date_range +from pandas import ( + DataFrame, + IndexSlice, + MultiIndex, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py index ceb14aa82a76c..446a52ef02581 100644 --- a/pandas/tests/indexes/multi/test_reindex.py +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 6d8a396119ef3..f8afc49b4b41c 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -5,7 +5,10 @@ import pytz import pandas as pd -from pandas import Index, MultiIndex +from pandas import ( + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index f872315374174..85866e7d97bcc 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex, Series +from pandas import ( + Index, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 5f957f54c05cd..63d3fe53f9db5 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -3,9 +3,18 @@ import numpy as np import pytest -from pandas.errors import PerformanceWarning, UnsortedIndexError +from pandas.errors import ( + PerformanceWarning, + UnsortedIndexError, +) -from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + RangeIndex, +) import pandas._testing as tm from pandas.core.indexes.frozen import FrozenList diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index 1771f4336df67..bda66856fb57a 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -5,7 +5,11 @@ from pandas.core.dtypes.common import pandas_dtype -from pandas import Float64Index, Index, Int64Index +from pandas import ( + Float64Index, + Index, + Int64Index, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/numeric/test_join.py b/pandas/tests/indexes/numeric/test_join.py index c8dffa411e5fd..43d731f8c3142 100644 --- a/pandas/tests/indexes/numeric/test_join.py +++ b/pandas/tests/indexes/numeric/test_join.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Index, Int64Index, UInt64Index +from pandas import ( + Index, + Int64Index, + UInt64Index, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py index 27e19468dddd2..5a7db9858dbad 100644 --- a/pandas/tests/indexes/numeric/test_setops.py +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -1,9 +1,18 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest -from pandas import Float64Index, Index, Int64Index, RangeIndex, UInt64Index +from pandas import ( + Float64Index, + Index, + Int64Index, + RangeIndex, + UInt64Index, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/methods/test_asfreq.py b/pandas/tests/indexes/period/methods/test_asfreq.py index 8c04ac1177676..23b88fb6ab0d3 100644 --- a/pandas/tests/indexes/period/methods/test_asfreq.py +++ b/pandas/tests/indexes/period/methods/test_asfreq.py @@ -1,6 +1,9 @@ import pytest -from pandas import PeriodIndex, period_range +from pandas import ( + PeriodIndex, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py index 943b2605363c7..73439d349bebd 100644 --- a/pandas/tests/indexes/period/methods/test_astype.py +++ b/pandas/tests/indexes/period/methods/test_astype.py @@ -37,7 +37,7 @@ def test_astype_conversion(self): ) tm.assert_index_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = idx.astype(np.int64) expected = Int64Index( [16937] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" @@ -49,7 +49,7 @@ def test_astype_conversion(self): tm.assert_index_equal(result, expected) idx = period_range("1990", "2009", freq="A", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = idx.astype("i8") tm.assert_index_equal(result, Index(idx.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, idx.asi8) @@ -57,7 +57,7 @@ def test_astype_conversion(self): def test_astype_uint(self): arr = period_range("2000", periods=2, name="idx") expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected) diff --git a/pandas/tests/indexes/period/methods/test_fillna.py b/pandas/tests/indexes/period/methods/test_fillna.py index 602e87333a6c1..12a07bac25a59 100644 --- a/pandas/tests/indexes/period/methods/test_fillna.py +++ b/pandas/tests/indexes/period/methods/test_fillna.py @@ -1,4 +1,9 @@ -from pandas import Index, NaT, Period, PeriodIndex +from pandas import ( + Index, + NaT, + Period, + PeriodIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/methods/test_insert.py b/pandas/tests/indexes/period/methods/test_insert.py new file mode 100644 index 0000000000000..32bbe09d92567 --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_insert.py @@ -0,0 +1,18 @@ +import numpy as np +import pytest + +from pandas import ( + NaT, + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestInsert: + @pytest.mark.parametrize("na", [np.nan, NaT, None]) + def test_insert(self, na): + # GH#18295 (test missing) + expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") + result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/period/methods/test_shift.py b/pandas/tests/indexes/period/methods/test_shift.py index 278bb7f07c679..730172ca56938 100644 --- a/pandas/tests/indexes/period/methods/test_shift.py +++ b/pandas/tests/indexes/period/methods/test_shift.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import PeriodIndex, period_range +from pandas import ( + PeriodIndex, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py index c5018bd0e66d2..54e61b35eb70f 100644 --- a/pandas/tests/indexes/period/test_constructors.py +++ b/pandas/tests/indexes/period/test_constructors.py @@ -512,6 +512,27 @@ def test_map_with_string_constructor(self): tm.assert_index_equal(res, expected) +class TestShallowCopy: + def test_shallow_copy_empty(self): + # GH#13067 + idx = PeriodIndex([], freq="M") + result = idx._view() + expected = idx + + tm.assert_index_equal(result, expected) + + def test_shallow_copy_disallow_i8(self): + # GH#24391 + pi = period_range("2018-01-01", periods=3, freq="2D") + with pytest.raises(AssertionError, match="ndarray"): + pi._shallow_copy(pi.asi8) + + def test_shallow_copy_requires_disallow_period_index(self): + pi = period_range("2018-01-01", periods=3, freq="2D") + with pytest.raises(AssertionError, match="PeriodIndex"): + pi._shallow_copy(pi) + + class TestSeriesPeriod: def setup_method(self, method): self.series = Series(period_range("2000-01-01", periods=10, freq="D")) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index b60ae8819023f..7d054a7af4a4d 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import PeriodIndex, Series +from pandas import ( + PeriodIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 00babd2d56aeb..fcf01f850711b 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import re import numpy as np diff --git a/pandas/tests/indexes/period/test_join.py b/pandas/tests/indexes/period/test_join.py index e895bb81ea061..2f16daa36d1fd 100644 --- a/pandas/tests/indexes/period/test_join.py +++ b/pandas/tests/indexes/period/test_join.py @@ -3,7 +3,11 @@ from pandas._libs.tslibs import IncompatibleFrequency -from pandas import Index, PeriodIndex, period_range +from pandas import ( + Index, + PeriodIndex, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_monotonic.py b/pandas/tests/indexes/period/test_monotonic.py index e06e7da1773f5..15cb8f71cdcf3 100644 --- a/pandas/tests/indexes/period/test_monotonic.py +++ b/pandas/tests/indexes/period/test_monotonic.py @@ -1,4 +1,7 @@ -from pandas import Period, PeriodIndex +from pandas import ( + Period, + PeriodIndex, +) def test_is_monotonic_increasing(): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index fd0a77bf7930b..52f8de27cb6c6 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import Index, NaT, PeriodIndex, Series +from pandas import ( + Index, + NaT, + PeriodIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index f354682bf6f70..b0e573250d02e 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range, period_range +from pandas import ( + DataFrame, + Series, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index af5ce1945a671..aabc837e25b4b 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -79,25 +79,6 @@ def test_make_time_series(self): series = Series(1, index=index) assert isinstance(series, Series) - def test_shallow_copy_empty(self): - # GH13067 - idx = PeriodIndex([], freq="M") - result = idx._view() - expected = idx - - tm.assert_index_equal(result, expected) - - def test_shallow_copy_disallow_i8(self): - # GH-24391 - pi = period_range("2018-01-01", periods=3, freq="2D") - with pytest.raises(AssertionError, match="ndarray"): - pi._shallow_copy(pi.asi8) - - def test_shallow_copy_requires_disallow_period_index(self): - pi = period_range("2018-01-01", periods=3, freq="2D") - with pytest.raises(AssertionError, match="PeriodIndex"): - pi._shallow_copy(pi) - def test_view_asi8(self): idx = PeriodIndex([], freq="M") @@ -411,7 +392,7 @@ def test_convert_array_of_periods(self): result = Index(periods) assert isinstance(result, PeriodIndex) - def test_append_concat(self): + def test_append_concat(self): # TODO: pd.concat test # #1815 d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") @@ -442,13 +423,6 @@ def test_map(self): exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) - def test_insert(self): - # GH 18295 (test missing) - expected = PeriodIndex(["2017Q1", NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") - for na in (np.nan, NaT, None): - result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( "msg, key", [ diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py index 68b48a55957ff..a5be19731b54a 100644 --- a/pandas/tests/indexes/period/test_period_range.py +++ b/pandas/tests/indexes/period/test_period_range.py @@ -1,7 +1,13 @@ import numpy as np import pytest -from pandas import NaT, Period, PeriodIndex, date_range, period_range +from pandas import ( + NaT, + Period, + PeriodIndex, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py index e9d17e7e20778..a42b8496b0bcf 100644 --- a/pandas/tests/indexes/period/test_scalar_compat.py +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -1,6 +1,10 @@ """Tests for PeriodIndex behaving like a vectorized Period scalar""" -from pandas import Timedelta, date_range, period_range +from pandas import ( + Timedelta, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_searchsorted.py b/pandas/tests/indexes/period/test_searchsorted.py index 5e1a3b899755d..af243eeccc7a4 100644 --- a/pandas/tests/indexes/period/test_searchsorted.py +++ b/pandas/tests/indexes/period/test_searchsorted.py @@ -4,7 +4,13 @@ from pandas._libs.tslibs import IncompatibleFrequency from pandas.compat import np_version_under1p18 -from pandas import NaT, Period, PeriodIndex, Series, array +from pandas import ( + NaT, + Period, + PeriodIndex, + Series, + array, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index 02c007c394ff5..fcd2c7d3422e1 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -1,7 +1,11 @@ import numpy as np import pandas as pd -from pandas import PeriodIndex, date_range, period_range +from pandas import ( + PeriodIndex, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 82c13240c6bf2..82a3721b0cbb9 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Period, PeriodIndex, period_range +from pandas import ( + Period, + PeriodIndex, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py index f83c885a7850b..599df3732a33b 100644 --- a/pandas/tests/indexes/ranges/test_constructors.py +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import Index, RangeIndex, Series +from pandas import ( + Index, + RangeIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py index 76013d2b7a387..6668a7c6a3d02 100644 --- a/pandas/tests/indexes/ranges/test_join.py +++ b/pandas/tests/indexes/ranges/test_join.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import Index, Int64Index, RangeIndex +from pandas import ( + Index, + Int64Index, + RangeIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 57df2a1e83418..fb670c508a8f1 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -4,7 +4,12 @@ from pandas.core.dtypes.common import ensure_platform_int import pandas as pd -from pandas import Float64Index, Index, Int64Index, RangeIndex +from pandas import ( + Float64Index, + Index, + Int64Index, + RangeIndex, +) import pandas._testing as tm from pandas.tests.indexes.test_numeric import Numeric diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 660269f2d02a4..ba938f82e9d89 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -1,9 +1,17 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest -from pandas import Index, Int64Index, RangeIndex, UInt64Index +from pandas import ( + Index, + Int64Index, + RangeIndex, + UInt64Index, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index c9c86f9eebde9..60fa8f1a0c083 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -33,7 +33,7 @@ def test_hash_error(index): def test_copy_dtype_deprecated(index): # GH#35853 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): index.copy(dtype=object) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5fd1a15416e23..0b43b6b1ead9c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1,15 +1,20 @@ from collections import defaultdict -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) from io import StringIO import math -import operator import re import numpy as np import pytest from pandas._libs.tslib import Timestamp -from pandas.compat import IS64, np_datetime64_compat +from pandas.compat import ( + IS64, + np_datetime64_compat, +) from pandas.util._test_decorators import async_mark import pandas as pd @@ -567,25 +572,19 @@ def test_asof_numeric_vs_bool_raises(self): with pytest.raises(TypeError, match=msg): right.asof(left) - def test_asof_datetime_partial(self): - index = date_range("2010-01-01", periods=2, freq="m") - expected = Timestamp("2010-02-28") - result = index.asof("2010-02") - assert result == expected - assert not isinstance(result, Index) - - def test_nanosecond_index_access(self): - s = Series([Timestamp("20130101")]).values.view("i8")[0] + # TODO: this tests Series.asof + def test_asof_nanosecond_index_access(self): + s = Timestamp("20130101").value r = DatetimeIndex([s + 50 + i for i in range(100)]) - x = Series(np.random.randn(100), index=r) + ser = Series(np.random.randn(100), index=r) - first_value = x.asof(x.index[0]) + first_value = ser.asof(ser.index[0]) # this does not yet work, as parsing strings is done via dateutil # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] expected_ts = np_datetime64_compat("2013-01-01 00:00:00.000000050+0000", "ns") - assert first_value == x[Timestamp(expected_ts)] + assert first_value == ser[Timestamp(expected_ts)] @pytest.mark.parametrize("index", ["string"], indirect=True) def test_booleanindex(self, index): @@ -629,110 +628,6 @@ def test_empty_fancy_raises(self, index): with pytest.raises(IndexError, match=msg): index[empty_farr] - @pytest.mark.parametrize("index", ["string"], indirect=True) - def test_intersection(self, index, sort): - first = index[:20] - second = index[:10] - intersect = first.intersection(second, sort=sort) - if sort is None: - tm.assert_index_equal(intersect, second.sort_values()) - assert tm.equalContents(intersect, second) - - # Corner cases - inter = first.intersection(first, sort=sort) - assert inter is first - - @pytest.mark.parametrize( - "index2,keeps_name", - [ - (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name - (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names - (Index([3, 4, 5, 6, 7]), False), - ], - ) - def test_intersection_name_preservation(self, index2, keeps_name, sort): - index1 = Index([1, 2, 3, 4, 5], name="index") - expected = Index([3, 4, 5]) - result = index1.intersection(index2, sort) - - if keeps_name: - expected.name = "index" - - assert result.name == expected.name - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize("index", ["string"], indirect=True) - @pytest.mark.parametrize( - "first_name,second_name,expected_name", - [("A", "A", "A"), ("A", "B", None), (None, "B", None)], - ) - def test_intersection_name_preservation2( - self, index, first_name, second_name, expected_name, sort - ): - first = index[5:20] - second = index[:10] - first.name = first_name - second.name = second_name - intersect = first.intersection(second, sort=sort) - assert intersect.name == expected_name - - def test_chained_union(self, sort): - # Chained unions handles names correctly - i1 = Index([1, 2], name="i1") - i2 = Index([5, 6], name="i2") - i3 = Index([3, 4], name="i3") - union = i1.union(i2.union(i3, sort=sort), sort=sort) - expected = i1.union(i2, sort=sort).union(i3, sort=sort) - tm.assert_index_equal(union, expected) - - j1 = Index([1, 2], name="j1") - j2 = Index([], name="j2") - j3 = Index([], name="j3") - union = j1.union(j2.union(j3, sort=sort), sort=sort) - expected = j1.union(j2, sort=sort).union(j3, sort=sort) - tm.assert_index_equal(union, expected) - - @pytest.mark.parametrize("index", ["string"], indirect=True) - def test_union(self, index, sort): - first = index[5:20] - second = index[:10] - everything = index[:20] - - union = first.union(second, sort=sort) - if sort is None: - tm.assert_index_equal(union, everything.sort_values()) - assert tm.equalContents(union, everything) - - @pytest.mark.parametrize("klass", [np.array, Series, list]) - @pytest.mark.parametrize("index", ["string"], indirect=True) - def test_union_from_iterables(self, index, klass, sort): - # GH 10149 - first = index[5:20] - second = index[:10] - everything = index[:20] - - case = klass(second.values) - result = first.union(case, sort=sort) - if sort is None: - tm.assert_index_equal(result, everything.sort_values()) - assert tm.equalContents(result, everything) - - @pytest.mark.parametrize("index", ["string"], indirect=True) - def test_union_identity(self, index, sort): - first = index[5:20] - - union = first.union(first, sort=sort) - # i.e. identity is not preserved when sort is True - assert (union is first) is (not sort) - - # This should no longer be the same object, since [] is not consistent, - # both objects will be recast to dtype('O') - union = first.union([], sort=sort) - assert (union is first) is (not sort) - - union = Index([]).union(first, sort=sort) - assert (union is first) is (not sort) - def test_union_dt_as_obj(self, sort): # TODO: Replace with fixturesult index = self.create_index() @@ -859,123 +754,6 @@ def test_append_empty_preserve_name(self, name, expected): result = left.append(right) assert result.name == expected - @pytest.mark.parametrize("index", ["string"], indirect=True) - @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) - def test_difference_name_preservation(self, index, second_name, expected, sort): - first = index[5:20] - second = index[:10] - answer = index[10:20] - - first.name = "name" - second.name = second_name - result = first.difference(second, sort=sort) - - assert tm.equalContents(result, answer) - - if expected is None: - assert result.name is None - else: - assert result.name == expected - - def test_difference_empty_arg(self, index, sort): - first = index[5:20] - first.name = "name" - result = first.difference([], sort) - - tm.assert_index_equal(result, first) - - @pytest.mark.parametrize("index", ["string"], indirect=True) - def test_difference_identity(self, index, sort): - first = index[5:20] - first.name = "name" - result = first.difference(first, sort) - - assert len(result) == 0 - assert result.name == first.name - - @pytest.mark.parametrize("index", ["string"], indirect=True) - def test_difference_sort(self, index, sort): - first = index[5:20] - second = index[:10] - - result = first.difference(second, sort) - expected = index[10:20] - - if sort is None: - expected = expected.sort_values() - - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) - def test_difference_incomparable(self, opname): - a = Index([3, Timestamp("2000"), 1]) - b = Index([2, Timestamp("1999"), 1]) - op = operator.methodcaller(opname, b) - - with tm.assert_produces_warning(RuntimeWarning): - # sort=None, the default - result = op(a) - expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")]) - if opname == "difference": - expected = expected[:2] - tm.assert_index_equal(result, expected) - - # sort=False - op = operator.methodcaller(opname, b, sort=False) - result = op(a) - tm.assert_index_equal(result, expected) - - @pytest.mark.xfail(reason="Not implemented") - @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) - def test_difference_incomparable_true(self, opname): - # TODO decide on True behaviour - # # sort=True, raises - a = Index([3, Timestamp("2000"), 1]) - b = Index([2, Timestamp("1999"), 1]) - op = operator.methodcaller(opname, b, sort=True) - - with pytest.raises(TypeError, match="Cannot compare"): - op(a) - - def test_symmetric_difference_mi(self, sort): - index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) - index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) - result = index1.symmetric_difference(index2, sort=sort) - expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) - if sort is None: - expected = expected.sort_values() - tm.assert_index_equal(result, expected) - assert tm.equalContents(result, expected) - - @pytest.mark.parametrize( - "index2,expected", - [ - (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), - (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), - ], - ) - def test_symmetric_difference_missing(self, index2, expected, sort): - # GH 13514 change: {nan} - {nan} == {} - # (GH 6444, sorting of nans, is no longer an issue) - index1 = Index([1, np.nan, 2, 3]) - - result = index1.symmetric_difference(index2, sort=sort) - if sort is None: - expected = expected.sort_values() - tm.assert_index_equal(result, expected) - - def test_symmetric_difference_non_index(self, sort): - index1 = Index([1, 2, 3, 4], name="index1") - index2 = np.array([2, 3, 4, 5]) - expected = Index([1, 5]) - result = index1.symmetric_difference(index2, sort=sort) - assert tm.equalContents(result, expected) - assert result.name == "index1" - - result = index1.symmetric_difference(index2, result_name="new_name", sort=sort) - assert tm.equalContents(result, expected) - assert result.name == "new_name" - def test_is_mixed_deprecated(self): # GH#32922 index = self.create_index() @@ -1029,7 +807,7 @@ def test_is_all_dates(self, index, expected): assert index.is_all_dates is expected def test_summary(self, index): - self._check_method_works(Index._summary, index) + index._summary() def test_summary_bug(self): # GH3869` @@ -1092,9 +870,6 @@ def test_logical_compat(self, op): index = self.create_index() assert getattr(index, op)() == getattr(index.values, op)() - def _check_method_works(self, method, index): - method(index) - @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) def test_drop_by_str_label(self, index): n = len(index) @@ -1770,11 +1545,6 @@ def test_dropna_invalid_how_raises(self): with pytest.raises(ValueError, match=msg): Index([1, 2, 3]).dropna(how="xxx") - def test_get_combined_index(self): - result = _get_combined_index([]) - expected = Index([]) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( "index", [ @@ -1807,16 +1577,6 @@ def test_str_to_bytes_raises(self): with pytest.raises(TypeError, match=msg): bytes(index) - def test_intersect_str_dates(self): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - index1 = Index(dt_dates, dtype=object) - index2 = Index(["aa"], dtype=object) - result = index2.intersection(index1) - - expected = Index([], dtype=object) - tm.assert_index_equal(result, expected) - @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") def test_index_with_tuple_bool(self): # GH34123 @@ -1862,6 +1622,11 @@ def test_ensure_index_mixed_closed_intervals(self): expected = Index(intervals, dtype=object) tm.assert_index_equal(result, expected) + def test_get_combined_index(self): + result = _get_combined_index([]) + expected = Index([]) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( "opname", diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index e5895c0b507e3..97fe35bb7f2c9 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -11,7 +11,10 @@ from pandas._libs.tslibs import iNaT from pandas.compat import IS64 -from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion +from pandas.core.dtypes.common import ( + is_period_dtype, + needs_i8_conversion, +) import pandas as pd from pandas import ( @@ -108,7 +111,10 @@ def test_set_name_methods(self, index_flat): assert index.names == [name] def test_copy_and_deepcopy(self, index_flat): - from copy import copy, deepcopy + from copy import ( + copy, + deepcopy, + ) index = index_flat diff --git a/pandas/tests/indexes/test_engines.py b/pandas/tests/indexes/test_engines.py index 9ea70a457e516..52af29d999fcc 100644 --- a/pandas/tests/indexes/test_engines.py +++ b/pandas/tests/indexes/test_engines.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas._libs import algos as libalgos, index as libindex +from pandas._libs import ( + algos as libalgos, + index as libindex, +) import pandas as pd import pandas._testing as tm diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index d6b92999305b2..74c961418176b 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -6,7 +6,14 @@ from pandas._libs.tslibs import Timestamp import pandas as pd -from pandas import Float64Index, Index, Int64Index, RangeIndex, Series, UInt64Index +from pandas import ( + Float64Index, + Index, + Int64Index, + RangeIndex, + Series, + UInt64Index, +) import pandas._testing as tm from pandas.tests.indexes.common import Base diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index f7f6456f736c0..1ee7c5547ecf9 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.compat import np_version_under1p17, np_version_under1p18 +from pandas.compat import ( + np_version_under1p17, + np_version_under1p18, +) from pandas import ( DatetimeIndex, diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 746b6d6fb6e2a..b2bab2e720146 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -2,25 +2,32 @@ The tests in this package are to ensure the proper resultant dtypes of set operations. """ +from datetime import datetime +import operator + import numpy as np import pytest from pandas.core.dtypes.common import is_dtype_equal -import pandas as pd from pandas import ( CategoricalIndex, DatetimeIndex, Float64Index, + Index, Int64Index, MultiIndex, RangeIndex, Series, TimedeltaIndex, + Timestamp, UInt64Index, ) import pandas._testing as tm -from pandas.api.types import is_datetime64tz_dtype, pandas_dtype +from pandas.api.types import ( + is_datetime64tz_dtype, + pandas_dtype, +) COMPATIBLE_INCONSISTENT_PAIRS = { (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex), @@ -110,8 +117,8 @@ def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2): def test_union_dtypes(left, right, expected, names): left = pandas_dtype(left) right = pandas_dtype(right) - a = pd.Index([], dtype=left, name=names[0]) - b = pd.Index([], dtype=right, name=names[1]) + a = Index([], dtype=left, name=names[0]) + b = Index([], dtype=right, name=names[1]) result = a.union(b) assert result.dtype == expected assert result.name == names[2] @@ -138,10 +145,10 @@ def test_dunder_inplace_setops_deprecated(index): @pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]]) def test_intersection_duplicates(values): # GH#31326 - a = pd.Index(values) - b = pd.Index([3, 3]) + a = Index(values) + b = Index([3, 3]) result = a.intersection(b) - expected = pd.Index([3]) + expected = Index([3]) tm.assert_index_equal(result, expected) @@ -492,3 +499,238 @@ def check_intersection_commutative(left, right): check_intersection_commutative(idx, idx_non_unique) assert idx.intersection(idx_non_unique).is_unique + + +class TestSetOpsUnsorted: + # These may eventually belong in a dtype-specific test_setops, or + # parametrized over a more general fixture + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + index1 = Index(dt_dates, dtype=object) + index2 = Index(["aa"], dtype=object) + result = index2.intersection(index1) + + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_intersection(self, index, sort): + first = index[:20] + second = index[:10] + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name + (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names + (Index([3, 4, 5, 6, 7]), False), + ], + ) + def test_intersection_name_preservation(self, index2, keeps_name, sort): + index1 = Index([1, 2, 3, 4, 5], name="index") + expected = Index([3, 4, 5]) + result = index1.intersection(index2, sort) + + if keeps_name: + expected.name = "index" + + assert result.name == expected.name + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize( + "first_name,second_name,expected_name", + [("A", "A", "A"), ("A", "B", None), (None, "B", None)], + ) + def test_intersection_name_preservation2( + self, index, first_name, second_name, expected_name, sort + ): + first = index[5:20] + second = index[:10] + first.name = first_name + second.name = second_name + intersect = first.intersection(second, sort=sort) + assert intersect.name == expected_name + + def test_chained_union(self, sort): + # Chained unions handles names correctly + i1 = Index([1, 2], name="i1") + i2 = Index([5, 6], name="i2") + i3 = Index([3, 4], name="i3") + union = i1.union(i2.union(i3, sort=sort), sort=sort) + expected = i1.union(i2, sort=sort).union(i3, sort=sort) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name="j1") + j2 = Index([], name="j2") + j3 = Index([], name="j3") + union = j1.union(j2.union(j3, sort=sort), sort=sort) + expected = j1.union(j2, sort=sort).union(j3, sort=sort) + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union(self, index, sort): + first = index[5:20] + second = index[:10] + everything = index[:20] + + union = first.union(second, sort=sort) + if sort is None: + tm.assert_index_equal(union, everything.sort_values()) + assert tm.equalContents(union, everything) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union_from_iterables(self, index, klass, sort): + # GH#10149 + first = index[5:20] + second = index[:10] + everything = index[:20] + + case = klass(second.values) + result = first.union(case, sort=sort) + if sort is None: + tm.assert_index_equal(result, everything.sort_values()) + assert tm.equalContents(result, everything) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_union_identity(self, index, sort): + first = index[5:20] + + union = first.union(first, sort=sort) + # i.e. identity is not preserved when sort is True + assert (union is first) is (not sort) + + # This should no longer be the same object, since [] is not consistent, + # both objects will be recast to dtype('O') + union = first.union([], sort=sort) + assert (union is first) is (not sort) + + union = Index([]).union(first, sort=sort) + assert (union is first) is (not sort) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) + def test_difference_name_preservation(self, index, second_name, expected, sort): + first = index[5:20] + second = index[:10] + answer = index[10:20] + + first.name = "name" + second.name = second_name + result = first.difference(second, sort=sort) + + assert tm.equalContents(result, answer) + + if expected is None: + assert result.name is None + else: + assert result.name == expected + + def test_difference_empty_arg(self, index, sort): + first = index[5:20] + first.name = "name" + result = first.difference([], sort) + + tm.assert_index_equal(result, first) + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_difference_identity(self, index, sort): + first = index[5:20] + first.name = "name" + result = first.difference(first, sort) + + assert len(result) == 0 + assert result.name == first.name + + @pytest.mark.parametrize("index", ["string"], indirect=True) + def test_difference_sort(self, index, sort): + first = index[5:20] + second = index[:10] + + result = first.difference(second, sort) + expected = index[10:20] + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable(self, opname): + a = Index([3, Timestamp("2000"), 1]) + b = Index([2, Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b) + + with tm.assert_produces_warning(RuntimeWarning): + # sort=None, the default + result = op(a) + expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")]) + if opname == "difference": + expected = expected[:2] + tm.assert_index_equal(result, expected) + + # sort=False + op = operator.methodcaller(opname, b, sort=False) + result = op(a) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable_true(self, opname): + # TODO: decide on True behaviour + # # sort=True, raises + a = Index([3, Timestamp("2000"), 1]) + b = Index([2, Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b, sort=True) + + with pytest.raises(TypeError, match="Cannot compare"): + op(a) + + def test_symmetric_difference_mi(self, sort): + index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) + index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) + result = index1.symmetric_difference(index2, sort=sort) + expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + @pytest.mark.parametrize( + "index2,expected", + [ + (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), + (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), + ], + ) + def test_symmetric_difference_missing(self, index2, expected, sort): + # GH#13514 change: {nan} - {nan} == {} + # (GH#6444, sorting of nans, is no longer an issue) + index1 = Index([1, np.nan, 2, 3]) + + result = index1.symmetric_difference(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + def test_symmetric_difference_non_index(self, sort): + index1 = Index([1, 2, 3, 4], name="index1") + index2 = np.array([2, 3, 4, 5]) + expected = Index([1, 5]) + result = index1.symmetric_difference(index2, sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "index1" + + result = index1.symmetric_difference(index2, result_name="new_name", sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "new_name" diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py index a849ffa98324c..c2c7a1f32ae6e 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_astype.py +++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py @@ -55,7 +55,7 @@ def test_astype(self): ) tm.assert_index_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = idx.astype(int) expected = Int64Index( [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" @@ -67,7 +67,7 @@ def test_astype(self): tm.assert_index_equal(result, expected) rng = timedelta_range("1 days", periods=10) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): result = rng.astype("i8") tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(rng.asi8, result.values) @@ -77,7 +77,7 @@ def test_astype_uint(self): expected = pd.UInt64Index( np.array([3600000000000, 90000000000000], dtype="uint64") ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning): tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected) diff --git a/pandas/tests/indexes/timedeltas/methods/test_factorize.py b/pandas/tests/indexes/timedeltas/methods/test_factorize.py index dcf8cefba70fd..24ab3888412d0 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_factorize.py +++ b/pandas/tests/indexes/timedeltas/methods/test_factorize.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import TimedeltaIndex, factorize, timedelta_range +from pandas import ( + TimedeltaIndex, + factorize, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/methods/test_fillna.py b/pandas/tests/indexes/timedeltas/methods/test_fillna.py index 47b2f2ff597f4..40aa95d0a4605 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_fillna.py +++ b/pandas/tests/indexes/timedeltas/methods/test_fillna.py @@ -1,4 +1,9 @@ -from pandas import Index, NaT, Timedelta, TimedeltaIndex +from pandas import ( + Index, + NaT, + Timedelta, + TimedeltaIndex, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_insert.py b/pandas/tests/indexes/timedeltas/methods/test_insert.py similarity index 98% rename from pandas/tests/indexes/timedeltas/test_insert.py rename to pandas/tests/indexes/timedeltas/methods/test_insert.py index d501f81fd9dce..067031c694810 100644 --- a/pandas/tests/indexes/timedeltas/test_insert.py +++ b/pandas/tests/indexes/timedeltas/methods/test_insert.py @@ -6,7 +6,12 @@ from pandas._libs import lib import pandas as pd -from pandas import Index, Timedelta, TimedeltaIndex, timedelta_range +from pandas import ( + Index, + Timedelta, + TimedeltaIndex, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index ffc10faaf8150..20b8ffc062982 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -4,9 +4,17 @@ import pytest import pandas as pd -from pandas import Timedelta, TimedeltaIndex, timedelta_range, to_timedelta +from pandas import ( + Timedelta, + TimedeltaIndex, + timedelta_range, + to_timedelta, +) import pandas._testing as tm -from pandas.core.arrays.timedeltas import TimedeltaArray, sequence_to_td64ns +from pandas.core.arrays.timedeltas import ( + TimedeltaArray, + sequence_to_td64ns, +) class TestTimedeltaIndex: diff --git a/pandas/tests/indexes/timedeltas/test_delete.py b/pandas/tests/indexes/timedeltas/test_delete.py index 63f2b450aa818..6e6f54702ce1a 100644 --- a/pandas/tests/indexes/timedeltas/test_delete.py +++ b/pandas/tests/indexes/timedeltas/test_delete.py @@ -1,4 +1,7 @@ -from pandas import TimedeltaIndex, timedelta_range +from pandas import ( + TimedeltaIndex, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 8a8e2abd17165..751f9e4cc9eee 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -1,7 +1,10 @@ import pytest import pandas as pd -from pandas import Series, TimedeltaIndex +from pandas import ( + Series, + TimedeltaIndex, +) class TestTimedeltaIndexRendering: diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 34316e1470573..7acfb50fe944b 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,11 +1,20 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import re import numpy as np import pytest import pandas as pd -from pandas import Index, Timedelta, TimedeltaIndex, notna, timedelta_range +from pandas import ( + Index, + Timedelta, + TimedeltaIndex, + notna, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py index aaf4ef29e162b..2d8795b45f276 100644 --- a/pandas/tests/indexes/timedeltas/test_join.py +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import Index, Timedelta, timedelta_range +from pandas import ( + Index, + Timedelta, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 83b8fcc1b15fe..4e6d69913900d 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -2,10 +2,18 @@ import pytest import pandas as pd -from pandas import Series, TimedeltaIndex, timedelta_range +from pandas import ( + Series, + TimedeltaIndex, + timedelta_range, +) import pandas._testing as tm -from pandas.tseries.offsets import DateOffset, Day, Hour +from pandas.tseries.offsets import ( + DateOffset, + Day, + Hour, +) class TestTimedeltaIndexOps: diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py index 6d53fe4563e41..cca211c1eb155 100644 --- a/pandas/tests/indexes/timedeltas/test_partial_slicing.py +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import Series, timedelta_range +from pandas import ( + Series, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py index 2f9e1a88a04a8..5e4b228ba2d32 100644 --- a/pandas/tests/indexes/timedeltas/test_scalar_compat.py +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -7,7 +7,13 @@ from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG -from pandas import Index, Series, Timedelta, TimedeltaIndex, timedelta_range +from pandas import ( + Index, + Series, + Timedelta, + TimedeltaIndex, + timedelta_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_searchsorted.py b/pandas/tests/indexes/timedeltas/test_searchsorted.py index e3b52058469f0..8a48da91ef31d 100644 --- a/pandas/tests/indexes/timedeltas/test_searchsorted.py +++ b/pandas/tests/indexes/timedeltas/test_searchsorted.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import Series, TimedeltaIndex, Timestamp, array +from pandas import ( + Series, + TimedeltaIndex, + Timestamp, + array, +) import pandas._testing as tm diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index 2e4e4bfde9202..907a3463971ca 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import Int64Index, TimedeltaIndex, timedelta_range +from pandas import ( + Int64Index, + TimedeltaIndex, + timedelta_range, +) import pandas._testing as tm from pandas.tseries.offsets import Hour diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index dc3df4427f351..7277595f1d631 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -1,10 +1,17 @@ import numpy as np import pytest -from pandas import Timedelta, timedelta_range, to_timedelta +from pandas import ( + Timedelta, + timedelta_range, + to_timedelta, +) import pandas._testing as tm -from pandas.tseries.offsets import Day, Second +from pandas.tseries.offsets import ( + Day, + Second, +) class TestTimedeltas: diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index fb6f4da2a482e..f70897147c867 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -3,7 +3,14 @@ import numpy as np -from pandas import DataFrame, Float64Index, MultiIndex, Series, UInt64Index, date_range +from pandas import ( + DataFrame, + Float64Index, + MultiIndex, + Series, + UInt64Index, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index f4e7296598d54..eaf597e6bf978 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -2,92 +2,92 @@ import pytest import pandas as pd -from pandas import DataFrame, IntervalIndex, Series +from pandas import ( + DataFrame, + IntervalIndex, + Series, +) import pandas._testing as tm class TestIntervalIndex: - def setup_method(self, method): - self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + @pytest.fixture + def series_with_interval_index(self): + return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) - def test_getitem_with_scalar(self): + def test_getitem_with_scalar(self, series_with_interval_index, indexer_sl): - s = self.s + ser = series_with_interval_index.copy() - expected = s.iloc[:3] - tm.assert_series_equal(expected, s[:3]) - tm.assert_series_equal(expected, s[:2.5]) - tm.assert_series_equal(expected, s[0.1:2.5]) + expected = ser.iloc[:3] + tm.assert_series_equal(expected, indexer_sl(ser)[:3]) + tm.assert_series_equal(expected, indexer_sl(ser)[:2.5]) + tm.assert_series_equal(expected, indexer_sl(ser)[0.1:2.5]) + if indexer_sl is tm.loc: + tm.assert_series_equal(expected, ser.loc[-1:3]) - expected = s.iloc[1:4] - tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]]) - tm.assert_series_equal(expected, s[[2, 3, 4]]) - tm.assert_series_equal(expected, s[[1.5, 3, 4]]) + expected = ser.iloc[1:4] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) - expected = s.iloc[2:5] - tm.assert_series_equal(expected, s[s >= 2]) + expected = ser.iloc[2:5] + tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) @pytest.mark.parametrize("direction", ["increasing", "decreasing"]) - def test_nonoverlapping_monotonic(self, direction, closed): + def test_getitem_nonoverlapping_monotonic(self, direction, closed, indexer_sl): tpls = [(0, 1), (2, 3), (4, 5)] if direction == "decreasing": tpls = tpls[::-1] idx = IntervalIndex.from_tuples(tpls, closed=closed) - s = Series(list("abc"), idx) + ser = Series(list("abc"), idx) - for key, expected in zip(idx.left, s): + for key, expected in zip(idx.left, ser): if idx.closed_left: - assert s[key] == expected - assert s.loc[key] == expected + assert indexer_sl(ser)[key] == expected else: with pytest.raises(KeyError, match=str(key)): - s[key] - with pytest.raises(KeyError, match=str(key)): - s.loc[key] + indexer_sl(ser)[key] - for key, expected in zip(idx.right, s): + for key, expected in zip(idx.right, ser): if idx.closed_right: - assert s[key] == expected - assert s.loc[key] == expected + assert indexer_sl(ser)[key] == expected else: with pytest.raises(KeyError, match=str(key)): - s[key] - with pytest.raises(KeyError, match=str(key)): - s.loc[key] + indexer_sl(ser)[key] - for key, expected in zip(idx.mid, s): - assert s[key] == expected - assert s.loc[key] == expected + for key, expected in zip(idx.mid, ser): + assert indexer_sl(ser)[key] == expected - def test_non_matching(self): - s = self.s + def test_getitem_non_matching(self, series_with_interval_index, indexer_sl): + ser = series_with_interval_index.copy() # this is a departure from our current # indexing scheme, but simpler with pytest.raises(KeyError, match=r"^\[-1\]$"): - s.loc[[-1, 3, 4, 5]] + indexer_sl(ser)[[-1, 3, 4, 5]] with pytest.raises(KeyError, match=r"^\[-1\]$"): - s.loc[[-1, 3]] + indexer_sl(ser)[[-1, 3]] @pytest.mark.arm_slow - def test_large_series(self): - s = Series( + def test_loc_getitem_large_series(self): + ser = Series( np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001)) ) - result1 = s.loc[:80000] - result2 = s.loc[0:80000] - result3 = s.loc[0:80000:1] + result1 = ser.loc[:80000] + result2 = ser.loc[0:80000] + result3 = ser.loc[0:80000:1] tm.assert_series_equal(result1, result2) tm.assert_series_equal(result1, result3) def test_loc_getitem_frame(self): # CategoricalIndex with IntervalIndex categories df = DataFrame({"A": range(10)}) - s = pd.cut(df.A, 5) - df["B"] = s + ser = pd.cut(df.A, 5) + df["B"] = ser df = df.set_index("B") result = df.loc[4] diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py index a9512bc97d9de..34dc5d604e90d 100644 --- a/pandas/tests/indexing/interval/test_interval_new.py +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -3,94 +3,74 @@ import numpy as np import pytest -from pandas import Interval, IntervalIndex, Series +from pandas import ( + Interval, + IntervalIndex, + Series, +) import pandas._testing as tm class TestIntervalIndex: - def setup_method(self, method): - self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + @pytest.fixture + def series_with_interval_index(self): + return Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) - def test_loc_with_interval(self): + def test_loc_with_interval(self, series_with_interval_index, indexer_sl): # loc with single label / list of labels: # - Intervals: only exact matches # - scalars: those that contain it - s = self.s + ser = series_with_interval_index.copy() expected = 0 - result = s.loc[Interval(0, 1)] - assert result == expected - result = s[Interval(0, 1)] + result = indexer_sl(ser)[Interval(0, 1)] assert result == expected - expected = s.iloc[3:5] - result = s.loc[[Interval(3, 4), Interval(4, 5)]] - tm.assert_series_equal(expected, result) - result = s[[Interval(3, 4), Interval(4, 5)]] + expected = ser.iloc[3:5] + result = indexer_sl(ser)[[Interval(3, 4), Interval(4, 5)]] tm.assert_series_equal(expected, result) # missing or not exact with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): - s.loc[Interval(3, 5, closed="left")] - - with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): - s[Interval(3, 5, closed="left")] - - with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): - s[Interval(3, 5)] + indexer_sl(ser)[Interval(3, 5, closed="left")] with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): - s.loc[Interval(3, 5)] - - with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): - s[Interval(3, 5)] - - with pytest.raises( - KeyError, match=re.escape("Interval(-2, 0, closed='right')") - ): - s.loc[Interval(-2, 0)] + indexer_sl(ser)[Interval(3, 5)] with pytest.raises( KeyError, match=re.escape("Interval(-2, 0, closed='right')") ): - s[Interval(-2, 0)] - - with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): - s.loc[Interval(5, 6)] + indexer_sl(ser)[Interval(-2, 0)] with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): - s[Interval(5, 6)] + indexer_sl(ser)[Interval(5, 6)] - def test_loc_with_scalar(self): + def test_loc_with_scalar(self, series_with_interval_index, indexer_sl): # loc with single label / list of labels: # - Intervals: only exact matches # - scalars: those that contain it - s = self.s + ser = series_with_interval_index.copy() - assert s.loc[1] == 0 - assert s.loc[1.5] == 1 - assert s.loc[2] == 1 + assert indexer_sl(ser)[1] == 0 + assert indexer_sl(ser)[1.5] == 1 + assert indexer_sl(ser)[2] == 1 - assert s[1] == 0 - assert s[1.5] == 1 - assert s[2] == 1 + expected = ser.iloc[1:4] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[2, 3, 4]]) + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 3, 4]]) - expected = s.iloc[1:4] - tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]]) - tm.assert_series_equal(expected, s.loc[[2, 3, 4]]) - tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]]) + expected = ser.iloc[[1, 1, 2, 1]] + tm.assert_series_equal(expected, indexer_sl(ser)[[1.5, 2, 2.5, 1.5]]) - expected = s.iloc[[1, 1, 2, 1]] - tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]]) + expected = ser.iloc[2:5] + tm.assert_series_equal(expected, indexer_sl(ser)[ser >= 2]) - expected = s.iloc[2:5] - tm.assert_series_equal(expected, s.loc[s >= 2]) - - def test_loc_with_slices(self): + def test_loc_with_slices(self, series_with_interval_index, indexer_sl): # loc with slices: # - Interval objects: only works with exact matches @@ -99,178 +79,130 @@ def test_loc_with_slices(self): # contains them: # (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop)) - s = self.s + ser = series_with_interval_index.copy() # slice of interval - expected = s.iloc[:3] - result = s.loc[Interval(0, 1) : Interval(2, 3)] - tm.assert_series_equal(expected, result) - result = s[Interval(0, 1) : Interval(2, 3)] + expected = ser.iloc[:3] + result = indexer_sl(ser)[Interval(0, 1) : Interval(2, 3)] tm.assert_series_equal(expected, result) - expected = s.iloc[3:] - result = s.loc[Interval(3, 4) :] - tm.assert_series_equal(expected, result) - result = s[Interval(3, 4) :] + expected = ser.iloc[3:] + result = indexer_sl(ser)[Interval(3, 4) :] tm.assert_series_equal(expected, result) msg = "Interval objects are not currently supported" with pytest.raises(NotImplementedError, match=msg): - s.loc[Interval(3, 6) :] + indexer_sl(ser)[Interval(3, 6) :] with pytest.raises(NotImplementedError, match=msg): - s[Interval(3, 6) :] - - with pytest.raises(NotImplementedError, match=msg): - s.loc[Interval(3, 4, closed="left") :] - - with pytest.raises(NotImplementedError, match=msg): - s[Interval(3, 4, closed="left") :] - - # slice of scalar + indexer_sl(ser)[Interval(3, 4, closed="left") :] - expected = s.iloc[:3] - tm.assert_series_equal(expected, s.loc[:3]) - tm.assert_series_equal(expected, s.loc[:2.5]) - tm.assert_series_equal(expected, s.loc[0.1:2.5]) - tm.assert_series_equal(expected, s.loc[-1:3]) - - tm.assert_series_equal(expected, s[:3]) - tm.assert_series_equal(expected, s[:2.5]) - tm.assert_series_equal(expected, s[0.1:2.5]) - - def test_slice_step_ne1(self): + def test_slice_step_ne1(self, series_with_interval_index): # GH#31658 slice of scalar with step != 1 - s = self.s - expected = s.iloc[0:4:2] + ser = series_with_interval_index.copy() + expected = ser.iloc[0:4:2] - result = s[0:4:2] + result = ser[0:4:2] tm.assert_series_equal(result, expected) - result2 = s[0:4][::2] + result2 = ser[0:4][::2] tm.assert_series_equal(result2, expected) - def test_slice_float_start_stop(self): + def test_slice_float_start_stop(self, series_with_interval_index): # GH#31658 slicing with integers is positional, with floats is not # supported - ser = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + ser = series_with_interval_index.copy() msg = "label-based slicing with step!=1 is not supported for IntervalIndex" with pytest.raises(ValueError, match=msg): ser[1.5:9.5:2] - def test_slice_interval_step(self): + def test_slice_interval_step(self, series_with_interval_index): # GH#31658 allows for integer step!=1, not Interval step - s = self.s + ser = series_with_interval_index.copy() msg = "label-based slicing with step!=1 is not supported for IntervalIndex" with pytest.raises(ValueError, match=msg): - s[0 : 4 : Interval(0, 1)] + ser[0 : 4 : Interval(0, 1)] - def test_loc_with_overlap(self): + def test_loc_with_overlap(self, indexer_sl): idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) - s = Series(range(len(idx)), index=idx) + ser = Series(range(len(idx)), index=idx) # scalar - expected = s - result = s.loc[4] - tm.assert_series_equal(expected, result) - - result = s[4] - tm.assert_series_equal(expected, result) - - result = s.loc[[4]] + expected = ser + result = indexer_sl(ser)[4] tm.assert_series_equal(expected, result) - result = s[[4]] + result = indexer_sl(ser)[[4]] tm.assert_series_equal(expected, result) # interval expected = 0 - result = s.loc[Interval(1, 5)] + result = indexer_sl(ser)[Interval(1, 5)] result == expected - result = s[Interval(1, 5)] - result == expected - - expected = s - result = s.loc[[Interval(1, 5), Interval(3, 7)]] - tm.assert_series_equal(expected, result) - - result = s[[Interval(1, 5), Interval(3, 7)]] + expected = ser + result = indexer_sl(ser)[[Interval(1, 5), Interval(3, 7)]] tm.assert_series_equal(expected, result) with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): - s.loc[Interval(3, 5)] + indexer_sl(ser)[Interval(3, 5)] with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): - s.loc[[Interval(3, 5)]] - - with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): - s[Interval(3, 5)] - - with pytest.raises(KeyError, match=r"^\[Interval\(3, 5, closed='right'\)\]$"): - s[[Interval(3, 5)]] + indexer_sl(ser)[[Interval(3, 5)]] # slices with interval (only exact matches) - expected = s - result = s.loc[Interval(1, 5) : Interval(3, 7)] - tm.assert_series_equal(expected, result) - - result = s[Interval(1, 5) : Interval(3, 7)] + expected = ser + result = indexer_sl(ser)[Interval(1, 5) : Interval(3, 7)] tm.assert_series_equal(expected, result) msg = "'can only get slices from an IntervalIndex if bounds are" " non-overlapping and all monotonic increasing or decreasing'" with pytest.raises(KeyError, match=msg): - s.loc[Interval(1, 6) : Interval(3, 8)] + indexer_sl(ser)[Interval(1, 6) : Interval(3, 8)] - with pytest.raises(KeyError, match=msg): - s[Interval(1, 6) : Interval(3, 8)] - - # slices with scalar raise for overlapping intervals - # TODO KeyError is the appropriate error? - with pytest.raises(KeyError, match=msg): - s.loc[1:4] + if indexer_sl is tm.loc: + # slices with scalar raise for overlapping intervals + # TODO KeyError is the appropriate error? + with pytest.raises(KeyError, match=msg): + ser.loc[1:4] - def test_non_unique(self): + def test_non_unique(self, indexer_sl): idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) - s = Series(range(len(idx)), index=idx) + ser = Series(range(len(idx)), index=idx) - result = s.loc[Interval(1, 3)] + result = indexer_sl(ser)[Interval(1, 3)] assert result == 0 - result = s.loc[[Interval(1, 3)]] - expected = s.iloc[0:1] + result = indexer_sl(ser)[[Interval(1, 3)]] + expected = ser.iloc[0:1] tm.assert_series_equal(expected, result) - def test_non_unique_moar(self): + def test_non_unique_moar(self, indexer_sl): idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) - s = Series(range(len(idx)), index=idx) - - expected = s.iloc[[0, 1]] - result = s.loc[Interval(1, 3)] - tm.assert_series_equal(expected, result) + ser = Series(range(len(idx)), index=idx) - expected = s - result = s.loc[Interval(1, 3) :] + expected = ser.iloc[[0, 1]] + result = indexer_sl(ser)[Interval(1, 3)] tm.assert_series_equal(expected, result) - expected = s - result = s[Interval(1, 3) :] + expected = ser + result = indexer_sl(ser)[Interval(1, 3) :] tm.assert_series_equal(expected, result) - expected = s.iloc[[0, 1]] - result = s[[Interval(1, 3)]] + expected = ser.iloc[[0, 1]] + result = indexer_sl(ser)[[Interval(1, 3)]] tm.assert_series_equal(expected, result) - def test_missing_key_error_message(self, frame_or_series): + def test_loc_getitem_missing_key_error_message( + self, frame_or_series, series_with_interval_index + ): # GH#27365 - obj = frame_or_series( - np.arange(5), index=IntervalIndex.from_breaks(np.arange(6)) - ) + ser = series_with_interval_index.copy() + obj = frame_or_series(ser) with pytest.raises(KeyError, match=r"\[6\]"): obj.loc[[4, 5, 6]] diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 62c0171fe641f..f71b39d53d825 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm import pandas.core.common as com diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index d0ef95d2fa56c..f07bf3464b74c 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -57,26 +62,22 @@ def test_series_getitem_duplicates_multiindex(level0_value): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("indexer", [lambda s: s[2000, 3], lambda s: s.loc[2000, 3]]) -def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer): +def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl): s = multiindex_year_month_day_dataframe_random_data["A"] expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) - result = indexer(s) + result = indexer_sl(s)[2000, 3] tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - "indexer", [lambda s: s[2000, 3, 10], lambda s: s.loc[2000, 3, 10]] -) def test_series_getitem_returns_scalar( - multiindex_year_month_day_dataframe_random_data, indexer + multiindex_year_month_day_dataframe_random_data, indexer_sl ): s = multiindex_year_month_day_dataframe_random_data["A"] expected = s.iloc[49] - result = indexer(s) + result = indexer_sl(s)[2000, 3, 10] assert result == expected diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index 9859c7235c380..db91d5ad88252 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm @@ -13,14 +17,10 @@ def simple_multiindex_dataframe(): random data by default. """ - def _simple_multiindex_dataframe(data=None): - if data is None: - data = np.random.randn(3, 3) - return DataFrame( - data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] - ) - - return _simple_multiindex_dataframe + data = np.random.randn(3, 3) + return DataFrame( + data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] + ) @pytest.mark.parametrize( @@ -41,23 +41,23 @@ def _simple_multiindex_dataframe(data=None): ], ) def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe): - arr = np.random.randn(3, 3) - df = simple_multiindex_dataframe(arr) + df = simple_multiindex_dataframe + arr = df.values result = indexer(df) expected = expected(arr) tm.assert_series_equal(result, expected) def test_iloc_returns_dataframe(simple_multiindex_dataframe): - df = simple_multiindex_dataframe() + df = simple_multiindex_dataframe result = df.iloc[[0, 1]] expected = df.xs(4, drop_level=False) tm.assert_frame_equal(result, expected) def test_iloc_returns_scalar(simple_multiindex_dataframe): - arr = np.random.randn(3, 3) - df = simple_multiindex_dataframe(arr) + df = simple_multiindex_dataframe + arr = df.values result = df.iloc[2, 2] expected = arr[2, 2] assert result == expected diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py index eaa7029b118b1..a38b5f6cc449a 100644 --- a/pandas/tests/indexing/multiindex/test_indexing_slow.py +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -4,7 +4,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm m = 50 @@ -37,49 +40,49 @@ b = df.drop_duplicates(subset=cols[:-1]) -@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") -@pytest.mark.parametrize("lexsort_depth", list(range(5))) -@pytest.mark.parametrize("key", keys) -@pytest.mark.parametrize("frame", [a, b]) -def test_multiindex_get_loc(lexsort_depth, key, frame): - # GH7724, GH2646 +def validate(mi, df, key): + # check indexing into a multi-index before & past the lexsort depth - with warnings.catch_warnings(record=True): + mask = np.ones(len(df)).astype("bool") - # test indexing into a multi-index before & past the lexsort depth + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k - def validate(mi, df, key): - mask = np.ones(len(df)).astype("bool") + if not mask.any(): + assert key[: i + 1] not in mi.index + continue - # test for all partials of this key - for i, k in enumerate(key): - mask &= df.iloc[:, i] == k + assert key[: i + 1] in mi.index + right = df[mask].copy() - if not mask.any(): - assert key[: i + 1] not in mi.index - continue + if i + 1 != len(key): # partial key + return_value = right.drop(cols[: i + 1], axis=1, inplace=True) + assert return_value is None + return_value = right.set_index(cols[i + 1 : -1], inplace=True) + assert return_value is None + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) - assert key[: i + 1] in mi.index - right = df[mask].copy() + else: # full key + return_value = right.set_index(cols[:-1], inplace=True) + assert return_value is None + if len(right) == 1: # single hit + right = Series( + right["jolia"].values, name=right.index[0], index=["jolia"] + ) + tm.assert_series_equal(mi.loc[key[: i + 1]], right) + else: # multi hit + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) - if i + 1 != len(key): # partial key - return_value = right.drop(cols[: i + 1], axis=1, inplace=True) - assert return_value is None - return_value = right.set_index(cols[i + 1 : -1], inplace=True) - assert return_value is None - tm.assert_frame_equal(mi.loc[key[: i + 1]], right) - else: # full key - return_value = right.set_index(cols[:-1], inplace=True) - assert return_value is None - if len(right) == 1: # single hit - right = Series( - right["jolia"].values, name=right.index[0], index=["jolia"] - ) - tm.assert_series_equal(mi.loc[key[: i + 1]], right) - else: # multi hit - tm.assert_frame_equal(mi.loc[key[: i + 1]], right) +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +@pytest.mark.parametrize("lexsort_depth", list(range(5))) +@pytest.mark.parametrize("key", keys) +@pytest.mark.parametrize("frame", [a, b]) +def test_multiindex_get_loc(lexsort_depth, key, frame): + # GH7724, GH2646 + with warnings.catch_warnings(record=True): if lexsort_depth == 0: df = frame.copy() else: diff --git a/pandas/tests/indexing/multiindex/test_insert.py b/pandas/tests/indexing/multiindex/test_insert.py deleted file mode 100644 index 9f5ad90d36e03..0000000000000 --- a/pandas/tests/indexing/multiindex/test_insert.py +++ /dev/null @@ -1,31 +0,0 @@ -import numpy as np - -from pandas import DataFrame, MultiIndex, Series -import pandas._testing as tm - - -class TestMultiIndexInsertion: - def test_setitem_mixed_depth(self): - arrays = [ - ["a", "top", "top", "routine1", "routine1", "routine2"], - ["", "OD", "OD", "result1", "result2", "result1"], - ["", "wx", "wy", "", "", ""], - ] - - tuples = sorted(zip(*arrays)) - index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) - - result = df.copy() - expected = df.copy() - result["b"] = [1, 2, 3, 4] - expected["b", "", ""] = [1, 2, 3, 4] - tm.assert_frame_equal(result, expected) - - def test_dataframe_insert_column_all_na(self): - # GH #1534 - mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) - df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) - s = Series({(1, 1): 1, (1, 2): 2}) - df["new"] = s - assert df["new"].isna().all() diff --git a/pandas/tests/indexing/multiindex/test_ix.py b/pandas/tests/indexing/multiindex/test_ix.py deleted file mode 100644 index abf989324e4a5..0000000000000 --- a/pandas/tests/indexing/multiindex/test_ix.py +++ /dev/null @@ -1,64 +0,0 @@ -import numpy as np -import pytest - -from pandas.errors import PerformanceWarning - -from pandas import DataFrame, MultiIndex -import pandas._testing as tm - - -class TestMultiIndex: - def test_frame_setitem_loc(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - frame.loc[("bar", "two"), "B"] = 5 - assert frame.loc[("bar", "two"), "B"] == 5 - - # with integer labels - df = frame.copy() - df.columns = list(range(3)) - df.loc[("bar", "two"), 1] = 7 - assert df.loc[("bar", "two"), 1] == 7 - - def test_loc_general(self): - - # GH 2817 - data = { - "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, - } - df = DataFrame(data).set_index(keys=["col", "year"]) - key = 4.0, 2012 - - # emits a PerformanceWarning, ok - with tm.assert_produces_warning(PerformanceWarning): - tm.assert_frame_equal(df.loc[key], df.iloc[2:]) - - # this is ok - return_value = df.sort_index(inplace=True) - assert return_value is None - res = df.loc[key] - - # col has float dtype, result should be Float64Index - index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) - expected = DataFrame({"amount": [222, 333, 444]}, index=index) - tm.assert_frame_equal(res, expected) - - def test_loc_multiindex_missing_label_raises(self): - # GH 21593 - df = DataFrame( - np.random.randn(3, 3), - columns=[[2, 2, 4], [6, 8, 10]], - index=[[4, 4, 8], [8, 10, 12]], - ) - - with pytest.raises(KeyError, match=r"^2$"): - df.loc[2] - - def test_series_loc_getitem_fancy( - self, multiindex_year_month_day_dataframe_random_data - ): - s = multiindex_year_month_day_dataframe_random_data["A"] - expected = s.reindex(s.index[49:51]) - result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 4c9912f1591c7..07503b5b34176 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -1,8 +1,15 @@ import numpy as np import pytest +from pandas.errors import PerformanceWarning + import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -24,6 +31,61 @@ def frame_random_data_integer_multi_index(): class TestMultiIndexLoc: + def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + frame.loc[("bar", "two"), "B"] = 5 + assert frame.loc[("bar", "two"), "B"] == 5 + + # with integer labels + df = frame.copy() + df.columns = list(range(3)) + df.loc[("bar", "two"), 1] = 7 + assert df.loc[("bar", "two"), 1] == 7 + + def test_loc_getitem_general(self): + + # GH#2817 + data = { + "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, + } + df = DataFrame(data).set_index(keys=["col", "year"]) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + return_value = df.sort_index(inplace=True) + assert return_value is None + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) + expected = DataFrame({"amount": [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_loc_getitem_multiindex_missing_label_raises(self): + # GH#21593 + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + def test_loc_getitem_list_of_tuples_with_multiindex( + self, multiindex_year_month_day_dataframe_random_data + ): + ser = multiindex_year_month_day_dataframe_random_data["A"] + expected = ser.reindex(ser.index[49:51]) + result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) + def test_loc_getitem_series(self): # GH14730 # passing a series as a key with a MultiIndex diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 9a3039c28416c..e00b77003ebdc 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -4,7 +4,12 @@ from pandas.errors import PerformanceWarning import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index e5d114d5a9b18..9e85f9f65a3bc 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -2,119 +2,122 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna +from pandas import ( + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, +) import pandas._testing as tm import pandas.core.common as com +def assert_equal(a, b): + assert a == b + + class TestMultiIndexSetItem: + def check(self, target, indexers, value, compare_fn=assert_equal, expected=None): + target.loc[indexers] = value + result = target.loc[indexers] + if expected is None: + expected = value + compare_fn(result, expected) + def test_setitem_multiindex(self): - for index_fn in ("loc",): - - def assert_equal(a, b): - assert a == b - - def check(target, indexers, value, compare_fn, expected=None): - fn = getattr(target, index_fn) - fn.__setitem__(indexers, value) - result = fn.__getitem__(indexers) - if expected is None: - expected = value - compare_fn(result, expected) - - # GH7190 - index = MultiIndex.from_product( - [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] - ) - t, n = 0, 2 - df = DataFrame( - np.nan, - columns=["A", "w", "l", "a", "x", "X", "d", "profit"], - index=index, - ) - check(target=df, indexers=((t, n), "X"), value=0, compare_fn=assert_equal) - - df = DataFrame( - -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index - ) - check(target=df, indexers=((t, n), "X"), value=1, compare_fn=assert_equal) - - df = DataFrame( - columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index - ) - check(target=df, indexers=((t, n), "X"), value=2, compare_fn=assert_equal) - - # gh-7218: assigning with 0-dim arrays - df = DataFrame( - -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index - ) - check( - target=df, - indexers=((t, n), "X"), - value=np.array(3), - compare_fn=assert_equal, - expected=3, - ) - - # GH5206 - df = DataFrame( - np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float - ) - df["F"] = 99 - row_selection = df["A"] % 2 == 0 - col_selection = ["B", "C"] - df.loc[row_selection, col_selection] = df["F"] - output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) - tm.assert_frame_equal(df.loc[row_selection, col_selection], output) - check( - target=df, - indexers=(row_selection, col_selection), - value=df["F"], - compare_fn=tm.assert_frame_equal, - expected=output, - ) - - # GH11372 - idx = MultiIndex.from_product( - [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] - ) - cols = MultiIndex.from_product( - [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] - ) - - df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) - - subidx = MultiIndex.from_tuples( - [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] - ) - subcols = MultiIndex.from_tuples( - [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] - ) - - vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) - check( - target=df, - indexers=(subidx, subcols), - value=vals, - compare_fn=tm.assert_frame_equal, - ) - # set all columns - vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) - check( - target=df, - indexers=(subidx, slice(None, None, None)), - value=vals, - compare_fn=tm.assert_frame_equal, - ) - # identity - copy = df.copy() - check( - target=df, - indexers=(df.index, df.columns), - value=df, - compare_fn=tm.assert_frame_equal, - expected=copy, - ) + # GH#7190 + cols = ["A", "w", "l", "a", "x", "X", "d", "profit"] + index = MultiIndex.from_product( + [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] + ) + t, n = 0, 2 + + df = DataFrame( + np.nan, + columns=cols, + index=index, + ) + self.check(target=df, indexers=((t, n), "X"), value=0) + + df = DataFrame(-999, columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=1) + + df = DataFrame(columns=cols, index=index) + self.check(target=df, indexers=((t, n), "X"), value=2) + + # gh-7218: assigning with 0-dim arrays + df = DataFrame(-999, columns=cols, index=index) + self.check( + target=df, + indexers=((t, n), "X"), + value=np.array(3), + expected=3, + ) + + def test_setitem_multiindex2(self): + # GH#5206 + df = DataFrame( + np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float + ) + df["F"] = 99 + row_selection = df["A"] % 2 == 0 + col_selection = ["B", "C"] + df.loc[row_selection, col_selection] = df["F"] + output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) + tm.assert_frame_equal(df.loc[row_selection, col_selection], output) + self.check( + target=df, + indexers=(row_selection, col_selection), + value=df["F"], + compare_fn=tm.assert_frame_equal, + expected=output, + ) + + def test_setitem_multiindex3(self): + # GH#11372 + idx = MultiIndex.from_product( + [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] + ) + cols = MultiIndex.from_product( + [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] + ) + + df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + + subidx = MultiIndex.from_tuples( + [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] + ) + subcols = MultiIndex.from_tuples( + [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] + ) + + vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + self.check( + target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # set all columns + vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + self.check( + target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # identity + copy = df.copy() + self.check( + target=df, + indexers=(df.index, df.columns), + value=df, + compare_fn=tm.assert_frame_equal, + expected=copy, + ) def test_multiindex_setitem(self): @@ -140,6 +143,8 @@ def test_multiindex_setitem(self): with pytest.raises(TypeError, match=msg): df.loc["bar"] *= 2 + def test_multiindex_setitem2(self): + # from SO # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation df_orig = DataFrame.from_dict( @@ -231,17 +236,6 @@ def test_groupby_example(self): grp = df.groupby(level=index_cols[:4]) df["new_col"] = np.nan - f_index = np.arange(5) - - def f(name, df2): - return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex( - f_index - ) - - # FIXME: dont leave commented-out - # TODO(wesm): unused? - # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T - # we are actually operating on a copy here # but in this case, that's ok for name, df2 in grp: @@ -326,8 +320,10 @@ def test_frame_setitem_multi_column(self): cp["a"] = cp["b"].values tm.assert_frame_equal(cp["a"], cp["b"]) + def test_frame_setitem_multi_column2(self): + # --------------------------------------- - # #1803 + # GH#1803 columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]) df = DataFrame(index=[1, 3, 5], columns=columns) @@ -348,6 +344,7 @@ def test_frame_setitem_multi_column(self): assert sliced_a2.name == ("A", "2") assert sliced_b1.name == ("B", "1") + # TODO: no setitem here? def test_getitem_setitem_tuple_plus_columns( self, multiindex_year_month_day_dataframe_random_data ): @@ -359,29 +356,23 @@ def test_getitem_setitem_tuple_plus_columns( expected = df.loc[2000, 1, 6][["A", "B", "C"]] tm.assert_series_equal(result, expected) - def test_getitem_setitem_slice_integers(self): + def test_loc_getitem_setitem_slice_integers(self, frame_or_series): index = MultiIndex( levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]] ) - frame = DataFrame( + obj = DataFrame( np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"] ) - res = frame.loc[1:2] - exp = frame.reindex(frame.index[2:]) - tm.assert_frame_equal(res, exp) + if frame_or_series is not DataFrame: + obj = obj["a"] - frame.loc[1:2] = 7 - assert (frame.loc[1:2] == 7).values.all() + res = obj.loc[1:2] + exp = obj.reindex(obj.index[2:]) + tm.assert_equal(res, exp) - series = Series(np.random.randn(len(index)), index=index) - - res = series.loc[1:2] - exp = series.reindex(series.index[2:]) - tm.assert_series_equal(res, exp) - - series.loc[1:2] = 7 - assert (series.loc[1:2] == 7).values.all() + obj.loc[1:2] = 7 + assert (obj.loc[1:2] == 7).values.all() def test_setitem_change_dtype(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -411,9 +402,9 @@ def test_nonunique_assignment_1750(self): ) df = df.set_index(["A", "B"]) - ix = MultiIndex.from_tuples([(1, 1)]) + mi = MultiIndex.from_tuples([(1, 1)]) - df.loc[ix, "C"] = "_" + df.loc[mi, "C"] = "_" assert (df.xs((1, 1))["C"] == "_").all() @@ -439,6 +430,33 @@ def test_setitem_nonmonotonic(self): tm.assert_frame_equal(df, expected) +class TestSetitemWithExpansionMultiIndex: + def test_setitem_new_column_mixed_depth(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df.copy() + expected = df.copy() + result["b"] = [1, 2, 3, 4] + expected["b", "", ""] = [1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + def test_setitem_new_column_all_na(self): + # GH#1534 + mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) + s = Series({(1, 1): 1, (1, 2): 2}) + df["new"] = s + assert df["new"].isna().all() + + def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 6c7d5f06ac355..4c3653b5cf64d 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -4,9 +4,14 @@ from pandas.errors import UnsortedIndexError import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) import pandas._testing as tm -from pandas.core.indexing import non_reducing_slice from pandas.tests.indexing.common import _mklbl @@ -763,59 +768,6 @@ def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_dat expected = ymd.reindex(s.index[5:]) tm.assert_frame_equal(result, expected) - def test_non_reducing_slice_on_multiindex(self): - # GH 19861 - dic = { - ("a", "d"): [1, 4], - ("a", "c"): [2, 3], - ("b", "c"): [3, 2], - ("b", "d"): [4, 1], - } - df = DataFrame(dic, index=[0, 1]) - idx = pd.IndexSlice - slice_ = idx[:, idx["b", "d"]] - tslice_ = non_reducing_slice(slice_) - - result = df.loc[tslice_] - expected = DataFrame({("b", "d"): [4, 1]}) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "slice_", - [ - pd.IndexSlice[:, :], - # check cols - pd.IndexSlice[:, pd.IndexSlice[["a"]]], # inferred deeper need list - pd.IndexSlice[:, pd.IndexSlice[["a"], ["c"]]], # inferred deeper need list - pd.IndexSlice[:, pd.IndexSlice["a", "c", :]], - pd.IndexSlice[:, pd.IndexSlice["a", :, "e"]], - pd.IndexSlice[:, pd.IndexSlice[:, "c", "e"]], - pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d"], :]], # check list - pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d", "-"], :]], # allow missing - pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d", "-"], "e"]], # no slice - # check rows - pd.IndexSlice[pd.IndexSlice[["U"]], :], # inferred deeper need list - pd.IndexSlice[pd.IndexSlice[["U"], ["W"]], :], # inferred deeper need list - pd.IndexSlice[pd.IndexSlice["U", "W", :], :], - pd.IndexSlice[pd.IndexSlice["U", :, "Y"], :], - pd.IndexSlice[pd.IndexSlice[:, "W", "Y"], :], - pd.IndexSlice[pd.IndexSlice[:, "W", ["Y", "Z"]], :], # check list - pd.IndexSlice[pd.IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing - pd.IndexSlice[pd.IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice - # check simultaneous - pd.IndexSlice[pd.IndexSlice[:, "W", "Y"], pd.IndexSlice["a", "c", :]], - ], - ) - def test_non_reducing_multi_slice_on_multiindex(self, slice_): - # GH 33562 - cols = pd.MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]) - idxs = pd.MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]]) - df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs) - - expected = df.loc[slice_] - result = df.loc[non_reducing_slice(slice_)] - tm.assert_frame_equal(result, expected) - def test_loc_slice_negative_stepsize(self): # GH#38071 mi = MultiIndex.from_product([["a", "b"], [0, 1]]) diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py index b3e8c4a83b9fc..6ba083d65ac3f 100644 --- a/pandas/tests/indexing/multiindex/test_sorted.py +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index fbf33999386e6..f11f000bb79fd 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -1,9 +1,17 @@ -from datetime import datetime, timezone +from datetime import ( + datetime, + timezone, +) import numpy as np import pytest -from pandas import CategoricalDtype, DataFrame, Series, Timestamp +from pandas import ( + CategoricalDtype, + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 1b9b6452b2e33..68ae1a0dd6f3d 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -322,6 +322,7 @@ def test_loc_listlike_dtypes(self): with pytest.raises(KeyError, match=re.escape(msg)): df.loc[["a", "x"]] + def test_loc_listlike_dtypes_duplicated_categories_and_codes(self): # duplicated categories and codes index = CategoricalIndex(["a", "b", "a"]) df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) @@ -341,9 +342,11 @@ def test_loc_listlike_dtypes(self): ) tm.assert_frame_equal(res, exp, check_index_type=True) + msg = "The following labels were missing: Index(['x'], dtype='object')" with pytest.raises(KeyError, match=re.escape(msg)): df.loc[["a", "x"]] + def test_loc_listlike_dtypes_unused_category(self): # contains unused category index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) @@ -363,6 +366,7 @@ def test_loc_listlike_dtypes(self): ) tm.assert_frame_equal(res, exp, check_index_type=True) + msg = "The following labels were missing: Index(['x'], dtype='object')" with pytest.raises(KeyError, match=re.escape(msg)): df.loc[["a", "x"]] @@ -405,6 +409,8 @@ def test_ix_categorical_index(self): expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns) tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + def test_ix_categorical_index_non_unique(self): + # non-unique df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX")) cdf = df.copy() @@ -444,30 +450,16 @@ def test_loc_slice(self): def test_loc_and_at_with_categorical_index(self): # GH 20629 - s = Series([1, 2, 3], index=CategoricalIndex(["A", "B", "C"])) - assert s.loc["A"] == 1 - assert s.at["A"] == 1 df = DataFrame( [[1, 2], [3, 4], [5, 6]], index=CategoricalIndex(["A", "B", "C"]) ) - assert df.loc["B", 1] == 4 - assert df.at["B", 1] == 4 - - def test_indexing_with_category(self): - # https://github.com/pandas-dev/pandas/issues/12564 - # consistent result if comparing as Dataframe - - cat = DataFrame({"A": ["foo", "bar", "baz"]}) - exp = DataFrame({"A": [True, False, False]}) - - res = cat[["A"]] == "foo" - tm.assert_frame_equal(res, exp) - - cat["A"] = cat["A"].astype("category") + s = df[0] + assert s.loc["A"] == 1 + assert s.at["A"] == 1 - res = cat[["A"]] == "foo" - tm.assert_frame_equal(res, exp) + assert df.loc["B", 1] == 4 + assert df.at["B", 1] == 4 @pytest.mark.parametrize( "idx_values", @@ -495,7 +487,7 @@ def test_indexing_with_category(self): pd.timedelta_range(start="1d", periods=3).array, ], ) - def test_loc_with_non_string_categories(self, idx_values, ordered): + def test_loc_getitem_with_non_string_categories(self, idx_values, ordered): # GH-17569 cat_idx = CategoricalIndex(idx_values, ordered=ordered) df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 1ac2a16660f93..4a66073d4f7a5 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -4,7 +4,13 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, Timestamp, date_range, option_context +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + option_context, +) import pandas._testing as tm import pandas.core.common as com @@ -373,6 +379,7 @@ def test_setting_with_copy_bug(self): with pytest.raises(com.SettingWithCopyError, match=msg): df[["c"]][mask] = df[["b"]][mask] + def test_setting_with_copy_bug_no_warning(self): # invalid warning as we are returning a new object # GH 8730 df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])}) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index f6f04e935ac52..2c962df30f1ff 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -1,11 +1,17 @@ from datetime import timedelta import itertools -from typing import Dict, List +from typing import ( + Dict, + List, +) import numpy as np import pytest -from pandas.compat import IS64, is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) import pandas as pd import pandas._testing as tm diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 44a5e2ae6d9e9..28a1098c10d9f 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -2,7 +2,13 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp, date_range +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + date_range, +) import pandas._testing as tm @@ -19,24 +25,20 @@ def test_indexing_with_datetime_tz(self): df.iloc[1, 1] = pd.NaT df.iloc[1, 2] = pd.NaT - # indexing - result = df.iloc[1] expected = Series( [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], index=list("ABC"), dtype="object", name=1, ) + + # indexing + result = df.iloc[1] tm.assert_series_equal(result, expected) result = df.loc[1] - expected = Series( - [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], - index=list("ABC"), - dtype="object", - name=1, - ) tm.assert_series_equal(result, expected) + def test_indexing_fast_xs(self): # indexing - fast_xs df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) result = df.iloc[5] @@ -53,6 +55,7 @@ def test_indexing_with_datetime_tz(self): expected = df.iloc[4:] tm.assert_frame_equal(result, expected) + def test_setitem_with_expansion(self): # indexing - setting an element df = DataFrame( data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), @@ -216,12 +219,14 @@ def test_nanosecond_getitem_setitem_with_tz(self): expected = DataFrame(-1, index=index, columns=["a"]) tm.assert_frame_equal(result, expected) - def test_loc_setitem_with_existing_dst(self): + def test_loc_setitem_with_expansion_and_existing_dst(self): # GH 18308 start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") idx = pd.date_range(start, end, closed="left", freq="H") + assert ts not in idx # i.e. result.loc setitem is with-expansion + result = DataFrame(index=idx, columns=["value"]) result.loc[ts, "value"] = 12 expected = DataFrame( @@ -234,21 +239,23 @@ def test_loc_setitem_with_existing_dst(self): def test_getitem_millisecond_resolution(self, frame_or_series): # GH#33589 + + keys = [ + "2017-10-25T16:25:04.151", + "2017-10-25T16:25:04.252", + "2017-10-25T16:50:05.237", + "2017-10-25T16:50:05.238", + ] obj = frame_or_series( [1, 2, 3, 4], - index=[ - Timestamp("2017-10-25T16:25:04.151"), - Timestamp("2017-10-25T16:25:04.252"), - Timestamp("2017-10-25T16:50:05.237"), - Timestamp("2017-10-25T16:50:05.238"), - ], + index=[Timestamp(x) for x in keys], ) - result = obj["2017-10-25T16:25:04.252":"2017-10-25T16:50:05.237"] + result = obj[keys[1] : keys[2]] expected = frame_or_series( [2, 3], index=[ - Timestamp("2017-10-25T16:25:04.252"), - Timestamp("2017-10-25T16:50:05.237"), + Timestamp(keys[1]), + Timestamp(keys[2]), ], ) tm.assert_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 5eb3d9e9ec00e..d1f1db741509f 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series +from pandas import ( + DataFrame, + Float64Index, + Index, + Int64Index, + RangeIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index 84bd1d63f6bbc..f1fe464ca0854 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import DataFrame, Series, period_range +from pandas import ( + DataFrame, + Series, + period_range, +) def test_iat(float_frame): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 1668123e782ff..d5e5cabc93b66 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -2,7 +2,10 @@ from datetime import datetime import re -from warnings import catch_warnings, simplefilter +from warnings import ( + catch_warnings, + simplefilter, +) import numpy as np import pytest @@ -918,7 +921,7 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): with pytest.raises(ValueError, match=msg): obj.iloc[nd3] = 0 - @pytest.mark.parametrize("indexer", [lambda x: x.loc, lambda x: x.iloc]) + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) def test_iloc_getitem_read_only_values(self, indexer): # GH#10043 this is fundamentally a test for iloc, but test loc while # we're here diff --git a/pandas/tests/indexing/test_indexers.py b/pandas/tests/indexing/test_indexers.py index 14b2b494d65fb..45dcaf95ffdd0 100644 --- a/pandas/tests/indexing/test_indexers.py +++ b/pandas/tests/indexing/test_indexers.py @@ -2,7 +2,11 @@ import numpy as np import pytest -from pandas.core.indexers import is_scalar_indexer, length_of_indexer, validate_indices +from pandas.core.indexers import ( + is_scalar_indexer, + length_of_indexer, + validate_indices, +) def test_length_of_indexer(): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index dcd073681cecf..aec12f4cedcea 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -7,12 +7,22 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype +from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, +) import pandas as pd -from pandas import DataFrame, Index, NaT, Series, date_range, offsets, timedelta_range +from pandas import ( + DataFrame, + Index, + NaT, + Series, + date_range, + offsets, + timedelta_range, +) import pandas._testing as tm -from pandas.core.indexing import maybe_numeric_slice, non_reducing_slice from pandas.tests.indexing.common import _mklbl from pandas.tests.indexing.test_floats import gen_obj @@ -45,6 +55,9 @@ def test_setitem_ndarray_1d(self): ) tm.assert_series_equal(result, expected) + def test_setitem_ndarray_1d_2(self): + # GH5508 + # dtype getting changed? df = DataFrame(index=Index(np.arange(1, 11))) df["foo"] = np.zeros(10, dtype=np.float64) @@ -79,7 +92,7 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): potential_errors = (IndexError, ValueError, NotImplementedError) with pytest.raises(potential_errors, match=msg): - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + with tm.assert_produces_warning(DeprecationWarning): idxr[nd3] def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): @@ -125,6 +138,7 @@ def test_inf_upcast(self): expected = pd.Float64Index([1, 2, np.inf]) tm.assert_index_equal(result, expected) + def test_loc_setitem_with_expasnion_inf_upcast_empty(self): # Test with np.inf in columns df = DataFrame() df.loc[0, 0] = 1 @@ -148,6 +162,9 @@ def test_setitem_dtype_upcast(self): ) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("val", [3.14, "wxyz"]) + def test_setitem_dtype_upcast2(self, val): + # GH10280 df = DataFrame( np.arange(6, dtype="int64").reshape(2, 3), @@ -155,19 +172,19 @@ def test_setitem_dtype_upcast(self): columns=["foo", "bar", "baz"], ) - for val in [3.14, "wxyz"]: - left = df.copy() - left.loc["a", "bar"] = val - right = DataFrame( - [[0, val, 2], [3, 4, 5]], - index=list("ab"), - columns=["foo", "bar", "baz"], - ) + left = df.copy() + left.loc["a", "bar"] = val + right = DataFrame( + [[0, val, 2], [3, 4, 5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) - tm.assert_frame_equal(left, right) - assert is_integer_dtype(left["foo"]) - assert is_integer_dtype(left["baz"]) + tm.assert_frame_equal(left, right) + assert is_integer_dtype(left["foo"]) + assert is_integer_dtype(left["baz"]) + def test_setitem_dtype_upcast3(self): left = DataFrame( np.arange(6, dtype="int64").reshape(2, 3) / 10.0, index=list("ab"), @@ -195,6 +212,8 @@ def test_dups_fancy_indexing(self): expected = Index(["b", "a", "a"]) tm.assert_index_equal(result, expected) + def test_dups_fancy_indexing_across_dtypes(self): + # across dtypes df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) df.head() @@ -208,6 +227,7 @@ def test_dups_fancy_indexing(self): tm.assert_frame_equal(df, result) + def test_dups_fancy_indexing_not_in_order(self): # GH 3561, dups not in selected order df = DataFrame( {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")}, @@ -232,6 +252,8 @@ def test_dups_fancy_indexing(self): with pytest.raises(KeyError, match="with any missing labels"): df.loc[rows] + def test_dups_fancy_indexing_only_missing_label(self): + # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) with pytest.raises( @@ -244,6 +266,8 @@ def test_dups_fancy_indexing(self): # ToDo: check_index_type can be True after GH 11497 + def test_dups_fancy_indexing_missing_label(self): + # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with pytest.raises(KeyError, match="with any missing labels"): @@ -253,6 +277,8 @@ def test_dups_fancy_indexing(self): with pytest.raises(KeyError, match="with any missing labels"): df.loc[[0, 8, 0]] + def test_dups_fancy_indexing_non_unique(self): + # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) with pytest.raises(KeyError, match="with any missing labels"): @@ -266,6 +292,8 @@ def test_dups_fancy_indexing2(self): with pytest.raises(KeyError, match="with any missing labels"): df.loc[:, ["A", "B", "C"]] + def test_dups_fancy_indexing3(self): + # GH 6504, multi-axis indexing df = DataFrame( np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] @@ -447,6 +475,7 @@ def test_multi_assign(self): df2.loc[mask, cols] = dft.loc[mask, cols].values tm.assert_frame_equal(df2, expected) + def test_multi_assign_broadcasting_rhs(self): # broadcasting on the rhs is required df = DataFrame( { @@ -478,6 +507,7 @@ def test_setitem_list(self): tm.assert_frame_equal(result, df) + def test_iloc_setitem_custom_object(self): # iloc with an object class TO: def __init__(self, value): @@ -523,6 +553,9 @@ def test_string_slice(self): with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] + def test_string_slice_empty(self): + # GH 14424 + df = DataFrame() assert not df.index._is_all_dates with pytest.raises(KeyError, match="'2011'"): @@ -567,6 +600,7 @@ def test_astype_assignment(self): ) tm.assert_frame_equal(df, expected) + def test_astype_assignment_full_replacements(self): # full replacements / no nans df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) df.iloc[:, 0] = df["A"].astype(np.int64) @@ -630,9 +664,9 @@ class TestMisc: def test_float_index_to_mixed(self): df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df["a"] = 10 - tm.assert_frame_equal( - DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}), df - ) + + expected = DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}) + tm.assert_frame_equal(expected, df) def test_float_index_non_scalar_assignment(self): df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) @@ -717,12 +751,10 @@ def assert_slices_equivalent(l_slc, i_slc): assert_slices_equivalent(SLC[idx[13] : idx[9] : -1], SLC[13:8:-1]) assert_slices_equivalent(SLC[idx[9] : idx[13] : -1], SLC[:0]) - def test_slice_with_zero_step_raises(self): - s = Series(np.arange(20), index=_mklbl("A", 20)) + def test_slice_with_zero_step_raises(self, indexer_sl): + ser = Series(np.arange(20), index=_mklbl("A", 20)) with pytest.raises(ValueError, match="slice step cannot be zero"): - s[::0] - with pytest.raises(ValueError, match="slice step cannot be zero"): - s.loc[::0] + indexer_sl(ser)[::0] def test_indexing_assignment_dict_already_exists(self): index = Index([-5, 0, 5], name="z") @@ -758,51 +790,6 @@ def test_range_in_series_indexing(self, size): s.loc[range(2)] = 43 tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) - @pytest.mark.parametrize( - "slc", - [ - pd.IndexSlice[:, :], - pd.IndexSlice[:, 1], - pd.IndexSlice[1, :], - pd.IndexSlice[[1], [1]], - pd.IndexSlice[1, [1]], - pd.IndexSlice[[1], 1], - pd.IndexSlice[1], - pd.IndexSlice[1, 1], - slice(None, None, None), - [0, 1], - np.array([0, 1]), - Series([0, 1]), - ], - ) - def test_non_reducing_slice(self, slc): - df = DataFrame([[0, 1], [2, 3]]) - - tslice_ = non_reducing_slice(slc) - assert isinstance(df.loc[tslice_], DataFrame) - - def test_list_slice(self): - # like dataframe getitem - slices = [["A"], Series(["A"]), np.array(["A"])] - df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) - expected = pd.IndexSlice[:, ["A"]] - for subset in slices: - result = non_reducing_slice(subset) - tm.assert_frame_equal(df.loc[result], df.loc[expected]) - - def test_maybe_numeric_slice(self): - df = DataFrame({"A": [1, 2], "B": ["c", "d"], "C": [True, False]}) - result = maybe_numeric_slice(df, slice_=None) - expected = pd.IndexSlice[:, ["A"]] - assert result == expected - - result = maybe_numeric_slice(df, None, include_bool=True) - expected = pd.IndexSlice[:, ["A", "C"]] - assert all(result[1] == expected[1]) - result = maybe_numeric_slice(df, [1]) - expected = [1] - assert result == expected - def test_partial_boolean_frame_indexing(self): # GH 17170 df = DataFrame( @@ -905,7 +892,7 @@ def test_none_coercion_mixed_dtypes(self): class TestDatetimelikeCoercion: def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): - # dispatching _can_hold_element to underling DatetimeArray + # dispatching _can_hold_element to underlying DatetimeArray tz = tz_naive_fixture dti = date_range("2016-01-01", periods=3, tz=tz) @@ -1026,52 +1013,3 @@ def test_extension_array_cross_section_converts(): result = df.iloc[0] tm.assert_series_equal(result, expected) - - -def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): - # GH 30567 - ser = Series([None] * 10) - mask = [False] * 3 + [True] * 5 + [False] * 2 - ser[mask] = range(5) - result = ser - expected = Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") - tm.assert_series_equal(result, expected) - - -def test_missing_labels_inside_loc_matched_in_error_message(): - # GH34272 - s = Series({"a": 1, "b": 2, "c": 3}) - error_message_regex = "missing_0.*missing_1.*missing_2" - with pytest.raises(KeyError, match=error_message_regex): - s.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] - - -def test_many_missing_labels_inside_loc_error_message_limited(): - # GH34272 - n = 10000 - missing_labels = [f"missing_{label}" for label in range(n)] - s = Series({"a": 1, "b": 2, "c": 3}) - # regex checks labels between 4 and 9995 are replaced with ellipses - error_message_regex = "missing_4.*\\.\\.\\..*missing_9995" - with pytest.raises(KeyError, match=error_message_regex): - s.loc[["a", "c"] + missing_labels] - - -def test_long_text_missing_labels_inside_loc_error_message_limited(): - # GH34272 - s = Series({"a": 1, "b": 2, "c": 3}) - missing_labels = [f"long_missing_label_text_{i}" * 5 for i in range(3)] - # regex checks for very long labels there are new lines between each - error_message_regex = "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1" - with pytest.raises(KeyError, match=error_message_regex): - s.loc[["a", "c"] + missing_labels] - - -def test_setitem_categorical(): - # https://github.com/pandas-dev/pandas/issues/35369 - df = DataFrame({"h": Series(list("mn")).astype("category")}) - df.h = df.h.cat.reorder_categories(["n", "m"]) - expected = DataFrame( - {"h": pd.Categorical(["m", "n"]).reorder_categories(["n", "m"])} - ) - tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1cd352e4e0899..c98666b38b8b8 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1,5 +1,9 @@ """ test label based indexing with loc """ -from datetime import datetime, time, timedelta +from datetime import ( + datetime, + time, + timedelta, +) from io import StringIO import re @@ -1804,25 +1808,51 @@ def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box): with pytest.raises(KeyError, match=msg): ser2.to_frame().loc[box(ci2)] + def test_loc_getitem_many_missing_labels_inside_error_message_limited(self): + # GH#34272 + n = 10000 + missing_labels = [f"missing_{label}" for label in range(n)] + ser = Series({"a": 1, "b": 2, "c": 3}) + # regex checks labels between 4 and 9995 are replaced with ellipses + error_message_regex = "missing_4.*\\.\\.\\..*missing_9995" + with pytest.raises(KeyError, match=error_message_regex): + ser.loc[["a", "c"] + missing_labels] + + def test_loc_getitem_missing_labels_inside_matched_in_error_message(self): + # GH#34272 + ser = Series({"a": 1, "b": 2, "c": 3}) + error_message_regex = "missing_0.*missing_1.*missing_2" + with pytest.raises(KeyError, match=error_message_regex): + ser.loc[["a", "b", "missing_0", "c", "missing_1", "missing_2"]] + + def test_loc_getitem_long_text_missing_labels_inside_error_message_limited(self): + # GH#34272 + ser = Series({"a": 1, "b": 2, "c": 3}) + missing_labels = [f"long_missing_label_text_{i}" * 5 for i in range(3)] + # regex checks for very long labels there are new lines between each + error_message_regex = ( + "long_missing_label_text_0.*\\\\n.*long_missing_label_text_1" + ) + with pytest.raises(KeyError, match=error_message_regex): + ser.loc[["a", "c"] + missing_labels] + + def test_loc_getitem_series_label_list_missing_values(self): + # gh-11428 + key = np.array( + ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" + ) + ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[key] -def test_series_loc_getitem_label_list_missing_values(): - # gh-11428 - key = np.array( - ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" - ) - s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) - with pytest.raises(KeyError, match="with any missing labels"): - s.loc[key] - - -def test_series_getitem_label_list_missing_integer_values(): - # GH: 25927 - s = Series( - index=np.array([9730701000001104, 10049011000001109]), - data=np.array([999000011000001104, 999000011000001104]), - ) - with pytest.raises(KeyError, match="with any missing labels"): - s.loc[np.array([9730701000001104, 10047311000001102])] + def test_loc_getitem_series_label_list_missing_integer_values(self): + # GH: 25927 + ser = Series( + index=np.array([9730701000001104, 10049011000001109]), + data=np.array([999000011000001104, 999000011000001104]), + ) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[np.array([9730701000001104, 10047311000001102])] @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index ad2d7250d9d6c..468e4cad742df 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -8,7 +8,15 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range +from pandas import ( + DataFrame, + Index, + Period, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index ce48fd1e5c905..39611bce2b4fa 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -1,10 +1,19 @@ """ test scalar indexing, including at and iat """ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest -from pandas import DataFrame, Series, Timedelta, Timestamp, date_range +from pandas import ( + DataFrame, + Series, + Timedelta, + Timestamp, + date_range, +) import pandas._testing as tm from pandas.tests.indexing.common import Base diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0a50ef2831534..a24c711df7b55 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1,4 +1,7 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import itertools import re @@ -23,8 +26,16 @@ ) import pandas._testing as tm import pandas.core.algorithms as algos -from pandas.core.arrays import DatetimeArray, SparseArray, TimedeltaArray -from pandas.core.internals import BlockManager, SingleBlockManager, make_block +from pandas.core.arrays import ( + DatetimeArray, + SparseArray, + TimedeltaArray, +) +from pandas.core.internals import ( + BlockManager, + SingleBlockManager, + make_block, +) @pytest.fixture @@ -1233,8 +1244,8 @@ def check_frame_setitem(self, elem, index: Index, inplace: bool): if inplace: # assertion here implies setting was done inplace - # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" - # has no attribute "blocks" [union-attr] + # error: Item "ArrayManager" of "Union[ArrayManager, BlockManager]" has no + # attribute "blocks" assert df._mgr.blocks[0].values is arr # type:ignore[union-attr] else: assert df.dtypes[0] == object diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py index 333455875904a..4ca6e8b6598aa 100644 --- a/pandas/tests/internals/test_managers.py +++ b/pandas/tests/internals/test_managers.py @@ -5,7 +5,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.internals import ArrayManager, BlockManager +from pandas.core.internals import ( + ArrayManager, + BlockManager, +) def test_dataframe_creation(): diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py index b1038f0314f41..e1de03e1f306c 100644 --- a/pandas/tests/io/excel/__init__.py +++ b/pandas/tests/io/excel/__init__.py @@ -2,7 +2,10 @@ import pytest -from pandas.compat._optional import get_version, import_optional_dependency +from pandas.compat._optional import ( + get_version, + import_optional_dependency, +) pytestmark = [ pytest.mark.filterwarnings( diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 0962b719efd4d..9010f978d268d 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,16 +1,16 @@ -from distutils.version import LooseVersion from pathlib import Path import numpy as np import pytest -from pandas.compat._optional import get_version - import pandas as pd from pandas import DataFrame import pandas._testing as tm -from pandas.io.excel import ExcelWriter, _OpenpyxlWriter +from pandas.io.excel import ( + ExcelWriter, + _OpenpyxlWriter, +) openpyxl = pytest.importorskip("openpyxl") @@ -157,10 +157,6 @@ def test_read_with_bad_dimension( datapath, ext, header, expected_data, filename, read_only, request ): # GH 38956, 39001 - no/incorrect dimension information - version = LooseVersion(get_version(openpyxl)) - if (read_only or read_only is None) and version < "3.0.0": - msg = "openpyxl read-only sheet is incorrect when dimension data is wrong" - request.node.add_marker(pytest.mark.xfail(reason=msg)) path = datapath("io", "data", "excel", f"{filename}{ext}") if read_only is None: result = pd.read_excel(path, header=header) @@ -195,10 +191,6 @@ def test_append_mode_file(ext): @pytest.mark.parametrize("read_only", [True, False, None]) def test_read_with_empty_trailing_rows(datapath, ext, read_only, request): # GH 39181 - version = LooseVersion(get_version(openpyxl)) - if (read_only or read_only is None) and version < "3.0.0": - msg = "openpyxl read-only sheet is incorrect when dimension data is wrong" - request.node.add_marker(pytest.mark.xfail(reason=msg)) path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}") if read_only is None: result = pd.read_excel(path) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index a594718bd62d9..1c71666e88651 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1,4 +1,7 @@ -from datetime import datetime, time +from datetime import ( + datetime, + time, +) from functools import partial import os from urllib.error import URLError @@ -10,7 +13,12 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.tests.io.excel import xlrd_version @@ -117,6 +125,30 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "read_excel", func) + def test_engine_used(self, read_ext, engine, monkeypatch): + # GH 38884 + def parser(self, *args, **kwargs): + return self.engine + + monkeypatch.setattr(pd.ExcelFile, "parse", parser) + + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + def test_usecols_int(self, read_ext, df_ref): df_ref = df_ref.reindex(columns=["A", "B", "C"]) @@ -1164,6 +1196,24 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "ExcelFile", func) + def test_engine_used(self, read_ext, engine, monkeypatch): + expected_defaults = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xlsb": "pyxlsb", + "xls": "xlrd", + "ods": "odf", + } + + with pd.ExcelFile("test1" + read_ext) as excel: + result = excel.engine + + if engine is not None: + expected = engine + else: + expected = expected_defaults[read_ext[1:]] + assert result == expected + def test_excel_passes_na(self, read_ext): with pd.ExcelFile("test4" + read_ext) as excel: parsed = pd.read_excel( diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 0c61a8a18e153..c650f59a7da95 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1,4 +1,8 @@ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) from functools import partial from io import BytesIO import os @@ -9,7 +13,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Index, MultiIndex, get_option, set_option +from pandas import ( + DataFrame, + Index, + MultiIndex, + get_option, + set_option, +) import pandas._testing as tm from pandas.io.excel import ( @@ -1305,6 +1315,15 @@ def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): with pytest.raises(ValueError, match="Excel does not support"): df.to_excel(path) + def test_excel_duplicate_columns_with_names(self, path): + # GH#39695 + df = DataFrame({"A": [0, 1], "B": [10, 11]}) + df.to_excel(path, columns=["A", "B", "A"], index=False) + + result = pd.read_excel(path) + expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"]) + tm.assert_frame_equal(result, expected) + class TestExcelWriterEngineTests: @pytest.mark.parametrize( diff --git a/pandas/tests/io/excel/test_xlwt.py b/pandas/tests/io/excel/test_xlwt.py index ac53a7d5aee69..7e1787d8c55d4 100644 --- a/pandas/tests/io/excel/test_xlwt.py +++ b/pandas/tests/io/excel/test_xlwt.py @@ -1,10 +1,17 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, options +from pandas import ( + DataFrame, + MultiIndex, + options, +) import pandas._testing as tm -from pandas.io.excel import ExcelWriter, _XlwtWriter +from pandas.io.excel import ( + ExcelWriter, + _XlwtWriter, +) xlwt = pytest.importorskip("xlwt") diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py index 785904fafd31a..8465d116805c7 100644 --- a/pandas/tests/io/formats/test_css.py +++ b/pandas/tests/io/formats/test_css.py @@ -2,7 +2,10 @@ import pandas._testing as tm -from pandas.io.formats.css import CSSResolver, CSSWarning +from pandas.io.formats.css import ( + CSSResolver, + CSSWarning, +) def assert_resolves(css, props, inherited=None): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c0e4f01d3e5a5..41efb594fd8e4 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -18,7 +18,10 @@ import pytest import pytz -from pandas.compat import IS64, is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) import pandas.util._test_decorators as td import pandas as pd @@ -2445,7 +2448,10 @@ def test_datetimeindex_highprecision(self, start_date): def test_timedelta64(self): - from datetime import datetime, timedelta + from datetime import ( + datetime, + timedelta, + ) Series(np.array([1100, 20], dtype="timedelta64[ns]")).to_string() diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 4990a14f302a6..5522631d222e1 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas.compat import IS64, PYPY +from pandas.compat import ( + IS64, + PYPY, +) from pandas import ( CategoricalIndex, diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index a154e51f68dba..66dc1cd4adddd 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -16,9 +16,23 @@ Styler, _get_level_lengths, _maybe_convert_css_to_tuples, + _non_reducing_slice, ) +def bar_grad(a=None, b=None, c=None, d=None): + """Used in multiple tests to simplify formatting of expected result""" + ret = [("width", "10em"), ("height", "80%")] + if all(x is None for x in [a, b, c, d]): + return ret + return ret + [ + ( + "background", + f"linear-gradient(90deg,{','.join(x for x in [a, b, c, d] if x)})", + ) + ] + + class TestStyler: def setup_method(self, method): np.random.seed(24) @@ -61,24 +75,15 @@ def test_repr_html_mathjax(self): def test_update_ctx(self): self.styler._update_ctx(self.attrs) - expected = {(0, 0): ["color: red"], (1, 0): ["color: blue"]} + expected = {(0, 0): [("color", "red")], (1, 0): [("color", "blue")]} assert self.styler.ctx == expected - def test_update_ctx_flatten_multi(self): - attrs = DataFrame({"A": ["color: red; foo: bar", "color: blue; foo: baz"]}) + def test_update_ctx_flatten_multi_and_trailing_semi(self): + attrs = DataFrame({"A": ["color: red; foo: bar", "color:blue ; foo: baz;"]}) self.styler._update_ctx(attrs) expected = { - (0, 0): ["color: red", " foo: bar"], - (1, 0): ["color: blue", " foo: baz"], - } - assert self.styler.ctx == expected - - def test_update_ctx_flatten_multi_traliing_semi(self): - attrs = DataFrame({"A": ["color: red; foo: bar;", "color: blue; foo: baz;"]}) - self.styler._update_ctx(attrs) - expected = { - (0, 0): ["color: red", " foo: bar"], - (1, 0): ["color: blue", " foo: baz"], + (0, 0): [("color", "red"), ("foo", "bar")], + (1, 0): [("color", "blue"), ("foo", "baz")], } assert self.styler.ctx == expected @@ -141,7 +146,7 @@ def test_multiple_render(self): s.render() # do 2 renders to ensure css styles not duplicated assert ( '" in s.render() + " color: red;\n}\n" in s.render() ) def test_render_empty_dfs(self): @@ -167,7 +172,7 @@ def test_set_properties(self): df = DataFrame({"A": [0, 1]}) result = df.style.set_properties(color="white", size="10px")._compute().ctx # order is deterministic - v = ["color: white", "size: 10px"] + v = [("color", "white"), ("size", "10px")] expected = {(0, 0): v, (1, 0): v} assert result.keys() == expected.keys() for v1, v2 in zip(result.values(), expected.values()): @@ -180,7 +185,7 @@ def test_set_properties_subset(self): ._compute() .ctx ) - expected = {(0, 0): ["color: white"]} + expected = {(0, 0): [("color", "white")]} assert result == expected def test_empty_index_name_doesnt_display(self): @@ -254,8 +259,8 @@ def test_index_name(self): ], [ {"class": "index_name level0", "type": "th", "value": "A"}, - {"class": "blank", "type": "th", "value": ""}, - {"class": "blank", "type": "th", "value": ""}, + {"class": "blank col0", "type": "th", "value": ""}, + {"class": "blank col1", "type": "th", "value": ""}, ], ] @@ -293,7 +298,7 @@ def test_multiindex_name(self): [ {"class": "index_name level0", "type": "th", "value": "A"}, {"class": "index_name level1", "type": "th", "value": "B"}, - {"class": "blank", "type": "th", "value": ""}, + {"class": "blank col0", "type": "th", "value": ""}, ], ] @@ -313,19 +318,19 @@ def test_apply_axis(self): assert len(result.ctx) == 0 result._compute() expected = { - (0, 0): ["val: 1"], - (0, 1): ["val: 1"], - (1, 0): ["val: 1"], - (1, 1): ["val: 1"], + (0, 0): [("val", "1")], + (0, 1): [("val", "1")], + (1, 0): [("val", "1")], + (1, 1): [("val", "1")], } assert result.ctx == expected result = df.style.apply(f, axis=0) expected = { - (0, 0): ["val: 0"], - (0, 1): ["val: 1"], - (1, 0): ["val: 0"], - (1, 1): ["val: 1"], + (0, 0): [("val", "0")], + (0, 1): [("val", "1")], + (1, 0): [("val", "0")], + (1, 1): [("val", "1")], } result._compute() assert result.ctx == expected @@ -333,53 +338,52 @@ def test_apply_axis(self): result._compute() assert result.ctx == expected - def test_apply_subset(self): - axes = [0, 1] - slices = [ + @pytest.mark.parametrize( + "slice_", + [ pd.IndexSlice[:], pd.IndexSlice[:, ["A"]], pd.IndexSlice[[1], :], pd.IndexSlice[[1], ["A"]], pd.IndexSlice[:2, ["A", "B"]], - ] - for ax in axes: - for slice_ in slices: - result = ( - self.df.style.apply(self.h, axis=ax, subset=slice_, foo="baz") - ._compute() - .ctx - ) - expected = { - (r, c): ["color: baz"] - for r, row in enumerate(self.df.index) - for c, col in enumerate(self.df.columns) - if row in self.df.loc[slice_].index - and col in self.df.loc[slice_].columns - } - assert result == expected - - def test_applymap_subset(self): - def f(x): - return "foo: bar" + ], + ) + @pytest.mark.parametrize("axis", [0, 1]) + def test_apply_subset(self, slice_, axis): + result = ( + self.df.style.apply(self.h, axis=axis, subset=slice_, foo="baz") + ._compute() + .ctx + ) + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns + } + assert result == expected - slices = [ + @pytest.mark.parametrize( + "slice_", + [ pd.IndexSlice[:], pd.IndexSlice[:, ["A"]], pd.IndexSlice[[1], :], pd.IndexSlice[[1], ["A"]], pd.IndexSlice[:2, ["A", "B"]], - ] - - for slice_ in slices: - result = self.df.style.applymap(f, subset=slice_)._compute().ctx - expected = { - (r, c): ["foo: bar"] - for r, row in enumerate(self.df.index) - for c, col in enumerate(self.df.columns) - if row in self.df.loc[slice_].index - and col in self.df.loc[slice_].columns - } - assert result == expected + ], + ) + def test_applymap_subset(self, slice_): + result = ( + self.df.style.applymap(lambda x: "color:baz;", subset=slice_)._compute().ctx + ) + expected = { + (r, c): [("color", "baz")] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns + } + assert result == expected @pytest.mark.parametrize( "slice_", @@ -430,14 +434,24 @@ def f(x): result = self.df.style.where(f, style1)._compute().ctx expected = { - (r, c): [style1] + (r, c): [("foo", "bar")] for r, row in enumerate(self.df.index) for c, col in enumerate(self.df.columns) if f(self.df.loc[row, col]) } assert result == expected - def test_where_subset(self): + @pytest.mark.parametrize( + "slice_", + [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ], + ) + def test_where_subset(self, slice_): # GH 17474 def f(x): return x > 0.5 @@ -445,26 +459,14 @@ def f(x): style1 = "foo: bar" style2 = "baz: foo" - slices = [ - pd.IndexSlice[:], - pd.IndexSlice[:, ["A"]], - pd.IndexSlice[[1], :], - pd.IndexSlice[[1], ["A"]], - pd.IndexSlice[:2, ["A", "B"]], - ] - - for slice_ in slices: - result = ( - self.df.style.where(f, style1, style2, subset=slice_)._compute().ctx - ) - expected = { - (r, c): [style1 if f(self.df.loc[row, col]) else style2] - for r, row in enumerate(self.df.index) - for c, col in enumerate(self.df.columns) - if row in self.df.loc[slice_].index - and col in self.df.loc[slice_].columns - } - assert result == expected + result = self.df.style.where(f, style1, style2, subset=slice_)._compute().ctx + expected = { + (r, c): [("foo", "bar") if f(self.df.loc[row, col]) else ("baz", "foo")] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index and col in self.df.loc[slice_].columns + } + assert result == expected def test_where_subset_compare_with_applymap(self): # GH 17474 @@ -495,11 +497,11 @@ def g(x): def test_empty(self): df = DataFrame({"A": [1, 0]}) s = df.style - s.ctx = {(0, 0): ["color: red"], (1, 0): [""]} + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("", "")]} result = s._translate()["cellstyle"] expected = [ - {"props": [("color", " red")], "selectors": ["row0_col0"]}, + {"props": [("color", "red")], "selectors": ["row0_col0"]}, {"props": [("", "")], "selectors": ["row1_col0"]}, ] assert result == expected @@ -507,11 +509,11 @@ def test_empty(self): def test_duplicate(self): df = DataFrame({"A": [1, 0]}) s = df.style - s.ctx = {(0, 0): ["color: red"], (1, 0): ["color: red"]} + s.ctx = {(0, 0): [("color", "red")], (1, 0): [("color", "red")]} result = s._translate()["cellstyle"] expected = [ - {"props": [("color", " red")], "selectors": ["row0_col0", "row1_col0"]} + {"props": [("color", "red")], "selectors": ["row0_col0", "row1_col0"]} ] assert result == expected @@ -519,35 +521,17 @@ def test_bar_align_left(self): df = DataFrame({"A": [0, 1, 2]}) result = df.style.bar()._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(" - "90deg,#d65f5f 50.0%, transparent 50.0%)", - ], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(" - "90deg,#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (2, 0): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), } assert result == expected result = df.style.bar(color="red", width=50)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,red 25.0%, transparent 25.0%)", - ], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,red 50.0%, transparent 50.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad("red 25.0%", " transparent 25.0%"), + (2, 0): bar_grad("red 50.0%", " transparent 50.0%"), } assert result == expected @@ -562,118 +546,54 @@ def test_bar_align_left_0points(self): df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) result = df.style.bar()._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (0, 1): ["width: 10em", " height: 80%"], - (0, 2): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", - ], - (1, 2): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", - ], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 100.0%" - ", transparent 100.0%)", - ], - (2, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 100.0%" - ", transparent 100.0%)", - ], - (2, 2): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 100.0%" - ", transparent 100.0%)", - ], + (0, 0): bar_grad(), + (0, 1): bar_grad(), + (0, 2): bar_grad(), + (1, 0): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (1, 1): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (1, 2): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (2, 0): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), + (2, 1): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), + (2, 2): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), } assert result == expected result = df.style.bar(axis=1)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", - ], - (0, 2): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 100.0%" - ", transparent 100.0%)", - ], - (1, 0): ["width: 10em", " height: 80%"], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 50.0%" - ", transparent 50.0%)", - ], - (1, 2): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 100.0%" - ", transparent 100.0%)", - ], - (2, 0): ["width: 10em", " height: 80%"], - (2, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 50.0%" - ", transparent 50.0%)", - ], - (2, 2): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg,#d65f5f 100.0%" - ", transparent 100.0%)", - ], + (0, 0): bar_grad(), + (0, 1): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (0, 2): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), + (1, 0): bar_grad(), + (1, 1): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (1, 2): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), + (2, 0): bar_grad(), + (2, 1): bar_grad("#d65f5f 50.0%", " transparent 50.0%"), + (2, 2): bar_grad("#d65f5f 100.0%", " transparent 100.0%"), } assert result == expected def test_bar_align_mid_pos_and_neg(self): df = DataFrame({"A": [-10, 0, 20, 90]}) - result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx - expected = { - (0, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 10.0%, transparent 10.0%)", - ], - (1, 0): ["width: 10em", " height: 80%"], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 10.0%, #5fba7d 10.0%" - ", #5fba7d 30.0%, transparent 30.0%)", - ], - (3, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 10.0%, " - "#5fba7d 10.0%, #5fba7d 100.0%, " - "transparent 100.0%)", - ], + (0, 0): bar_grad( + "#d65f5f 10.0%", + " transparent 10.0%", + ), + (1, 0): bar_grad(), + (2, 0): bar_grad( + " transparent 10.0%", + " #5fba7d 10.0%", + " #5fba7d 30.0%", + " transparent 30.0%", + ), + (3, 0): bar_grad( + " transparent 10.0%", + " #5fba7d 10.0%", + " #5fba7d 100.0%", + " transparent 100.0%", + ), } - assert result == expected def test_bar_align_mid_all_pos(self): @@ -682,30 +602,22 @@ def test_bar_align_mid_all_pos(self): result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx expected = { - (0, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#5fba7d 10.0%, transparent 10.0%)", - ], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#5fba7d 20.0%, transparent 20.0%)", - ], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#5fba7d 50.0%, transparent 50.0%)", - ], - (3, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#5fba7d 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad( + "#5fba7d 10.0%", + " transparent 10.0%", + ), + (1, 0): bar_grad( + "#5fba7d 20.0%", + " transparent 20.0%", + ), + (2, 0): bar_grad( + "#5fba7d 50.0%", + " transparent 50.0%", + ), + (3, 0): bar_grad( + "#5fba7d 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -716,36 +628,28 @@ def test_bar_align_mid_all_neg(self): result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx expected = { - (0, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 100.0%, transparent 100.0%)", - ], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 40.0%, " - "#d65f5f 40.0%, #d65f5f 100.0%, " - "transparent 100.0%)", - ], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 70.0%, " - "#d65f5f 70.0%, #d65f5f 100.0%, " - "transparent 100.0%)", - ], - (3, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 80.0%, " - "#d65f5f 80.0%, #d65f5f 100.0%, " - "transparent 100.0%)", - ], + (0, 0): bar_grad( + "#d65f5f 100.0%", + " transparent 100.0%", + ), + (1, 0): bar_grad( + " transparent 40.0%", + " #d65f5f 40.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), + (2, 0): bar_grad( + " transparent 70.0%", + " #d65f5f 70.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), + (3, 0): bar_grad( + " transparent 80.0%", + " #d65f5f 80.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -759,28 +663,25 @@ def test_bar_align_zero_pos_and_neg(self): .ctx ) expected = { - (0, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 40.0%, #d65f5f 40.0%, " - "#d65f5f 45.0%, transparent 45.0%)", - ], - (1, 0): ["width: 10em", " height: 80%"], - (2, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 45.0%, #5fba7d 45.0%, " - "#5fba7d 55.0%, transparent 55.0%)", - ], - (3, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 45.0%, #5fba7d 45.0%, " - "#5fba7d 90.0%, transparent 90.0%)", - ], + (0, 0): bar_grad( + " transparent 40.0%", + " #d65f5f 40.0%", + " #d65f5f 45.0%", + " transparent 45.0%", + ), + (1, 0): bar_grad(), + (2, 0): bar_grad( + " transparent 45.0%", + " #5fba7d 45.0%", + " #5fba7d 55.0%", + " transparent 55.0%", + ), + (3, 0): bar_grad( + " transparent 45.0%", + " #5fba7d 45.0%", + " #5fba7d 90.0%", + " transparent 90.0%", + ), } assert result == expected @@ -788,25 +689,19 @@ def test_bar_align_left_axis_none(self): df = DataFrame({"A": [0, 1], "B": [2, 4]}) result = df.style.bar(axis=None)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 25.0%, transparent 25.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 50.0%, transparent 50.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + "#d65f5f 25.0%", + " transparent 25.0%", + ), + (0, 1): bar_grad( + "#d65f5f 50.0%", + " transparent 50.0%", + ), + (1, 1): bar_grad( + "#d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -814,28 +709,25 @@ def test_bar_align_zero_axis_none(self): df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="zero", axis=None)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 50.0%, #d65f5f 50.0%, " - "#d65f5f 62.5%, transparent 62.5%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 25.0%, #d65f5f 25.0%, " - "#d65f5f 50.0%, transparent 50.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 50.0%, #d65f5f 50.0%, " - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + " transparent 50.0%", + " #d65f5f 50.0%", + " #d65f5f 62.5%", + " transparent 62.5%", + ), + (0, 1): bar_grad( + " transparent 25.0%", + " #d65f5f 25.0%", + " #d65f5f 50.0%", + " transparent 50.0%", + ), + (1, 1): bar_grad( + " transparent 50.0%", + " #d65f5f 50.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -843,27 +735,23 @@ def test_bar_align_mid_axis_none(self): df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 33.3%, #d65f5f 33.3%, " - "#d65f5f 50.0%, transparent 50.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 33.3%, transparent 33.3%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 33.3%, #d65f5f 33.3%, " - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + " transparent 33.3%", + " #d65f5f 33.3%", + " #d65f5f 50.0%", + " transparent 50.0%", + ), + (0, 1): bar_grad( + "#d65f5f 33.3%", + " transparent 33.3%", + ), + (1, 1): bar_grad( + " transparent 33.3%", + " #d65f5f 33.3%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -871,28 +759,25 @@ def test_bar_align_mid_vmin(self): df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmin=-6)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 60.0%, #d65f5f 60.0%, " - "#d65f5f 70.0%, transparent 70.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 40.0%, #d65f5f 40.0%, " - "#d65f5f 60.0%, transparent 60.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 60.0%, #d65f5f 60.0%, " - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + " transparent 60.0%", + " #d65f5f 60.0%", + " #d65f5f 70.0%", + " transparent 70.0%", + ), + (0, 1): bar_grad( + " transparent 40.0%", + " #d65f5f 40.0%", + " #d65f5f 60.0%", + " transparent 60.0%", + ), + (1, 1): bar_grad( + " transparent 60.0%", + " #d65f5f 60.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -900,27 +785,23 @@ def test_bar_align_mid_vmax(self): df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmax=8)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 20.0%, #d65f5f 20.0%, " - "#d65f5f 30.0%, transparent 30.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 20.0%, transparent 20.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 20.0%, #d65f5f 20.0%, " - "#d65f5f 60.0%, transparent 60.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + " transparent 20.0%", + " #d65f5f 20.0%", + " #d65f5f 30.0%", + " transparent 30.0%", + ), + (0, 1): bar_grad( + "#d65f5f 20.0%", + " transparent 20.0%", + ), + (1, 1): bar_grad( + " transparent 20.0%", + " #d65f5f 20.0%", + " #d65f5f 60.0%", + " transparent 60.0%", + ), } assert result == expected @@ -928,28 +809,25 @@ def test_bar_align_mid_vmin_vmax_wide(self): df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmin=-3, vmax=7)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 30.0%, #d65f5f 30.0%, " - "#d65f5f 40.0%, transparent 40.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 10.0%, #d65f5f 10.0%, " - "#d65f5f 30.0%, transparent 30.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 30.0%, #d65f5f 30.0%, " - "#d65f5f 70.0%, transparent 70.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + " transparent 30.0%", + " #d65f5f 30.0%", + " #d65f5f 40.0%", + " transparent 40.0%", + ), + (0, 1): bar_grad( + " transparent 10.0%", + " #d65f5f 10.0%", + " #d65f5f 30.0%", + " transparent 30.0%", + ), + (1, 1): bar_grad( + " transparent 30.0%", + " #d65f5f 30.0%", + " #d65f5f 70.0%", + " transparent 70.0%", + ), } assert result == expected @@ -957,27 +835,20 @@ def test_bar_align_mid_vmin_vmax_clipping(self): df = DataFrame({"A": [0, 1], "B": [-2, 4]}) result = df.style.bar(align="mid", axis=None, vmin=-1, vmax=3)._compute().ctx expected = { - (0, 0): ["width: 10em", " height: 80%"], - (1, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 25.0%, #d65f5f 25.0%, " - "#d65f5f 50.0%, transparent 50.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 25.0%, transparent 25.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 25.0%, #d65f5f 25.0%, " - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad(), + (1, 0): bar_grad( + " transparent 25.0%", + " #d65f5f 25.0%", + " #d65f5f 50.0%", + " transparent 50.0%", + ), + (0, 1): bar_grad("#d65f5f 25.0%", " transparent 25.0%"), + (1, 1): bar_grad( + " transparent 25.0%", + " #d65f5f 25.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -985,26 +856,19 @@ def test_bar_align_mid_nans(self): df = DataFrame({"A": [1, None], "B": [-1, 3]}) result = df.style.bar(align="mid", axis=None)._compute().ctx expected = { - (0, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 25.0%, #d65f5f 25.0%, " - "#d65f5f 50.0%, transparent 50.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg," - "#d65f5f 25.0%, transparent 25.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 25.0%, #d65f5f 25.0%, " - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad( + " transparent 25.0%", + " #d65f5f 25.0%", + " #d65f5f 50.0%", + " transparent 50.0%", + ), + (0, 1): bar_grad("#d65f5f 25.0%", " transparent 25.0%"), + (1, 1): bar_grad( + " transparent 25.0%", + " #d65f5f 25.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -1012,27 +876,24 @@ def test_bar_align_zero_nans(self): df = DataFrame({"A": [1, None], "B": [-1, 2]}) result = df.style.bar(align="zero", axis=None)._compute().ctx expected = { - (0, 0): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 50.0%, #d65f5f 50.0%, " - "#d65f5f 75.0%, transparent 75.0%)", - ], - (0, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 25.0%, #d65f5f 25.0%, " - "#d65f5f 50.0%, transparent 50.0%)", - ], - (1, 1): [ - "width: 10em", - " height: 80%", - "background: linear-gradient(90deg, " - "transparent 50.0%, #d65f5f 50.0%, " - "#d65f5f 100.0%, transparent 100.0%)", - ], + (0, 0): bar_grad( + " transparent 50.0%", + " #d65f5f 50.0%", + " #d65f5f 75.0%", + " transparent 75.0%", + ), + (0, 1): bar_grad( + " transparent 25.0%", + " #d65f5f 25.0%", + " #d65f5f 50.0%", + " transparent 50.0%", + ), + (1, 1): bar_grad( + " transparent 50.0%", + " #d65f5f 50.0%", + " #d65f5f 100.0%", + " transparent 100.0%", + ), } assert result == expected @@ -1115,7 +976,7 @@ def test_format_with_bad_na_rep(self): def test_highlight_null(self, null_color="red"): df = DataFrame({"A": [0, np.nan]}) result = df.style.highlight_null()._compute().ctx - expected = {(1, 0): ["background-color: red"]} + expected = {(1, 0): [("background-color", "red")]} assert result == expected def test_highlight_null_subset(self): @@ -1128,8 +989,8 @@ def test_highlight_null_subset(self): .ctx ) expected = { - (1, 0): ["background-color: red"], - (1, 1): ["background-color: green"], + (1, 0): [("background-color", "red")], + (1, 1): [("background-color", "green")], } assert result == expected @@ -1228,7 +1089,7 @@ def f(x): ) result = DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx - assert result[(1, 1)] == ["color: red"] + assert result[(1, 1)] == [("color", "red")] def test_trim(self): result = self.df.style.render() # trim=True @@ -1239,6 +1100,7 @@ def test_trim(self): def test_highlight_max(self): df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + css_seq = [("background-color", "yellow")] # max(df) = min(-df) for max_ in [True, False]: if max_: @@ -1247,35 +1109,35 @@ def test_highlight_max(self): df = -df attr = "highlight_min" result = getattr(df.style, attr)()._compute().ctx - assert result[(1, 1)] == ["background-color: yellow"] + assert result[(1, 1)] == css_seq result = getattr(df.style, attr)(color="green")._compute().ctx - assert result[(1, 1)] == ["background-color: green"] + assert result[(1, 1)] == [("background-color", "green")] result = getattr(df.style, attr)(subset="A")._compute().ctx - assert result[(1, 0)] == ["background-color: yellow"] + assert result[(1, 0)] == css_seq result = getattr(df.style, attr)(axis=0)._compute().ctx expected = { - (1, 0): ["background-color: yellow"], - (1, 1): ["background-color: yellow"], + (1, 0): css_seq, + (1, 1): css_seq, } assert result == expected result = getattr(df.style, attr)(axis=1)._compute().ctx expected = { - (0, 1): ["background-color: yellow"], - (1, 1): ["background-color: yellow"], + (0, 1): css_seq, + (1, 1): css_seq, } assert result == expected # separate since we can't negate the strs df["C"] = ["a", "b"] result = df.style.highlight_max()._compute().ctx - expected = {(1, 1): ["background-color: yellow"]} + expected = {(1, 1): css_seq} result = df.style.highlight_min()._compute().ctx - expected = {(0, 0): ["background-color: yellow"]} + expected = {(0, 0): css_seq} def test_export(self): f = lambda x: "color: red" if x > 0 else "color: blue" @@ -1461,7 +1323,7 @@ def test_mi_sparse(self): "display_value": "a", "is_visible": True, "type": "th", - "attributes": ['rowspan="2"'], + "attributes": 'rowspan="2"', "class": "row_heading level0 row0", "id": "level0_row0", } @@ -1537,7 +1399,7 @@ def test_mi_sparse_index_names(self): expected = [ {"class": "index_name level0", "value": "idx_level_0", "type": "th"}, {"class": "index_name level1", "value": "idx_level_1", "type": "th"}, - {"class": "blank", "value": "", "type": "th"}, + {"class": "blank col0", "value": "", "type": "th"}, ] assert head == expected @@ -1975,6 +1837,93 @@ def test_w3_html_format(self): """ assert expected == s.render() + @pytest.mark.parametrize( + "slc", + [ + pd.IndexSlice[:, :], + pd.IndexSlice[:, 1], + pd.IndexSlice[1, :], + pd.IndexSlice[[1], [1]], + pd.IndexSlice[1, [1]], + pd.IndexSlice[[1], 1], + pd.IndexSlice[1], + pd.IndexSlice[1, 1], + slice(None, None, None), + [0, 1], + np.array([0, 1]), + pd.Series([0, 1]), + ], + ) + def test_non_reducing_slice(self, slc): + df = DataFrame([[0, 1], [2, 3]]) + + tslice_ = _non_reducing_slice(slc) + assert isinstance(df.loc[tslice_], DataFrame) + + @pytest.mark.parametrize("box", [list, pd.Series, np.array]) + def test_list_slice(self, box): + # like dataframe getitem + subset = box(["A"]) + + df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) + expected = pd.IndexSlice[:, ["A"]] + + result = _non_reducing_slice(subset) + tm.assert_frame_equal(df.loc[result], df.loc[expected]) + + def test_non_reducing_slice_on_multiindex(self): + # GH 19861 + dic = { + ("a", "d"): [1, 4], + ("a", "c"): [2, 3], + ("b", "c"): [3, 2], + ("b", "d"): [4, 1], + } + df = DataFrame(dic, index=[0, 1]) + idx = pd.IndexSlice + slice_ = idx[:, idx["b", "d"]] + tslice_ = _non_reducing_slice(slice_) + + result = df.loc[tslice_] + expected = DataFrame({("b", "d"): [4, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "slice_", + [ + pd.IndexSlice[:, :], + # check cols + pd.IndexSlice[:, pd.IndexSlice[["a"]]], # inferred deeper need list + pd.IndexSlice[:, pd.IndexSlice[["a"], ["c"]]], # inferred deeper need list + pd.IndexSlice[:, pd.IndexSlice["a", "c", :]], + pd.IndexSlice[:, pd.IndexSlice["a", :, "e"]], + pd.IndexSlice[:, pd.IndexSlice[:, "c", "e"]], + pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d"], :]], # check list + pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d", "-"], :]], # allow missing + pd.IndexSlice[:, pd.IndexSlice["a", ["c", "d", "-"], "e"]], # no slice + # check rows + pd.IndexSlice[pd.IndexSlice[["U"]], :], # inferred deeper need list + pd.IndexSlice[pd.IndexSlice[["U"], ["W"]], :], # inferred deeper need list + pd.IndexSlice[pd.IndexSlice["U", "W", :], :], + pd.IndexSlice[pd.IndexSlice["U", :, "Y"], :], + pd.IndexSlice[pd.IndexSlice[:, "W", "Y"], :], + pd.IndexSlice[pd.IndexSlice[:, "W", ["Y", "Z"]], :], # check list + pd.IndexSlice[pd.IndexSlice[:, "W", ["Y", "Z", "-"]], :], # allow missing + pd.IndexSlice[pd.IndexSlice["U", "W", ["Y", "Z", "-"]], :], # no slice + # check simultaneous + pd.IndexSlice[pd.IndexSlice[:, "W", "Y"], pd.IndexSlice["a", "c", :]], + ], + ) + def test_non_reducing_multi_slice_on_multiindex(self, slice_): + # GH 33562 + cols = pd.MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]) + idxs = pd.MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]]) + df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs) + + expected = df.loc[slice_] + result = df.loc[_non_reducing_slice(slice_)] + tm.assert_frame_equal(result, expected) + @td.skip_if_no_mpl class TestStylerMatplotlibDep: @@ -1983,7 +1932,7 @@ def test_background_gradient(self): for c_map in [None, "YlOrRd"]: result = df.style.background_gradient(cmap=c_map)._compute().ctx - assert all("#" in x[0] for x in result.values()) + assert all("#" in x[0][1] for x in result.values()) assert result[(0, 0)] == result[(0, 1)] assert result[(1, 0)] == result[(1, 1)] @@ -1991,31 +1940,39 @@ def test_background_gradient(self): df.style.background_gradient(subset=pd.IndexSlice[1, "A"])._compute().ctx ) - assert result[(1, 0)] == ["background-color: #fff7fb", "color: #000000"] + assert result[(1, 0)] == [("background-color", "#fff7fb"), ("color", "#000000")] @pytest.mark.parametrize( - "c_map,expected", + "cmap, expected", [ ( - None, + "PuBu", { - (0, 0): ["background-color: #440154", "color: #f1f1f1"], - (1, 0): ["background-color: #fde725", "color: #000000"], + (4, 5): [("background-color", "#86b0d3"), ("color", "#000000")], + (4, 6): [("background-color", "#83afd3"), ("color", "#f1f1f1")], }, ), ( "YlOrRd", { - (0, 0): ["background-color: #ffffcc", "color: #000000"], - (1, 0): ["background-color: #800026", "color: #f1f1f1"], + (4, 8): [("background-color", "#fd913e"), ("color", "#000000")], + (4, 9): [("background-color", "#fd8f3d"), ("color", "#f1f1f1")], + }, + ), + ( + None, + { + (7, 0): [("background-color", "#48c16e"), ("color", "#f1f1f1")], + (7, 1): [("background-color", "#4cc26c"), ("color", "#000000")], }, ), ], ) - def test_text_color_threshold(self, c_map, expected): - df = DataFrame([1, 2], columns=["A"]) - result = df.style.background_gradient(cmap=c_map)._compute().ctx - assert result == expected + def test_text_color_threshold(self, cmap, expected): + df = DataFrame(np.arange(100).reshape(10, 10)) + result = df.style.background_gradient(cmap=cmap, axis=None)._compute().ctx + for k in expected.keys(): + assert result[k] == expected[k] @pytest.mark.parametrize("text_color_threshold", [1.1, "1", -1, [2, 2]]) def test_text_color_threshold_raises(self, text_color_threshold): @@ -2030,9 +1987,9 @@ def test_text_color_threshold_raises(self, text_color_threshold): def test_background_gradient_axis(self): df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) - low = ["background-color: #f7fbff", "color: #000000"] - high = ["background-color: #08306b", "color: #f1f1f1"] - mid = ["background-color: #abd0e6", "color: #000000"] + low = [("background-color", "#f7fbff"), ("color", "#000000")] + high = [("background-color", "#08306b"), ("color", "#f1f1f1")] + mid = [("background-color", "#abd0e6"), ("color", "#000000")] result = df.style.background_gradient(cmap="Blues", axis=0)._compute().ctx assert result[(0, 0)] == low assert result[(0, 1)] == low diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index ef4de5961a696..8c634509bdc84 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -6,7 +6,10 @@ import pytest import pandas as pd -from pandas import DataFrame, compat +from pandas import ( + DataFrame, + compat, +) import pandas._testing as tm diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index a88dec84bd693..347e1fda3c79d 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -6,7 +6,12 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, option_context +from pandas import ( + DataFrame, + Index, + MultiIndex, + option_context, +) import pandas._testing as tm import pandas.io.formats.format as fmt diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index ba6d7c010613b..53d6dc3cf0b17 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -5,7 +5,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm from pandas.io.formats.format import DataFrameFormatter diff --git a/pandas/tests/io/formats/test_to_string.py b/pandas/tests/io/formats/test_to_string.py index 5d7b4b417006a..551734f343dfa 100644 --- a/pandas/tests/io/formats/test_to_string.py +++ b/pandas/tests/io/formats/test_to_string.py @@ -5,7 +5,12 @@ import numpy as np import pytest -from pandas import DataFrame, Series, option_context, to_datetime +from pandas import ( + DataFrame, + Series, + option_context, + to_datetime, +) def test_repr_embedded_ndarray(): diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index e25964f556e4e..9d955545aede3 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -8,7 +8,11 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.dtypes import CategoricalDtype, DatetimeTZDtype, PeriodDtype +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + PeriodDtype, +) import pandas as pd from pandas import DataFrame @@ -749,6 +753,9 @@ def test_read_json_table_timezones_orient(self, idx, vals, recwarn): result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) + @pytest.mark.filterwarnings( + "ignore:an integer is required (got type float)*:DeprecationWarning" + ) def test_comprehensive(self): df = DataFrame( { @@ -759,8 +766,7 @@ def test_comprehensive(self): "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])), "F": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)), "G": [1.1, 2.2, 3.3, 4.4], - # 'H': pd.date_range('2016-01-01', freq='d', periods=4, - # tz='US/Central'), + "H": pd.date_range("2016-01-01", freq="d", periods=4, tz="US/Central"), "I": [True, False, False, True], }, index=pd.Index(range(4), name="idx"), diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index d7fc1257d8396..c01660ab5febe 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -5,7 +5,12 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Index, Series, json_normalize +from pandas import ( + DataFrame, + Index, + Series, + json_normalize, +) import pandas._testing as tm from pandas.io.json._normalize import nested_to_record diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index c3ada52eba5aa..bfce694637579 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -8,11 +8,22 @@ import numpy as np import pytest -from pandas.compat import IS64, PY38, is_platform_windows +from pandas.compat import ( + IS64, + PY38, + is_platform_windows, +) import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, DatetimeIndex, Series, Timestamp, compat, read_json +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + Timestamp, + compat, + read_json, +) import pandas._testing as tm pytestmark = td.skip_array_manager_not_yet_implemented diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py index 2484c12f42600..a8cf94421dbde 100644 --- a/pandas/tests/io/json/test_readlines.py +++ b/pandas/tests/io/json/test_readlines.py @@ -6,7 +6,10 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, read_json +from pandas import ( + DataFrame, + read_json, +) import pandas._testing as tm from pandas.io.json._json import JsonReader diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index dff506809ee4f..bfe8c7d6a4124 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -15,10 +15,21 @@ import pandas._libs.json as ujson from pandas._libs.tslib import Timestamp -from pandas.compat import IS64, is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) import pandas.util._test_decorators as td -from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, Timedelta, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Series, + Timedelta, + date_range, +) import pandas._testing as tm pytestmark = td.skip_array_manager_not_yet_implemented diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 8c1475025b442..3d9d780a1e878 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -9,7 +9,10 @@ from pandas.errors import DtypeWarning -from pandas import DataFrame, concat +from pandas import ( + DataFrame, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index 9fd6e48cf8689..6730d8cc46603 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -11,9 +11,17 @@ import pytest from pandas._libs.tslib import Timestamp -from pandas.errors import EmptyDataError, ParserError +from pandas.errors import ( + EmptyDataError, + ParserError, +) -from pandas import DataFrame, Index, Series, compat +from pandas import ( + DataFrame, + Index, + Series, + compat, +) import pandas._testing as tm from pandas.io.parsers import TextFileReader diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 018f93f1eb06b..f09a1f7bcc492 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -2,14 +2,20 @@ Tests that work on both the Python and C engines but do not have a specific classification into the other test modules. """ -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import os import platform from urllib.error import URLError import pytest -from pandas.errors import EmptyDataError, ParserError +from pandas.errors import ( + EmptyDataError, + ParserError, +) import pandas.util._test_decorators as td from pandas import DataFrame diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index a133e1be49946..6e7022cd87875 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -8,7 +8,11 @@ import pytest -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index fca4aaaba6675..52fbdedd138fb 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas import DataFrame, option_context +from pandas import ( + DataFrame, + option_context, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index a8f5c43ea15c7..febeef695aafb 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 3cc30b0ab4029..5ae1d80589df9 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -6,7 +6,11 @@ import pytest -from pandas import DataFrame, Series, concat +from pandas import ( + DataFrame, + Series, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 57defb400b842..91b5e26efafa1 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -12,7 +12,10 @@ import numpy as np import pytest -from pandas.errors import EmptyDataError, ParserError +from pandas.errors import ( + EmptyDataError, + ParserError, +) import pandas.util._test_decorators as td from pandas import DataFrame diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 321678c36943a..1eb52ab78e1a0 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -1,9 +1,15 @@ import os -from typing import List, Optional +from typing import ( + List, + Optional, +) import pytest -from pandas import read_csv, read_table +from pandas import ( + read_csv, + read_table, +) class BaseParser: diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index 2f569424a82f5..f956403197cf5 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -11,7 +11,11 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, DataFrame, Timestamp +from pandas import ( + Categorical, + DataFrame, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 5ffd909d316bf..e452159189d4a 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -10,7 +10,10 @@ from pandas.errors import ParserWarning import pandas as pd -from pandas import DataFrame, Timestamp +from pandas import ( + DataFrame, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index 57d729fb4b7fc..200d1b50bfced 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -7,7 +7,14 @@ import numpy as np import pytest -from pandas import Categorical, DataFrame, Index, MultiIndex, Series, concat +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index b23dfa6ef1548..f8aff3ad3696a 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -5,7 +5,11 @@ further arguments when parsing. """ -from io import BytesIO, StringIO, TextIOWrapper +from io import ( + BytesIO, + StringIO, + TextIOWrapper, +) import mmap import os import tarfile @@ -17,7 +21,10 @@ from pandas.errors import ParserError import pandas.util._test_decorators as td -from pandas import DataFrame, concat +from pandas import ( + DataFrame, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py index 1d2fb7fddc9dd..ffa6c8259a59e 100644 --- a/pandas/tests/io/parser/test_converters.py +++ b/pandas/tests/io/parser/test_converters.py @@ -9,7 +9,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 10386cf87b9c2..ba6bfe9d88a03 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -10,7 +10,10 @@ import numpy as np import pytest -from pandas import DataFrame, read_csv +from pandas import ( + DataFrame, + read_csv, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 0b8dc1900ebb4..f15fc16fbce38 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -11,7 +11,11 @@ from pandas.errors import ParserError -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index a409751e261d6..2f876a28c56cd 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -8,7 +8,11 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 4237a774261ca..fecba8bd81404 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -9,7 +9,11 @@ from pandas._libs.parsers import STR_NA_VALUES -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index 657793450df5b..d412f0f313ead 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -2,7 +2,10 @@ Tests parsers ability to read and parse non-local files and hence require a network connection to be read. """ -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import logging import numpy as np diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index a510286d5412e..9f94f3f8f8a8b 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -3,11 +3,18 @@ parsers defined in parsers.py """ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) from io import StringIO from dateutil.parser import parse as du_parse -from hypothesis import given, settings, strategies as st +from hypothesis import ( + given, + settings, + strategies as st, +) import numpy as np import pytest import pytz @@ -15,10 +22,19 @@ from pandas._libs.tslib import Timestamp from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import parse_datetime_string -from pandas.compat import is_platform_windows, np_array_datetime64_compat +from pandas.compat import ( + is_platform_windows, + np_array_datetime64_compat, +) import pandas as pd -from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index d55a6361fc8d2..cf6866946ab76 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -6,13 +6,20 @@ """ import csv -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import pytest from pandas.errors import ParserError -from pandas import DataFrame, Index, MultiIndex +from pandas import ( + DataFrame, + Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 5322c19a3ae50..5586b4915b6ea 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -5,7 +5,10 @@ """ from datetime import datetime -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) from pathlib import Path import numpy as np @@ -14,10 +17,16 @@ from pandas.errors import EmptyDataError import pandas as pd -from pandas import DataFrame, DatetimeIndex +from pandas import ( + DataFrame, + DatetimeIndex, +) import pandas._testing as tm -from pandas.io.parsers import read_csv, read_fwf +from pandas.io.parsers import ( + read_csv, + read_fwf, +) def test_basic(): diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 35b155705ccee..0735f60fabbf6 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -11,7 +11,10 @@ from pandas.errors import EmptyDataError -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py index 1af69785c7584..104cf56419bfd 100644 --- a/pandas/tests/io/parser/test_textreader.py +++ b/pandas/tests/io/parser/test_textreader.py @@ -2,7 +2,10 @@ Tests the TextReader class in parsers.pyx, which is integral to the C engine in parsers.py """ -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import os import numpy as np @@ -14,7 +17,10 @@ from pandas import DataFrame import pandas._testing as tm -from pandas.io.parsers import TextFileReader, read_csv +from pandas.io.parsers import ( + TextFileReader, + read_csv, +) class TestTextReader: diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index c6b700c0adfff..7f813b8733061 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -8,7 +8,10 @@ from pandas._libs.tslib import Timestamp -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm _msg_validate_usecols_arg = ( diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 7d81a88e09012..27bad29550d82 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas import DataFrame, Index +from pandas import ( + DataFrame, + Index, +) import pandas._testing as tm _msg_validate_usecols_arg = ( diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index b873811de616c..858e38e40f017 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import Categorical, DataFrame, Series, _testing as tm, concat, read_hdf +from pandas import ( + Categorical, + DataFrame, + Series, + _testing as tm, + concat, + read_hdf, +) from pandas.tests.io.pytables.common import ( _maybe_remove, ensure_clean_path, diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py index 72e8b4aea5ede..8e1dee5873512 100644 --- a/pandas/tests/io/pytables/test_complex.py +++ b/pandas/tests/io/pytables/test_complex.py @@ -6,9 +6,15 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm -from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store +from pandas.tests.io.pytables.common import ( + ensure_clean_path, + ensure_clean_store, +) from pandas.io.pytables import read_hdf diff --git a/pandas/tests/io/pytables/test_errors.py b/pandas/tests/io/pytables/test_errors.py index 24bd573341dc4..11ee5e3564634 100644 --- a/pandas/tests/io/pytables/test_errors.py +++ b/pandas/tests/io/pytables/test_errors.py @@ -16,9 +16,15 @@ date_range, read_hdf, ) -from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store +from pandas.tests.io.pytables.common import ( + ensure_clean_path, + ensure_clean_store, +) -from pandas.io.pytables import Term, _maybe_adjust_name +from pandas.io.pytables import ( + Term, + _maybe_adjust_name, +) pytestmark = pytest.mark.single diff --git a/pandas/tests/io/pytables/test_file_handling.py b/pandas/tests/io/pytables/test_file_handling.py index e0e995e03064f..6340311b234f1 100644 --- a/pandas/tests/io/pytables/test_file_handling.py +++ b/pandas/tests/io/pytables/test_file_handling.py @@ -6,7 +6,13 @@ from pandas.compat import is_platform_little_endian import pandas as pd -from pandas import DataFrame, HDFStore, Series, _testing as tm, read_hdf +from pandas import ( + DataFrame, + HDFStore, + Series, + _testing as tm, + read_hdf, +) from pandas.tests.io.pytables.common import ( _maybe_remove, ensure_clean_path, @@ -15,7 +21,11 @@ ) from pandas.io import pytables as pytables -from pandas.io.pytables import ClosedFileError, PossibleDataLossError, Term +from pandas.io.pytables import ( + ClosedFileError, + PossibleDataLossError, + Term, +) pytestmark = pytest.mark.single diff --git a/pandas/tests/io/pytables/test_keys.py b/pandas/tests/io/pytables/test_keys.py index 4f939adeb4138..02b79bd0fdbc1 100644 --- a/pandas/tests/io/pytables/test_keys.py +++ b/pandas/tests/io/pytables/test_keys.py @@ -1,6 +1,10 @@ import pytest -from pandas import DataFrame, HDFStore, _testing as tm +from pandas import ( + DataFrame, + HDFStore, + _testing as tm, +) from pandas.tests.io.pytables.common import ( ensure_clean_path, ensure_clean_store, diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 5f6a39d46df97..4f8c7c84a9fcc 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -1,6 +1,9 @@ import datetime import re -from warnings import catch_warnings, simplefilter +from warnings import ( + catch_warnings, + simplefilter, +) import numpy as np import pytest diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index 5ca8960ae5604..f8d302a0190f8 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -8,7 +8,14 @@ from pandas.compat import is_platform_windows import pandas as pd -from pandas import DataFrame, HDFStore, Index, Series, _testing as tm, read_hdf +from pandas import ( + DataFrame, + HDFStore, + Index, + Series, + _testing as tm, + read_hdf, +) from pandas.tests.io.pytables.common import ( _maybe_remove, ensure_clean_path, diff --git a/pandas/tests/io/pytables/test_retain_attributes.py b/pandas/tests/io/pytables/test_retain_attributes.py index d301835632431..16772d03c6d26 100644 --- a/pandas/tests/io/pytables/test_retain_attributes.py +++ b/pandas/tests/io/pytables/test_retain_attributes.py @@ -4,7 +4,13 @@ from pandas._libs.tslibs import Timestamp -from pandas import DataFrame, Series, _testing as tm, date_range, read_hdf +from pandas import ( + DataFrame, + Series, + _testing as tm, + date_range, + read_hdf, +) from pandas.tests.io.pytables.common import ( _maybe_remove, ensure_clean_path, diff --git a/pandas/tests/io/pytables/test_round_trip.py b/pandas/tests/io/pytables/test_round_trip.py index 403c3766fe6ed..03d3d838a936c 100644 --- a/pandas/tests/io/pytables/test_round_trip.py +++ b/pandas/tests/io/pytables/test_round_trip.py @@ -1,6 +1,9 @@ import datetime import re -from warnings import catch_warnings, simplefilter +from warnings import ( + catch_warnings, + simplefilter, +) import numpy as np import pytest diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py index cb281e7cafd1f..ef75c86190a25 100644 --- a/pandas/tests/io/pytables/test_store.py +++ b/pandas/tests/io/pytables/test_store.py @@ -2,7 +2,10 @@ import hashlib import os import time -from warnings import catch_warnings, simplefilter +from warnings import ( + catch_warnings, + simplefilter, +) import numpy as np import pytest @@ -38,7 +41,10 @@ "ignore:object name:tables.exceptions.NaturalNameWarning" ) -from pandas.io.pytables import HDFStore, read_hdf +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) pytestmark = pytest.mark.single diff --git a/pandas/tests/io/pytables/test_subclass.py b/pandas/tests/io/pytables/test_subclass.py index 196f729cd6eb2..75b04f332e054 100644 --- a/pandas/tests/io/pytables/test_subclass.py +++ b/pandas/tests/io/pytables/test_subclass.py @@ -1,10 +1,16 @@ import numpy as np -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm from pandas.tests.io.pytables.common import ensure_clean_path -from pandas.io.pytables import HDFStore, read_hdf +from pandas.io.pytables import ( + HDFStore, + read_hdf, +) class TestHDFStoreSubclass: diff --git a/pandas/tests/io/pytables/test_time_series.py b/pandas/tests/io/pytables/test_time_series.py index d98ae7c599c52..5e42dbde4b9f1 100644 --- a/pandas/tests/io/pytables/test_time_series.py +++ b/pandas/tests/io/pytables/test_time_series.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import DataFrame, Series, _testing as tm +from pandas import ( + DataFrame, + Series, + _testing as tm, +) from pandas.tests.io.pytables.common import ensure_clean_store pytestmark = pytest.mark.single diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py index a106a579d7e52..f67efb4cc60be 100644 --- a/pandas/tests/io/pytables/test_timezones.py +++ b/pandas/tests/io/pytables/test_timezones.py @@ -1,12 +1,22 @@ -import datetime +from datetime import ( + date, + timedelta, +) import numpy as np import pytest +from pandas._libs.tslibs.timezones import maybe_get_tz import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + Timestamp, + date_range, +) import pandas._testing as tm from pandas.tests.io.pytables.common import ( _maybe_remove, @@ -30,200 +40,109 @@ def _compare_with_tz(a, b): raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]") -def test_append_with_timezones_dateutil(setup_path): +# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows +# filename issues. +gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x) +gettz_pytz = lambda x: x - from datetime import timedelta - # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows - # filename issues. - from pandas._libs.tslibs.timezones import maybe_get_tz +@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) +def test_append_with_timezones(setup_path, gettz): + # as columns - gettz = lambda x: maybe_get_tz("dateutil/" + x) + # Single-tzinfo, no DST transition + df_est = DataFrame( + { + "A": [ + Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) + + timedelta(hours=1) * i + for i in range(5) + ] + } + ) + + # frame with all columns having same tzinfo, but different sides + # of DST transition + df_crosses_dst = DataFrame( + { + "A": Timestamp("20130102", tz=gettz("US/Eastern")), + "B": Timestamp("20130603", tz=gettz("US/Eastern")), + }, + index=range(5), + ) + + df_mixed_tz = DataFrame( + { + "A": Timestamp("20130102", tz=gettz("US/Eastern")), + "B": Timestamp("20130102", tz=gettz("EET")), + }, + index=range(5), + ) + + df_different_tz = DataFrame( + { + "A": Timestamp("20130102", tz=gettz("US/Eastern")), + "B": Timestamp("20130102", tz=gettz("CET")), + }, + index=range(5), + ) - # as columns with ensure_clean_store(setup_path) as store: _maybe_remove(store, "df_tz") - df = DataFrame( - { - "A": [ - Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) - + timedelta(hours=1) * i - for i in range(5) - ] - } - ) - - store.append("df_tz", df, data_columns=["A"]) + store.append("df_tz", df_est, data_columns=["A"]) result = store["df_tz"] - _compare_with_tz(result, df) - tm.assert_frame_equal(result, df) + _compare_with_tz(result, df_est) + tm.assert_frame_equal(result, df_est) # select with tz aware - expected = df[df.A >= df.A[3]] - result = store.select("df_tz", where="A>=df.A[3]") + expected = df_est[df_est.A >= df_est.A[3]] + result = store.select("df_tz", where="A>=df_est.A[3]") _compare_with_tz(result, expected) # ensure we include dates in DST and STD time here. _maybe_remove(store, "df_tz") - df = DataFrame( - { - "A": Timestamp("20130102", tz=gettz("US/Eastern")), - "B": Timestamp("20130603", tz=gettz("US/Eastern")), - }, - index=range(5), - ) - store.append("df_tz", df) + store.append("df_tz", df_crosses_dst) result = store["df_tz"] - _compare_with_tz(result, df) - tm.assert_frame_equal(result, df) - - df = DataFrame( - { - "A": Timestamp("20130102", tz=gettz("US/Eastern")), - "B": Timestamp("20130102", tz=gettz("EET")), - }, - index=range(5), - ) + _compare_with_tz(result, df_crosses_dst) + tm.assert_frame_equal(result, df_crosses_dst) msg = ( r"invalid info for \[values_block_1\] for \[tz\], " - r"existing_value \[dateutil/.*US/Eastern\] " - r"conflicts with new value \[dateutil/.*EET\]" + r"existing_value \[(dateutil/.*)?US/Eastern\] " + r"conflicts with new value \[(dateutil/.*)?EET\]" ) with pytest.raises(ValueError, match=msg): - store.append("df_tz", df) + store.append("df_tz", df_mixed_tz) # this is ok _maybe_remove(store, "df_tz") - store.append("df_tz", df, data_columns=["A", "B"]) + store.append("df_tz", df_mixed_tz, data_columns=["A", "B"]) result = store["df_tz"] - _compare_with_tz(result, df) - tm.assert_frame_equal(result, df) + _compare_with_tz(result, df_mixed_tz) + tm.assert_frame_equal(result, df_mixed_tz) # can't append with diff timezone - df = DataFrame( - { - "A": Timestamp("20130102", tz=gettz("US/Eastern")), - "B": Timestamp("20130102", tz=gettz("CET")), - }, - index=range(5), - ) - msg = ( r"invalid info for \[B\] for \[tz\], " - r"existing_value \[dateutil/.*EET\] " - r"conflicts with new value \[dateutil/.*CET\]" + r"existing_value \[(dateutil/.*)?EET\] " + r"conflicts with new value \[(dateutil/.*)?CET\]" ) with pytest.raises(ValueError, match=msg): - store.append("df_tz", df) - - # as index - with ensure_clean_store(setup_path) as store: - - dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern")) - dti = dti._with_freq(None) # freq doesnt round-trip - - # GH 4098 example - df = DataFrame({"A": Series(range(3), index=dti)}) - - _maybe_remove(store, "df") - store.put("df", df) - result = store.select("df") - tm.assert_frame_equal(result, df) - - _maybe_remove(store, "df") - store.append("df", df) - result = store.select("df") - tm.assert_frame_equal(result, df) - - -def test_append_with_timezones_pytz(setup_path): - - from datetime import timedelta + store.append("df_tz", df_different_tz) - # as columns - with ensure_clean_store(setup_path) as store: - _maybe_remove(store, "df_tz") - df = DataFrame( - { - "A": [ - Timestamp("20130102 2:00:00", tz="US/Eastern") - + timedelta(hours=1) * i - for i in range(5) - ] - } - ) - store.append("df_tz", df, data_columns=["A"]) - result = store["df_tz"] - _compare_with_tz(result, df) - tm.assert_frame_equal(result, df) - - # select with tz aware - _compare_with_tz(store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]]) - - _maybe_remove(store, "df_tz") - # ensure we include dates in DST and STD time here. - df = DataFrame( - { - "A": Timestamp("20130102", tz="US/Eastern"), - "B": Timestamp("20130603", tz="US/Eastern"), - }, - index=range(5), - ) - store.append("df_tz", df) - result = store["df_tz"] - _compare_with_tz(result, df) - tm.assert_frame_equal(result, df) +@pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz]) +def test_append_with_timezones_as_index(setup_path, gettz): + # GH#4098 example - df = DataFrame( - { - "A": Timestamp("20130102", tz="US/Eastern"), - "B": Timestamp("20130102", tz="EET"), - }, - index=range(5), - ) + dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern")) + dti = dti._with_freq(None) # freq doesnt round-trip - msg = ( - r"invalid info for \[values_block_1\] for \[tz\], " - r"existing_value \[US/Eastern\] conflicts with new value \[EET\]" - ) - with pytest.raises(ValueError, match=msg): - store.append("df_tz", df) + df = DataFrame({"A": Series(range(3), index=dti)}) - # this is ok - _maybe_remove(store, "df_tz") - store.append("df_tz", df, data_columns=["A", "B"]) - result = store["df_tz"] - _compare_with_tz(result, df) - tm.assert_frame_equal(result, df) - - # can't append with diff timezone - df = DataFrame( - { - "A": Timestamp("20130102", tz="US/Eastern"), - "B": Timestamp("20130102", tz="CET"), - }, - index=range(5), - ) - - msg = ( - r"invalid info for \[B\] for \[tz\], " - r"existing_value \[EET\] conflicts with new value \[CET\]" - ) - with pytest.raises(ValueError, match=msg): - store.append("df_tz", df) - - # as index with ensure_clean_store(setup_path) as store: - dti = date_range("2000-1-1", periods=3, freq="H", tz="US/Eastern") - dti = dti._with_freq(None) # freq doesnt round-trip - - # GH 4098 example - df = DataFrame({"A": Series(range(3), index=dti)}) - _maybe_remove(store, "df") store.put("df", df) result = store.select("df") @@ -321,17 +240,19 @@ def test_timezones_fixed_format_frame_non_empty(setup_path): tm.assert_frame_equal(result, df) -def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture): +def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series): # GH 20594 dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) + obj = Series(dtype=dtype, name="A") + if frame_or_series is DataFrame: + obj = obj.to_frame() + with ensure_clean_store(setup_path) as store: - s = Series(dtype=dtype) - df = DataFrame({"A": s}) - store["df"] = df - result = store["df"] - tm.assert_frame_equal(result, df) + store["obj"] = obj + result = store["obj"] + tm.assert_equal(result, obj) def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): @@ -346,18 +267,6 @@ def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture): tm.assert_series_equal(result, s) -def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture): - # GH 20594 - - dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture) - - with ensure_clean_store(setup_path) as store: - s = Series(dtype=dtype) - store["s"] = s - result = store["s"] - tm.assert_series_equal(result, s) - - def test_fixed_offset_tz(setup_path): rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") frame = DataFrame(np.random.randn(len(rng), 4), index=rng) @@ -378,7 +287,7 @@ def test_store_timezone(setup_path): # original method with ensure_clean_store(setup_path) as store: - today = datetime.date(2013, 9, 10) + today = date(2013, 9, 10) df = DataFrame([1, 2, 3], index=[today, today, today]) store["obj1"] = df result = store["obj1"] @@ -388,7 +297,7 @@ def test_store_timezone(setup_path): with ensure_clean_store(setup_path) as store: with tm.set_timezone("EST5EDT"): - today = datetime.date(2013, 9, 10) + today = date(2013, 9, 10) df = DataFrame([1, 2, 3], index=[today, today, today]) store["obj1"] = df diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index b23959a7d87a2..5d847f7b6f2bd 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -7,7 +7,10 @@ import numpy as np import pytest -from pandas.errors import EmptyDataError, PerformanceWarning +from pandas.errors import ( + EmptyDataError, + PerformanceWarning, +) import pandas.util._test_decorators as td import pandas as pd diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 440c370857eef..e60807db55f97 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -4,10 +4,17 @@ import pytest import pandas as pd -from pandas import DataFrame, get_option, read_clipboard +from pandas import ( + DataFrame, + get_option, + read_clipboard, +) import pandas._testing as tm -from pandas.io.clipboard import clipboard_get, clipboard_set +from pandas.io.clipboard import ( + clipboard_get, + clipboard_set, +) def build_kwargs(sep, excel): diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 69a1427cec34f..db742fb69dd10 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -2,7 +2,10 @@ Tests for the pandas.io.common functionalities """ import codecs -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import mmap import os from pathlib import Path diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 76bc188afdd1f..66c238bbd0962 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -112,7 +112,7 @@ def test_compression_warning(compression_only): ) with tm.ensure_clean() as path: with icom.get_handle(path, "w", compression=compression_only) as handles: - with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): + with tm.assert_produces_warning(RuntimeWarning): df.to_csv(handles.handle, compression=compression_only) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index ed6f3b7d21d43..bb2a042f9168b 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -4,7 +4,14 @@ import numpy as np import pytest -from pandas import DataFrame, date_range, read_csv, read_excel, read_json, read_parquet +from pandas import ( + DataFrame, + date_range, + read_csv, + read_excel, + read_json, + read_parquet, +) import pandas._testing as tm from pandas.util import _test_decorators as td @@ -12,7 +19,10 @@ @pytest.fixture def gcs_buffer(monkeypatch): """Emulate GCS using a binary buffer.""" - from fsspec import AbstractFileSystem, registry + from fsspec import ( + AbstractFileSystem, + registry, + ) registry.target.clear() # remove state @@ -131,7 +141,10 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding) @td.skip_if_no("gcsfs") def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): """Regression test for writing to a not-yet-existent GCS Parquet file.""" - from fsspec import AbstractFileSystem, registry + from fsspec import ( + AbstractFileSystem, + registry, + ) registry.target.clear() # remove state df1 = DataFrame( diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 7b762e4891c14..9f1df201ee0e6 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1,6 +1,9 @@ from functools import partial from importlib import reload -from io import BytesIO, StringIO +from io import ( + BytesIO, + StringIO, +) import os from pathlib import Path import re diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index c72363b088a5c..d5567f1208c8c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -9,7 +9,10 @@ import numpy as np import pytest -from pandas.compat import PY38, is_platform_windows +from pandas.compat import ( + PY38, + is_platform_windows, +) import pandas.util._test_decorators as td import pandas as pd diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 135f243eb867e..63dfbd59acd94 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -21,20 +21,35 @@ from pathlib import Path import pickle import shutil -from warnings import catch_warnings, simplefilter +from warnings import ( + catch_warnings, + simplefilter, +) import zipfile import numpy as np import pytest -from pandas.compat import PY38, get_lzma_file, import_lzma, is_platform_little_endian +from pandas.compat import ( + PY38, + get_lzma_file, + import_lzma, + is_platform_little_endian, +) import pandas.util._test_decorators as td import pandas as pd -from pandas import Index, Series, period_range +from pandas import ( + Index, + Series, + period_range, +) import pandas._testing as tm -from pandas.tseries.offsets import Day, MonthEnd +from pandas.tseries.offsets import ( + Day, + MonthEnd, +) lzma = import_lzma() diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index b70bc3c598702..0be26ab285079 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,7 +18,11 @@ """ import csv -from datetime import date, datetime, time +from datetime import ( + date, + datetime, + time, +) from io import StringIO import sqlite3 import warnings @@ -26,7 +30,10 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_datetime64_dtype, is_datetime64tz_dtype +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, +) import pandas as pd from pandas import ( @@ -44,7 +51,10 @@ import pandas._testing as tm import pandas.io.sql as sql -from pandas.io.sql import read_sql_query, read_sql_table +from pandas.io.sql import ( + read_sql_query, + read_sql_table, +) try: import sqlalchemy diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 058dc7659fc95..de1f3cf1e6338 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -18,7 +18,10 @@ import pandas as pd import pandas._testing as tm -from pandas.core.frame import DataFrame, Series +from pandas.core.frame import ( + DataFrame, + Series, +) from pandas.io.parsers import read_csv from pandas.io.stata import ( diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py index f3f09d7a42204..0bdb7b0e71e2d 100644 --- a/pandas/tests/libs/test_join.py +++ b/pandas/tests/libs/test_join.py @@ -2,7 +2,10 @@ import pytest from pandas._libs import join as libjoin -from pandas._libs.join import inner_join, left_outer_join +from pandas._libs.join import ( + inner_join, + left_outer_join, +) import pandas._testing as tm diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index da3e18c8d9634..60c42878497c2 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas._libs import Timestamp, lib, writers as libwriters +from pandas._libs import ( + Timestamp, + lib, + writers as libwriters, +) from pandas import Index import pandas._testing as tm diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 29c02916ec6e9..fa0a09a84a8f0 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -8,7 +8,11 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Sequence, Union +from typing import ( + TYPE_CHECKING, + Sequence, + Union, +) import warnings import numpy as np @@ -19,7 +23,11 @@ from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, Series, to_datetime +from pandas import ( + DataFrame, + Series, + to_datetime, +) import pandas._testing as tm if TYPE_CHECKING: @@ -228,7 +236,11 @@ def _check_colors( Series used for color grouping key used for andrew_curves, parallel_coordinates, radviz test """ - from matplotlib.collections import Collection, LineCollection, PolyCollection + from matplotlib.collections import ( + Collection, + LineCollection, + PolyCollection, + ) from matplotlib.lines import Line2D conv = self.colorconverter diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index 41df9fb2e5af0..920350477bb20 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -1,5 +1,8 @@ """ Test cases for DataFrame.plot """ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import itertools import re import string @@ -13,9 +16,19 @@ from pandas.core.dtypes.api import is_list_like import pandas as pd -from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +from pandas import ( + DataFrame, + MultiIndex, + PeriodIndex, + Series, + bdate_range, + date_range, +) import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) from pandas.io.formats.printing import pprint_thing import pandas.plotting as plotting diff --git a/pandas/tests/plotting/frame/test_frame_color.py b/pandas/tests/plotting/frame/test_frame_color.py index 7eb12d3193b09..6844124d15f9d 100644 --- a/pandas/tests/plotting/frame/test_frame_color.py +++ b/pandas/tests/plotting/frame/test_frame_color.py @@ -10,7 +10,10 @@ import pandas as pd from pandas import DataFrame import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) pytestmark = pytest.mark.slow diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 049f357a4647f..0e25fb5f4c01f 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -9,7 +9,11 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index b7dab714961db..448679d562a4a 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -8,9 +8,18 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, MultiIndex, Series, date_range, timedelta_range +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, + timedelta_range, +) import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) import pandas.plotting as plotting diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 4f332bfbac397..75f2dcacf244d 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -1,4 +1,7 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import subprocess import sys @@ -7,17 +10,31 @@ import pandas._config.config as cf -from pandas.compat import is_platform_windows, np_datetime64_compat +from pandas.compat import ( + is_platform_windows, + np_datetime64_compat, +) import pandas.util._test_decorators as td -from pandas import Index, Period, Series, Timestamp, date_range +from pandas import ( + Index, + Period, + Series, + Timestamp, + date_range, +) import pandas._testing as tm from pandas.plotting import ( deregister_matplotlib_converters, register_matplotlib_converters, ) -from pandas.tseries.offsets import Day, Micro, Milli, Second +from pandas.tseries.offsets import ( + Day, + Micro, + Milli, + Second, +) try: from pandas.plotting._matplotlib import converter diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index e3fd404ec1906..6e71b56e8182b 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1,18 +1,41 @@ """ Test cases for time series specific (freq conversion, etc) """ -from datetime import date, datetime, time, timedelta +from datetime import ( + date, + datetime, + time, + timedelta, +) import pickle import sys import numpy as np import pytest -from pandas._libs.tslibs import BaseOffset, to_offset +from pandas._libs.tslibs import ( + BaseOffset, + to_offset, +) import pandas.util._test_decorators as td -from pandas import DataFrame, Index, NaT, Series, isna, to_datetime +from pandas import ( + DataFrame, + Index, + NaT, + Series, + isna, + to_datetime, +) import pandas._testing as tm -from pandas.core.indexes.datetimes import DatetimeIndex, bdate_range, date_range -from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.indexes.datetimes import ( + DatetimeIndex, + bdate_range, + date_range, +) +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) from pandas.core.indexes.timedeltas import timedelta_range from pandas.tests.plotting.common import TestPlotBase diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py index f73ceee577a18..76320767a6b01 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -7,7 +7,11 @@ from pandas.compat import is_platform_windows import pandas.util._test_decorators as td -from pandas import DataFrame, Index, Series +from pandas import ( + DataFrame, + Index, + Series, +) import pandas._testing as tm from pandas.tests.plotting.common import TestPlotBase diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index d4901ba5576c9..a6e3ba71e94ab 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -6,9 +6,17 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Index, Series, to_datetime +from pandas import ( + DataFrame, + Index, + Series, + to_datetime, +) import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) pytestmark = pytest.mark.slow @@ -103,7 +111,10 @@ def test_hist_layout_with_by(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) def test_hist_no_overlap(self): - from matplotlib.pyplot import gcf, subplot + from matplotlib.pyplot import ( + gcf, + subplot, + ) x = Series(np.random.randn(2)) y = Series(np.random.randn(2)) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 803c56b3b8eb3..7f0d1802580b9 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -5,9 +5,15 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) import pandas.plotting as plotting diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index b4848f80e9a2c..59b0cc99d94fb 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -10,9 +10,16 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm -from pandas.tests.plotting.common import TestPlotBase, _check_plot_works +from pandas.tests.plotting.common import ( + TestPlotBase, + _check_plot_works, +) import pandas.plotting as plotting diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index cb64b2423696f..decff32baa970 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest @@ -1417,7 +1420,7 @@ def test_mode_sortwarning(self): expected = Series(["foo", np.nan]) s = Series([1, "foo", "foo", np.nan, np.nan]) - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + with tm.assert_produces_warning(UserWarning): result = s.mode(dropna=False) result = result.sort_values().reset_index(drop=True) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py index ab13893901104..88f69d00447b1 100644 --- a/pandas/tests/reductions/test_stat_reductions.py +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -9,9 +9,16 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm -from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) class TestDatetimeLikeStatReductions: diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py index cb62263b885aa..420c3028382fc 100644 --- a/pandas/tests/resample/conftest.py +++ b/pandas/tests/resample/conftest.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) from pandas.core.indexes.datetimes import date_range from pandas.core.indexes.period import period_range diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 1154bc3316ae8..2244f6eba9479 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -3,7 +3,12 @@ import numpy as np import pytest -from pandas import DataFrame, NaT, PeriodIndex, Series +from pandas import ( + DataFrame, + NaT, + PeriodIndex, + Series, +) import pandas._testing as tm from pandas.core.groupby.groupby import DataError from pandas.core.groupby.grouper import Grouper diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index c23a22448fbb0..cbf69696d5801 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -10,12 +10,25 @@ from pandas.errors import UnsupportedFunctionCall import pandas as pd -from pandas import DataFrame, Series, Timedelta, Timestamp, isna, notna +from pandas import ( + DataFrame, + Series, + Timedelta, + Timestamp, + isna, + notna, +) import pandas._testing as tm from pandas.core.groupby.grouper import Grouper from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import Period, period_range -from pandas.core.resample import DatetimeIndex, _get_timestamp_range_edges +from pandas.core.indexes.period import ( + Period, + period_range, +) +from pandas.core.resample import ( + DatetimeIndex, + _get_timestamp_range_edges, +) import pandas.tseries.offsets as offsets from pandas.tseries.offsets import Minute diff --git a/pandas/tests/resample/test_deprecated.py b/pandas/tests/resample/test_deprecated.py index b3d568678afb5..d3672b3d32be1 100644 --- a/pandas/tests/resample/test_deprecated.py +++ b/pandas/tests/resample/test_deprecated.py @@ -1,16 +1,28 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import PeriodIndex, period_range +from pandas.core.indexes.period import ( + PeriodIndex, + period_range, +) from pandas.core.indexes.timedeltas import timedelta_range -from pandas.tseries.offsets import BDay, Minute +from pandas.tseries.offsets import ( + BDay, + Minute, +) DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 2fe3fb91768e6..5dab0cbafbc59 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -5,15 +5,26 @@ import pytest import pytz -from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.ccalendar import ( + DAYS, + MONTHS, +) from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.errors import InvalidIndexError import pandas as pd -from pandas import DataFrame, Series, Timestamp +from pandas import ( + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.indexes.period import ( + Period, + PeriodIndex, + period_range, +) from pandas.core.resample import _get_period_range_edges import pandas.tseries.offsets as offsets diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index d217957cbe08a..48c068be843a9 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -4,7 +4,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 39d4533ca08dc..a17ed44c4011a 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -7,7 +7,11 @@ from pandas.util._test_decorators import async_mark import pandas as pd -from pandas import DataFrame, Series, Timestamp +from pandas import ( + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm from pandas.core.indexes.datetimes import date_range diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index c669cf39c9a61..e62c907032938 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -5,7 +5,11 @@ import pytest import pandas as pd -from pandas import DataFrame, Series, Timestamp +from pandas import ( + DataFrame, + Series, + Timestamp, +) import pandas._testing as tm from pandas.core.groupby.grouper import Grouper from pandas.core.indexes.datetimes import date_range diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 1c440b889b146..ee08dac09695b 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -4,7 +4,10 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm from pandas.core.indexes.timedeltas import timedelta_range diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 81d5526f5bd15..7b9f8d1c2879e 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -7,7 +7,14 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, Timestamp, concat, isna +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, + concat, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index 8b7fb69f7ee05..9bd098a9e4e72 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import Categorical, DataFrame, Index, Series +from pandas import ( + Categorical, + DataFrame, + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/concat/test_categorical.py b/pandas/tests/reshape/concat/test_categorical.py index 357274b66332f..a81085e083199 100644 --- a/pandas/tests/reshape/concat/test_categorical.py +++ b/pandas/tests/reshape/concat/test_categorical.py @@ -3,7 +3,11 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, DataFrame, Series +from pandas import ( + Categorical, + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 575903de8f946..8f18f87f2decc 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -1,4 +1,7 @@ -from collections import abc, deque +from collections import ( + abc, + deque, +) from decimal import Decimal from warnings import catch_warnings @@ -6,7 +9,14 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + date_range, +) import pandas._testing as tm from pandas.core.arrays import SparseArray from pandas.core.construction import create_series_with_explicit_dtype diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 295846ee1b264..f5eb0ab8c9a17 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, concat +from pandas import ( + DataFrame, + Index, + Series, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index a97e9265b4f99..0e86cb0ae48c0 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -2,7 +2,13 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, concat, date_range +from pandas import ( + DataFrame, + Index, + Series, + concat, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index 3fc886893b55a..c822dab9b8cfc 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -2,7 +2,13 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, concat +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/concat/test_invalid.py b/pandas/tests/reshape/concat/test_invalid.py index cc9f09c16fb43..95a81ce61c785 100644 --- a/pandas/tests/reshape/concat/test_invalid.py +++ b/pandas/tests/reshape/concat/test_invalid.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import DataFrame, concat, read_csv +from pandas import ( + DataFrame, + concat, + read_csv, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 500d7000817af..e5499c44be7d7 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -13,7 +13,11 @@ merge, ) import pandas._testing as tm -from pandas.tests.reshape.merge.test_merge import NGROUPS, N, get_test_data +from pandas.tests.reshape.merge.test_merge import ( + NGROUPS, + N, + get_test_data, +) a_ = np.array diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index da3ac81c4aa17..d9af59382ae79 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1,11 +1,18 @@ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) import random import re import numpy as np import pytest -from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_object_dtype, +) from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -27,7 +34,10 @@ import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT from pandas.core.reshape.concat import concat -from pandas.core.reshape.merge import MergeError, merge +from pandas.core.reshape.merge import ( + MergeError, + merge, +) N = 50 NGROUPS = 8 diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index ecff63b495fbb..5fa08904e3fcf 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -5,7 +5,12 @@ import pytz import pandas as pd -from pandas import Timedelta, merge_asof, read_csv, to_datetime +from pandas import ( + Timedelta, + merge_asof, + read_csv, + to_datetime, +) import pandas._testing as tm from pandas.core.reshape.merge import MergeError diff --git a/pandas/tests/reshape/merge/test_merge_cross.py b/pandas/tests/reshape/merge/test_merge_cross.py index d6c29ea129027..7e14b515836cf 100644 --- a/pandas/tests/reshape/merge/test_merge_cross.py +++ b/pandas/tests/reshape/merge/test_merge_cross.py @@ -2,7 +2,10 @@ from pandas import DataFrame import pandas._testing as tm -from pandas.core.reshape.merge import MergeError, merge +from pandas.core.reshape.merge import ( + MergeError, + merge, +) @pytest.mark.parametrize( diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 4a70719df5c57..4a4af789d540b 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import DataFrame, merge_ordered +from pandas import ( + DataFrame, + merge_ordered, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index f47f4e1577277..56ea3c9718a41 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -2,7 +2,13 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, +) import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 6faf64789c687..86cde3eee874d 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -3,7 +3,14 @@ from pandas.core.dtypes.common import is_categorical_dtype -from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, Series, crosstab +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + crosstab, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py index 42907b3b4e23f..8af49ac20987a 100644 --- a/pandas/tests/reshape/test_get_dummies.py +++ b/pandas/tests/reshape/test_get_dummies.py @@ -6,9 +6,18 @@ from pandas.core.dtypes.common import is_integer_dtype import pandas as pd -from pandas import Categorical, CategoricalIndex, DataFrame, Series, get_dummies +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Series, + get_dummies, +) import pandas._testing as tm -from pandas.core.arrays.sparse import SparseArray, SparseDtype +from pandas.core.arrays.sparse import ( + SparseArray, + SparseDtype, +) class TestGetDummies: diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 1f39302845ae9..53244569d0432 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import DataFrame, lreshape, melt, wide_to_long +from pandas import ( + DataFrame, + lreshape, + melt, + wide_to_long, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f9b2a02920841..19eba4305fdf6 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1,4 +1,8 @@ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) from itertools import product import numpy as np diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index f59a469c05d15..df2ae0d52c660 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import Index, Int64Index, MultiIndex +from pandas import ( + Index, + Int64Index, + MultiIndex, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index e7a04bafed8e3..d48fde35f8561 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -23,7 +23,10 @@ from pandas.api.types import CategoricalDtype as CDT from pandas.core.algorithms import quantile -from pandas.tseries.offsets import Day, Nano +from pandas.tseries.offsets import ( + Day, + Nano, +) def test_qcut(): diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index 8c0c0a1f22760..f39b5de2478b0 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -4,7 +4,11 @@ from pandas.core.dtypes.concat import union_categoricals import pandas as pd -from pandas import Categorical, CategoricalIndex, Series +from pandas import ( + Categorical, + CategoricalIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 0acadc54cec0c..890562712f3b7 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Index, date_range +from pandas import ( + Index, + date_range, +) import pandas._testing as tm from pandas.core.reshape.util import cartesian_product diff --git a/pandas/tests/scalar/interval/test_arithmetic.py b/pandas/tests/scalar/interval/test_arithmetic.py index b4c2b448e252a..987f7d53afacc 100644 --- a/pandas/tests/scalar/interval/test_arithmetic.py +++ b/pandas/tests/scalar/interval/test_arithmetic.py @@ -3,7 +3,11 @@ import numpy as np import pytest -from pandas import Interval, Timedelta, Timestamp +from pandas import ( + Interval, + Timedelta, + Timestamp, +) @pytest.mark.parametrize("method", ["__add__", "__sub__"]) diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py index 5071c5cdec6c8..1f76a7df1e996 100644 --- a/pandas/tests/scalar/interval/test_interval.py +++ b/pandas/tests/scalar/interval/test_interval.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import Interval, Period, Timedelta, Timestamp +from pandas import ( + Interval, + Period, + Timedelta, + Timestamp, +) import pandas._testing as tm import pandas.core.common as com diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py index 2d9f0954af5a8..9fe40c208d880 100644 --- a/pandas/tests/scalar/interval/test_ops.py +++ b/pandas/tests/scalar/interval/test_ops.py @@ -1,7 +1,11 @@ """Tests for Interval-Interval operations, such as overlaps, contains, etc.""" import pytest -from pandas import Interval, Timedelta, Timestamp +from pandas import ( + Interval, + Timedelta, + Timestamp, +) @pytest.fixture( diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 56281521deb90..9110352d33c26 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -4,7 +4,11 @@ from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG from pandas.errors import OutOfBoundsDatetime -from pandas import Period, Timestamp, offsets +from pandas import ( + Period, + Timestamp, + offsets, +) class TestFreqConversion: diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 285bf37176af6..7dcd4dc979eb2 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1,19 +1,41 @@ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) import numpy as np import pytest import pytz -from pandas._libs.tslibs import iNaT, period as libperiod -from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs import ( + iNaT, + period as libperiod, +) +from pandas._libs.tslibs.ccalendar import ( + DAYS, + MONTHS, +) from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.parsing import DateParseError -from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG, IncompatibleFrequency -from pandas._libs.tslibs.timezones import dateutil_gettz, maybe_get_tz +from pandas._libs.tslibs.period import ( + INVALID_FREQ_ERR_MSG, + IncompatibleFrequency, +) +from pandas._libs.tslibs.timezones import ( + dateutil_gettz, + maybe_get_tz, +) from pandas.compat import np_datetime64_compat import pandas as pd -from pandas import NaT, Period, Timedelta, Timestamp, offsets +from pandas import ( + NaT, + Period, + Timedelta, + Timestamp, + offsets, +) import pandas._testing as tm diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index ea867d8607d2e..9ccdd0261de0e 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import operator import numpy as np @@ -24,7 +27,11 @@ offsets, ) import pandas._testing as tm -from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray +from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + TimedeltaArray, +) from pandas.core.ops import roperator diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 41671343c2800..b9594a9c876c6 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -1,7 +1,10 @@ """ Tests for scalar Timedelta arithmetic ops """ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import operator import numpy as np @@ -10,7 +13,13 @@ from pandas.compat import is_numpy_dev import pandas as pd -from pandas import NaT, Timedelta, Timestamp, compat, offsets +from pandas import ( + NaT, + Timedelta, + Timestamp, + compat, + offsets, +) import pandas._testing as tm from pandas.core import ops diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 9cbe165434369..47b09280854de 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -6,7 +6,11 @@ from pandas._libs.tslibs import OutOfBoundsTimedelta -from pandas import Timedelta, offsets, to_timedelta +from pandas import ( + Timedelta, + offsets, + to_timedelta, +) def test_construction(): diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index 906ed038c4840..8b42bca8b8a0c 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -4,10 +4,18 @@ import numpy as np import pytest -from pandas._libs.tslibs import NaT, iNaT +from pandas._libs.tslibs import ( + NaT, + iNaT, +) import pandas as pd -from pandas import Timedelta, TimedeltaIndex, offsets, to_timedelta +from pandas import ( + Timedelta, + TimedeltaIndex, + offsets, + to_timedelta, +) import pandas._testing as tm diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 1e980b6e4559c..e178b31b9ae93 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 9ee7cb1840e5b..555067f2aba1a 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import operator import numpy as np diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 654c7d502610e..663892cefb5e6 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -1,5 +1,8 @@ import calendar -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import dateutil.tz from dateutil.tz import tzutc @@ -9,7 +12,12 @@ from pandas.errors import OutOfBoundsDatetime -from pandas import Period, Timedelta, Timestamp, compat +from pandas import ( + Period, + Timedelta, + Timestamp, + compat, +) from pandas.tseries import offsets diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index c94c16d1d603a..54f3f21dc9f6f 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -1,7 +1,10 @@ """ test the scalar Timestamp """ import calendar -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import locale import unicodedata @@ -9,13 +12,23 @@ import numpy as np import pytest import pytz -from pytz import timezone, utc +from pytz import ( + timezone, + utc, +) -from pandas._libs.tslibs.timezones import dateutil_gettz as gettz, get_timezone +from pandas._libs.tslibs.timezones import ( + dateutil_gettz as gettz, + get_timezone, +) from pandas.compat import np_datetime64_compat import pandas.util._test_decorators as td -from pandas import NaT, Timedelta, Timestamp +from pandas import ( + NaT, + Timedelta, + Timestamp, +) import pandas._testing as tm from pandas.tseries import offsets @@ -494,7 +507,7 @@ def test_to_pydatetime_nonzero_nano(self): ts = Timestamp("2011-01-01 9:00:00.123456789") # Warn the user of data loss (nanoseconds). - with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + with tm.assert_produces_warning(UserWarning): expected = datetime(2011, 1, 1, 9, 0, 0, 123456) result = ts.to_pydatetime() assert result == expected @@ -528,13 +541,13 @@ def test_to_datetime_bijective(self): # Ensure that converting to datetime and back only loses precision # by going from nanoseconds to microseconds. exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + with tm.assert_produces_warning(exp_warning): pydt_max = Timestamp.max.to_pydatetime() assert Timestamp(pydt_max).value / 1000 == Timestamp.max.value / 1000 exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + with tm.assert_produces_warning(exp_warning): pydt_min = Timestamp.min.to_pydatetime() # The next assertion can be enabled once GH#39221 is merged diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index f05f2054b2483..9ba4a2c1f77cd 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -1,19 +1,32 @@ """ Tests for Timestamp timezone-related methods """ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) import dateutil -from dateutil.tz import gettz, tzoffset +from dateutil.tz import ( + gettz, + tzoffset, +) import pytest import pytz -from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError +from pytz.exceptions import ( + AmbiguousTimeError, + NonExistentTimeError, +) from pandas._libs.tslibs import timezones from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td -from pandas import NaT, Timestamp +from pandas import ( + NaT, + Timestamp, +) class TestTimestampTZOperations: diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 4278d185ea7dd..aab0b2e6d31ef 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -6,7 +6,13 @@ import pytz from pytz import utc -from pandas._libs.tslibs import NaT, Timedelta, Timestamp, conversion, to_offset +from pandas._libs.tslibs import ( + NaT, + Timedelta, + Timestamp, + conversion, + to_offset, +) from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG import pandas.util._test_decorators as td diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 7a84f642aebc2..6199e77e10166 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -1,5 +1,9 @@ import calendar -from datetime import date, datetime, time +from datetime import ( + date, + datetime, + time, +) import locale import unicodedata @@ -9,7 +13,10 @@ from pandas._libs.tslibs.timezones import maybe_get_tz -from pandas.core.dtypes.common import is_integer_dtype, is_list_like +from pandas.core.dtypes.common import ( + is_integer_dtype, + is_list_like, +) import pandas as pd from pandas import ( @@ -573,7 +580,10 @@ def test_strftime_nat(self, data): def test_valid_dt_with_missing_values(self): - from datetime import date, time + from datetime import ( + date, + time, + ) # GH 8689 s = Series(date_range("20130101", periods=5, freq="D")) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 6c595e1f0f19f..347aa8d66405c 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -1,10 +1,16 @@ """ Also test support for datetime64[ns] in Series / DataFrame """ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import re -from dateutil.tz import gettz, tzutc +from dateutil.tz import ( + gettz, + tzutc, +) import numpy as np import pytest import pytz @@ -12,7 +18,13 @@ from pandas._libs import index as libindex import pandas as pd -from pandas import DataFrame, Series, Timestamp, date_range, period_range +from pandas import ( + DataFrame, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/indexing/test_delitem.py b/pandas/tests/series/indexing/test_delitem.py index 6c7e3f2b06983..019cb92d780ef 100644 --- a/pandas/tests/series/indexing/test_delitem.py +++ b/pandas/tests/series/indexing/test_delitem.py @@ -1,6 +1,9 @@ import pytest -from pandas import Index, Series +from pandas import ( + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 5a5e285bf719f..64d763f410666 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -1,12 +1,21 @@ """ Series.__getitem__ test classes are organized by the type of key passed. """ -from datetime import date, datetime, time +from datetime import ( + date, + datetime, + time, +) import numpy as np import pytest -from pandas._libs.tslibs import conversion, timezones +from pandas._libs.tslibs import ( + conversion, + timezones, +) + +from pandas.core.dtypes.common import is_scalar import pandas as pd from pandas import ( @@ -526,3 +535,63 @@ def test_getitem_preserve_name(datetime_series): result = datetime_series[5:10] assert result.name == datetime_series.name + + +def test_getitem_with_integer_labels(): + # integer indexes, be careful + ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 2, 5, 7, 8] + arr_inds = np.array([0, 2, 5, 7, 8]) + with pytest.raises(KeyError, match="with any missing labels"): + ser[inds] + + with pytest.raises(KeyError, match="with any missing labels"): + ser[arr_inds] + + +def test_getitem_missing(datetime_series): + # missing + d = datetime_series.index[0] - BDay() + msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" + with pytest.raises(KeyError, match=msg): + datetime_series[d] + + +def test_getitem_fancy(string_series, object_series): + slice1 = string_series[[1, 2, 3]] + slice2 = object_series[[1, 2, 3]] + assert string_series.index[2] == slice1.index[1] + assert object_series.index[2] == slice2.index[1] + assert string_series[2] == slice1[1] + assert object_series[2] == slice2[1] + + +def test_getitem_box_float64(datetime_series): + value = datetime_series[5] + assert isinstance(value, np.float64) + + +def test_getitem_unordered_dup(): + obj = Series(range(5), index=["c", "a", "a", "b", "b"]) + assert is_scalar(obj["c"]) + assert obj["c"] == 0 + + +def test_getitem_dups(): + ser = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64) + expected = Series([3, 4], index=["C", "C"], dtype=np.int64) + result = ser["C"] + tm.assert_series_equal(result, expected) + + +def test_getitem_categorical_str(): + # GH#31765 + ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) + result = ser["a"] + expected = ser.iloc[[0, 3]] + tm.assert_series_equal(result, expected) + + # Check the intermediate steps work as expected + with tm.assert_produces_warning(FutureWarning): + result = ser.index.get_value(ser, "a") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index d97410562083c..49264c5b669d7 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_scalar - import pandas as pd from pandas import ( Categorical, @@ -22,8 +20,6 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import BDay - def test_basic_indexing(): s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"]) @@ -58,18 +54,6 @@ def test_basic_getitem_with_labels(datetime_series): tm.assert_series_equal(result, expected) -def test_basic_getitem_with_integer_labels(): - # integer indexes, be careful - ser = Series(np.random.randn(10), index=list(range(0, 20, 2))) - inds = [0, 2, 5, 7, 8] - arr_inds = np.array([0, 2, 5, 7, 8]) - with pytest.raises(KeyError, match="with any missing labels"): - ser[inds] - - with pytest.raises(KeyError, match="with any missing labels"): - ser[arr_inds] - - def test_basic_getitem_dt64tz_values(): # GH12089 @@ -98,24 +82,7 @@ def test_getitem_setitem_ellipsis(): assert (result == 5).all() -def test_getitem_missing(datetime_series): - # missing - d = datetime_series.index[0] - BDay() - msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" - with pytest.raises(KeyError, match=msg): - datetime_series[d] - - -def test_getitem_fancy(string_series, object_series): - slice1 = string_series[[1, 2, 3]] - slice2 = object_series[[1, 2, 3]] - assert string_series.index[2] == slice1.index[1] - assert object_series.index[2] == slice2.index[1] - assert string_series[2] == slice1[1] - assert object_series[2] == slice2[1] - - -def test_type_promotion(): +def test_setitem_with_expansion_type_promotion(): # GH12599 s = Series(dtype=object) s["a"] = Timestamp("2016-01-01") @@ -157,11 +124,6 @@ def test_getitem_setitem_integers(): tm.assert_almost_equal(s["a"], 5) -def test_getitem_box_float64(datetime_series): - value = datetime_series[5] - assert isinstance(value, np.float64) - - def test_series_box_timestamp(): rng = pd.date_range("20090415", "20090519", freq="B") ser = Series(rng) @@ -189,49 +151,26 @@ def test_series_box_timedelta(): assert isinstance(ser.iloc[4], Timedelta) -def test_getitem_ambiguous_keyerror(): - s = Series(range(10), index=list(range(0, 20, 2))) - with pytest.raises(KeyError, match=r"^1$"): - s[1] +def test_getitem_ambiguous_keyerror(indexer_sl): + ser = Series(range(10), index=list(range(0, 20, 2))) with pytest.raises(KeyError, match=r"^1$"): - s.loc[1] + indexer_sl(ser)[1] -def test_getitem_unordered_dup(): - obj = Series(range(5), index=["c", "a", "a", "b", "b"]) - assert is_scalar(obj["c"]) - assert obj["c"] == 0 - - -def test_getitem_dups_with_missing(): +def test_getitem_dups_with_missing(indexer_sl): # breaks reindex, so need to use .loc internally # GH 4246 - s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) + ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) with pytest.raises(KeyError, match="with any missing labels"): - s.loc[["foo", "bar", "bah", "bam"]] + indexer_sl(ser)[["foo", "bar", "bah", "bam"]] - with pytest.raises(KeyError, match="with any missing labels"): - s[["foo", "bar", "bah", "bam"]] - -def test_getitem_dups(): - s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64) - expected = Series([3, 4], index=["C", "C"], dtype=np.int64) - result = s["C"] - tm.assert_series_equal(result, expected) - - -def test_setitem_ambiguous_keyerror(): +def test_setitem_ambiguous_keyerror(indexer_sl): s = Series(range(10), index=list(range(0, 20, 2))) # equivalent of an append s2 = s.copy() - s2[1] = 5 - expected = s.append(Series([5], index=[1])) - tm.assert_series_equal(s2, expected) - - s2 = s.copy() - s2.loc[1] = 5 + indexer_sl(s2)[1] = 5 expected = s.append(Series([5], index=[1])) tm.assert_series_equal(s2, expected) @@ -314,13 +253,10 @@ def test_basic_getitem_setitem_corner(datetime_series): @pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"]) -def test_setitem_with_tz(tz): +def test_setitem_with_tz(tz, indexer_sli): orig = Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz)) assert orig.dtype == f"datetime64[ns, {tz}]" - # scalar - s = orig.copy() - s[1] = Timestamp("2011-01-01", tz=tz) exp = Series( [ Timestamp("2016-01-01 00:00", tz=tz), @@ -328,15 +264,11 @@ def test_setitem_with_tz(tz): Timestamp("2016-01-01 02:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) - s = orig.copy() - s.iloc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) # vector vals = Series( @@ -345,7 +277,6 @@ def test_setitem_with_tz(tz): ) assert vals.dtype == f"datetime64[ns, {tz}]" - s[[1, 2]] = vals exp = Series( [ Timestamp("2016-01-01 00:00", tz=tz), @@ -353,26 +284,18 @@ def test_setitem_with_tz(tz): Timestamp("2012-01-01 00:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - s = orig.copy() - s.iloc[[1, 2]] = vals - tm.assert_series_equal(s, exp) + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) -def test_setitem_with_tz_dst(): +def test_setitem_with_tz_dst(indexer_sli): # GH XXX TODO: fill in GH ref tz = "US/Eastern" orig = Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz)) assert orig.dtype == f"datetime64[ns, {tz}]" - # scalar - s = orig.copy() - s[1] = Timestamp("2011-01-01", tz=tz) exp = Series( [ Timestamp("2016-11-06 00:00-04:00", tz=tz), @@ -380,15 +303,11 @@ def test_setitem_with_tz_dst(): Timestamp("2016-11-06 01:00-05:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - s = orig.copy() - s.loc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.iloc[1] = Timestamp("2011-01-01", tz=tz) - tm.assert_series_equal(s, exp) + # scalar + ser = orig.copy() + indexer_sli(ser)[1] = Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(ser, exp) # vector vals = Series( @@ -397,7 +316,6 @@ def test_setitem_with_tz_dst(): ) assert vals.dtype == f"datetime64[ns, {tz}]" - s[[1, 2]] = vals exp = Series( [ Timestamp("2016-11-06 00:00", tz=tz), @@ -405,15 +323,10 @@ def test_setitem_with_tz_dst(): Timestamp("2012-01-01 00:00", tz=tz), ] ) - tm.assert_series_equal(s, exp) - - s = orig.copy() - s.loc[[1, 2]] = vals - tm.assert_series_equal(s, exp) - s = orig.copy() - s.iloc[[1, 2]] = vals - tm.assert_series_equal(s, exp) + ser = orig.copy() + indexer_sli(ser)[[1, 2]] = vals + tm.assert_series_equal(ser, exp) def test_categorical_assigning_ops(): @@ -453,19 +366,6 @@ def test_setitem_nan_into_categorical(): tm.assert_series_equal(ser, exp) -def test_getitem_categorical_str(): - # GH#31765 - ser = Series(range(5), index=Categorical(["a", "b", "c", "a", "b"])) - result = ser["a"] - expected = ser.iloc[[0, 3]] - tm.assert_series_equal(result, expected) - - # Check the intermediate steps work as expected - with tm.assert_produces_warning(FutureWarning): - result = ser.index.get_value(ser, "a") - tm.assert_series_equal(result, expected) - - def test_slice(string_series, object_series): numSlice = string_series[10:20] numSliceEnd = string_series[-10:] @@ -542,56 +442,6 @@ def test_setitem_td64_non_nano(): tm.assert_series_equal(ser, expected) -@pytest.mark.parametrize( - "nat_val", - [ - pd.NaT, - np.timedelta64("NaT", "ns"), - np.datetime64("NaT", "ns"), - ], -) -@pytest.mark.parametrize("tz", [None, "UTC"]) -def test_dt64_series_assign_nat(nat_val, tz, indexer_sli): - # some nat-like values should be cast to datetime64 when inserting - # into a datetime64 series. Others should coerce to object - # and retain their dtypes. - dti = pd.date_range("2016-01-01", periods=3, tz=tz) - base = Series(dti) - expected = Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype) - - should_cast = nat_val is pd.NaT or base.dtype.kind == nat_val.dtype.kind - if not should_cast: - expected = expected.astype(object) - - ser = base.copy(deep=True) - indexer_sli(ser)[0] = nat_val - tm.assert_series_equal(ser, expected) - - -@pytest.mark.parametrize( - "nat_val", - [ - pd.NaT, - np.timedelta64("NaT", "ns"), - np.datetime64("NaT", "ns"), - ], -) -def test_td64_series_assign_nat(nat_val, indexer_sli): - # some nat-like values should be cast to timedelta64 when inserting - # into a timedelta64 series. Others should coerce to object - # and retain their dtypes. - base = Series([0, 1, 2], dtype="m8[ns]") - expected = Series([pd.NaT, 1, 2], dtype="m8[ns]") - - should_cast = nat_val is pd.NaT or base.dtype == nat_val.dtype - if not should_cast: - expected = expected.astype(object) - - ser = base.copy(deep=True) - indexer_sli(ser)[0] = nat_val - tm.assert_series_equal(ser, expected) - - def test_underlying_data_conversion(): # GH 4080 df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]}) diff --git a/pandas/tests/series/indexing/test_set_value.py b/pandas/tests/series/indexing/test_set_value.py index 61b01720d1e40..cbe1a8bf296c8 100644 --- a/pandas/tests/series/indexing/test_set_value.py +++ b/pandas/tests/series/indexing/test_set_value.py @@ -2,7 +2,10 @@ import numpy as np -from pandas import DatetimeIndex, Series +from pandas import ( + DatetimeIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 3a9ec0948b29a..ba9593067a412 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1,4 +1,7 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import numpy as np import pytest @@ -167,15 +170,6 @@ def test_setitem_boolean_python_list(self, func): expected = Series(["a", "b", "c"]) tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize("value", [None, NaT, np.nan]) - def test_setitem_boolean_td64_values_cast_na(self, value): - # GH#18586 - series = Series([0, 1, 2], dtype="timedelta64[ns]") - mask = series == series[0] - series[mask] = value - expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") - tm.assert_series_equal(series, expected) - def test_setitem_boolean_nullable_int_types(self, any_nullable_numeric_dtype): # GH: 26468 ser = Series([5, 6, 7, 8], dtype=any_nullable_numeric_dtype) @@ -192,6 +186,15 @@ def test_setitem_boolean_nullable_int_types(self, any_nullable_numeric_dtype): ser.loc[ser > 6] = loc_ser.loc[loc_ser > 1] tm.assert_series_equal(ser, expected) + def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(self): + # GH#30567 + ser = Series([None] * 10) + mask = [False] * 3 + [True] * 5 + [False] * 2 + ser[mask] = range(5) + result = ser + expected = Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") + tm.assert_series_equal(result, expected) + class TestSetitemViewCopySemantics: def test_setitem_invalidates_datetime_index_freq(self): @@ -248,6 +251,88 @@ def test_setitem_callable_other(self): tm.assert_series_equal(ser, expected) +class TestSetitemWithExpansion: + def test_setitem_empty_series(self): + # GH#10193 + key = Timestamp("2012-01-01") + series = Series(dtype=object) + series[key] = 47 + expected = Series(47, [key]) + tm.assert_series_equal(series, expected) + + def test_setitem_empty_series_datetimeindex_preserves_freq(self): + # GH#33573 our index should retain its freq + series = Series([], DatetimeIndex([], freq="D"), dtype=object) + key = Timestamp("2012-01-01") + series[key] = 47 + expected = Series(47, DatetimeIndex([key], freq="D")) + tm.assert_series_equal(series, expected) + assert series.index.freq == expected.index.freq + + def test_setitem_empty_series_timestamp_preserves_dtype(self): + # GH 21881 + timestamp = Timestamp(1412526600000000000) + series = Series([timestamp], index=["timestamp"], dtype=object) + expected = series["timestamp"] + + series = Series([], dtype=object) + series["anything"] = 300.0 + series["timestamp"] = timestamp + result = series["timestamp"] + assert result == expected + + @pytest.mark.parametrize( + "td", + [ + Timedelta("9 days"), + Timedelta("9 days").to_timedelta64(), + Timedelta("9 days").to_pytimedelta(), + ], + ) + def test_append_timedelta_does_not_cast(self, td): + # GH#22717 inserting a Timedelta should _not_ cast to int64 + expected = Series(["x", td], index=[0, "td"], dtype=object) + + ser = Series(["x"]) + ser["td"] = td + tm.assert_series_equal(ser, expected) + assert isinstance(ser["td"], Timedelta) + + ser = Series(["x"]) + ser.loc["td"] = Timedelta("9 days") + tm.assert_series_equal(ser, expected) + assert isinstance(ser["td"], Timedelta) + + +def test_setitem_scalar_into_readonly_backing_data(): + # GH#14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + for n in range(len(series)): + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[n] = 1 + + assert array[n] == 0 + + +def test_setitem_slice_into_readonly_backing_data(): + # GH#14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[1:3] = 1 + + assert not array.any() + + class TestSetitemCasting: @pytest.mark.parametrize("unique", [True, False]) @pytest.mark.parametrize("val", [3, 3.0, "3"], ids=type) @@ -451,88 +536,6 @@ def val(self, request): return request.param -class TestSetitemWithExpansion: - def test_setitem_empty_series(self): - # GH#10193 - key = Timestamp("2012-01-01") - series = Series(dtype=object) - series[key] = 47 - expected = Series(47, [key]) - tm.assert_series_equal(series, expected) - - def test_setitem_empty_series_datetimeindex_preserves_freq(self): - # GH#33573 our index should retain its freq - series = Series([], DatetimeIndex([], freq="D"), dtype=object) - key = Timestamp("2012-01-01") - series[key] = 47 - expected = Series(47, DatetimeIndex([key], freq="D")) - tm.assert_series_equal(series, expected) - assert series.index.freq == expected.index.freq - - def test_setitem_empty_series_timestamp_preserves_dtype(self): - # GH 21881 - timestamp = Timestamp(1412526600000000000) - series = Series([timestamp], index=["timestamp"], dtype=object) - expected = series["timestamp"] - - series = Series([], dtype=object) - series["anything"] = 300.0 - series["timestamp"] = timestamp - result = series["timestamp"] - assert result == expected - - @pytest.mark.parametrize( - "td", - [ - Timedelta("9 days"), - Timedelta("9 days").to_timedelta64(), - Timedelta("9 days").to_pytimedelta(), - ], - ) - def test_append_timedelta_does_not_cast(self, td): - # GH#22717 inserting a Timedelta should _not_ cast to int64 - expected = Series(["x", td], index=[0, "td"], dtype=object) - - ser = Series(["x"]) - ser["td"] = td - tm.assert_series_equal(ser, expected) - assert isinstance(ser["td"], Timedelta) - - ser = Series(["x"]) - ser.loc["td"] = Timedelta("9 days") - tm.assert_series_equal(ser, expected) - assert isinstance(ser["td"], Timedelta) - - -def test_setitem_scalar_into_readonly_backing_data(): - # GH#14359: test that you cannot mutate a read only buffer - - array = np.zeros(5) - array.flags.writeable = False # make the array immutable - series = Series(array) - - for n in range(len(series)): - msg = "assignment destination is read-only" - with pytest.raises(ValueError, match=msg): - series[n] = 1 - - assert array[n] == 0 - - -def test_setitem_slice_into_readonly_backing_data(): - # GH#14359: test that you cannot mutate a read only buffer - - array = np.zeros(5) - array.flags.writeable = False # make the array immutable - series = Series(array) - - msg = "assignment destination is read-only" - with pytest.raises(ValueError, match=msg): - series[1:3] = 1 - - assert not array.any() - - class TestSetitemTimedelta64IntoNumeric(SetitemCastingEquivalents): # timedelta64 should not be treated as integers when setting into # numeric Series @@ -640,25 +643,43 @@ def is_inplace(self): return True -class TestSetitemNATimedelta64Dtype(SetitemCastingEquivalents): - # some nat-like values should be cast to timedelta64 when inserting - # into a timedelta64 series. Others should coerce to object - # and retain their dtypes. +class TestSetitemNADatetimeLikeDtype(SetitemCastingEquivalents): + # some nat-like values should be cast to datetime64/timedelta64 when + # inserting into a datetime64/timedelta64 series. Others should coerce + # to object and retain their dtypes. + # GH#18586 for td64 and boolean mask case + + @pytest.fixture( + params=["m8[ns]", "M8[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Central]"] + ) + def dtype(self, request): + return request.param @pytest.fixture - def obj(self): - return Series([0, 1, 2], dtype="m8[ns]") + def obj(self, dtype): + i8vals = date_range("2016-01-01", periods=3).asi8 + idx = Index(i8vals, dtype=dtype) + assert idx.dtype == dtype + return Series(idx) @pytest.fixture( - params=[NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")] + params=[ + None, + np.nan, + NaT, + np.timedelta64("NaT", "ns"), + np.datetime64("NaT", "ns"), + ] ) def val(self, request): return request.param @pytest.fixture - def is_inplace(self, val): - # cast to object iff val is datetime64("NaT") - return val is NaT or val.dtype.kind == "m" + def is_inplace(self, val, obj): + # td64 -> cast to object iff val is datetime64("NaT") + # dt64 -> cast to object iff val is timedelta64("NaT") + # dt64tz -> cast to object with anything _but_ NaT + return val is NaT or val is None or val is np.nan or obj.dtype == val.dtype @pytest.fixture def expected(self, obj, val, is_inplace): diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py index edcec386cd8ba..1e50fef55b4ec 100644 --- a/pandas/tests/series/indexing/test_where.py +++ b/pandas/tests/series/indexing/test_where.py @@ -4,7 +4,12 @@ from pandas.core.dtypes.common import is_integer import pandas as pd -from pandas import Series, Timestamp, date_range, isna +from pandas import ( + Series, + Timestamp, + date_range, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index 83cc6d4670423..b6351e970222f 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import MultiIndex, Series, date_range +from pandas import ( + MultiIndex, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index ef2b07d592b95..8769ab048a136 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -3,7 +3,11 @@ import pytz import pandas as pd -from pandas import Series, date_range, period_range +from pandas import ( + Series, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py index 069557cc65455..2081e244b4e6c 100644 --- a/pandas/tests/series/methods/test_append.py +++ b/pandas/tests/series/methods/test_append.py @@ -2,7 +2,14 @@ import pytest import pandas as pd -from pandas import DataFrame, DatetimeIndex, Index, Series, Timestamp, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index ec9ba468c996c..7a545378ef402 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Series, Timestamp, isna +from pandas import ( + Series, + Timestamp, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_asfreq.py b/pandas/tests/series/methods/test_asfreq.py index cd61c510c75f5..9a7f2343984d6 100644 --- a/pandas/tests/series/methods/test_asfreq.py +++ b/pandas/tests/series/methods/test_asfreq.py @@ -3,10 +3,19 @@ import numpy as np import pytest -from pandas import DataFrame, DatetimeIndex, Series, date_range, period_range +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, + period_range, +) import pandas._testing as tm -from pandas.tseries.offsets import BDay, BMonthEnd +from pandas.tseries.offsets import ( + BDay, + BMonthEnd, +) class TestAsFreq: diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 43d40d53dcd21..7a3f68fd3d990 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -3,7 +3,14 @@ from pandas._libs.tslibs import IncompatibleFrequency -from pandas import Series, Timestamp, date_range, isna, notna, offsets +from pandas import ( + Series, + Timestamp, + date_range, + isna, + notna, + offsets, +) import pandas._testing as tm @@ -90,7 +97,10 @@ def test_with_nan(self): tm.assert_series_equal(result, expected) def test_periodindex(self): - from pandas import PeriodIndex, period_range + from pandas import ( + PeriodIndex, + period_range, + ) # array or list or dates N = 50 diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index d683503f22f28..a3785518c860d 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) from importlib import reload import string import sys @@ -88,7 +91,7 @@ def test_astype_empty_constructor_equality(self, dtype): "m", # Generic timestamps raise a ValueError. Already tested. ): init_empty = Series([], dtype=dtype) - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + with tm.assert_produces_warning(DeprecationWarning): as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) diff --git a/pandas/tests/series/methods/test_between.py b/pandas/tests/series/methods/test_between.py index 350a3fe6ff009..381c733619c6b 100644 --- a/pandas/tests/series/methods/test_between.py +++ b/pandas/tests/series/methods/test_between.py @@ -1,6 +1,11 @@ import numpy as np -from pandas import Series, bdate_range, date_range, period_range +from pandas import ( + Series, + bdate_range, + date_range, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 5a5a397222b87..528e95f65c8f4 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -2,7 +2,12 @@ import pytest import pandas as pd -from pandas import Series, Timestamp, isna, notna +from pandas import ( + Series, + Timestamp, + isna, + notna, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_combine_first.py b/pandas/tests/series/methods/test_combine_first.py index 94aa6b8d84cad..4c254c6db2a70 100644 --- a/pandas/tests/series/methods/test_combine_first.py +++ b/pandas/tests/series/methods/test_combine_first.py @@ -3,7 +3,13 @@ import numpy as np import pandas as pd -from pandas import Period, Series, date_range, period_range, to_datetime +from pandas import ( + Period, + Series, + date_range, + period_range, + to_datetime, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_convert.py b/pandas/tests/series/methods/test_convert.py index f052f4423d32a..b658929dfd0d5 100644 --- a/pandas/tests/series/methods/test_convert.py +++ b/pandas/tests/series/methods/test_convert.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas import Series, Timestamp +from pandas import ( + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_copy.py b/pandas/tests/series/methods/test_copy.py index 6201c0f5f7c29..8aa5c14812dc0 100644 --- a/pandas/tests/series/methods/test_copy.py +++ b/pandas/tests/series/methods/test_copy.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Series, Timestamp +from pandas import ( + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py index 7fff87c7b55f4..937bb383dd35c 100644 --- a/pandas/tests/series/methods/test_count.py +++ b/pandas/tests/series/methods/test_count.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import Categorical, MultiIndex, Series +from pandas import ( + Categorical, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py index f01ed73c0165f..58a332ace244f 100644 --- a/pandas/tests/series/methods/test_cov_corr.py +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -6,7 +6,10 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import Series, isna +from pandas import ( + Series, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py index e479e5c1416db..1113efc972e76 100644 --- a/pandas/tests/series/methods/test_describe.py +++ b/pandas/tests/series/methods/test_describe.py @@ -2,7 +2,13 @@ import pandas.util._test_decorators as td -from pandas import Period, Series, Timedelta, Timestamp, date_range +from pandas import ( + Period, + Series, + Timedelta, + Timestamp, + date_range, +) import pandas._testing as tm # TODO(ArrayManager) quantile is needed for describe() diff --git a/pandas/tests/series/methods/test_diff.py b/pandas/tests/series/methods/test_diff.py index 033f75e95f11b..1fbce249af6d2 100644 --- a/pandas/tests/series/methods/test_diff.py +++ b/pandas/tests/series/methods/test_diff.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Series, TimedeltaIndex, date_range +from pandas import ( + Series, + TimedeltaIndex, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py index fe4bcb44d5e61..dae1bbcd86e81 100644 --- a/pandas/tests/series/methods/test_drop_duplicates.py +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Categorical, Series +from pandas import ( + Categorical, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_dropna.py b/pandas/tests/series/methods/test_dropna.py index f56230daea190..1c7c52d228cfa 100644 --- a/pandas/tests/series/methods/test_dropna.py +++ b/pandas/tests/series/methods/test_dropna.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import DatetimeIndex, IntervalIndex, NaT, Period, Series, Timestamp +from pandas import ( + DatetimeIndex, + IntervalIndex, + NaT, + Period, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_equals.py b/pandas/tests/series/methods/test_equals.py index 85d74196e59b4..0b3689afac764 100644 --- a/pandas/tests/series/methods/test_equals.py +++ b/pandas/tests/series/methods/test_equals.py @@ -8,7 +8,11 @@ from pandas.core.dtypes.common import is_float -from pandas import Index, MultiIndex, Series +from pandas import ( + Index, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 0bcb37d4880a6..5b3a6c13af467 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -1,4 +1,8 @@ -from datetime import datetime, timedelta, timezone +from datetime import ( + datetime, + timedelta, + timezone, +) import numpy as np import pytest @@ -204,8 +208,9 @@ def test_timedelta_fillna(self, frame_or_series): expected = frame_or_series(expected) tm.assert_equal(result, expected) - # interpreted as seconds, deprecated - with pytest.raises(TypeError, match="Passing integers to fillna"): + # interpreted as seconds, no longer supported + msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'" + with pytest.raises(TypeError, match=msg): obj.fillna(1) result = obj.fillna(Timedelta(seconds=1)) diff --git a/pandas/tests/series/methods/test_get_numeric_data.py b/pandas/tests/series/methods/test_get_numeric_data.py index dc0becf46a24c..e386f4b5b1dec 100644 --- a/pandas/tests/series/methods/test_get_numeric_data.py +++ b/pandas/tests/series/methods/test_get_numeric_data.py @@ -1,4 +1,8 @@ -from pandas import Index, Series, date_range +from pandas import ( + Index, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index 8740a309eec13..cad5476d4861c 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -4,7 +4,13 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import Index, MultiIndex, Series, date_range, isna +from pandas import ( + Index, + MultiIndex, + Series, + date_range, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_is_monotonic.py b/pandas/tests/series/methods/test_is_monotonic.py index b242b293cb59e..f02939374cc5b 100644 --- a/pandas/tests/series/methods/test_is_monotonic.py +++ b/pandas/tests/series/methods/test_is_monotonic.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import Series, date_range +from pandas import ( + Series, + date_range, +) class TestIsMonotonic: diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 76a84aac786c8..320179c0a8b0a 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import Series, date_range +from pandas import ( + Series, + date_range, +) import pandas._testing as tm from pandas.core.arrays import PeriodArray diff --git a/pandas/tests/series/methods/test_isna.py b/pandas/tests/series/methods/test_isna.py index 1760b0b9726e0..7e324aa86a052 100644 --- a/pandas/tests/series/methods/test_isna.py +++ b/pandas/tests/series/methods/test_isna.py @@ -3,7 +3,10 @@ """ import numpy as np -from pandas import Period, Series +from pandas import ( + Period, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py index 90e8f6d39c5cc..2bdeb4da5f70f 100644 --- a/pandas/tests/series/methods/test_item.py +++ b/pandas/tests/series/methods/test_item.py @@ -4,7 +4,12 @@ """ import pytest -from pandas import Series, Timedelta, Timestamp, date_range +from pandas import ( + Series, + Timedelta, + Timestamp, + date_range, +) class TestItem: diff --git a/pandas/tests/series/methods/test_matmul.py b/pandas/tests/series/methods/test_matmul.py index c311f1fd880a3..b944395bff29f 100644 --- a/pandas/tests/series/methods/test_matmul.py +++ b/pandas/tests/series/methods/test_matmul.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_nunique.py b/pandas/tests/series/methods/test_nunique.py index d2d94183aa21b..50d3b9331b2b2 100644 --- a/pandas/tests/series/methods/test_nunique.py +++ b/pandas/tests/series/methods/test_nunique.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import Categorical, Series +from pandas import ( + Categorical, + Series, +) def test_nunique(): diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py index 1efb57894f986..017fef5fdb31f 100644 --- a/pandas/tests/series/methods/test_pct_change.py +++ b/pandas/tests/series/methods/test_pct_change.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import Series, date_range +from pandas import ( + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_quantile.py b/pandas/tests/series/methods/test_quantile.py index 5771d8e2b8a47..9001f95fe4299 100644 --- a/pandas/tests/series/methods/test_quantile.py +++ b/pandas/tests/series/methods/test_quantile.py @@ -6,7 +6,10 @@ from pandas.core.dtypes.common import is_integer import pandas as pd -from pandas import Index, Series +from pandas import ( + Index, + Series, +) import pandas._testing as tm from pandas.core.indexes.datetimes import Timestamp diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py index 9d052e2236aae..088e10b0ba070 100644 --- a/pandas/tests/series/methods/test_rank.py +++ b/pandas/tests/series/methods/test_rank.py @@ -1,12 +1,23 @@ -from itertools import chain, product +from itertools import ( + chain, + product, +) import numpy as np import pytest -from pandas._libs.algos import Infinity, NegInfinity +from pandas._libs.algos import ( + Infinity, + NegInfinity, +) import pandas.util._test_decorators as td -from pandas import NaT, Series, Timestamp, date_range +from pandas import ( + NaT, + Series, + Timestamp, + date_range, +) import pandas._testing as tm from pandas.api.types import CategoricalDtype diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py index ac07fed7c951a..eacafa9310384 100644 --- a/pandas/tests/series/methods/test_rename.py +++ b/pandas/tests/series/methods/test_rename.py @@ -2,7 +2,10 @@ import numpy as np -from pandas import Index, Series +from pandas import ( + Index, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_rename_axis.py b/pandas/tests/series/methods/test_rename_axis.py index b519dd1144493..58c095d697ede 100644 --- a/pandas/tests/series/methods/test_rename_axis.py +++ b/pandas/tests/series/methods/test_rename_axis.py @@ -1,6 +1,10 @@ import pytest -from pandas import Index, MultiIndex, Series +from pandas import ( + Index, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_repeat.py b/pandas/tests/series/methods/test_repeat.py index 32f7384d34ebd..e63317f685556 100644 --- a/pandas/tests/series/methods/test_repeat.py +++ b/pandas/tests/series/methods/test_repeat.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import MultiIndex, Series +from pandas import ( + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 3f3a3af658969..69dd7d083119f 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -266,7 +266,7 @@ def test_replace_with_empty_dictlike(self): s = pd.Series(list("abcd")) tm.assert_series_equal(s, s.replace({})) - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + with tm.assert_produces_warning(DeprecationWarning): empty_series = pd.Series([]) tm.assert_series_equal(s, s.replace(empty_series)) @@ -457,6 +457,6 @@ def test_str_replace_regex_default_raises_warning(self, pattern): msg = r"The default value of regex will change from True to False" if len(pattern) == 1: msg += r".*single character regular expressions.*not.*literal strings" - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False) as w: + with tm.assert_produces_warning(FutureWarning) as w: s.str.replace(pattern, "") assert re.match(msg, str(w[0].message)) diff --git a/pandas/tests/series/methods/test_reset_index.py b/pandas/tests/series/methods/test_reset_index.py index 40e567a8c33ca..70b9c9c9dc7d7 100644 --- a/pandas/tests/series/methods/test_reset_index.py +++ b/pandas/tests/series/methods/test_reset_index.py @@ -4,7 +4,14 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + RangeIndex, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_searchsorted.py b/pandas/tests/series/methods/test_searchsorted.py index 5a6ec0039c7cd..5a7eb3f8cfc97 100644 --- a/pandas/tests/series/methods/test_searchsorted.py +++ b/pandas/tests/series/methods/test_searchsorted.py @@ -1,6 +1,10 @@ import numpy as np -from pandas import Series, Timestamp, date_range +from pandas import ( + Series, + Timestamp, + date_range, +) import pandas._testing as tm from pandas.api.types import is_scalar diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py index ccaa8a797e312..d70abe2311acd 100644 --- a/pandas/tests/series/methods/test_sort_index.py +++ b/pandas/tests/series/methods/test_sort_index.py @@ -3,7 +3,12 @@ import numpy as np import pytest -from pandas import DatetimeIndex, IntervalIndex, MultiIndex, Series +from pandas import ( + DatetimeIndex, + IntervalIndex, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index b49e39d4592ea..fe2046401f657 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import Categorical, DataFrame, Series +from pandas import ( + Categorical, + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_to_dict.py b/pandas/tests/series/methods/test_to_dict.py index 47badb0a1bb52..4c3d9592eebe3 100644 --- a/pandas/tests/series/methods/test_to_dict.py +++ b/pandas/tests/series/methods/test_to_dict.py @@ -1,5 +1,6 @@ import collections +import numpy as np import pytest from pandas import Series @@ -20,3 +21,18 @@ def test_to_dict(self, mapping, datetime_series): from_method = Series(datetime_series.to_dict(collections.Counter)) from_constructor = Series(collections.Counter(datetime_series.items())) tm.assert_series_equal(from_method, from_constructor) + + @pytest.mark.parametrize( + "input", + ( + {"a": np.int64(64), "b": 10}, + {"a": np.int64(64), "b": 10, "c": "ABC"}, + {"a": np.uint64(64), "b": 10, "c": "ABC"}, + ), + ) + def test_to_dict_return_types(self, input): + # GH25969 + + d = Series(input).to_dict() + assert isinstance(d["a"], int) + assert isinstance(d["b"], int) diff --git a/pandas/tests/series/methods/test_to_frame.py b/pandas/tests/series/methods/test_to_frame.py index 6d52ab9da3f1b..66e44f1a0caf0 100644 --- a/pandas/tests/series/methods/test_to_frame.py +++ b/pandas/tests/series/methods/test_to_frame.py @@ -1,4 +1,7 @@ -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py index 21de593c0e2af..672faf1e0d541 100644 --- a/pandas/tests/series/methods/test_truncate.py +++ b/pandas/tests/series/methods/test_truncate.py @@ -1,7 +1,10 @@ from datetime import datetime import pandas as pd -from pandas import Series, date_range +from pandas import ( + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_tz_convert.py b/pandas/tests/series/methods/test_tz_convert.py index 82ee5c8d756b1..d826dde646cfb 100644 --- a/pandas/tests/series/methods/test_tz_convert.py +++ b/pandas/tests/series/methods/test_tz_convert.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import DatetimeIndex, Series +from pandas import ( + DatetimeIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_tz_localize.py b/pandas/tests/series/methods/test_tz_localize.py index 836dee3aa047d..4d7f26076e060 100644 --- a/pandas/tests/series/methods/test_tz_localize.py +++ b/pandas/tests/series/methods/test_tz_localize.py @@ -3,7 +3,13 @@ from pandas._libs.tslibs import timezones -from pandas import DatetimeIndex, NaT, Series, Timestamp, date_range +from pandas import ( + DatetimeIndex, + NaT, + Series, + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_unique.py b/pandas/tests/series/methods/test_unique.py index b777d9ba1676a..856fe6e7c4f04 100644 --- a/pandas/tests/series/methods/test_unique.py +++ b/pandas/tests/series/methods/test_unique.py @@ -1,6 +1,9 @@ import numpy as np -from pandas import Categorical, Series +from pandas import ( + Categorical, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index ded4500ba478a..6f8f6d638dd56 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py index 51760c451ebca..4f585a6ea029a 100644 --- a/pandas/tests/series/methods/test_update.py +++ b/pandas/tests/series/methods/test_update.py @@ -1,7 +1,13 @@ import numpy as np import pytest -from pandas import CategoricalDtype, DataFrame, NaT, Series, Timestamp +from pandas import ( + CategoricalDtype, + DataFrame, + NaT, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index 505b879660ff1..e707c3f4023df 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import Categorical, CategoricalIndex, Series +from pandas import ( + Categorical, + CategoricalIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_values.py b/pandas/tests/series/methods/test_values.py index 2982dcd52991d..479c7033a3fb5 100644 --- a/pandas/tests/series/methods/test_values.py +++ b/pandas/tests/series/methods/test_values.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import IntervalIndex, Series, period_range +from pandas import ( + IntervalIndex, + Series, + period_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/methods/test_view.py b/pandas/tests/series/methods/test_view.py index ccf3aa0d90e6f..f0069cdb9b79c 100644 --- a/pandas/tests/series/methods/test_view.py +++ b/pandas/tests/series/methods/test_view.py @@ -1,4 +1,12 @@ -from pandas import Series, date_range +import numpy as np +import pytest + +from pandas import ( + Index, + Series, + array as pd_array, + date_range, +) import pandas._testing as tm @@ -16,3 +24,23 @@ def test_view_tz(self): ] ) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"] + ) + @pytest.mark.parametrize( + "second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"] + ) + @pytest.mark.parametrize("box", [Series, Index, pd_array]) + def test_view_between_datetimelike(self, first, second, box): + + dti = date_range("2016-01-01", periods=3) + + orig = box(dti) + obj = orig.view(first) + assert obj.dtype == first + tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8) + + res = obj.view(second) + assert res.dtype == second + tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index c09df52fb5df5..eddf57c1e88f3 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -7,7 +7,12 @@ from pandas.util._test_decorators import skip_if_no import pandas as pd -from pandas import DataFrame, Index, Series, date_range +from pandas import ( + DataFrame, + Index, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 1593cbc987a12..0b6939a0997a4 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -7,7 +7,10 @@ from pandas._libs.tslibs import IncompatibleFrequency -from pandas.core.dtypes.common import is_datetime64_dtype, is_datetime64tz_dtype +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, +) import pandas as pd from pandas import ( @@ -21,7 +24,10 @@ isna, ) import pandas._testing as tm -from pandas.core import nanops, ops +from pandas.core import ( + nanops, + ops, +) def _permute(obj): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 780fd276cdceb..6cd2a1dd180c1 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1,14 +1,23 @@ from collections import OrderedDict -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) from dateutil.tz import tzoffset import numpy as np import numpy.ma as ma import pytest -from pandas._libs import iNaT, lib +from pandas._libs import ( + iNaT, + lib, +) -from pandas.core.dtypes.common import is_categorical_dtype, is_datetime64tz_dtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64tz_dtype, +) from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -31,7 +40,10 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.arrays import IntervalArray, period_array +from pandas.core.arrays import ( + IntervalArray, + period_array, +) from pandas.core.internals.blocks import NumericBlock @@ -1306,7 +1318,7 @@ def test_constructor_dtype_timedelta64(self): td.astype("int64") # invalid casting - msg = r"cannot astype a timedelta from \[timedelta64\[ns\]\] to \[int32\]" + msg = r"cannot astype a datetimelike from \[timedelta64\[ns\]\] to \[int32\]" with pytest.raises(TypeError, match=msg): td.astype("int32") diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index d455e434f38be..9121a5a5b6b82 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -4,7 +4,11 @@ from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd -from pandas import Categorical, DataFrame, Series +from pandas import ( + Categorical, + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 23aa11bc9358a..87a86687fb9a0 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -4,7 +4,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, Series, bdate_range +from pandas import ( + DataFrame, + Index, + Series, + bdate_range, +) import pandas._testing as tm from pandas.core import ops diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 6fefeaa818a77..87a0e5cb680c8 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -5,7 +5,13 @@ from pandas._libs import iNaT import pandas as pd -from pandas import Categorical, Index, NaT, Series, isna +from pandas import ( + Categorical, + Index, + NaT, + Series, + isna, +) import pandas._testing as tm diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index c3c58f29fcbf6..12671bbf5ba98 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -2,7 +2,10 @@ import pytest import pandas as pd -from pandas import MultiIndex, Series +from pandas import ( + MultiIndex, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 026f6bd2d453d..a91908f7fba52 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index 86330b7cc6993..da5faeab49a8d 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -35,7 +35,7 @@ def test_subclass_unstack(self): tm.assert_frame_equal(res, exp) def test_subclass_empty_repr(self): - with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + with tm.assert_produces_warning(DeprecationWarning): sub_series = tm.SubclassedSeries() assert "SubclassedSeries" in repr(sub_series) diff --git a/pandas/tests/strings/test_api.py b/pandas/tests/strings/test_api.py index 4e9ccbc4892e3..7f864a503486e 100644 --- a/pandas/tests/strings/test_api.py +++ b/pandas/tests/strings/test_api.py @@ -1,6 +1,12 @@ import pytest -from pandas import DataFrame, Index, MultiIndex, Series, _testing as tm +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) from pandas.core import strings as strings diff --git a/pandas/tests/strings/test_case_justify.py b/pandas/tests/strings/test_case_justify.py index 8aacf3d6d1d4b..b46f50e430b54 100644 --- a/pandas/tests/strings/test_case_justify.py +++ b/pandas/tests/strings/test_case_justify.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas import Series, _testing as tm +from pandas import ( + Series, + _testing as tm, +) def test_title(): diff --git a/pandas/tests/strings/test_cat.py b/pandas/tests/strings/test_cat.py index 49091b1dd3858..cdaccf0dad8e6 100644 --- a/pandas/tests/strings/test_cat.py +++ b/pandas/tests/strings/test_cat.py @@ -2,7 +2,14 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, _testing as tm, concat +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, + concat, +) from pandas.tests.strings.test_strings import assert_series_or_index_equal diff --git a/pandas/tests/strings/test_extract.py b/pandas/tests/strings/test_extract.py index f1d6049b1ac08..c1564a5c256a1 100644 --- a/pandas/tests/strings/test_extract.py +++ b/pandas/tests/strings/test_extract.py @@ -4,7 +4,13 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, _testing as tm +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) def test_extract_expand_None(): diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 32ce89e64ef4b..ef27d582b4e0f 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -5,7 +5,11 @@ import pytest import pandas as pd -from pandas import Index, Series, _testing as tm +from pandas import ( + Index, + Series, + _testing as tm, +) def test_contains(): diff --git a/pandas/tests/strings/test_split_partition.py b/pandas/tests/strings/test_split_partition.py index 3bea778587d82..6df8fa805955d 100644 --- a/pandas/tests/strings/test_split_partition.py +++ b/pandas/tests/strings/test_split_partition.py @@ -4,7 +4,13 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, _testing as tm +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + _testing as tm, +) def test_split(): diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index 4cf3c3d165e79..b51132caf7573 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -4,7 +4,11 @@ from pandas._libs import lib import pandas as pd -from pandas import DataFrame, Series, _testing as tm +from pandas import ( + DataFrame, + Series, + _testing as tm, +) def test_string_array(any_string_method): diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 92e7bf258d2d7..95ac237597bc4 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -1,9 +1,19 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, isna, notna +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + isna, + notna, +) import pandas._testing as tm diff --git a/pandas/tests/test_aggregation.py b/pandas/tests/test_aggregation.py index 74ccebc8e2275..4534b8eaac03b 100644 --- a/pandas/tests/test_aggregation.py +++ b/pandas/tests/test_aggregation.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas.core.aggregation import _make_unique_kwarg_list, maybe_mangle_lambdas +from pandas.core.aggregation import ( + _make_unique_kwarg_list, + maybe_mangle_lambdas, +) def test_maybe_mangle_lambdas_passthrough(): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 88757b96085aa..27201055dfa5d 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -5,7 +5,10 @@ import numpy as np import pytest -from pandas._libs import algos as libalgos, hashtable as ht +from pandas._libs import ( + algos as libalgos, + hashtable as ht, +) from pandas.compat import np_array_datetime64_compat import pandas.util._test_decorators as td diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 83016a08de90b..911f1c7ebe31c 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -89,7 +89,10 @@ def test_statsmodels(): def test_scikit_learn(df): sklearn = import_module("sklearn") # noqa - from sklearn import datasets, svm + from sklearn import ( + datasets, + svm, + ) digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.0) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2d862fda013d5..30f88ba5e76f6 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -5,7 +5,11 @@ import pytest import pandas._testing as tm -from pandas.core.api import DataFrame, Index, Series +from pandas.core.api import ( + DataFrame, + Index, + Series, +) from pandas.core.computation import expressions as expr _frame = DataFrame(np.random.randn(10000, 4), columns=list("ABCD"), dtype="float64") diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 88fecc7635475..8e6a636a8f602 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2,7 +2,11 @@ import pytest import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import ( + DataFrame, + MultiIndex, + Series, +) import pandas._testing as tm AGG_FUNCTIONS = [ diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 359a7eecf6f7b..7f8b941a9f115 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -10,7 +10,10 @@ from pandas.core.dtypes.common import is_integer_dtype import pandas as pd -from pandas import Series, isna +from pandas import ( + Series, + isna, +) import pandas._testing as tm from pandas.core.arrays import DatetimeArray import pandas.core.nanops as nanops diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py index b9cab2428c0d1..f75ee0d0ddd95 100644 --- a/pandas/tests/test_optional_dependency.py +++ b/pandas/tests/test_optional_dependency.py @@ -3,7 +3,10 @@ import pytest -from pandas.compat._optional import VERSIONS, import_optional_dependency +from pandas.compat._optional import ( + VERSIONS, + import_optional_dependency, +) import pandas._testing as tm diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index da1c91a1ad218..2fa3acf939c5b 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -5,7 +5,14 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series, array, concat, merge +from pandas import ( + DataFrame, + MultiIndex, + Series, + array, + concat, + merge, +) import pandas._testing as tm from pandas.core.algorithms import safe_sort import pandas.core.common as com diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 278a315a479bd..695aa4ca129d8 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2,7 +2,10 @@ import calendar from collections import deque -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import locale from dateutil.parser import parse @@ -12,7 +15,10 @@ import pytz from pandas._libs import tslib -from pandas._libs.tslibs import iNaT, parsing +from pandas._libs.tslibs import ( + iNaT, + parsing, +) from pandas.errors import OutOfBoundsDatetime import pandas.util._test_decorators as td diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index d5b4bda35ca2b..15ee296be0908 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -5,7 +5,12 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, Series, to_numeric +from pandas import ( + DataFrame, + Index, + Series, + to_numeric, +) import pandas._testing as tm diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index de3ff6e80ad66..6ff14087e6259 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -1,10 +1,18 @@ -from datetime import time, timedelta +from datetime import ( + time, + timedelta, +) import numpy as np import pytest import pandas as pd -from pandas import Series, TimedeltaIndex, isna, to_timedelta +from pandas import ( + Series, + TimedeltaIndex, + isna, + to_timedelta, +) import pandas._testing as tm diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index 20cadde45e7a0..92795245103d0 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -1,6 +1,10 @@ import pytest -from pandas._libs.tslibs import Period, Resolution, to_offset +from pandas._libs.tslibs import ( + Period, + Resolution, + to_offset, +) from pandas._libs.tslibs.dtypes import _attrname_to_abbrevs diff --git a/pandas/tests/tseries/frequencies/test_frequencies.py b/pandas/tests/tseries/frequencies/test_frequencies.py index 0479de8e8e7c3..f0af290b2fb69 100644 --- a/pandas/tests/tseries/frequencies/test_frequencies.py +++ b/pandas/tests/tseries/frequencies/test_frequencies.py @@ -2,7 +2,10 @@ from pandas._libs.tslibs import offsets -from pandas.tseries.frequencies import is_subperiod, is_superperiod +from pandas.tseries.frequencies import ( + is_subperiod, + is_superperiod, +) @pytest.mark.parametrize( diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py index 95edd038dab9b..a764ab8f03d9e 100644 --- a/pandas/tests/tseries/frequencies/test_inference.py +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -1,13 +1,26 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest -from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.ccalendar import ( + DAYS, + MONTHS, +) from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG from pandas.compat import is_platform_windows -from pandas import DatetimeIndex, Index, Series, Timestamp, date_range, period_range +from pandas import ( + DatetimeIndex, + Index, + Series, + Timestamp, + date_range, + period_range, +) import pandas._testing as tm from pandas.core.tools.datetimes import to_datetime diff --git a/pandas/tests/tseries/holiday/test_calendar.py b/pandas/tests/tseries/holiday/test_calendar.py index cd3b1aab33a2a..d9f54d9d80b2e 100644 --- a/pandas/tests/tseries/holiday/test_calendar.py +++ b/pandas/tests/tseries/holiday/test_calendar.py @@ -2,7 +2,11 @@ import pytest -from pandas import DatetimeIndex, offsets, to_datetime +from pandas import ( + DatetimeIndex, + offsets, + to_datetime, +) import pandas._testing as tm from pandas.tseries.holiday import ( diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py index 5edef896be537..db63785988977 100644 --- a/pandas/tests/tseries/offsets/common.py +++ b/pandas/tests/tseries/offsets/common.py @@ -2,12 +2,18 @@ Assertion helpers and base class for offsets tests """ from datetime import datetime -from typing import Optional, Type +from typing import ( + Optional, + Type, +) from dateutil.tz.tz import tzlocal import pytest -from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp +from pandas._libs.tslibs import ( + OutOfBoundsDatetime, + Timestamp, +) from pandas._libs.tslibs.offsets import ( FY5253, BusinessHour, diff --git a/pandas/tests/tseries/offsets/test_business_day.py b/pandas/tests/tseries/offsets/test_business_day.py index 9b3ded9844e24..26df051ef928f 100644 --- a/pandas/tests/tseries/offsets/test_business_day.py +++ b/pandas/tests/tseries/offsets/test_business_day.py @@ -1,15 +1,28 @@ """ Tests for offsets.BDay """ -from datetime import date, datetime, timedelta +from datetime import ( + date, + datetime, + timedelta, +) import numpy as np import pytest -from pandas._libs.tslibs.offsets import ApplyTypeError, BDay, BMonthEnd, CDay +from pandas._libs.tslibs.offsets import ( + ApplyTypeError, + BDay, + BMonthEnd, + CDay, +) from pandas.compat import np_datetime64_compat -from pandas import DatetimeIndex, _testing as tm, read_pickle +from pandas import ( + DatetimeIndex, + _testing as tm, + read_pickle, +) from pandas.tests.tseries.offsets.common import ( Base, assert_is_on_offset, diff --git a/pandas/tests/tseries/offsets/test_business_hour.py b/pandas/tests/tseries/offsets/test_business_hour.py index 5f387b2edeb0b..72b939b79c321 100644 --- a/pandas/tests/tseries/offsets/test_business_hour.py +++ b/pandas/tests/tseries/offsets/test_business_hour.py @@ -1,15 +1,32 @@ """ Tests for offsets.BusinessHour """ -from datetime import datetime, time as dt_time +from datetime import ( + datetime, + time as dt_time, +) import pytest -from pandas._libs.tslibs import Timedelta, Timestamp -from pandas._libs.tslibs.offsets import BDay, BusinessHour, Nano - -from pandas import DatetimeIndex, _testing as tm, date_range -from pandas.tests.tseries.offsets.common import Base, assert_offset_equal +from pandas._libs.tslibs import ( + Timedelta, + Timestamp, +) +from pandas._libs.tslibs.offsets import ( + BDay, + BusinessHour, + Nano, +) + +from pandas import ( + DatetimeIndex, + _testing as tm, + date_range, +) +from pandas.tests.tseries.offsets.common import ( + Base, + assert_offset_equal, +) class TestBusinessHour(Base): diff --git a/pandas/tests/tseries/offsets/test_custom_business_hour.py b/pandas/tests/tseries/offsets/test_custom_business_hour.py index f05b286616572..07270008adbd2 100644 --- a/pandas/tests/tseries/offsets/test_custom_business_hour.py +++ b/pandas/tests/tseries/offsets/test_custom_business_hour.py @@ -7,10 +7,17 @@ import pytest from pandas._libs.tslibs import Timestamp -from pandas._libs.tslibs.offsets import BusinessHour, CustomBusinessHour, Nano +from pandas._libs.tslibs.offsets import ( + BusinessHour, + CustomBusinessHour, + Nano, +) import pandas._testing as tm -from pandas.tests.tseries.offsets.common import Base, assert_offset_equal +from pandas.tests.tseries.offsets.common import ( + Base, + assert_offset_equal, +) class TestCustomBusinessHour(Base): diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py index 14728314b8e20..1eee9e611e0f1 100644 --- a/pandas/tests/tseries/offsets/test_fiscal.py +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -18,7 +18,10 @@ ) from pandas.tseries.frequencies import get_offset -from pandas.tseries.offsets import FY5253, FY5253Quarter +from pandas.tseries.offsets import ( + FY5253, + FY5253Quarter, +) def makeFY5253LastOfMonthQuarter(*args, **kwds): diff --git a/pandas/tests/tseries/offsets/test_month.py b/pandas/tests/tseries/offsets/test_month.py index 578af79084e09..b9c0cfe75fe7e 100644 --- a/pandas/tests/tseries/offsets/test_month.py +++ b/pandas/tests/tseries/offsets/test_month.py @@ -1,7 +1,10 @@ """ Tests for CBMonthEnd CBMonthBegin, SemiMonthEnd, and SemiMonthBegin in offsets """ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) import numpy as np import pytest @@ -15,7 +18,12 @@ SemiMonthEnd, ) -from pandas import DatetimeIndex, Series, _testing as tm, date_range +from pandas import ( + DatetimeIndex, + Series, + _testing as tm, + date_range, +) from pandas.tests.tseries.offsets.common import ( Base, assert_is_on_offset, diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 8d718d055f02d..d36bea72908a3 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1,22 +1,41 @@ """ Tests of pandas.tseries.offsets """ -from datetime import datetime, timedelta -from typing import Dict, List, Tuple +from datetime import ( + datetime, + timedelta, +) +from typing import ( + Dict, + List, + Tuple, +) import numpy as np import pytest -from pandas._libs.tslibs import NaT, Timestamp, conversion, timezones +from pandas._libs.tslibs import ( + NaT, + Timestamp, + conversion, + timezones, +) import pandas._libs.tslibs.offsets as liboffsets -from pandas._libs.tslibs.offsets import _get_offset, _offset_map +from pandas._libs.tslibs.offsets import ( + _get_offset, + _offset_map, +) from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG from pandas.compat import np_datetime64_compat from pandas.errors import PerformanceWarning from pandas import DatetimeIndex import pandas._testing as tm -from pandas.tests.tseries.offsets.common import Base, WeekDay, assert_offset_equal +from pandas.tests.tseries.offsets.common import ( + Base, + WeekDay, + assert_offset_equal, +) import pandas.tseries.offsets as offsets from pandas.tseries.offsets import ( @@ -175,7 +194,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=Fals exp_warning = UserWarning # test nanosecond is preserved - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + with tm.assert_produces_warning(exp_warning): result = func(ts) assert isinstance(result, Timestamp) if normalize is False: @@ -212,7 +231,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=Fals exp_warning = UserWarning # test nanosecond is preserved - with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + with tm.assert_produces_warning(exp_warning): result = func(ts) assert isinstance(result, Timestamp) if normalize is False: diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py index edb0f8c7dd662..8e0ace7775868 100644 --- a/pandas/tests/tseries/offsets/test_offsets_properties.py +++ b/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -9,7 +9,11 @@ """ import warnings -from hypothesis import assume, given, strategies as st +from hypothesis import ( + assume, + given, + strategies as st, +) from hypothesis.errors import Flaky from hypothesis.extra.dateutil import timezones as dateutil_timezones from hypothesis.extra.pytz import timezones as pytz_timezones diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 5f7f1b898877c..52a2f3aeee850 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -1,20 +1,39 @@ """ Tests for offsets.Tick and subclasses """ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) -from hypothesis import assume, example, given, settings, strategies as st +from hypothesis import ( + assume, + example, + given, + settings, + strategies as st, +) import numpy as np import pytest from pandas._libs.tslibs.offsets import delta_to_tick -from pandas import Timedelta, Timestamp +from pandas import ( + Timedelta, + Timestamp, +) import pandas._testing as tm from pandas.tests.tseries.offsets.common import assert_offset_equal from pandas.tseries import offsets -from pandas.tseries.offsets import Hour, Micro, Milli, Minute, Nano, Second +from pandas.tseries.offsets import ( + Hour, + Micro, + Milli, + Minute, + Nano, + Second, +) # --------------------------------------------------------------------- # Test Helpers diff --git a/pandas/tests/tseries/offsets/test_week.py b/pandas/tests/tseries/offsets/test_week.py index 54751a70b151d..b46a36e00f2da 100644 --- a/pandas/tests/tseries/offsets/test_week.py +++ b/pandas/tests/tseries/offsets/test_week.py @@ -1,12 +1,19 @@ """ Tests for offset.Week, offset.WeekofMonth and offset.LastWeekofMonth """ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import pytest from pandas._libs.tslibs import Timestamp -from pandas._libs.tslibs.offsets import LastWeekOfMonth, Week, WeekOfMonth +from pandas._libs.tslibs.offsets import ( + LastWeekOfMonth, + Week, + WeekOfMonth, +) from pandas.tests.tseries.offsets.common import ( Base, diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 24fdb3840bf52..8c2f0b09c461e 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -1,11 +1,17 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) from dateutil.tz.tz import tzoffset import numpy as np import pytest import pytz -from pandas._libs import iNaT, tslib +from pandas._libs import ( + iNaT, + tslib, +) from pandas.compat import np_array_datetime64_compat from pandas import Timestamp diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py index 1ff700fdc23a3..bba833abd3ad0 100644 --- a/pandas/tests/tslibs/test_ccalendar.py +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -1,6 +1,12 @@ -from datetime import date, datetime +from datetime import ( + date, + datetime, +) -from hypothesis import given, strategies as st +from hypothesis import ( + given, + strategies as st, +) import numpy as np import pytest diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 87cd97f853f4d..41eb7ae85d032 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -12,7 +12,10 @@ tzconversion, ) -from pandas import Timestamp, date_range +from pandas import ( + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/tslibs/test_liboffsets.py b/pandas/tests/tslibs/test_liboffsets.py index 6a514d2cc8713..c189a431146a7 100644 --- a/pandas/tests/tslibs/test_liboffsets.py +++ b/pandas/tests/tslibs/test_liboffsets.py @@ -5,7 +5,10 @@ import pytest -from pandas._libs.tslibs.ccalendar import get_firstbday, get_lastbday +from pandas._libs.tslibs.ccalendar import ( + get_firstbday, + get_lastbday, +) import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import roll_qtrday diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 63298b657e341..2592fdbb2d361 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -1,7 +1,10 @@ import pytest from pandas._libs.tslibs import to_offset -from pandas._libs.tslibs.period import period_asfreq, period_ordinal +from pandas._libs.tslibs.period import ( + period_asfreq, + period_ordinal, +) def get_freq_code(freqstr: str) -> int: diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py index c87752ccf151e..25450bd64a298 100644 --- a/pandas/tests/tslibs/test_timedeltas.py +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -3,7 +3,10 @@ from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds -from pandas import Timedelta, offsets +from pandas import ( + Timedelta, + offsets, +) @pytest.mark.parametrize( diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py index 33f83c3579c43..fbda5e8fda9dd 100644 --- a/pandas/tests/tslibs/test_timezones.py +++ b/pandas/tests/tslibs/test_timezones.py @@ -1,10 +1,17 @@ -from datetime import datetime, timedelta, timezone +from datetime import ( + datetime, + timedelta, + timezone, +) import dateutil.tz import pytest import pytz -from pandas._libs.tslibs import conversion, timezones +from pandas._libs.tslibs import ( + conversion, + timezones, +) from pandas import Timestamp diff --git a/pandas/tests/tslibs/test_to_offset.py b/pandas/tests/tslibs/test_to_offset.py index 5b1134ee85e2c..27ddbb82f49a9 100644 --- a/pandas/tests/tslibs/test_to_offset.py +++ b/pandas/tests/tslibs/test_to_offset.py @@ -2,7 +2,11 @@ import pytest -from pandas._libs.tslibs import Timedelta, offsets, to_offset +from pandas._libs.tslibs import ( + Timedelta, + offsets, + to_offset, +) @pytest.mark.parametrize( diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py index ec8cb29c6dead..ab53707771be6 100644 --- a/pandas/tests/util/test_assert_almost_equal.py +++ b/pandas/tests/util/test_assert_almost_equal.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, Series, Timestamp +from pandas import ( + DataFrame, + Index, + Series, + Timestamp, +) import pandas._testing as tm diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 42c6db3d0b684..82a3a223b442b 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -1,7 +1,12 @@ import numpy as np import pytest -from pandas import Categorical, Index, MultiIndex, NaT +from pandas import ( + Categorical, + Index, + MultiIndex, + NaT, +) import pandas._testing as tm diff --git a/pandas/tests/util/test_assert_produces_warning.py b/pandas/tests/util/test_assert_produces_warning.py index 296fa3b6cf537..45699fa1294d3 100644 --- a/pandas/tests/util/test_assert_produces_warning.py +++ b/pandas/tests/util/test_assert_produces_warning.py @@ -5,7 +5,10 @@ import pytest -from pandas.errors import DtypeWarning, PerformanceWarning +from pandas.errors import ( + DtypeWarning, + PerformanceWarning, +) import pandas._testing as tm diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index df1853ffd26ae..e3384ce3caa06 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -1,7 +1,11 @@ import pytest import pandas as pd -from pandas import Categorical, DataFrame, Series +from pandas import ( + Categorical, + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 779d93eb14f24..94786292adb51 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -2,10 +2,18 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.util.hashing import hash_tuples -from pandas.util import hash_array, hash_pandas_object +from pandas.util import ( + hash_array, + hash_pandas_object, +) @pytest.fixture( diff --git a/pandas/tests/util/test_validate_kwargs.py b/pandas/tests/util/test_validate_kwargs.py index c357affb6203d..0e271ef42ca93 100644 --- a/pandas/tests/util/test_validate_kwargs.py +++ b/pandas/tests/util/test_validate_kwargs.py @@ -1,6 +1,9 @@ import pytest -from pandas.util._validators import validate_bool_kwarg, validate_kwargs +from pandas.util._validators import ( + validate_bool_kwarg, + validate_kwargs, +) _fname = "func" diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index 7ac033244fae7..d394a4b2be548 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -1,11 +1,19 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest import pandas.util._test_decorators as td -from pandas import DataFrame, Series, bdate_range, notna +from pandas import ( + DataFrame, + Series, + bdate_range, + notna, +) @pytest.fixture(params=[True, False]) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index 57665b47dea7f..a36091ab8934e 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -1,7 +1,11 @@ import numpy as np import pytest -from pandas import DataFrame, Series, concat +from pandas import ( + DataFrame, + Series, + concat, +) import pandas._testing as tm diff --git a/pandas/tests/window/moments/test_moments_consistency_expanding.py b/pandas/tests/window/moments/test_moments_consistency_expanding.py index 17f76bf824a5d..df3e79fb79eca 100644 --- a/pandas/tests/window/moments/test_moments_consistency_expanding.py +++ b/pandas/tests/window/moments/test_moments_consistency_expanding.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, isna, notna +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + isna, + notna, +) import pandas._testing as tm diff --git a/pandas/tests/window/moments/test_moments_consistency_rolling.py b/pandas/tests/window/moments/test_moments_consistency_rolling.py index 53e5354340dcc..28fd5633de02e 100644 --- a/pandas/tests/window/moments/test_moments_consistency_rolling.py +++ b/pandas/tests/window/moments/test_moments_consistency_rolling.py @@ -5,7 +5,13 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, +) import pandas._testing as tm from pandas.core.window.common import flex_binary_moment diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index a365321101e81..a7b1d3fbca3fb 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py index ac6dd0bad619a..b2e53a676b039 100644 --- a/pandas/tests/window/moments/test_moments_rolling.py +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -3,7 +3,11 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/window/moments/test_moments_rolling_apply.py b/pandas/tests/window/moments/test_moments_rolling_apply.py index e48d88b365d8d..d7ce1c92bcd83 100644 --- a/pandas/tests/window/moments/test_moments_rolling_apply.py +++ b/pandas/tests/window/moments/test_moments_rolling_apply.py @@ -3,7 +3,13 @@ import numpy as np import pytest -from pandas import DataFrame, Series, concat, isna, notna +from pandas import ( + DataFrame, + Series, + concat, + isna, + notna, +) import pandas._testing as tm import pandas.tseries.offsets as offsets diff --git a/pandas/tests/window/moments/test_moments_rolling_functions.py b/pandas/tests/window/moments/test_moments_rolling_functions.py index abe75c7289ed4..b25b3c3b17637 100644 --- a/pandas/tests/window/moments/test_moments_rolling_functions.py +++ b/pandas/tests/window/moments/test_moments_rolling_functions.py @@ -1,7 +1,13 @@ import numpy as np import pytest -from pandas import DataFrame, Series, concat, isna, notna +from pandas import ( + DataFrame, + Series, + concat, + isna, + notna, +) import pandas._testing as tm import pandas.tseries.offsets as offsets diff --git a/pandas/tests/window/moments/test_moments_rolling_quantile.py b/pandas/tests/window/moments/test_moments_rolling_quantile.py index e06a5faabe310..56681c2aaa57e 100644 --- a/pandas/tests/window/moments/test_moments_rolling_quantile.py +++ b/pandas/tests/window/moments/test_moments_rolling_quantile.py @@ -3,7 +3,13 @@ import numpy as np import pytest -from pandas import DataFrame, Series, concat, isna, notna +from pandas import ( + DataFrame, + Series, + concat, + isna, + notna, +) import pandas._testing as tm import pandas.tseries.offsets as offsets diff --git a/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py b/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py index cc67e602be12e..3cd4b115c90c7 100644 --- a/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py +++ b/pandas/tests/window/moments/test_moments_rolling_skew_kurt.py @@ -5,7 +5,13 @@ import pandas.util._test_decorators as td -from pandas import DataFrame, Series, concat, isna, notna +from pandas import ( + DataFrame, + Series, + concat, + isna, + notna, +) import pandas._testing as tm import pandas.tseries.offsets as offsets diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index b47cd71beb6a8..baab562b4d177 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) import pandas._testing as tm diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py index fd4dfa7b7ed2b..06867e80ee711 100644 --- a/pandas/tests/window/test_base_indexer.py +++ b/pandas/tests/window/test_base_indexer.py @@ -1,10 +1,20 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range +from pandas import ( + DataFrame, + Series, + date_range, +) import pandas._testing as tm -from pandas.api.indexers import BaseIndexer, FixedForwardWindowIndexer -from pandas.core.window.indexers import ExpandingIndexer, VariableOffsetWindowIndexer +from pandas.api.indexers import ( + BaseIndexer, + FixedForwardWindowIndexer, +) +from pandas.core.window.indexers import ( + ExpandingIndexer, + VariableOffsetWindowIndexer, +) from pandas.tseries.offsets import BusinessDay diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py index fc7a51834780f..7cd2bf4f1ca19 100644 --- a/pandas/tests/window/test_dtypes.py +++ b/pandas/tests/window/test_dtypes.py @@ -1,7 +1,10 @@ import numpy as np import pytest -from pandas import DataFrame, Series +from pandas import ( + DataFrame, + Series, +) import pandas._testing as tm from pandas.core.base import DataError diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 9c1d23fe6e7a6..fbd7a36a75bf0 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -3,7 +3,12 @@ from pandas.errors import UnsupportedFunctionCall -from pandas import DataFrame, DatetimeIndex, Series, date_range +from pandas import ( + DataFrame, + DatetimeIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.core.window import ExponentialMovingWindow diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 01804faad5a5e..c272544e6af9e 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -3,7 +3,11 @@ from pandas.errors import UnsupportedFunctionCall -from pandas import DataFrame, DatetimeIndex, Series +from pandas import ( + DataFrame, + DatetimeIndex, + Series, +) import pandas._testing as tm from pandas.core.window import Expanding diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 173e39ef42908..f64d242a4e820 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -4,7 +4,11 @@ from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td -from pandas import DataFrame, Series, option_context +from pandas import ( + DataFrame, + Series, + option_context, +) import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index b39d052a702c0..a0d24a061fc4a 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -3,7 +3,12 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex, Series, date_range +from pandas import ( + DataFrame, + MultiIndex, + Series, + date_range, +) import pandas._testing as tm from pandas.core.algorithms import safe_sort diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b275b64ff706b..4989e23ed7ba5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -1,4 +1,7 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) import numpy as np import pytest diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 1cfba6f020018..a1f388b1eb5d9 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -4,7 +4,13 @@ from pandas.errors import UnsupportedFunctionCall import pandas.util._test_decorators as td -from pandas import DataFrame, Series, Timedelta, concat, date_range +from pandas import ( + DataFrame, + Series, + Timedelta, + concat, + date_range, +) import pandas._testing as tm from pandas.api.indexers import BaseIndexer diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 0d5598fcaf890..b2e2ccfada2c3 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -4,7 +4,10 @@ import numpy as np from pandas._libs.algos import unique_deltas -from pandas._libs.tslibs import Timestamp, tzconversion +from pandas._libs.tslibs import ( + Timestamp, + tzconversion, +) from pandas._libs.tslibs.ccalendar import ( DAYS, MONTH_ALIASES, @@ -12,7 +15,10 @@ MONTHS, int_to_weekday, ) -from pandas._libs.tslibs.fields import build_field_sarray, month_position_check +from pandas._libs.tslibs.fields import ( + build_field_sarray, + month_position_check, +) from pandas._libs.tslibs.offsets import ( # noqa:F401 DateOffset, Day, diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index d8a3040919e7b..ce303928dc8ee 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -1,15 +1,36 @@ -from datetime import datetime, timedelta +from datetime import ( + datetime, + timedelta, +) from typing import List import warnings -from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE # noqa +from dateutil.relativedelta import ( # noqa + FR, + MO, + SA, + SU, + TH, + TU, + WE, +) import numpy as np from pandas.errors import PerformanceWarning -from pandas import DateOffset, DatetimeIndex, Series, Timestamp, concat, date_range +from pandas import ( + DateOffset, + DatetimeIndex, + Series, + Timestamp, + concat, + date_range, +) -from pandas.tseries.offsets import Day, Easter +from pandas.tseries.offsets import ( + Day, + Easter, +) def next_monday(dt: datetime) -> datetime: diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index 9f2bf156b7e37..35a88a802003e 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -1,6 +1,13 @@ -from pandas.util._decorators import Appender, Substitution, cache_readonly # noqa +from pandas.util._decorators import ( # noqa + Appender, + Substitution, + cache_readonly, +) -from pandas.core.util.hashing import hash_array, hash_pandas_object # noqa +from pandas.core.util.hashing import ( # noqa + hash_array, + hash_pandas_object, +) def __getattr__(name): diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index a13fb1ce57f6c..ffc0255ca9de7 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -1,7 +1,17 @@ from functools import wraps import inspect from textwrap import dedent -from typing import Any, Callable, List, Mapping, Optional, Tuple, Type, Union, cast +from typing import ( + Any, + Callable, + List, + Mapping, + Optional, + Tuple, + Type, + Union, + cast, +) import warnings from pandas._libs.properties import cache_readonly # noqa @@ -78,8 +88,8 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: {dedent(doc)}""" ) - # error: Incompatible return value type (got "Callable[[VarArg(Any), - # KwArg(Any)], Callable[...,Any]]", expected "Callable[[F], F]") + # error: Incompatible return value type (got "Callable[[VarArg(Any), KwArg(Any)], + # Callable[...,Any]]", expected "Callable[[F], F]") return wrapper # type: ignore[return-value] @@ -362,10 +372,10 @@ def decorator(decorated: F) -> F: for docstring in docstrings: if hasattr(docstring, "_docstring_components"): - # error: Item "str" of "Union[str, Callable[..., Any]]" has no - # attribute "_docstring_components" [union-attr] - # error: Item "function" of "Union[str, Callable[..., Any]]" - # has no attribute "_docstring_components" [union-attr] + # error: Item "str" of "Union[str, Callable[..., Any]]" has no attribute + # "_docstring_components" + # error: Item "function" of "Union[str, Callable[..., Any]]" has no + # attribute "_docstring_components" docstring_components.extend( docstring._docstring_components # type: ignore[union-attr] ) diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py index 256346d482248..d6689fcb8cd01 100644 --- a/pandas/util/_doctools.py +++ b/pandas/util/_doctools.py @@ -1,4 +1,7 @@ -from typing import Optional, Tuple +from typing import ( + Optional, + Tuple, +) import numpy as np diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py index ae3c8c98f8dc1..b81ec70c34396 100644 --- a/pandas/util/_print_versions.py +++ b/pandas/util/_print_versions.py @@ -5,10 +5,18 @@ import platform import struct import sys -from typing import Dict, Optional, Union +from typing import ( + Dict, + Optional, + Union, +) from pandas._typing import JSONSerializable -from pandas.compat._optional import VERSIONS, get_version, import_optional_dependency +from pandas.compat._optional import ( + VERSIONS, + get_version, + import_optional_dependency, +) def _get_commit_hash() -> Optional[str]: diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 95ef2f6c00fe8..fd8f62331dc38 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -26,16 +26,25 @@ def test_foo(): from contextlib import contextmanager from distutils.version import LooseVersion import locale -from typing import Callable, Optional +from typing import ( + Callable, + Optional, +) import warnings import numpy as np import pytest -from pandas.compat import IS64, is_platform_windows +from pandas.compat import ( + IS64, + is_platform_windows, +) from pandas.compat._optional import import_optional_dependency -from pandas.core.computation.expressions import NUMEXPR_INSTALLED, USE_NUMEXPR +from pandas.core.computation.expressions import ( + NUMEXPR_INSTALLED, + USE_NUMEXPR, +) def safe_import(mod_name: str, min_version: Optional[str] = None): diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index fa7201a5188a5..60a81ed63b005 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,7 +2,10 @@ Module that contains many useful utilities for validating data or function arguments """ -from typing import Iterable, Union +from typing import ( + Iterable, + Union, +) import warnings import numpy as np diff --git a/pyproject.toml b/pyproject.toml index 2b78147e9294d..9f11475234566 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,16 +1,17 @@ [build-system] # Minimum requirements for the build system to execute. -# See https://github.com/scipy/scipy/pull/10431 for the AIX issue. +# See https://github.com/scipy/scipy/pull/12940 for the AIX issue. requires = [ - "setuptools", + "setuptools>=38.6.0", "wheel", "Cython>=0.29.21,<3", # Note: sync with setup.py - "numpy==1.16.5; python_version=='3.7' and platform_system!='AIX'", - "numpy==1.17.3; python_version=='3.8' and platform_system!='AIX'", - "numpy==1.16.5; python_version=='3.7' and platform_system=='AIX'", - "numpy==1.17.3; python_version=='3.8' and platform_system=='AIX'", + "numpy==1.16.5; python_version=='3.7'", + "numpy==1.17.3; python_version=='3.8'", "numpy; python_version>='3.9'", ] +# uncomment to enable pep517 after versioneer problem is fixed. +# https://github.com/python-versioneer/python-versioneer/issues/193 +# build-backend = "setuptools.build_meta" [tool.black] target-version = ['py37', 'py38'] diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index b213d931e7f07..87070e819b4a0 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -7,52 +7,115 @@ This is meant to be run as a pre-commit hook - to run it manually, you can do: pre-commit run inconsistent-namespace-usage --all-files + +To automatically fixup a given file, you can pass `--replace`, e.g. + + python scripts/check_for_inconsistent_pandas_namespace.py test_me.py --replace + +though note that you may need to manually fixup some imports and that you will also +need the additional dependency `tokenize-rt` (which is left out from the pre-commit +hook so that it uses the same virtualenv as the other local ones). """ import argparse -from pathlib import Path -import re -from typing import Optional, Sequence - -PATTERN = r""" - ( - (? None: + self.pandas_namespace: MutableMapping[Offset, str] = {} + self.no_namespace: Set[str] = set() + + def visit_Attribute(self, node: ast.Attribute) -> None: + if ( + isinstance(node.value, ast.Name) + and node.value.id == "pd" + and node.attr not in EXCLUDE + ): + self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr + self.generic_visit(node) + + def visit_Name(self, node: ast.Name) -> None: + if node.id not in EXCLUDE: + self.no_namespace.add(node.id) + self.generic_visit(node) + + +def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str: + from tokenize_rt import ( + reversed_enumerate, + src_to_tokens, + tokens_to_src, ) - """ -ERROR_MESSAGE = "Found both `pd.{class_name}` and `{class_name}` in {path}" + + tokens = src_to_tokens(content) + for n, i in reversed_enumerate(tokens): + if ( + i.offset in visitor.pandas_namespace + and visitor.pandas_namespace[i.offset] in visitor.no_namespace + ): + # Replace `pd` + tokens[n] = i._replace(src="") + # Replace `.` + tokens[n + 1] = tokens[n + 1]._replace(src="") + + new_src: str = tokens_to_src(tokens) + return new_src + + +def check_for_inconsistent_pandas_namespace( + content: str, path: str, *, replace: bool +) -> Optional[str]: + tree = ast.parse(content) + + visitor = Visitor() + visitor.visit(tree) + + inconsistencies = visitor.no_namespace.intersection( + visitor.pandas_namespace.values() + ) + if not inconsistencies: + # No inconsistent namespace usage, nothing to replace. + return content + + if not replace: + msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path) + raise RuntimeError(msg) + + return replace_inconsistent_pandas_namespace(visitor, content) def main(argv: Optional[Sequence[str]] = None) -> None: parser = argparse.ArgumentParser() - parser.add_argument("paths", nargs="*", type=Path) + parser.add_argument("paths", nargs="*") + parser.add_argument("--replace", action="store_true") args = parser.parse_args(argv) - pattern = re.compile( - PATTERN.encode(), - flags=re.MULTILINE | re.DOTALL | re.VERBOSE, - ) for path in args.paths: - contents = path.read_bytes() - match = pattern.search(contents) - if match is None: + with open(path, encoding="utf-8") as fd: + content = fd.read() + new_content = check_for_inconsistent_pandas_namespace( + content, path, replace=args.replace + ) + if not args.replace or new_content is None: continue - if match.group(2) is not None: - raise AssertionError( - ERROR_MESSAGE.format(class_name=match.group(2).decode(), path=str(path)) - ) - if match.group(4) is not None: - raise AssertionError( - ERROR_MESSAGE.format(class_name=match.group(4).decode(), path=str(path)) - ) + with open(path, "w", encoding="utf-8") as fd: + fd.write(new_content) if __name__ == "__main__": diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 37e6d288d9341..cc3509af5b138 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -1,28 +1,38 @@ -from pathlib import Path - import pytest -from scripts.check_for_inconsistent_pandas_namespace import main +from scripts.check_for_inconsistent_pandas_namespace import ( + check_for_inconsistent_pandas_namespace, +) BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()" GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" +PATH = "t.py" + + +@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) +def test_inconsistent_usage(content): + msg = r"Found both `pd\.Categorical` and `Categorical` in t\.py" + with pytest.raises(RuntimeError, match=msg): + check_for_inconsistent_pandas_namespace(content, PATH, replace=False) + + +@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) +def test_consistent_usage(content): + # should not raise + check_for_inconsistent_pandas_namespace(content, PATH, replace=False) @pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) -def test_inconsistent_usage(tmpdir, content): - tmpfile = Path(tmpdir / "tmpfile.py") - tmpfile.touch() - tmpfile.write_text(content) - msg = fr"Found both `pd\.Categorical` and `Categorical` in {str(tmpfile)}" - with pytest.raises(AssertionError, match=msg): - main((str(tmpfile),)) +def test_inconsistent_usage_with_replace(content): + result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True) + expected = "cat_0 = Categorical()\ncat_1 = Categorical()" + assert result == expected @pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) -def test_consistent_usage(tmpdir, content): - tmpfile = Path(tmpdir / "tmpfile.py") - tmpfile.touch() - tmpfile.write_text(content) - main((str(tmpfile),)) # Should not raise. +def test_consistent_usage_with_replace(content): + result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True) + expected = content + assert result == expected diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 8b15358834066..c6b998e3dbddf 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -21,7 +21,10 @@ import os import sys import tempfile -from typing import List, Optional +from typing import ( + List, + Optional, +) import flake8.main.application diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index d521f2ee421be..aa17afc4c33ea 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -1,20 +1,24 @@ -#!/usr/bin/env python3 """ Validate that the titles in the rst files follow the proper capitalization convention. Print the titles that do not follow the convention. Usage:: -./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst -./scripts/validate_rst_title_capitalization.py doc/source/ +As pre-commit hook (recommended): + pre-commit run title-capitalization --all-files + +From the command-line: + python scripts/validate_rst_title_capitalization.py """ import argparse -import glob -import os import re import sys -from typing import Iterable, List, Tuple +from typing import ( + Iterable, + List, + Tuple, +) CAPITALIZATION_EXCEPTIONS = { "pandas", @@ -233,36 +237,7 @@ def find_titles(rst_file: str) -> Iterable[Tuple[str, int]]: previous_line = line -def find_rst_files(source_paths: List[str]) -> Iterable[str]: - """ - Given the command line arguments of directory paths, this method - yields the strings of the .rst file directories that these paths contain. - - Parameters - ---------- - source_paths : str - List of directories to validate, provided through command line arguments. - - Yields - ------- - str - Directory address of a .rst files found in command line argument directories. - """ - - for directory_address in source_paths: - if not os.path.exists(directory_address): - raise ValueError( - "Please enter a valid path, pointing to a valid file/directory." - ) - elif directory_address.endswith(".rst"): - yield directory_address - else: - yield from glob.glob( - pathname=f"{directory_address}/**/*.rst", recursive=True - ) - - -def main(source_paths: List[str], output_format: str) -> int: +def main(source_paths: List[str]) -> int: """ The main method to print all headings with incorrect capitalization. @@ -270,8 +245,6 @@ def main(source_paths: List[str], output_format: str) -> int: ---------- source_paths : str List of directories to validate, provided through command line arguments. - output_format : str - Output format of the script. Returns ------- @@ -281,7 +254,7 @@ def main(source_paths: List[str], output_format: str) -> int: number_of_errors: int = 0 - for filename in find_rst_files(source_paths): + for filename in source_paths: for title, line_number in find_titles(filename): if title != correct_title_capitalization(title): print( @@ -297,16 +270,9 @@ def main(source_paths: List[str], output_format: str) -> int: parser = argparse.ArgumentParser(description="Validate heading capitalization") parser.add_argument( - "paths", nargs="+", default=".", help="Source paths of file/directory to check." - ) - - parser.add_argument( - "--format", - "-f", - default="{source_path}:{line_number}:{msg}:{heading}:{correct_heading}", - help="Output format of incorrectly capitalized titles", + "paths", nargs="*", help="Source paths of file/directory to check." ) args = parser.parse_args() - sys.exit(main(args.paths, args.format)) + sys.exit(main(args.paths)) diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 8f48d518a737b..b6b038ae9dd17 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -15,7 +15,14 @@ import sys import token import tokenize -from typing import IO, Callable, Iterable, List, Set, Tuple +from typing import ( + IO, + Callable, + Iterable, + List, + Set, + Tuple, +) PRIVATE_IMPORTS_TO_IGNORE: Set[str] = { "_extension_array_shared_docs", diff --git a/setup.cfg b/setup.cfg index a6d636704664e..ce055f550a868 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,11 +1,65 @@ +[metadata] +name = pandas +description = Powerful data structures for data analysis, time series, and statistics +long_description = file: README.md +long_description_content_type = text/markdown +url = https://pandas.pydata.org +author = The Pandas Development Team +author_email = pandas-dev@python.org +license = BSD-3-Clause +license_file = LICENSE +platforms = any +classifiers = + Development Status :: 5 - Production/Stable + Environment :: Console + Intended Audience :: Science/Research + License :: OSI Approved :: BSD License + Operating System :: OS Independen + Programming Language :: Cython + Programming Language :: Python + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Topic :: Scientific/Engineering +project_urls = + Bug Tracker = https://github.com/pandas-dev/pandas/issues + Documentation = https://pandas.pydata.org/pandas-docs/stable + Source Code = https://github.com/pandas-dev/pandas + +[options] +packages = find: +install_requires = + numpy>=1.16.5 + python-dateutil>=2.7.3 + pytz>=2017.3 +python_requires = >=3.7.1 +include_package_data = True +zip_safe = False + +[options.entry_points] +pandas_plotting_backends = + matplotlib = pandas:plotting._matplotlib + +[options.extras_require] +test = + hypothesis>=3.58 + pytest>=5.0.1 + pytest-xdist + +[options.package_data] +* = templates/*, _libs/**/*.dll [build_ext] -inplace = 1 +inplace = True + +[options.packages.find] +include = pandas, pandas.* # See the docstring in versioneer.py for instructions. Note that you must # re-run 'versioneer.py setup' after changing this section, and commit the # resulting files. - [versioneer] VCS = git style = pep440 @@ -38,16 +92,16 @@ bootstrap = import pandas as pd np # avoiding error when importing again numpy or pandas pd # (in some cases we want to do it to show users) -ignore = E203, # space before : (needed for how black formats slicing) - E402, # module level import not at top of file - W503, # line break before binary operator - # Classes/functions in different blocks can generate those errors - E302, # expected 2 blank lines, found 0 - E305, # expected 2 blank lines after class or function definition, found 0 - # We use semicolon at the end to avoid displaying plot objects - E703, # statement ends with a semicolon - E711, # comparison to none should be 'if cond is none:' - +ignore = + E203, # space before : (needed for how black formats slicing) + E402, # module level import not at top of file + W503, # line break before binary operator + # Classes/functions in different blocks can generate those errors + E302, # expected 2 blank lines, found 0 + E305, # expected 2 blank lines after class or function definition, found 0 + # We use semicolon at the end to avoid displaying plot objects + E703, # statement ends with a semicolon + E711, # comparison to none should be 'if cond is none:' exclude = doc/source/development/contributing_docstring.rst, # work around issue of undefined variable warnings @@ -64,18 +118,18 @@ xfail_strict = True filterwarnings = error:Sparse:FutureWarning error:The SparseArray:FutureWarning -junit_family=xunit2 +junit_family = xunit2 [codespell] -ignore-words-list=ba,blocs,coo,hist,nd,ser -ignore-regex=https://(\w+\.)+ +ignore-words-list = ba,blocs,coo,hist,nd,ser +ignore-regex = https://(\w+\.)+ [coverage:run] branch = False omit = - */tests/* - pandas/_typing.py - pandas/_version.py + */tests/* + pandas/_typing.py + pandas/_version.py plugins = Cython.Coverage [coverage:report] @@ -115,6 +169,7 @@ sections = FUTURE,STDLIB,THIRDPARTY,PRE_LIBS,PRE_CORE,DTYPES,FIRSTPARTY,POST_COR profile = black combine_as_imports = True line_length = 88 +force_grid_wrap = True force_sort_within_sections = True skip_glob = env, skip = pandas/__init__.py @@ -130,10 +185,10 @@ warn_unused_ignores = True show_error_codes = True [mypy-pandas.tests.*] -check_untyped_defs=False +check_untyped_defs = False [mypy-pandas._version] -check_untyped_defs=False +check_untyped_defs = False [mypy-pandas.io.clipboard] -check_untyped_defs=False +check_untyped_defs = False diff --git a/setup.py b/setup.py index f9c4a1158fee0..45548fed68322 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,11 @@ import sys import numpy -from setuptools import Command, Extension, find_packages, setup +from setuptools import ( + Command, + Extension, + setup, +) from setuptools.command.build_ext import build_ext as _build_ext import versioneer @@ -34,11 +38,13 @@ def is_platform_mac(): return sys.platform == "darwin" -min_numpy_ver = "1.16.5" min_cython_ver = "0.29.21" # note: sync with pyproject.toml try: - from Cython import Tempita, __version__ as _CYTHON_VERSION + from Cython import ( + Tempita, + __version__ as _CYTHON_VERSION, + ) from Cython.Build import cythonize _CYTHON_INSTALLED = _CYTHON_VERSION >= LooseVersion(min_cython_ver) @@ -99,96 +105,6 @@ def build_extensions(self): super().build_extensions() -DESCRIPTION = "Powerful data structures for data analysis, time series, and statistics" -LONG_DESCRIPTION = """ -**pandas** is a Python package that provides fast, flexible, and expressive data -structures designed to make working with structured (tabular, multidimensional, -potentially heterogeneous) and time series data both easy and intuitive. It -aims to be the fundamental high-level building block for doing practical, -**real world** data analysis in Python. Additionally, it has the broader goal -of becoming **the most powerful and flexible open source data analysis / -manipulation tool available in any language**. It is already well on its way -toward this goal. - -pandas is well suited for many different kinds of data: - - - Tabular data with heterogeneously-typed columns, as in an SQL table or - Excel spreadsheet - - Ordered and unordered (not necessarily fixed-frequency) time series data. - - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and - column labels - - Any other form of observational / statistical data sets. The data actually - need not be labeled at all to be placed into a pandas data structure - -The two primary data structures of pandas, Series (1-dimensional) and DataFrame -(2-dimensional), handle the vast majority of typical use cases in finance, -statistics, social science, and many areas of engineering. For R users, -DataFrame provides everything that R's ``data.frame`` provides and much -more. pandas is built on top of `NumPy `__ and is -intended to integrate well within a scientific computing environment with many -other 3rd party libraries. - -Here are just a few of the things that pandas does well: - - - Easy handling of **missing data** (represented as NaN) in floating point as - well as non-floating point data - - Size mutability: columns can be **inserted and deleted** from DataFrame and - higher dimensional objects - - Automatic and explicit **data alignment**: objects can be explicitly - aligned to a set of labels, or the user can simply ignore the labels and - let `Series`, `DataFrame`, etc. automatically align the data for you in - computations - - Powerful, flexible **group by** functionality to perform - split-apply-combine operations on data sets, for both aggregating and - transforming data - - Make it **easy to convert** ragged, differently-indexed data in other - Python and NumPy data structures into DataFrame objects - - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** - of large data sets - - Intuitive **merging** and **joining** data sets - - Flexible **reshaping** and pivoting of data sets - - **Hierarchical** labeling of axes (possible to have multiple labels per - tick) - - Robust IO tools for loading data from **flat files** (CSV and delimited), - Excel files, databases, and saving / loading data from the ultrafast **HDF5 - format** - - **Time series**-specific functionality: date range generation and frequency - conversion, moving window statistics, date shifting and lagging. - -Many of these principles are here to address the shortcomings frequently -experienced using other languages / scientific research environments. For data -scientists, working with data is typically divided into multiple stages: -munging and cleaning data, analyzing / modeling it, then organizing the results -of the analysis into a form suitable for plotting or tabular display. pandas is -the ideal tool for all of these tasks. -""" - -DISTNAME = "pandas" -LICENSE = "BSD" -AUTHOR = "The PyData Development Team" -EMAIL = "pydata@googlegroups.com" -URL = "https://pandas.pydata.org" -DOWNLOAD_URL = "" -PROJECT_URLS = { - "Bug Tracker": "https://github.com/pandas-dev/pandas/issues", - "Documentation": "https://pandas.pydata.org/pandas-docs/stable/", - "Source Code": "https://github.com/pandas-dev/pandas", -} -CLASSIFIERS = [ - "Development Status :: 5 - Production/Stable", - "Environment :: Console", - "Operating System :: OS Independent", - "Intended Audience :: Science/Research", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Cython", - "Topic :: Scientific/Engineering", -] - - class CleanCommand(Command): """Custom distutils command to clean the .so and .pyc files.""" @@ -711,51 +627,11 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): # ---------------------------------------------------------------------- -def setup_package(): - setuptools_kwargs = { - "install_requires": [ - "python-dateutil >= 2.7.3", - "pytz >= 2017.3", - f"numpy >= {min_numpy_ver}", - ], - "setup_requires": [f"numpy >= {min_numpy_ver}"], - "zip_safe": False, - } - +if __name__ == "__main__": + # Freeze to support parallel compilation when using spawn instead of fork + multiprocessing.freeze_support() setup( - name=DISTNAME, - maintainer=AUTHOR, version=versioneer.get_version(), - packages=find_packages(include=["pandas", "pandas.*"]), - package_data={"": ["templates/*", "_libs/**/*.dll"]}, ext_modules=maybe_cythonize(extensions, compiler_directives=directives), - maintainer_email=EMAIL, - description=DESCRIPTION, - license=LICENSE, cmdclass=cmdclass, - url=URL, - download_url=DOWNLOAD_URL, - project_urls=PROJECT_URLS, - long_description=LONG_DESCRIPTION, - classifiers=CLASSIFIERS, - platforms="any", - python_requires=">=3.7.1", - extras_require={ - "test": [ - # sync with setup.cfg minversion & install.rst - "pytest>=5.0.1", - "pytest-xdist", - "hypothesis>=3.58", - ] - }, - entry_points={ - "pandas_plotting_backends": ["matplotlib = pandas:plotting._matplotlib"] - }, - **setuptools_kwargs, ) - - -if __name__ == "__main__": - # Freeze to support parallel compilation when using spawn instead of fork - multiprocessing.freeze_support() - setup_package()