From 4b839cb8e1d94584c710d1c7b2d3f856aa8637d6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 8 Jan 2022 14:35:34 -0800 Subject: [PATCH] TST: Split / parameterize reshaping tests --- pandas/tests/reshape/test_melt.py | 44 +++++----- pandas/tests/reshape/test_pivot.py | 81 ++++++++++--------- .../tests/reshape/test_union_categoricals.py | 43 ++++++---- pandas/tests/reshape/test_util.py | 17 ++-- 4 files changed, 96 insertions(+), 89 deletions(-) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 8a83cdcbdefb0..ff8e5d56cdc93 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -75,7 +75,8 @@ def test_value_vars(self): ) tm.assert_frame_equal(result4, expected4) - def test_value_vars_types(self): + @pytest.mark.parametrize("type_", (tuple, list, np.array)) + def test_value_vars_types(self, type_): # GH 15348 expected = DataFrame( { @@ -86,10 +87,8 @@ def test_value_vars_types(self): }, columns=["id1", "id2", "variable", "value"], ) - - for type_ in (tuple, list, np.array): - result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B"))) - tm.assert_frame_equal(result, expected) + result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B"))) + tm.assert_frame_equal(result, expected) def test_vars_work_with_multiindex(self): expected = DataFrame( @@ -140,23 +139,21 @@ def test_single_vars_work_with_multiindex( result = self.df1.melt(id_vars, value_vars, col_level=col_level) tm.assert_frame_equal(result, expected) - def test_tuple_vars_fail_with_multiindex(self): + @pytest.mark.parametrize( + "id_vars, value_vars", + [ + [("A", "a"), [("B", "b")]], + [[("A", "a")], ("B", "b")], + [("A", "a"), ("B", "b")], + ], + ) + def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars): # melt should fail with an informative error message if # the columns have a MultiIndex and a tuple is passed # for id_vars or value_vars. - tuple_a = ("A", "a") - list_a = [tuple_a] - tuple_b = ("B", "b") - list_b = [tuple_b] - msg = r"(id|value)_vars must be a list of tuples when columns are a MultiIndex" - for id_vars, value_vars in ( - (tuple_a, list_b), - (list_a, tuple_b), - (tuple_a, tuple_b), - ): - with pytest.raises(ValueError, match=msg): - self.df1.melt(id_vars=id_vars, value_vars=value_vars) + with pytest.raises(ValueError, match=msg): + self.df1.melt(id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): result5 = self.df.melt(var_name=self.var_name) @@ -261,11 +258,10 @@ def test_custom_var_and_value_name(self): result20 = df20.melt() assert result20.columns.tolist() == ["foo", "value"] - def test_col_level(self): - res1 = self.df1.melt(col_level=0) - res2 = self.df1.melt(col_level="CAP") - assert res1.columns.tolist() == ["CAP", "value"] - assert res2.columns.tolist() == ["CAP", "value"] + @pytest.mark.parametrize("col_level", [0, "CAP"]) + def test_col_level(self, col_level): + res = self.df1.melt(col_level=col_level) + assert res.columns.tolist() == ["CAP", "value"] def test_multiindex(self): res = self.df1.melt() @@ -633,7 +629,7 @@ def test_pairs(self): tm.assert_frame_equal(result, exp) with tm.assert_produces_warning(FutureWarning): - result = lreshape(df, spec, dropna=False, label="foo") + lreshape(df, spec, dropna=False, label="foo") spec = { "visitdt": [f"visitdt{i:d}" for i in range(1, 3)], diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index a023adfb509a0..a38cf8b067b27 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -260,6 +260,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) + def test_pivot_with_non_observable_dropna_multi_cat(self, dropna): # gh-21378 df = DataFrame( { @@ -493,6 +494,8 @@ def test_pivot_index_with_nan(self, method): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_index_with_nan_dates(self, method): # GH9491 df = DataFrame( { @@ -501,8 +504,8 @@ def test_pivot_index_with_nan(self, method): } ) df["b"] = df["a"] - pd.Timestamp("2014-02-02") - df.loc[1, "a"] = df.loc[3, "a"] = nan - df.loc[1, "b"] = df.loc[4, "b"] = nan + df.loc[1, "a"] = df.loc[3, "a"] = np.nan + df.loc[1, "b"] = df.loc[4, "b"] = np.nan if method: pv = df.pivot("a", "b", "c") @@ -851,33 +854,31 @@ def test_pivot_with_tuple_of_values(self, method): else: pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz")) - def test_margins(self): - def _check_output( - result, values_col, index=["A", "B"], columns=["C"], margins_col="All" - ): - col_margins = result.loc[result.index[:-1], margins_col] - expected_col_margins = self.data.groupby(index)[values_col].mean() - tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) - assert col_margins.name == margins_col - - result = result.sort_index() - index_margins = result.loc[(margins_col, "")].iloc[:-1] - - expected_ix_margins = self.data.groupby(columns)[values_col].mean() - tm.assert_series_equal( - index_margins, expected_ix_margins, check_names=False - ) - assert index_margins.name == (margins_col, "") + def _check_output( + self, result, values_col, index=["A", "B"], columns=["C"], margins_col="All" + ): + col_margins = result.loc[result.index[:-1], margins_col] + expected_col_margins = self.data.groupby(index)[values_col].mean() + tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) + assert col_margins.name == margins_col - grand_total_margins = result.loc[(margins_col, ""), margins_col] - expected_total_margins = self.data[values_col].mean() - assert grand_total_margins == expected_total_margins + result = result.sort_index() + index_margins = result.loc[(margins_col, "")].iloc[:-1] + expected_ix_margins = self.data.groupby(columns)[values_col].mean() + tm.assert_series_equal(index_margins, expected_ix_margins, check_names=False) + assert index_margins.name == (margins_col, "") + + grand_total_margins = result.loc[(margins_col, ""), margins_col] + expected_total_margins = self.data[values_col].mean() + assert grand_total_margins == expected_total_margins + + def test_margins(self): # column specified result = self.data.pivot_table( values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean ) - _check_output(result, "D") + self._check_output(result, "D") # Set a different margins_name (not 'All') result = self.data.pivot_table( @@ -888,15 +889,16 @@ def _check_output( aggfunc=np.mean, margins_name="Totals", ) - _check_output(result, "D", margins_col="Totals") + self._check_output(result, "D", margins_col="Totals") # no column specified table = self.data.pivot_table( index=["A", "B"], columns="C", margins=True, aggfunc=np.mean ) for value_col in table.columns.levels[0]: - _check_output(table[value_col], value_col) + self._check_output(table[value_col], value_col) + def test_no_col(self): # no col # to help with a buglet @@ -1353,6 +1355,7 @@ def test_pivot_timegrouper(self, using_array_manager): aggfunc=np.sum, ) + def test_pivot_timegrouper_double(self): # double grouper df = DataFrame( { @@ -1633,7 +1636,8 @@ def test_pivot_dtaccessor(self): ) tm.assert_frame_equal(result, expected) - def test_daily(self): + @pytest.mark.parametrize("i", range(1, 367)) + def test_daily(self, i): rng = date_range("1/1/2000", "12/31/2004", freq="D") ts = Series(np.random.randn(len(rng)), index=rng) @@ -1644,15 +1648,15 @@ def test_daily(self): doy = np.asarray(ts.index.dayofyear) - for i in range(1, 367): - subset = ts[doy == i] - subset.index = subset.index.year + subset = ts[doy == i] + subset.index = subset.index.year - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - assert result.name == i + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i - def test_monthly(self): + @pytest.mark.parametrize("i", range(1, 13)) + def test_monthly(self, i): rng = date_range("1/1/2000", "12/31/2004", freq="M") ts = Series(np.random.randn(len(rng)), index=rng) @@ -1660,12 +1664,11 @@ def test_monthly(self): annual.columns = annual.columns.droplevel(0) month = ts.index.month - for i in range(1, 13): - subset = ts[month == i] - subset.index = subset.index.year - result = annual[i].dropna() - tm.assert_series_equal(result, subset, check_names=False) - assert result.name == i + subset = ts[month == i] + subset.index = subset.index.year + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i def test_pivot_table_with_iterator_values(self): # GH 12017 diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py index f39b5de2478b0..7505d69aee134 100644 --- a/pandas/tests/reshape/test_union_categoricals.py +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -13,9 +13,9 @@ class TestUnionCategoricals: - def test_union_categorical(self): - # GH 13361 - data = [ + @pytest.mark.parametrize( + "a, b, combined", + [ (list("abc"), list("abd"), list("abcabd")), ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), @@ -39,14 +39,16 @@ def test_union_categorical(self): pd.period_range("2014-01-06", "2014-01-07"), pd.period_range("2014-01-01", "2014-01-07"), ), - ] - - for a, b, combined in data: - for box in [Categorical, CategoricalIndex, Series]: - result = union_categoricals([box(Categorical(a)), box(Categorical(b))]) - expected = Categorical(combined) - tm.assert_categorical_equal(result, expected) + ], + ) + @pytest.mark.parametrize("box", [Categorical, CategoricalIndex, Series]) + def test_union_categorical(self, a, b, combined, box): + # GH 13361 + result = union_categoricals([box(Categorical(a)), box(Categorical(b))]) + expected = Categorical(combined) + tm.assert_categorical_equal(result, expected) + def test_union_categorical_ordered_appearance(self): # new categories ordered by appearance s = Categorical(["x", "y", "z"]) s2 = Categorical(["a", "b", "c"]) @@ -56,12 +58,14 @@ def test_union_categorical(self): ) tm.assert_categorical_equal(result, expected) + def test_union_categorical_ordered_true(self): s = Categorical([0, 1.2, 2], ordered=True) s2 = Categorical([0, 1.2, 2], ordered=True) result = union_categoricals([s, s2]) expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) tm.assert_categorical_equal(result, expected) + def test_union_categorical_match_types(self): # must exactly match types s = Categorical([0, 1.2, 2]) s2 = Categorical([2, 3, 4]) @@ -69,6 +73,7 @@ def test_union_categorical(self): with pytest.raises(TypeError, match=msg): union_categoricals([s, s2]) + def test_union_categorical_empty(self): msg = "No Categoricals to union" with pytest.raises(ValueError, match=msg): union_categoricals([]) @@ -117,14 +122,11 @@ def test_union_categoricals_nan(self): exp = Categorical([np.nan, np.nan, np.nan, np.nan]) tm.assert_categorical_equal(res, exp) - def test_union_categoricals_empty(self): + @pytest.mark.parametrize("val", [[], ["1"]]) + def test_union_categoricals_empty(self, val): # GH 13759 - res = union_categoricals([Categorical([]), Categorical([])]) - exp = Categorical([]) - tm.assert_categorical_equal(res, exp) - - res = union_categoricals([Categorical([]), Categorical(["1"])]) - exp = Categorical(["1"]) + res = union_categoricals([Categorical([]), Categorical(val)]) + exp = Categorical(val) tm.assert_categorical_equal(res, exp) def test_union_categorical_same_category(self): @@ -135,6 +137,7 @@ def test_union_categorical_same_category(self): exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], categories=[1, 2, 3, 4]) tm.assert_categorical_equal(res, exp) + def test_union_categorical_same_category_str(self): c1 = Categorical(["z", "z", "z"], categories=["x", "y", "z"]) c2 = Categorical(["x", "x", "x"], categories=["x", "y", "z"]) res = union_categoricals([c1, c2]) @@ -293,6 +296,7 @@ def test_union_categoricals_sort_false(self): ) tm.assert_categorical_equal(result, expected) + def test_union_categoricals_sort_false_fastpath(self): # fastpath c1 = Categorical(["a", "b"], categories=["b", "a", "c"]) c2 = Categorical(["b", "c"], categories=["b", "a", "c"]) @@ -300,6 +304,7 @@ def test_union_categoricals_sort_false(self): expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"]) tm.assert_categorical_equal(result, expected) + def test_union_categoricals_sort_false_skipresort(self): # fastpath - skip resort c1 = Categorical(["a", "b"], categories=["a", "b", "c"]) c2 = Categorical(["b", "c"], categories=["a", "b", "c"]) @@ -307,24 +312,28 @@ def test_union_categoricals_sort_false(self): expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) tm.assert_categorical_equal(result, expected) + def test_union_categoricals_sort_false_one_nan(self): c1 = Categorical(["x", np.nan]) c2 = Categorical([np.nan, "b"]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"]) tm.assert_categorical_equal(result, expected) + def test_union_categoricals_sort_false_only_nan(self): c1 = Categorical([np.nan]) c2 = Categorical([np.nan]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical([np.nan, np.nan]) tm.assert_categorical_equal(result, expected) + def test_union_categoricals_sort_false_empty(self): c1 = Categorical([]) c2 = Categorical([]) result = union_categoricals([c1, c2], sort_categories=False) expected = Categorical([]) tm.assert_categorical_equal(result, expected) + def test_union_categoricals_sort_false_ordered_true(self): c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True) c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True) result = union_categoricals([c1, c2], sort_categories=False) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 1ebe96a8b5a8d..3a9ddaebf2934 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -44,17 +44,16 @@ def test_tzaware_retained_categorical(self): expected = x.repeat(2) tm.assert_index_equal(result1, expected) - def test_empty(self): + @pytest.mark.parametrize("x, y", [[[], []], [[0, 1], []], [[], ["a", "b", "c"]]]) + def test_empty(self, x, y): # product of empty factors - X = [[], [0, 1], []] - Y = [[], [], ["a", "b", "c"]] - for x, y in zip(X, Y): - expected1 = np.array([], dtype=np.asarray(x).dtype) - expected2 = np.array([], dtype=np.asarray(y).dtype) - result1, result2 = cartesian_product([x, y]) - tm.assert_numpy_array_equal(result1, expected1) - tm.assert_numpy_array_equal(result2, expected2) + expected1 = np.array([], dtype=np.asarray(x).dtype) + expected2 = np.array([], dtype=np.asarray(y).dtype) + result1, result2 = cartesian_product([x, y]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + def test_empty_input(self): # empty product (empty input): result = cartesian_product([]) expected = []