rework testing

rapidsai · Apr 21, 2022 · 7b27cc5 · 7b27cc5
1 parent 78450ef
commit 7b27cc5
Showing 1 changed file with 105 additions and 42 deletions.
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
@@ -291,12 +291,38 @@ def foo(df):
     got = got_grpby.apply(foo)
     assert_groupby_results_equal(expect, got)
 
-    def foo_args(df, k):
+
+def create_test_groupby_apply_args_params():
+    def f1(df, k):
         df["out"] = df["val1"] + df["val2"] + k
         return df
 
-    expect = expect_grpby.apply(foo_args, 2)
-    got = got_grpby.apply(foo_args, 2)
+    def f2(df, k, L):
+        df["out"] = df["val1"] - df["val2"] + (k / L)
+        return df
+
+    def f3(df, k, L, m):
+        df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m
+        return df
+
+    return [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+
+
+@pytest.mark.parametrize("func,args", create_test_groupby_apply_args_params())
+def test_groupby_apply_args(func, args):
+    np.random.seed(0)
+    df = DataFrame()
+    nelem = 20
+    df["key1"] = np.random.randint(0, 3, nelem)
+    df["key2"] = np.random.randint(0, 2, nelem)
+    df["val1"] = np.random.random(nelem)
+    df["val2"] = np.random.random(nelem)
+
+    expect_grpby = df.to_pandas().groupby(["key1", "key2"], as_index=False)
+    got_grpby = df.groupby(["key1", "key2"])
+
+    expect = expect_grpby.apply(func, *args)
+    got = got_grpby.apply(func, *args)
     assert_groupby_results_equal(expect, got)
 
 
@@ -1603,7 +1629,38 @@ def test_groupby_pipe():
     assert_groupby_results_equal(expected, actual)
 
 
-def test_groupby_apply_return_scalars():
+def create_test_groupby_apply_return_scalars_params():
+    def f0(x):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = ticker / 10
+        return full
+
+    def f1(x, k):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = ticker / k
+        return full
+
+    def f2(x, k, L):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = L * (ticker / k)
+        return full
+
+    def f3(x, k, L, m):
+        x = x[~x["B"].isna()]
+        ticker = x.shape[0]
+        full = L * (ticker / k) % m
+        return full
+
+    return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]
+
+
+@pytest.mark.parametrize(
+    "func,args", create_test_groupby_apply_return_scalars_params()
+)
+def test_groupby_apply_return_scalars(func, args):
     pdf = pd.DataFrame(
         {
             "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
@@ -1623,56 +1680,52 @@ def test_groupby_apply_return_scalars():
     )
     gdf = cudf.from_pandas(pdf)
 
-    def custom_map_func(x):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = ticker / 10
-        return full
-
-    expected = pdf.groupby("A").apply(lambda x: custom_map_func(x))
-    actual = gdf.groupby("A").apply(lambda x: custom_map_func(x))
+    expected = pdf.groupby("A").apply(func, *args)
+    actual = gdf.groupby("A").apply(func, *args)
 
     assert_groupby_results_equal(expected, actual)
 
-    def custom_map_func_args(x, k):
-        x = x[~x["B"].isna()]
-        ticker = x.shape[0]
-        full = ticker / 10 + k
-        return full + 1.8 / k
 
-    expected = pdf.groupby("A").apply(custom_map_func_args, 2)
-    actual = gdf.groupby("A").apply(custom_map_func_args, 2)
+def create_test_groupby_apply_return_series_dataframe_params():
+    def f0(x):
+        return x - x.max()
 
-    assert_groupby_results_equal(expected, actual)
+    def f1(x):
+        return x.min() - x.max()
 
+    def f2(x):
+        return x.min()
 
-@pytest.mark.parametrize(
-    "cust_func",
-    [lambda x: x - x.max(), lambda x: x.min() - x.max(), lambda x: x.min()],
-)
-def test_groupby_apply_return_series_dataframe(cust_func):
-    pdf = pd.DataFrame(
-        {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
-    )
-    gdf = cudf.from_pandas(pdf)
+    def f3(x, k):
+        return x - x.max() + k
 
-    expected = pdf.groupby(["key"]).apply(cust_func)
-    actual = gdf.groupby(["key"]).apply(cust_func)
+    def f4(x, k, L):
+        return x.min() - x.max() + (k / L)
 
-    assert_groupby_results_equal(expected, actual)
+    def f5(x, k, L, m):
+        return m * x.min() + (k / L)
+
+    return [
+        (f0, ()),
+        (f1, ()),
+        (f2, ()),
+        (f3, (42,)),
+        (f4, (42, 119)),
+        (f5, (41, 119, 212.1)),
+    ]
 
 
-def test_groupby_apply_return_series_dataframe_args():
+@pytest.mark.parametrize(
+    "func,args", create_test_groupby_apply_return_series_dataframe_params()
+)
+def test_groupby_apply_return_series_dataframe(func, args):
     pdf = pd.DataFrame(
         {"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
     )
     gdf = cudf.from_pandas(pdf)
 
-    def cust_func(x, k):
-        return x - x.min() + k
-
-    expected = pdf.groupby(["key"]).apply(cust_func, 2)
-    actual = gdf.groupby(["key"]).apply(cust_func, 2)
+    expected = pdf.groupby(["key"]).apply(func, *args)
+    actual = gdf.groupby(["key"]).apply(func, *args)
 
     assert_groupby_results_equal(expected, actual)
 
@@ -2246,11 +2299,21 @@ def foo(x):
 
     assert_groupby_results_equal(expect, got)
 
-    def foo_args(x, k):
-        return x.sum() + k
 
-    got = make_frame(DataFrame, 100).groupby("x").y.apply(foo_args, 2)
-    expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(foo_args, 2)
+@pytest.mark.parametrize(
+    "func,args",
+    [
+        (lambda x, k: x + k, (42,)),
+        (lambda x, k, L: x + k - L, (42, 191)),
+        (lambda x, k, L, m: (x + k) / (L * m), (42, 191, 99.9)),
+    ],
+)
+def test_groupby_apply_series_args(func, args):
+
+    got = make_frame(DataFrame, 100).groupby("x").y.apply(func, *args)
+    expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(func, *args)
+
+    assert_groupby_results_equal(expect, got)
 
 
 @pytest.mark.parametrize("label", [None, "left", "right"])