Skip to content

Commit

Permalink
rework testing
Browse files Browse the repository at this point in the history
  • Loading branch information
brandon-b-miller committed Apr 21, 2022
1 parent 78450ef commit 7b27cc5
Showing 1 changed file with 105 additions and 42 deletions.
147 changes: 105 additions & 42 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,38 @@ def foo(df):
got = got_grpby.apply(foo)
assert_groupby_results_equal(expect, got)

def foo_args(df, k):

def create_test_groupby_apply_args_params():
def f1(df, k):
df["out"] = df["val1"] + df["val2"] + k
return df

expect = expect_grpby.apply(foo_args, 2)
got = got_grpby.apply(foo_args, 2)
def f2(df, k, L):
df["out"] = df["val1"] - df["val2"] + (k / L)
return df

def f3(df, k, L, m):
df["out"] = ((k * df["val1"]) + (L * df["val2"])) / m
return df

return [(f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]


@pytest.mark.parametrize("func,args", create_test_groupby_apply_args_params())
def test_groupby_apply_args(func, args):
np.random.seed(0)
df = DataFrame()
nelem = 20
df["key1"] = np.random.randint(0, 3, nelem)
df["key2"] = np.random.randint(0, 2, nelem)
df["val1"] = np.random.random(nelem)
df["val2"] = np.random.random(nelem)

expect_grpby = df.to_pandas().groupby(["key1", "key2"], as_index=False)
got_grpby = df.groupby(["key1", "key2"])

expect = expect_grpby.apply(func, *args)
got = got_grpby.apply(func, *args)
assert_groupby_results_equal(expect, got)


Expand Down Expand Up @@ -1603,7 +1629,38 @@ def test_groupby_pipe():
assert_groupby_results_equal(expected, actual)


def test_groupby_apply_return_scalars():
def create_test_groupby_apply_return_scalars_params():
def f0(x):
x = x[~x["B"].isna()]
ticker = x.shape[0]
full = ticker / 10
return full

def f1(x, k):
x = x[~x["B"].isna()]
ticker = x.shape[0]
full = ticker / k
return full

def f2(x, k, L):
x = x[~x["B"].isna()]
ticker = x.shape[0]
full = L * (ticker / k)
return full

def f3(x, k, L, m):
x = x[~x["B"].isna()]
ticker = x.shape[0]
full = L * (ticker / k) % m
return full

return [(f0, ()), (f1, (42,)), (f2, (42, 119)), (f3, (42, 119, 212.1))]


@pytest.mark.parametrize(
"func,args", create_test_groupby_apply_return_scalars_params()
)
def test_groupby_apply_return_scalars(func, args):
pdf = pd.DataFrame(
{
"A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
Expand All @@ -1623,56 +1680,52 @@ def test_groupby_apply_return_scalars():
)
gdf = cudf.from_pandas(pdf)

def custom_map_func(x):
x = x[~x["B"].isna()]
ticker = x.shape[0]
full = ticker / 10
return full

expected = pdf.groupby("A").apply(lambda x: custom_map_func(x))
actual = gdf.groupby("A").apply(lambda x: custom_map_func(x))
expected = pdf.groupby("A").apply(func, *args)
actual = gdf.groupby("A").apply(func, *args)

assert_groupby_results_equal(expected, actual)

def custom_map_func_args(x, k):
x = x[~x["B"].isna()]
ticker = x.shape[0]
full = ticker / 10 + k
return full + 1.8 / k

expected = pdf.groupby("A").apply(custom_map_func_args, 2)
actual = gdf.groupby("A").apply(custom_map_func_args, 2)
def create_test_groupby_apply_return_series_dataframe_params():
def f0(x):
return x - x.max()

assert_groupby_results_equal(expected, actual)
def f1(x):
return x.min() - x.max()

def f2(x):
return x.min()

@pytest.mark.parametrize(
"cust_func",
[lambda x: x - x.max(), lambda x: x.min() - x.max(), lambda x: x.min()],
)
def test_groupby_apply_return_series_dataframe(cust_func):
pdf = pd.DataFrame(
{"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
)
gdf = cudf.from_pandas(pdf)
def f3(x, k):
return x - x.max() + k

expected = pdf.groupby(["key"]).apply(cust_func)
actual = gdf.groupby(["key"]).apply(cust_func)
def f4(x, k, L):
return x.min() - x.max() + (k / L)

assert_groupby_results_equal(expected, actual)
def f5(x, k, L, m):
return m * x.min() + (k / L)

return [
(f0, ()),
(f1, ()),
(f2, ()),
(f3, (42,)),
(f4, (42, 119)),
(f5, (41, 119, 212.1)),
]


def test_groupby_apply_return_series_dataframe_args():
@pytest.mark.parametrize(
"func,args", create_test_groupby_apply_return_series_dataframe_params()
)
def test_groupby_apply_return_series_dataframe(func, args):
pdf = pd.DataFrame(
{"key": [0, 0, 1, 1, 2, 2, 2], "val": [0, 1, 2, 3, 4, 5, 6]}
)
gdf = cudf.from_pandas(pdf)

def cust_func(x, k):
return x - x.min() + k

expected = pdf.groupby(["key"]).apply(cust_func, 2)
actual = gdf.groupby(["key"]).apply(cust_func, 2)
expected = pdf.groupby(["key"]).apply(func, *args)
actual = gdf.groupby(["key"]).apply(func, *args)

assert_groupby_results_equal(expected, actual)

Expand Down Expand Up @@ -2246,11 +2299,21 @@ def foo(x):

assert_groupby_results_equal(expect, got)

def foo_args(x, k):
return x.sum() + k

got = make_frame(DataFrame, 100).groupby("x").y.apply(foo_args, 2)
expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(foo_args, 2)
@pytest.mark.parametrize(
"func,args",
[
(lambda x, k: x + k, (42,)),
(lambda x, k, L: x + k - L, (42, 191)),
(lambda x, k, L, m: (x + k) / (L * m), (42, 191, 99.9)),
],
)
def test_groupby_apply_series_args(func, args):

got = make_frame(DataFrame, 100).groupby("x").y.apply(func, *args)
expect = make_frame(pd.DataFrame, 100).groupby("x").y.apply(func, *args)

assert_groupby_results_equal(expect, got)


@pytest.mark.parametrize("label", [None, "left", "right"])
Expand Down

0 comments on commit 7b27cc5

Please sign in to comment.