Skip to content

Commit

Permalink
added nan handlers for aggregate line and bar charts
Browse files Browse the repository at this point in the history
  • Loading branch information
AjayThorve committed Feb 5, 2020
1 parent 23edfc7 commit cd00a8a
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
12 changes: 8 additions & 4 deletions python/cuxfilter/assets/numba_kernels/gpu_histogram.py
Expand Up @@ -205,15 +205,18 @@ def calc_groupby(chart: Type[BaseChart], data, agg=None):

if agg is None:
temp_df = cudf.DataFrame()

temp_df.index = data.dropna(subset=[chart.x]).index
temp_df.add_column(
chart.x,
get_binwise_reduced_column(
data[chart.x].copy().to_gpu_array(), chart.stride, a_x_range
data.dropna(subset=[chart.x])[chart.x].copy().to_gpu_array(),
chart.stride,
a_x_range,
),
)
temp_df.add_column(chart.y, data[chart.y].copy().to_gpu_array())

temp_df.add_column(
chart.y, data.dropna(subset=[chart.x])[chart.y].copy()
)
groupby_res = (
temp_df.groupby(by=[chart.x], as_index=False)
.agg({chart.y: chart.aggregate_fn})
Expand All @@ -225,6 +228,7 @@ def calc_groupby(chart: Type[BaseChart], data, agg=None):
groupby_res = (
data.groupby(by=[chart.x], as_index=False).agg(agg).to_pandas()
)
print(data.groupby(by=[chart.x], as_index=False).agg(agg).to_pandas())
return groupby_res.to_numpy().transpose()


Expand Down
24 changes: 24 additions & 0 deletions python/cuxfilter/tests/assets/test_gpu_histogram.py
Expand Up @@ -332,6 +332,30 @@ def test_calc_groupby(aggregate_fn, result):
assert np.array_equal(gpu_histogram.calc_groupby(bc, df), result)


@pytest.mark.parametrize(
"x, y, aggregate_fn, result",
[
("key", "val", "mean", np.array([[0.0, 1.0], [np.NaN, 3.0]])),
("val", "key", "mean", np.array([[0.0], [2.0]])),
],
)
def test_calc_groupby_for_nulls(x, y, aggregate_fn, result):
df = cudf.DataFrame({"key": [1, 2], "val": [np.NaN, 3]})
print(df, x, y)
bc = BaseChart()
bc.x = x
bc.y = y
bc.stride = 1.0
bc.min_value = df[x].min()
bc.max_value = df[x].max()
bc.aggregate_fn = aggregate_fn
print(gpu_histogram.calc_groupby(bc, df))
print(result)
assert np.allclose(
gpu_histogram.calc_groupby(bc, df), result, equal_nan=True
)


def test_aggregated_column_unique():
df = cudf.DataFrame(
{
Expand Down

0 comments on commit cd00a8a

Please sign in to comment.