Skip to content

Commit

Permalink
raise informative error on mismatched columns, return_type
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Aug 6, 2015
1 parent bbf7985 commit 763ddbc
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
6 changes: 6 additions & 0 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,12 @@ def map_partitions(func, column_info, *args, **kwargs):
else:
return_type = type(args[0])

if (return_type == DataFrame and isinstance(column_info, (str, unicode)) or
return_type == Series and isinstance(column_info, (tuple, list, pd.Index))):
raise ValueError("Arguments to map_partitions are not consistent.\n"
"Received columns=%s and return_type=%s" %
(str(column_info), str(return_type)))

if kwargs:
raise ValueError("Keyword arguments not yet supported in map_partitions")

Expand Down
13 changes: 13 additions & 0 deletions dask/dataframe/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,19 @@ def test_map_partitions_method_names():
assert b.name == 'x'


def test_map_partitions_return_type_and_names_agree():
df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]})
a = dd.from_pandas(df, npartitions=2)
try:
a.map_partitions(lambda x: x, columns='zzzz', return_type=dd.DataFrame)
except ValueError as e:
assert 'zzzz' in str(e)
try:
a.map_partitions(lambda x: x, columns=['zzzz'], return_type=dd.Series)
except ValueError as e:
assert 'zzzz' in str(e)


def test_drop_duplicates():
assert eq(d.a.drop_duplicates(), full.a.drop_duplicates())
assert eq(d.drop_duplicates(), full.drop_duplicates())
Expand Down

0 comments on commit 763ddbc

Please sign in to comment.