# Dask DataFrame smoke test

In [1]:
import dask.dataframe as dd
import pandas as pd

# Create a small Dask DataFrame
pdf = pd.DataFrame({'id': [1, 1, 2, 3], 'value': [10, 20, 30, 40]})
ddf = dd.from_pandas(pdf, npartitions=2)

# Groupby aggregation and compute
agg = ddf.groupby('id').value.sum().compute().sort_index()
# Align dtypes for comparison even if Dask upcasts
expected = pd.Series([30, 30, 40], index=pd.Index([1, 2, 3], name='id'), name='value').astype(agg.dtype)
pd.testing.assert_series_equal(agg, expected, check_dtype=True)
print('Groupby sum ok:\n', agg)

# Join correctness
left = dd.from_pandas(pd.DataFrame({'k': [1, 2], 'v_left': ['a', 'b']}), npartitions=1)
right = dd.from_pandas(pd.DataFrame({'k': [2, 3], 'v_right': ['x', 'y']}), npartitions=1)
merged = left.merge(right, on='k', how='left').compute().sort_values('k').reset_index(drop=True)
assert merged.loc[merged.k == 2, 'v_right'].iloc[0] == 'x'
print('Merge ok:\n', merged)


Groupby sum ok:
 id
1    30
2    30
3    40
Name: value, dtype: int64
Merge ok:
    k v_left v_right
0  1      a    <NA>
1  2      b       x
