In [1]:
import pandas as pd
import dask.dataframe as dd
df = pd.DataFrame({'x': [1, 2, 3, 4, 5],
                   'y': [1., 2., 3., 4., 5.]})
ddf = dd.from_pandas(df, npartitions=2)

In [2]:
ddf.shape

(Delayed('int-f8dfa3c1-aa89-499c-8b25-340fc87cd6c3'), 2)

In [3]:
ddf.compute()

Unnamed: 0,x,y
0,1,1.0
1,2,2.0
2,3,3.0
3,4,4.0
4,5,5.0


In [4]:
def myadd(df, a, b=1):
    #print('df.x\n', df.x)     
    return df.x + df.y  # + a + b

### One can use map_partitions to apply a function on each partition

In [5]:
res = ddf.map_partitions(myadd, 1, b=2)

In [6]:
res.dtype

dtype('float64')

In [65]:
res.compute()

0     2.0
1     4.0
2     6.0
3     8.0
4    10.0
dtype: float64

In [19]:
res = ddf.map_partitions(myadd, 1, b=2, meta=(None, 'f8'))

In [20]:
type(res)

dask.dataframe.core.Series

In [21]:
res.compute()

0     2.0
1     4.0
2     6.0
3     8.0
4    10.0
dtype: float64

### Here we map a function that takes in a DataFrame, and returns a DataFrame with a new column:

In [10]:
res = ddf.map_partitions(lambda df: df.assign(z=df.x + df.y))
res.dtypes

x      int64
y    float64
z    float64
dtype: object

In [11]:
res.compute()

Unnamed: 0,x,y,z
0,1,1.0,2.0
1,2,2.0,4.0
2,3,3.0,6.0
3,4,4.0,8.0
4,5,5.0,10.0


In [12]:
res = ddf.map_partitions(lambda df: df.assign(z=df.x * df.y),
                          meta={'x': 'i8', 'y': 'f8', 'z': 'f8'})

In [13]:
res.dtypes

x      int64
y    float64
z    float64
dtype: object

In [14]:
res.compute()

Unnamed: 0,x,y,z
0,1,1.0,1.0
1,2,2.0,4.0
2,3,3.0,9.0
3,4,4.0,16.0
4,5,5.0,25.0


In [15]:
res = ddf.map_partitions(lambda df: df.head(), meta=df)

In [16]:
res.dtypes

x      int64
y    float64
dtype: object

In [17]:
res.compute()

Unnamed: 0,x,y
0,1,1.0
1,2,2.0
2,3,3.0
3,4,4.0
4,5,5.0


In [23]:
df

Unnamed: 0,x,y
0,1,1.0
1,2,2.0
2,3,3.0
3,4,4.0
4,5,5.0
