---

<a href="https://github.com/rraadd88/roux/blob/master/examples/roux_lib_df_apply.ipynb"><img align="right" style="float:right;" src="https://img.shields.io/badge/-source-cccccc?style=flat-square"></a>

## ⌗ Apply on dataframes.

In [1]:
# to show logging messages
import logging
logging.getLogger().setLevel(logging.INFO)

**Optional requirements**

In [2]:
# None

**Demo data**

In [3]:
import pandas as pd
data=pd.DataFrame(
    dict(
        a=[1,2,3],
        b=[1,2,3],
    )
)
data

Unnamed: 0,a,b
0,1,1
1,2,2
2,3,3


## Import `r`oux-`d`ataframe attributes

In [4]:
import roux.lib.df_apply as rd #noqa

## Asynchronously for faster processing

### No chunking

In [5]:
res=data.rd.apply(
    lambda x: x['a']+x['b'],
    cpus=2,
    axis=1,
)
res

INFO:root:using 2/2 cpus/threads ..


0    2
1    4
2    6
dtype: int64

In [6]:
## test
assert res.compare(
    data.apply(
        lambda x: x['a']+x['b'],
        axis=1,
    )
).shape[0]==0, res

#### `groupby`

In [7]:
data.rd.apply(
    lambda df: df.sum(),
    by='a',
    cpus=2,
)

INFO:root:using 2/2 cpus/threads ..


a   
3  a    3
   b    3
2  a    2
   b    2
1  a    1
   b    1
dtype: int64

In [8]:
data.rd.apply(
    lambda df: df.sum().to_frame().T,
    by='a',
    cpus=2,
)

INFO:root:using 2/2 cpus/threads ..


Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3,0,3,3
2,0,2,2
1,0,1,1


### Chunking

In [9]:
data.index.tolist()==list(range(len(data)))

True

In [10]:
# %run ../roux/lib/df_apply.py
res=(
    data
    .reset_index(drop=True)
    .rd.apply(
        lambda x: x['a']+x['b'],
        cpus=2,
        verbose=True,
        
        kws_chunks=dict(
            chunk_size=1,
        ),
    )
)
res

INFO:root:using 2/2 cpus/threads ..
INFO:root:chunking ..
INFO:root:shape = (3, 4) nunique: chunk = 3 
INFO:root:temp. dir path: /tmp/tmp00w7pbv8
  0%|          | 0/3 [00:00<?, ?it/s]INFO:root:processsing /tmp/tmp00w7pbv8/00.pqt ..
  table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
INFO:root:processsing /tmp/tmp00w7pbv8/01.pqt ..
  table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
INFO:root:processsing /tmp/tmp00w7pbv8/02.pqt ..
  table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
100%|██████████| 3/3 [00:00<00:00, 83.62it/s]
INFO:root:collecting processed chunks ..
INFO:root:0


0    2
1    4
2    6
Name: 0, dtype: int64

In [11]:
## test
assert res.compare(
    data.apply(
        lambda x: x['a']+x['b'],
        axis=1,
    )
).shape[0]==0, res

### Out. `df`

In [12]:
# %run ../roux/lib/df_apply.py
(
    data
    .reset_index(drop=True)
    .rd.apply(
        lambda x: x['a']+x['b'],
        cpus=2,
        verbose=True,
        
        kws_chunks=dict(
            chunk_size=1,
            out_df=True,
        ),
    )
)

INFO:root:using 2/2 cpus/threads ..
INFO:root:chunking ..
INFO:root:shape = (3, 4) nunique: chunk = 3 
INFO:root:temp. dir path: /tmp/tmphzuaurm7
  0%|          | 0/3 [00:00<?, ?it/s]INFO:root:processsing /tmp/tmphzuaurm7/00.pqt ..
  table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
INFO:root:processsing /tmp/tmphzuaurm7/01.pqt ..
  table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
INFO:root:processsing /tmp/tmphzuaurm7/02.pqt ..
  table = self.api.Table.from_pandas(df, **from_pandas_kwargs)
100%|██████████| 3/3 [00:00<00:00, 231.44it/s]
INFO:root:collecting processed chunks ..
INFO:root:0
INFO:root:merge: shape changed: (3, 5)->(3, 6), length constant


Unnamed: 0,a,b,_chunk_path,0
0,1,1,/tmp/tmphzuaurm7/00.pqt,2
1,2,2,/tmp/tmphzuaurm7/01.pqt,4
2,3,3,/tmp/tmphzuaurm7/02.pqt,6


## Documentation
[`roux.lib.df_apply`](https://github.com/rraadd88/roux#module-rouxlibdfapply)