In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import time
from pandarallel import pandarallel
import numpy as np

⚠️ **WARNING** ⚠️

On Windows, because of the multiprocessing system (spawn), the function you send to pandarallel must be **self contained**, and should not depend on external resources.

Example:

❌ **Forbidden:**

```Python
import math

def func(x):
    # Here, `math` is defined outside `func`. `func` is not self contained.
    return math.sin(x.a**2) + math.sin(x.b**2)
```

✅ **Valid:**

```Python
def func(x):
    # Here, `math` is defined inside `func`. `func` is self contained.
    import math
    return math.sin(x.a**2) + math.sin(x.b**2)
```

# Initialize pandarallel

In [None]:
pandarallel.initialize()

# DataFrame.apply

In [None]:
df_size = int(5e6)
df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size),
                       b=np.random.rand(df_size)))

In [None]:
def func(x):
    import math
    return math.sin(x.a**2) + math.sin(x.b**2)

In [None]:
%%time
res = df.apply(func, axis=1)

In [None]:
%%time
res_parallel = df.parallel_apply(func, axis=1)

In [None]:
res.equals(res_parallel)

# DataFrame.map

In [None]:
df_size = int(1e7)
df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size),
                       b=np.random.rand(df_size)))

In [None]:
def func(x):
    import math
    return math.sin(x**2) - math.cos(x**2)

In [None]:
%%time
res = df.map(func)

In [None]:
%%time
res_parallel = df.parallel_applymap(func)

In [None]:
res.equals(res_parallel)

# DataFrame.groupby.apply

In [None]:
df_size = int(3e7)
df = pd.DataFrame(dict(a=np.random.randint(1, 1000, df_size),
                       b=np.random.rand(df_size)))

In [None]:
def func(df):
    import math
    dum = 0
    for item in df.b:
        dum += math.log10(math.sqrt(math.exp(item**2)))
        
    return dum / len(df.b)

In [None]:
%%time
res = df.groupby("a").apply(func)

In [None]:
%%time
res_parallel = df.groupby("a").parallel_apply(func)

In [None]:
res.equals(res_parallel)

# DataFrame.groupby.rolling.apply

In [None]:
df_size = int(1e6)
df = pd.DataFrame(dict(a=np.random.randint(1, 300, df_size),
                       b=np.random.rand(df_size)))

In [None]:
def func(x):
    return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4

In [None]:
%%time
res = df.groupby('a').b.rolling(4).apply(func, raw=False)

In [None]:
%%time
res_parallel = df.groupby('a').b.rolling(4).parallel_apply(func, raw=False)

In [None]:
res.equals(res_parallel)

# DataFrame.groupby.expanding.apply

In [None]:
df_size = int(1e6)
df = pd.DataFrame(dict(a=np.random.randint(1, 300, df_size),
                       b=np.random.rand(df_size)))

In [None]:
def func(x):
    return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4

In [None]:
%%time
res = df.groupby('a').b.expanding(4).apply(func, raw=False)

In [None]:
%%time
res_parallel = df.groupby('a').b.expanding(4).parallel_apply(func, raw=False)

In [None]:
res.equals(res_parallel)

# Series.map

In [None]:
df_size = int(5e7)
df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))

In [None]:
def func(x):
    import math
    return math.log10(math.sqrt(math.exp(x**2)))

In [None]:
%%time
res = df.a.map(func)

In [None]:
%%time
res_parallel = df.a.parallel_map(func)

In [None]:
res.equals(res_parallel)

# Series.apply

In [None]:
df_size = int(3.5e7)
df = pd.DataFrame(dict(a=np.random.rand(df_size) + 1))

In [None]:
def func(x, power, bias=0):
    import math
    return math.log10(math.sqrt(math.exp(x**power))) + bias

In [None]:
%%time
res = df.a.apply(func, args=(2,), bias=3)

In [None]:
%%time
res_parallel = df.a.parallel_apply(func, args=(2,), bias=3)

In [None]:
res.equals(res_parallel)

# Series.rolling.apply

In [None]:
df_size = int(1e6)
df = pd.DataFrame(dict(a=np.random.randint(1, 8, df_size),
                       b=list(range(df_size))))

In [None]:
def func(x):
    return x.iloc[0] + x.iloc[1] ** 2 + x.iloc[2] ** 3 + x.iloc[3] ** 4

In [None]:
%%time
res = df.b.rolling(4).apply(func, raw=False)

In [None]:
%%time
res_parallel = df.b.rolling(4).parallel_apply(func, raw=False)

In [None]:
res.equals(res_parallel)