In [5]:
%run nb_helpers.py

from datar.datasets import iris
from datar.all import *

nb_header(across, if_any, if_all, c_across)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ across</div>

##### Apply the same transformation to multiple columns

The original API:  
https://dplyr.tidyverse.org/reference/across.html  

##### Args:
&emsp;&emsp;`_data`: The dataframe.  
&emsp;&emsp;`*args`: If given, the first 2 elements should be columns and functions  
&emsp;&emsp;&emsp;&emsp;apply to each of the selected columns. The rest of them will be  
&emsp;&emsp;&emsp;&emsp;the arguments for the functions.  

&emsp;&emsp;`_names`: A glue specification that describes how to name  
&emsp;&emsp;&emsp;&emsp;the output columns. This can use `{_col}` to stand for the  
&emsp;&emsp;&emsp;&emsp;selected column name, and `{_fn}` to stand for the name of  
&emsp;&emsp;&emsp;&emsp;the function being applied.  
&emsp;&emsp;&emsp;&emsp;The default (None) is equivalent to `{_col}` for the  
&emsp;&emsp;&emsp;&emsp;single function case and `{_col}_{_fn}` for the case where  
&emsp;&emsp;&emsp;&emsp;a list is used for _fns. In such a case, `{_fn}` is 0-based.  
&emsp;&emsp;&emsp;&emsp;To use 1-based index, use `{_fn1}`  

&emsp;&emsp;`_fn_context`: Defines the context to evaluate the arguments for functions  
&emsp;&emsp;&emsp;&emsp;if they are plain functions.  
&emsp;&emsp;&emsp;&emsp;Note that registered functions will use its own context  

&emsp;&emsp;`**kwargs`: Keyword arguments for the functions  

##### Returns:
&emsp;&emsp;A dataframe with one column for each column and each function.  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ if_any</div>

##### Apply the same predicate function to a selection of columns and combine
the results True if any element is True.  

See Also:  
&emsp;&emsp;[`across()`](datar.dplyr.across.across)  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ if_all</div>

##### Apply the same predicate function to a selection of columns and combine
the results True if all elements are True.  

See Also:  
&emsp;&emsp;[`across()`](datar.dplyr.across.across)  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ c_across</div>

##### Apply the same transformation to multiple columns rowwisely

##### Args:
&emsp;&emsp;`_data`: The dataframe  
&emsp;&emsp;`_cols`: The columns  

##### Returns:
&emsp;&emsp;A rowwise tibble  


In [2]:
# round not changing dtypes (Series.round)
iris >> mutate(across(c(f.Sepal_Length, f.Sepal_Width), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [5]:
iris >> mutate(across(c(0, 1), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [6]:
# use slice with column names
iris >> mutate(across(f[:f.Sepal_Width], round)) 

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,3.5,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.2,1.3,0.2,setosa
3,5.0,3.1,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,3.6,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.5,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [7]:
# to include stop of slice
iris >> mutate(across(f[:f.Sepal_Width:1], round)) 

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [8]:
iris >> mutate(across(where(is_double) & ~c(f.Petal_Length, f.Petal_Width), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [9]:
iris = iris >> mutate(Species=as_categorical(f.Species))
iris

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<category>
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,3.6,1.4,0.2,setosa
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica


In [10]:
iris = iris >> mutate(across(where(is_categorical), as_character))
iris

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,3.6,1.4,0.2,setosa
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica


In [11]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), mean, na_rm=True)
)

Unnamed: 0,Species,Sepal_Length,Sepal_Width
,<object>,<float64>,<float64>
0.0,setosa,5.006,3.428
1.0,versicolor,5.936,2.770
2.0,virginica,6.588,2.974


In [12]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), dict(mean=mean, sd=sd))
)

Unnamed: 0,Species,Sepal_Length_mean,Sepal_Length_sd,Sepal_Width_mean,Sepal_Width_sd
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [13]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), mean, _names = "mean_{_col}")
)

Unnamed: 0,Species,mean_Sepal_Length,mean_Sepal_Width
,<object>,<float64>,<float64>
0.0,setosa,5.006,3.428
1.0,versicolor,5.936,2.770
2.0,virginica,6.588,2.974


In [14]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), dict(mean=mean, sd=sd), _names = "{_col}.{_fn}")
)

Unnamed: 0,Species,Sepal_Length.mean,Sepal_Length.sd,Sepal_Width.mean,Sepal_Width.sd
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [15]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), [mean, sd], _names = "{_col}.fn{_fn}")
)

Unnamed: 0,Species,Sepal_Length.fn0,Sepal_Length.fn1,Sepal_Width.fn0,Sepal_Width.fn1
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [17]:
iris >> group_by(f.Species) >> summarise(
    across(
        starts_with("Sepal"), 
        [mean, sd], 
        _names="{_col}.fn{_fn}", 
    )
)
# or use _fn0

# iris >> group_by(f.Species) >> summarise(
#     across(
#         starts_with("Sepal"), 
#         [mean, sd], 
#         _names="{_col}.fn{_fn1}", # _fn1 for 1-based
#     )
# )


Unnamed: 0,Species,Sepal_Length.fn0,Sepal_Length.fn1,Sepal_Width.fn0,Sepal_Width.fn1
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [18]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), [mean, sd], _names = "{_col}.fn{_fn1}")
)

Unnamed: 0,Species,Sepal_Length.fn1,Sepal_Length.fn2,Sepal_Width.fn1,Sepal_Width.fn2
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [19]:
iris >> filter(if_any(ends_with("Width"), lambda x: x > 4))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
15.0,5.7,4.4,1.5,0.4,setosa
32.0,5.2,4.1,1.5,0.1,setosa
33.0,5.5,4.2,1.4,0.2,setosa


In [20]:
iris >> filter(if_all(ends_with("Width"), lambda x: x > 2))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
100.0,6.3,3.3,6.0,2.5,virginica
102.0,7.1,3.0,5.9,2.1,virginica
104.0,6.5,3.0,5.8,2.2,virginica
105.0,7.6,3.0,6.6,2.1,virginica
109.0,7.2,3.6,6.1,2.5,virginica
112.0,6.8,3.0,5.5,2.1,virginica
114.0,5.8,2.8,5.1,2.4,virginica
115.0,6.4,3.2,5.3,2.3,virginica
117.0,7.7,3.8,6.7,2.2,virginica


In [21]:
df = tibble(
    id=[1, 2, 3, 4],
    w=runif(4), 
    x=runif(4), 
    y=runif(4), 
    z=runif(4)
)
df >> rowwise() >> mutate(
    sum = sum(c_across(f[f.w:f.z])),
    sd = sd(c_across(f[f.w:f.z]))
)

Unnamed: 0,id,w,x,y,z,sum,sd
,<int64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
0.0,1,0.569851,0.930258,0.854710,0.046722,2.354819,0.190064
1.0,2,0.775352,0.424834,0.306213,0.082870,1.506398,0.243935
2.0,3,0.704210,0.828292,0.468007,0.640442,2.000509,0.183027
3.0,4,0.368568,0.197822,0.748319,0.123777,1.314709,0.281783
