In [1]:
from datar.datasets import iris
from datar.all import *

%run nb_helpers.py
nb_header(across, if_any, if_all, c_across)

### # across  

##### Apply the same transformation to multiple columns

The original API:  
https://dplyr.tidyverse.org/reference/across.html  

##### Args:
&emsp;&emsp;`_data`: The dataframe.  
&emsp;&emsp;`*args`: If given, the first 2 elements should be columns and functions  
&emsp;&emsp;&emsp;&emsp;apply to each of the selected columns. The rest of them will be  
&emsp;&emsp;&emsp;&emsp;the arguments for the functions.  

&emsp;&emsp;`_names`: A glue specification that describes how to name  
&emsp;&emsp;&emsp;&emsp;the output columns. This can use `{_col}` to stand for the  
&emsp;&emsp;&emsp;&emsp;selected column name, and `{_fn}` to stand for the name of  
&emsp;&emsp;&emsp;&emsp;the function being applied.  
&emsp;&emsp;&emsp;&emsp;The default (None) is equivalent to `{_col}` for the  
&emsp;&emsp;&emsp;&emsp;single function case and `{_col}_{_fn}` for the case where  
&emsp;&emsp;&emsp;&emsp;a list is used for _fns. In such a case, `{_fn}` is 0-based.  
&emsp;&emsp;&emsp;&emsp;To use 1-based index, use `{_fn1}`  

&emsp;&emsp;`_base0`: Indicating whether the columns are 0-based if selected  
&emsp;&emsp;&emsp;&emsp;by indexes. if not provided, will use  
&emsp;&emsp;&emsp;&emsp;`datar.base.get_option('index.base.0')`.  

&emsp;&emsp;`**kwargs`: Keyword arguments for the functions  

##### Returns:
&emsp;&emsp;A dataframe with one column for each column and each function.  


### # if_any  

##### Apply the same predicate function to a selection of columns and combine
the results True if any element is True.  

See Also:  
&emsp;&emsp;[`across()`](datar.dplyr.across.across)  


### # if_all  

##### Apply the same predicate function to a selection of columns and combine
the results True if all elements are True.  

See Also:  
&emsp;&emsp;[`across()`](datar.dplyr.across.across)  


### # c_across  

##### Apply the same transformation to multiple columns rowwisely

##### Args:
&emsp;&emsp;`_data`: The dataframe  
&emsp;&emsp;`_cols`: The columns  
&emsp;&emsp;`_base0`: Indicating whether the columns are 0-based if selected  
&emsp;&emsp;&emsp;&emsp;by indexes. if not provided, will use  
&emsp;&emsp;&emsp;&emsp;`datar.base.get_option('index.base.0')`.  

##### Returns:
&emsp;&emsp;A series  


In [2]:
# round not changing dtypes (Series.round)
iris >> mutate(across(c(f.Sepal_Length, f.Sepal_Width), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
4,5.0,4.0,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.0,5.4,2.3,virginica


In [3]:
# 1-based index by default
iris >> mutate(across(c(1,2), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
4,5.0,4.0,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.0,5.4,2.3,virginica


In [4]:
# use 0-base index
iris >> mutate(across(c(0,1), round, _base0=True))



Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
4,5.0,4.0,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.0,5.4,2.3,virginica


In [5]:
# change the options temporarily
with options_context(index_base_0=True):
    iris >> mutate(across(c(0,1), round))

# to change it forever: options(index_base_0=True)

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
4,5.0,4.0,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.0,5.4,2.3,virginica


In [6]:
# use slice with column names
iris >> mutate(across(f[:f.Sepal_Width], round)) 

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
4,5.0,4.0,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.0,5.4,2.3,virginica


In [7]:
# to exclude stop of slice
iris >> mutate(across(f[:f.Sepal_Width:0], round)) 

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,3.5,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.2,1.3,0.2,setosa
3,5.0,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.5,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.4,5.4,2.3,virginica


In [8]:
iris >> mutate(across(where(is_double) & ~c(f.Petal_Length, f.Petal_Width), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
4,5.0,4.0,1.4,0.2,setosa
...,...,...,...,...,...
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica
148,6.0,3.0,5.4,2.3,virginica


In [9]:
iris = iris >> mutate(Species=as_categorical(f.Species))
iris.dtypes

Sepal_Length     float64
Sepal_Width      float64
Petal_Length     float64
Petal_Width      float64
Species         category
dtype: object

In [10]:
iris = iris >> mutate(across(where(is_categorical), as_character))
iris.dtypes

Sepal_Length    float64
Sepal_Width     float64
Petal_Length    float64
Petal_Width     float64
Species          object
dtype: object

In [11]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), mean, na_rm=True)
)

Unnamed: 0,Species,Sepal_Length,Sepal_Width
0,setosa,5.006,3.428
1,versicolor,5.936,2.77
2,virginica,6.588,2.974


In [12]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), dict(mean=mean, sd=sd))
)

Unnamed: 0,Species,Sepal_Length_mean,Sepal_Length_sd,Sepal_Width_mean,Sepal_Width_sd
0,setosa,5.006,0.35249,3.428,0.379064
1,versicolor,5.936,0.516171,2.77,0.313798
2,virginica,6.588,0.63588,2.974,0.322497


In [13]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), mean, _names = "mean_{_col}")
)

Unnamed: 0,Species,mean_Sepal_Length,mean_Sepal_Width
0,setosa,5.006,3.428
1,versicolor,5.936,2.77
2,virginica,6.588,2.974


In [14]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), dict(mean=mean, sd=sd), _names = "{_col}.{_fn}")
)

Unnamed: 0,Species,Sepal_Length.mean,Sepal_Length.sd,Sepal_Width.mean,Sepal_Width.sd
0,setosa,5.006,0.35249,3.428,0.379064
1,versicolor,5.936,0.516171,2.77,0.313798
2,virginica,6.588,0.63588,2.974,0.322497


In [15]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), [mean, sd], _names = "{_col}.fn{_fn}")
)

Unnamed: 0,Species,Sepal_Length.fn1,Sepal_Length.fn2,Sepal_Width.fn1,Sepal_Width.fn2
0,setosa,5.006,0.35249,3.428,0.379064
1,versicolor,5.936,0.516171,2.77,0.313798
2,virginica,6.588,0.63588,2.974,0.322497


In [16]:
iris >> group_by(f.Species) >> summarise(
    across(
        starts_with("Sepal"), 
        [mean, sd], 
        _names="{_col}.fn{_fn}", 
        _base0=True
    )
)
# or use _fn0

# iris >> group_by(f.Species) >> summarise(
#     across(
#         starts_with("Sepal"), 
#         [mean, sd], 
#         _names="{_col}.fn{_fn0}", # _fn1 for 1-based
#     )
# )


Unnamed: 0,Species,Sepal_Length.fn0,Sepal_Length.fn1,Sepal_Width.fn0,Sepal_Width.fn1
0,setosa,5.006,0.35249,3.428,0.379064
1,versicolor,5.936,0.516171,2.77,0.313798
2,virginica,6.588,0.63588,2.974,0.322497


In [17]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), [mean, sd], _names = "{_col}.fn{_fn}")
)

Unnamed: 0,Species,Sepal_Length.fn1,Sepal_Length.fn2,Sepal_Width.fn1,Sepal_Width.fn2
0,setosa,5.006,0.35249,3.428,0.379064
1,versicolor,5.936,0.516171,2.77,0.313798
2,virginica,6.588,0.63588,2.974,0.322497


In [18]:
iris >> filter(if_any(ends_with("Width"), lambda x: x > 4))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,5.7,4.4,1.5,0.4,setosa
1,5.2,4.1,1.5,0.1,setosa
2,5.5,4.2,1.4,0.2,setosa


In [19]:
iris >> filter(if_all(ends_with("Width"), lambda x: x > 2))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
0,6.3,3.3,6.0,2.5,virginica
1,7.1,3.0,5.9,2.1,virginica
2,6.5,3.0,5.8,2.2,virginica
3,7.6,3.0,6.6,2.1,virginica
4,7.2,3.6,6.1,2.5,virginica
5,6.8,3.0,5.5,2.1,virginica
6,5.8,2.8,5.1,2.4,virginica
7,6.4,3.2,5.3,2.3,virginica
8,7.7,3.8,6.7,2.2,virginica
9,7.7,2.6,6.9,2.3,virginica


In [20]:
df = tibble(
    id=[1, 2, 3, 4],
    w=runif(4), 
    x=runif(4), 
    y=runif(4), 
    z=runif(4)
)
df >> rowwise() >> mutate(
    sum = sum(c_across(f[f.w:f.z])),
    sd = sd(c_across(f[f.w:f.z]))
)

Unnamed: 0,id,w,x,y,z,sum,sd
0,1,0.198557,0.232099,0.253651,0.20646,0.890767,0.025119
1,2,0.035271,0.02935,0.352935,0.747349,1.164905,0.339582
2,3,0.42907,0.24598,0.270517,0.81368,1.759248,0.262122
3,4,0.11655,0.982369,0.415331,0.994767,2.509017,0.434704
