In [1]:
%run nb_helpers.py

from datar.data import iris
from datar.all import *

nb_header(across, if_any, if_all, c_across)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ across</div>

Apply the same transformation to multiple columns

    The original API:
    https://dplyr.tidyverse.org/reference/across.html

    Examples:
        #
        >>> iris >> mutate(across(c(f.Sepal_Length, f.Sepal_Width), round))
            Sepal_Length  Sepal_Width  Petal_Length  Petal_Width    Species
               <float64>    <float64>     <float64>    <float64>   <object>
        0            5.0          4.0           1.4          0.2     setosa
        1            5.0          3.0           1.4          0.2     setosa
        ..           ...          ...           ...          ...        ...

        >>> iris >> group_by(f.Species) >> summarise(
        >>>     across(starts_with("Sepal"), mean)
        >>> )
              Species  Sepal_Length  Sepal_Width
             <object>     <float64>    <float64>
        0      setosa         5.006        3.428
        1  versicolor         5.936        2.770
        2   virginica         6.588        2.974

    Args:
        _data: The dataframe.
        *args: If given, the first 2 elements should be columns and functions
            apply to each of the selected columns. The rest of them will be
            the arguments for the functions.
        _names: A glue specification that describes how to name
            the output columns. This can use `{_col}` to stand for the
            selected column name, and `{_fn}` to stand for the name of
            the function being applied.
            The default (None) is equivalent to `{_col}` for the
            single function case and `{_col}_{_fn}` for the case where
            a list is used for _fns. In such a case, `{_fn}` is 0-based.
            To use 1-based index, use `{_fn1}`
        _fn_context: Defines the context to evaluate the arguments for functions
            if they are plain functions.
            Note that registered functions will use its own context
        **kwargs: Keyword arguments for the functions

    Returns:
        A dataframe with one column for each column and each function.
    

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ if_any</div>

##### Apply the same predicate function to a selection of columns and combine
the results True if any element is True.  

See Also:  
&emsp;&emsp;[`across()`](datar.dplyr.across.across)  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ if_all</div>

##### Apply the same predicate function to a selection of columns and combine
the results True if all elements are True.  

See Also:  
&emsp;&emsp;[`across()`](datar.dplyr.across.across)  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ c_across</div>

##### Apply the same transformation to multiple columns rowwisely

##### Args:
&emsp;&emsp;`_data`: The dataframe  
&emsp;&emsp;`_cols`: The columns  

##### Returns:
&emsp;&emsp;A rowwise tibble  


In [2]:
# round not changing dtypes (Series.round)
iris >> mutate(across(c(f.Sepal_Length, f.Sepal_Width), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [3]:
iris >> mutate(across(c(0, 1), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [4]:
# use slice with column names
iris >> mutate(across(c[:f.Sepal_Width], round)) 

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,3.5,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.2,1.3,0.2,setosa
3,5.0,3.1,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,3.6,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.5,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [5]:
# to include stop of slice
iris >> mutate(across(c[:f.Sepal_Width:1], round)) 

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [6]:
iris >> mutate(across(where(is_double) & ~c(f.Petal_Length, f.Petal_Width), round))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.0,4.0,1.4,0.2,setosa
1,5.0,3.0,1.4,0.2,setosa
2,5.0,3.0,1.3,0.2,setosa
3,5.0,3.0,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,4.0,1.4,0.2,setosa
145,7.0,3.0,5.2,2.3,virginica
146,6.0,2.0,5.0,1.9,virginica
147,6.0,3.0,5.2,2.0,virginica


In [7]:
iris = iris >> mutate(Species=as_factor(f.Species))
iris

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<category>
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,3.6,1.4,0.2,setosa
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica


In [8]:
iris = iris >> mutate(across(where(is_factor), as_character))
iris

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
...,...,...,...,...,...
4,5.0,3.6,1.4,0.2,setosa
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica


In [10]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), mean)
)

Unnamed: 0,Species,Sepal_Length,Sepal_Width
,<object>,<float64>,<float64>
0.0,setosa,5.006,3.428
1.0,versicolor,5.936,2.770
2.0,virginica,6.588,2.974


In [11]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), dict(mean=mean, sd=sd))
)

Unnamed: 0,Species,Sepal_Length_mean,Sepal_Length_sd,Sepal_Width_mean,Sepal_Width_sd
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [12]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), mean, _names = "mean_{_col}")
)

Unnamed: 0,Species,mean_Sepal_Length,mean_Sepal_Width
,<object>,<float64>,<float64>
0.0,setosa,5.006,3.428
1.0,versicolor,5.936,2.770
2.0,virginica,6.588,2.974


In [13]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), dict(mean=mean, sd=sd), _names = "{_col}.{_fn}")
)

Unnamed: 0,Species,Sepal_Length.mean,Sepal_Length.sd,Sepal_Width.mean,Sepal_Width.sd
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [14]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), [mean, sd], _names = "{_col}.fn{_fn}")
)

Unnamed: 0,Species,Sepal_Length.fn0,Sepal_Length.fn1,Sepal_Width.fn0,Sepal_Width.fn1
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [15]:
iris >> group_by(f.Species) >> summarise(
    across(
        starts_with("Sepal"), 
        [mean, sd], 
        _names="{_col}.fn{_fn}", 
    )
)
# or use _fn0

# iris >> group_by(f.Species) >> summarise(
#     across(
#         starts_with("Sepal"), 
#         [mean, sd], 
#         _names="{_col}.fn{_fn1}", # _fn1 for 1-based
#     )
# )


Unnamed: 0,Species,Sepal_Length.fn0,Sepal_Length.fn1,Sepal_Width.fn0,Sepal_Width.fn1
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [16]:
iris >> group_by(f.Species) >> summarise(
    across(starts_with("Sepal"), [mean, sd], _names = "{_col}.fn{_fn1}")
)

Unnamed: 0,Species,Sepal_Length.fn1,Sepal_Length.fn2,Sepal_Width.fn1,Sepal_Width.fn2
,<object>,<float64>,<float64>,<float64>,<float64>
0.0,setosa,5.006,0.352490,3.428,0.379064
1.0,versicolor,5.936,0.516171,2.770,0.313798
2.0,virginica,6.588,0.635880,2.974,0.322497


In [17]:
iris >> filter(if_any(ends_with("Width"), lambda x: x > 4))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
15.0,5.7,4.4,1.5,0.4,setosa
32.0,5.2,4.1,1.5,0.1,setosa
33.0,5.5,4.2,1.4,0.2,setosa


In [18]:
iris >> filter(if_all(ends_with("Width"), lambda x: x > 2))

Unnamed: 0,Sepal_Length,Sepal_Width,Petal_Length,Petal_Width,Species
,<float64>,<float64>,<float64>,<float64>,<object>
100.0,6.3,3.3,6.0,2.5,virginica
102.0,7.1,3.0,5.9,2.1,virginica
104.0,6.5,3.0,5.8,2.2,virginica
105.0,7.6,3.0,6.6,2.1,virginica
109.0,7.2,3.6,6.1,2.5,virginica
112.0,6.8,3.0,5.5,2.1,virginica
114.0,5.8,2.8,5.1,2.4,virginica
115.0,6.4,3.2,5.3,2.3,virginica
117.0,7.7,3.8,6.7,2.2,virginica


In [19]:
df = tibble(
    id=[1, 2, 3, 4],
    w=runif(4), 
    x=runif(4), 
    y=runif(4), 
    z=runif(4)
)
df >> rowwise() >> mutate(
    sum = sum(c_across(c[f.w:f.z])),
    sd = sd(c_across(c[f.w:f.z]))
)

Unnamed: 0,id,w,x,y,z,sum,sd
,<int64>,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>
0.0,1,0.909293,0.880456,0.174213,0.382593,1.963962,0.416324
1.0,2,0.102912,0.952811,0.632536,0.845920,1.688258,0.429225
2.0,3,0.425592,0.320275,0.803515,0.831533,1.549382,0.254112
3.0,4,0.218472,0.849190,0.637853,0.887980,1.705514,0.321026
