In [1]:
# https://dplyr.tidyverse.org/reference/context.html
from datar.all import *

%run nb_helpers.py
nb_header(cur_group_id, cur_group_rows, cur_data, cur_data_all, cur_column, book='context')



### # cur_group_id  

##### gives a unique numeric identifier for the current group.


### # cur_group_rows  

##### Gives the row indices for the current group.

##### Args:
&emsp;&emsp;`_data`: The dataFrame.  

##### Returns:
&emsp;&emsp;The `_rows` from group data or row indexes  


### # cur_data  

##### gives the current data for the current group
(excluding grouping variables).  


### # cur_data_all  

##### gives the current data for the current group
(including grouping variables)  


### # cur_column  

##### Used in the functions of across. So we don't have to register it.


In [2]:
df = tibble(
  g=['a'] + ['b'] * 2 + ['c'] * 3,
  x=runif(6),
  y=runif(6)
)
gf = df >> group_by(f.g)

gf >> summarise(n = n())

Unnamed: 0,g,n
,<object>,<int64>
0.0,a,1
1.0,b,2
2.0,c,3


In [3]:
gf

Unnamed: 0,g,x,y
,<object>,<float64>,<float64>
0.0,a,0.006538,0.421710
1.0,b,0.480888,0.103287
2.0,b,0.763029,0.543031
3.0,c,0.201200,0.207406
4.0,c,0.508855,0.315907
5.0,c,0.610592,0.650160


In [4]:
gf >> mutate(id=cur_group_id()) 

Unnamed: 0,g,x,y,id
,<object>,<float64>,<float64>,<int64>
0.0,a,0.006538,0.421710,0
1.0,b,0.480888,0.103287,1
2.0,b,0.763029,0.543031,1
3.0,c,0.201200,0.207406,2
4.0,c,0.508855,0.315907,2
5.0,c,0.610592,0.650160,2


In [5]:
gf >> summarise(row=cur_group_rows()) 

Unnamed: 0,g,row
,<object>,<object>
0.0,a,[0]
1.0,b,"[1, 2]"
2.0,c,"[3, 4, 5]"


In [9]:
gf_group = gf >> summarise(data=cur_group())
gf_group 

Unnamed: 0,g,data
,<object>,<object>
0.0,a,<DF 1x1>
1.0,b,<DF 1x1>
2.0,c,<DF 1x1>


In [10]:
gf_group >> pull(f.data)

0    <DF 1x1>
1    <DF 1x1>
2    <DF 1x1>
Name: data, dtype: object

In [11]:
gf_data = gf >> summarise(data=cur_data())
gf_data

Unnamed: 0,g,data
,<object>,<object>
0.0,a,<DF 1x2>
1.0,b,<DF 2x2>
2.0,c,<DF 3x2>


In [None]:
gf_data >> pull(f.data, to='list')

[          x         y
   <float64> <float64>
 0  0.355023  0.000239,
           x         y
   <float64> <float64>
 0  0.880160  0.685917
 1  0.346984  0.925864,
           x         y
   <float64> <float64>
 0  0.235711  0.701329
 1  0.320924  0.903383
 2  0.517224  0.223750]

In [12]:
gf_data_all = gf >> summarise(data=cur_data_all())
gf_data_all

Unnamed: 0,g,data
,<object>,<object>
0.0,a,<DF 1x3>
1.0,b,<DF 2x3>
2.0,c,<DF 3x3>


In [13]:
gf_data_all >> pull(f.data, to='list')

[         g         x         y
   <object> <float64> <float64>
 0        a  0.006538   0.42171,
          g         x         y
   <object> <float64> <float64>
 1        b  0.480888  0.103287
 2        b  0.763029  0.543031,
          g         x         y
   <object> <float64> <float64>
 3        c  0.201200  0.207406
 4        c  0.508855  0.315907
 5        c  0.610592  0.650160]

In [14]:
df >> select(f.x, f.y) >> mutate(
    across(
        everything(), 
        lambda x, cc: [cc + ' '] * x.shape[0] + (x**2).astype(str), cc=cur_column()
    )
)

Unnamed: 0,x,y
,<object>,<object>
0.0,x 4.275181839861608e-05,y 0.17783898820060154
1.0,x 0.2312531724845601,y 0.01066819823473873
2.0,x 0.5822139970696143,y 0.2948830834486957
3.0,x 0.04048149826262316,y 0.04301741953459272
4.0,x 0.25893300858833146,y 0.09979740403309767
5.0,x 0.37282240541087586,y 0.4227077348376767


In [15]:
# or you can use x.name, since x is a Series
df >> mutate(across(
    [f.x, f.y], 
    lambda x: [x.name + ' '] * x.shape[0] + (x**2).astype(str)
))

Unnamed: 0,g,x,y
,<object>,<object>,<object>
0.0,a,x 4.275181839861608e-05,y 0.17783898820060154
1.0,b,x 0.2312531724845601,y 0.01066819823473873
2.0,b,x 0.5822139970696143,y 0.2948830834486957
3.0,c,x 0.04048149826262316,y 0.04301741953459272
4.0,c,x 0.25893300858833146,y 0.09979740403309767
5.0,c,x 0.37282240541087586,y 0.4227077348376767
