In [1]:
# https://dplyr.tidyverse.org/reference/group_by.html
%run nb_helpers.py

from datar.data import mtcars
from datar.all import *

nb_header(group_by, ungroup)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ group_by</div>

##### Create a grouped frame

The original API:  
https://dplyr.tidyverse.org/reference/group_by.html  

##### Args:
&emsp;&emsp;`_data`: A data frame  
&emsp;&emsp;`*args`: A variable or function of variables to group by.  
&emsp;&emsp;`_add`: If `True`, add grouping variables to an existing group.  
&emsp;&emsp;`_drop`: If `True`, drop grouping variables from the output.  

##### Returns:
&emsp;&emsp;A grouped frame  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ ungroup</div>

##### Remove grouping variables

The original API:  
https://dplyr.tidyverse.org/reference/ungroup.html  

##### Args:
&emsp;&emsp;`_data`: A grouped frame  
&emsp;&emsp;`*cols`: Columns to remove grouping variables from.  

##### Returns:
&emsp;&emsp;A data frame  


In [2]:
by_cyl = mtcars >> group_by(f.cyl) 
by_cyl

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
,<float64>,<int64>,<float64>,<int64>,<float64>,<float64>,<float64>,<int64>,<int64>,<int64>,<int64>
0.0,21.0,6,160.0,110,3.90,2.620,16.46,0,1,4,4
1.0,21.0,6,160.0,110,3.90,2.875,17.02,0,1,4,4
2.0,22.8,4,108.0,93,3.85,2.320,18.61,1,1,4,1
3.0,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4.0,18.7,8,360.0,175,3.15,3.440,17.02,0,0,3,2
5.0,18.1,6,225.0,105,2.76,3.460,20.22,1,0,3,1
6.0,14.3,8,360.0,245,3.21,3.570,15.84,0,0,3,4
7.0,24.4,4,146.7,62,3.69,3.190,20.00,1,0,4,2
8.0,22.8,4,140.8,95,3.92,3.150,22.90,1,0,4,2


In [3]:
by_cyl >> group_vars()

['cyl']

In [4]:
by_cyl >> summarise(
  disp = mean(f.disp),
  hp = mean(f.hp)
)

Unnamed: 0,cyl,disp,hp
,<int64>,<float64>,<float64>
0.0,6,183.314286,122.285714
1.0,4,105.136364,82.636364
2.0,8,353.100000,209.214286


In [5]:
by_cyl >> summarise(
  disp = f.disp.mean(),
  hp = f.hp.mean()
)

Unnamed: 0,cyl,disp,hp
,<int64>,<float64>,<float64>
0.0,6,183.314286,122.285714
1.0,4,105.136364,82.636364
2.0,8,353.100000,209.214286


In [6]:
by_cyl >> filter(f.disp == max(f.disp))

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
,<float64>,<int64>,<float64>,<int64>,<float64>,<float64>,<float64>,<int64>,<int64>,<int64>,<int64>
0.0,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
1.0,24.4,4,146.7,62,3.69,3.190,20.00,1,0,4,2
2.0,10.4,8,472.0,205,2.93,5.250,17.98,0,0,3,4


In [7]:
by_vs_am = mtcars >> group_by(f.vs, f.am)
by_vs = by_vs_am >> summarise(n=n())
by_vs

[2022-12-02 14:03:28][datar][   INFO] `summarise()` has grouped output by ['vs'] (override with `_groups` argument)


Unnamed: 0,vs,am,n
,<int64>,<int64>,<int64>
0.0,0,1,6
1.0,1,1,7
2.0,1,0,7
3.0,0,0,12


In [8]:
by_vs >> summarise(n=sum(f.n))

Unnamed: 0,vs,n
,<int64>,<int64>
0.0,0,18
1.0,1,14


In [9]:
by_vs >> \
  ungroup() >> \
  summarise(n = sum(f.n))

Unnamed: 0,n
,<int64>
0.0,32


In [10]:
mtcars_vsam = mtcars >> group_by(vsam=f.vs + f.am) 
mtcars_vsam 

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb,vsam
,<float64>,<int64>,<float64>,<int64>,<float64>,<float64>,<float64>,<int64>,<int64>,<int64>,<int64>,<int64>
0.0,21.0,6,160.0,110,3.90,2.620,16.46,0,1,4,4,1
1.0,21.0,6,160.0,110,3.90,2.875,17.02,0,1,4,4,1
2.0,22.8,4,108.0,93,3.85,2.320,18.61,1,1,4,1,2
3.0,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1,1
4.0,18.7,8,360.0,175,3.15,3.440,17.02,0,0,3,2,0
5.0,18.1,6,225.0,105,2.76,3.460,20.22,1,0,3,1,1
6.0,14.3,8,360.0,245,3.21,3.570,15.84,0,0,3,4,0
7.0,24.4,4,146.7,62,3.69,3.190,20.00,1,0,4,2,1
8.0,22.8,4,140.8,95,3.92,3.150,22.90,1,0,4,2,1


In [11]:
by_cyl >> \
  group_by(f.vs, f.am) >> \
  group_vars()

['vs', 'am']

In [12]:
by_cyl >> \
  group_by(f.vs, f.am, _add=True) >> \
  group_vars()

['cyl', 'vs', 'am']