In [1]:
# https://dplyr.tidyverse.org/reference/summarise.html
%run nb_helpers.py

from datar.datasets import mtcars, starwars
from datar.all import *

nb_header(summarise)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ summarise</div>

##### Summarise each group to fewer rows

See https://dplyr.tidyverse.org/reference/summarise.html  

Both input and the summarised data can be recycled, but separately.  

Aliases - `summarize`  

##### Args:
&emsp;&emsp;`_groups`: Grouping structure of the result.  
&emsp;&emsp;&emsp;&emsp;- "drop_last": dropping the last level of grouping.

&emsp;&emsp;&emsp;&emsp;- "drop": All levels of grouping are dropped.

&emsp;&emsp;&emsp;&emsp;- "keep": Same grouping structure as _data.

&emsp;&emsp;&emsp;&emsp;- "rowwise": Each row is its own group.

&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: Name-value pairs, where value is the summarized  
&emsp;&emsp;&emsp;&emsp;data for each group  

##### Returns:
&emsp;&emsp;The summary dataframe.  


In [2]:
mtcars >> summarise(mean=mean(f.disp), n=n())

Unnamed: 0,mean,n
,<float64>,<int64>
0.0,230.721875,32


In [3]:
mtcars >> \
  group_by(f.cyl) >> \
  summarise(mean=mean(f.disp), n=n()) 

Unnamed: 0,cyl,mean,n
,<int64>,<float64>,<int64>
0.0,6,183.314286,7
1.0,4,105.136364,11
2.0,8,353.100000,14


In [4]:
mtcars >> \
   group_by(f.cyl) >> \
   summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75)) 


[2022-03-18 17:33:44][datar][   INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)


Unnamed: 0,cyl,qs,prob
,<int64>,<float64>,<float64>
0.0,6,160.00,0.25
1.0,6,196.30,0.75
2.0,4,78.85,0.25
3.0,4,120.65,0.75
4.0,8,301.75,0.25
5.0,8,390.00,0.75


In [5]:
with options_context(dplyr_summarise_inform=False):
    mtcars >> \
        group_by(f.cyl) >> \
        summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75)) 


Unnamed: 0,cyl,qs,prob
,<int64>,<float64>,<float64>
0.0,6,160.00,0.25
1.0,6,196.30,0.75
2.0,4,78.85,0.25
3.0,4,120.65,0.75
4.0,8,301.75,0.25
5.0,8,390.00,0.75


In [6]:
mtcars >> \
  group_by(f.cyl, f.vs) >> \
  summarise(cyl_n = n()) >> \
  group_vars()

[2022-03-18 17:33:45][datar][   INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)


['cyl']

In [7]:
# Unlike dplyr's summarise, f.disp can be reused.
mtcars >> \
  group_by(f.cyl) >> \
  summarise(disp=mean(f.disp), sd=sd(f.disp)) 

Unnamed: 0,cyl,disp,sd
,<int64>,<float64>,<float64>
0.0,6,183.314286,
1.0,4,105.136364,
2.0,8,353.100000,


In [8]:
# Create temporary variable
mtcars >> \
  group_by(f.cyl) >> \
  summarise(_disp_m2=mean(f.disp), disp_m2=f._disp_m2 * 2) 

Unnamed: 0,cyl,disp_m2
,<int64>,<float64>
0.0,6,366.628571
1.0,4,210.272727
2.0,8,706.200000


In [9]:
var = "mass"
starwars >> summarise(avg = mean(f[var], na_rm = TRUE))

Unnamed: 0,avg
,<float64>
0.0,97.311864
