In [1]:
# https://dplyr.tidyverse.org/reference/summarise.html

from datar.datasets import mtcars, starwars
from datar.all import *

from nb_helpers import nb_header
nb_header(summarise)

### # summarise  

##### Summarise each group to fewer rows

See https://dplyr.tidyverse.org/reference/summarise.html  

##### Args:
&emsp;&emsp;`_groups`: Grouping structure of the result.  
&emsp;&emsp;&emsp;&emsp;- "drop_last": dropping the last level of grouping.  

&emsp;&emsp;&emsp;&emsp;- "drop": All levels of grouping are dropped.  

&emsp;&emsp;&emsp;&emsp;- "keep": Same grouping structure as _data.  

&emsp;&emsp;&emsp;&emsp;- "rowwise": Each row is its own group.  

&emsp;&emsp;*dfs, **kwargs: Name-value pairs, where value is the summarized  
&emsp;&emsp;&emsp;&emsp;data for each group  

##### Returns:
&emsp;&emsp;The summary dataframe.  


In [2]:
mtcars >> summarise(mean=mean(f.disp), n=n())

Unnamed: 0,mean,n
0,230.721875,1


In [3]:
mtcars >> \
  group_by(f.cyl) >> \
  summarise(mean=mean(f.disp), n=n()) >> \
  display()

[2021-04-08 16:19:26][datar][   INFO] `summarise()` ungrouping output (override with `_groups` argument)


Unnamed: 0,cyl,mean,n
0,4,105.136364,1
1,6,183.314286,1
2,8,353.1,1


In [4]:
summarise.inform = False
mtcars >> \
  group_by(f.cyl) >> \
  summarise(mean=mean(f.disp), n=n()) >> \
  display()

Unnamed: 0,cyl,mean,n
0,4,105.136364,1
1,6,183.314286,1
2,8,353.1,1


In [5]:
summarise.inform = True

mtcars >> \
   group_by(f.cyl) >> \
   summarise(qs=quantile(f.disp, c(0.25, 0.75)), prob=c(0.25, 0.75)) >> \
   display()

[2021-04-08 16:19:26][datar][   INFO] `summarise()` has grouped output by ['cyl']. You can override using the `_groups` argument.
[2021-04-08 16:19:26][datar][   INFO] # [DataFrameGroupBy] Groups: ['cyl'] (3)


Unnamed: 0,cyl,qs,prob
0,4,78.85,0.25
1,4,120.65,0.75
2,6,160.0,0.25
3,6,196.3,0.75
4,8,301.75,0.25
5,8,390.0,0.75


In [6]:
_ >> display()

Unnamed: 0,cyl,qs,prob
0,4,78.85,0.25
1,4,120.65,0.75
2,6,160.0,0.25
3,6,196.3,0.75
4,8,301.75,0.25
5,8,390.0,0.75


In [7]:
from pipda import register_func, Context

def my_quantile(x, probs):
  return tibble(x=quantile(x, probs), probs = probs)

my_quantile = register_func(None, context=Context.EVAL, func=my_quantile)

mtcars >> \
  group_by(f.cyl) >> \
  summarise(my_quantile(f.disp, c(0.25, 0.75))) >> \
  display()


[2021-04-08 16:19:26][datar][   INFO] `summarise()` has grouped output by ['cyl']. You can override using the `_groups` argument.
[2021-04-08 16:19:26][datar][   INFO] # [DataFrameGroupBy] Groups: ['cyl'] (3)


Unnamed: 0,cyl,x,probs
0,4,78.85,0.25
1,4,120.65,0.75
2,6,160.0,0.25
3,6,196.3,0.75
4,8,301.75,0.25
5,8,390.0,0.75


In [8]:
mtcars >> \
  group_by(f.cyl, f.vs) >> \
  summarise(cyl_n = n()) >> \
  group_vars()

[2021-04-08 16:19:26][datar][   INFO] `summarise()` has grouped output by ['cyl'] (override with `_groups` argument)


['cyl']

In [9]:
# Unlike dplyr's summarise, f.disp can be reused.
mtcars >> \
  group_by(f.cyl) >> \
  summarise(disp=mean(f.disp), sd=sd(f.disp)) >> \
  display()

[2021-04-08 16:19:26][datar][   INFO] `summarise()` ungrouping output (override with `_groups` argument)


Unnamed: 0,cyl,disp,sd
0,4,105.136364,
1,6,183.314286,
2,8,353.1,


In [10]:
var = "mass"
starwars >> summarise(avg = mean(f[var], na_rm = TRUE))

Unnamed: 0,avg
0,97.311864
