In [1]:
# https://dplyr.tidyverse.org/reference/count.html
from pandas import DataFrame
from datar.datasets import starwars
from datar.all import *

%run nb_helpers.py
nb_header(count, tally, add_count, add_tally)

### # count  

##### Count observations by group

See https://dplyr.tidyverse.org/reference/count.html  

##### Args:
&emsp;&emsp;`x`: The dataframe  
&emsp;&emsp;`*columns`: and  
&emsp;&emsp;`**mutates`: Variables to group by  
&emsp;&emsp;`wt`: Frequency weights. Can be None or a variable:  
&emsp;&emsp;&emsp;&emsp;If None (the default), counts the number of rows in each group.  
&emsp;&emsp;&emsp;&emsp;If a variable, computes sum(wt) for each group.  

&emsp;&emsp;`sort`: If TRUE, will show the largest groups at the top.  
&emsp;&emsp;`name`: The name of the new column in the output.  

##### Returns:
&emsp;&emsp;DataFrame object with the count column  


### # tally  

##### A ower-level function for count that assumes you've done the grouping

See count()  


### # add_count  

##### Equivalents to count() but use mutate() instead of summarise()

See count().  


### # add_tally  

##### Equivalents to tally() but use mutate() instead of summarise()

See count().  


In [2]:
starwars >> count(f.species)

Unnamed: 0,species,n
0,Aleena,1
1,Besalisk,1
2,Cerean,1
3,Chagrian,1
4,Clawdite,1
5,Droid,6
6,Dug,1
7,Ewok,1
8,Geonosian,1
9,Gungan,3


In [3]:
starwars >> count(f.species, sort=True)

Unnamed: 0,species,n
0,Human,35
1,Droid,6
2,,4
3,Gungan,3
4,Wookiee,2
5,Zabrak,2
6,Twi'lek,2
7,Mirialan,2
8,Kaminoan,2
9,Skakoan,1


In [4]:
starwars >> count(f.sex, f.gender, sort=True)

Unnamed: 0,sex,gender,n
0,male,masculine,60
1,female,feminine,16
2,none,masculine,5
3,,,4
4,hermaphroditic,masculine,1
5,none,feminine,1


In [5]:
starwars >> count(birth_decade=round(f.birth_year, -1))

Unnamed: 0,birth_decade,n
0,10.0,1
1,20.0,6
2,30.0,4
3,40.0,6
4,50.0,8
5,60.0,4
6,70.0,4
7,80.0,2
8,90.0,3
9,100.0,1


In [6]:
df = tribble(
  f.name,    f.gender,   f.runs,
  "Max",    "male",       10,
  "Sandra", "female",      1,
  "Susan",  "female",      4
)
# counts rows:
df >> count(f.gender)

Unnamed: 0,gender,n
0,female,2
1,male,1


In [7]:
df >> count(f.gender, wt=f.runs)

Unnamed: 0,gender,n
0,female,5
1,male,10


In [8]:
starwars >> tally()

Unnamed: 0,n
0,87


In [9]:
starwars >> group_by(f.species) >> tally() 

Unnamed: 0,species,n
0,Aleena,1
1,Besalisk,1
2,Cerean,1
3,Chagrian,1
4,Clawdite,1
5,Droid,6
6,Dug,1
7,Ewok,1
8,Geonosian,1
9,Gungan,3


In [10]:
df >> add_count(f.gender, wt=f.runs)

Unnamed: 0,name,gender,runs,n
0,Max,male,10,10
1,Sandra,female,1,5
2,Susan,female,4,5


In [11]:
df

Unnamed: 0,name,gender,runs
0,Max,male,10
1,Sandra,female,1
2,Susan,female,4


In [12]:
df >> add_tally(wt=f.runs)

Unnamed: 0,name,gender,runs,n
0,Max,male,10,15
1,Sandra,female,1,15
2,Susan,female,4,15
