In [1]:
# https://dplyr.tidyverse.org/reference/count.html
%run nb_helpers.py

from datar.datasets import starwars
from datar.all import *

nb_header(count, tally, add_count, add_tally)

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ count</div>

##### Count observations by group

See https://dplyr.tidyverse.org/reference/count.html  

##### Args:
&emsp;&emsp;`x`: The dataframe  
&emsp;&emsp;`*columns`: and  
&emsp;&emsp;`**mutates`: Variables to group by  
&emsp;&emsp;`wt`: Frequency weights. Can be None or a variable:  
&emsp;&emsp;&emsp;&emsp;If None (the default), counts the number of rows in each group.  
&emsp;&emsp;&emsp;&emsp;If a variable, computes sum(wt) for each group.  

&emsp;&emsp;`sort`: If TRUE, will show the largest groups at the top.  
&emsp;&emsp;`name`: The name of the new column in the output.  

##### Returns:
&emsp;&emsp;DataFrame object with the count column  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ tally</div>

##### A ower-level function for count that assumes you've done the grouping

See count()  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ add_count</div>

##### Equivalents to count() but use mutate() instead of summarise()

See count().  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ add_tally</div>

##### Equivalents to tally() but use mutate() instead of summarise()

See count().  


In [2]:
starwars >> count(f.species)

Unnamed: 0,species,n
,<object>,<int64>
0.0,Human,35
1.0,Droid,6
2.0,Wookiee,2
3.0,Rodian,1
4.0,Hutt,1
5.0,Yoda's species,1
6.0,Trandoshan,1
7.0,Mon Calamari,1
8.0,Ewok,1


In [3]:
starwars >> count(f.species, sort=True)

Unnamed: 0,species,n
,<object>,<int64>
0.0,Human,35
1.0,Droid,6
2.0,,4
3.0,Gungan,3
4.0,Twi'lek,2
5.0,Kaminoan,2
6.0,Zabrak,2
7.0,Wookiee,2
8.0,Mirialan,2


In [4]:
starwars >> count(f.sex, f.gender, sort=True)

Unnamed: 0,sex,gender,n
,<object>,<object>,<int64>
0.0,male,masculine,60
1.0,female,feminine,16
2.0,none,masculine,5
3.0,,,4
4.0,hermaphroditic,masculine,1
5.0,none,feminine,1


In [5]:
starwars >> count(birth_decade=round(f.birth_year, -1))

Unnamed: 0,birth_decade,n
,<float64>,<int64>
0.0,20.0,6
1.0,110.0,1
2.0,30.0,4
3.0,40.0,6
4.0,50.0,8
5.0,60.0,4
6.0,200.0,1
7.0,600.0,1
8.0,900.0,1


In [6]:
df = tribble(
  f.name,    f.gender,   f.runs,
  "Max",    "male",       10,
  "Sandra", "female",      1,
  "Susan",  "female",      4
)
# counts rows:
df >> count(f.gender)

Unnamed: 0,gender,n
,<object>,<int64>
0.0,male,1
1.0,female,2


In [7]:
df >> count(f.gender, wt=f.runs)

Unnamed: 0,gender,n
,<object>,<int64>
0.0,male,10
1.0,female,5


In [8]:
starwars >> tally()

Unnamed: 0,n
,<int64>
0.0,87


In [9]:
starwars >> group_by(f.species) >> tally() 

Unnamed: 0,species,n
,<object>,<int64>
0.0,Human,35
1.0,Droid,6
2.0,Wookiee,2
3.0,Rodian,1
4.0,Hutt,1
5.0,Yoda's species,1
6.0,Trandoshan,1
7.0,Mon Calamari,1
8.0,Ewok,1


In [10]:
df >> add_count(f.gender, wt=f.runs)

Unnamed: 0,name,gender,runs,n
,<object>,<object>,<int64>,<int64>
0.0,Max,male,10,10
1.0,Sandra,female,1,5
2.0,Susan,female,4,5


In [11]:
df

Unnamed: 0,name,gender,runs
,<object>,<object>,<int64>
0.0,Max,male,10
1.0,Sandra,female,1
2.0,Susan,female,4


In [12]:
df >> add_tally(wt=f.runs)

Unnamed: 0,name,gender,runs,n
,<object>,<object>,<int64>,<int64>
0.0,Max,male,10,15
1.0,Sandra,female,1,15
2.0,Susan,female,4,15
