# How to get Frequency Table (count) of a Categorical Variable in R

In [14]:
head(mtcars)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1


[, 9]	am	Transmission (0 = automatic, 1 = manual)

In [16]:
mtcars$am

## USING BASE R

In [46]:
?table

In [2]:
?mtcars

In [19]:
am_object = table(mtcars$am)
am_object


 0  1 
19 13 

In [20]:
class(am_object)

In [22]:
am_df = as.data.frame(am_object)
names(am_df) = c("am", "count")
am_df

am,count
0,19
1,13


Note: If there are missing values in the vector then use the below syntax to get to count of missing values (NA) as well. 

In [50]:
table(mtcars$am, useNA = "always")


   0    1 <NA> 
  19   13    0 

## USING dplyr package

In [51]:
library(dplyr)

In [24]:
mtcars %>% count(am)

am,n
0,19
1,13


In [25]:
head(mtcars)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1


In [26]:
mtcars %>% count(gear)

gear,n
3,15
4,12
5,5


In [27]:
mtcars %>% count(am, gear)

am,gear,n
0,3,15
0,4,4
1,4,8
1,5,5


### Using group_by in dplyr

In [33]:
mtcars %>% group_by(am) %>% summarize(count=n())

am,count
0,19
1,13


In [34]:
mtcars %>% group_by(am, gear) %>% summarize(count=n())

am,gear,count
0,3,15
0,4,4
1,4,8
1,5,5


## Using data.table package

In [52]:
library(data.table)

In [31]:
as.data.table(mtcars)[, .N, by="am"]

am,N
1,13
0,19


In [30]:
class(mtcars)

In [32]:
as.data.table(mtcars)[, .N, by=c("am", "gear")]

am,gear,N
1,4,8
0,3,15
0,4,4
1,5,5


Summary
    - data.frame(table(mtcars$am)), using Base R
    - mtcars %>% count(am), using dplyr
    - as.data.table(mtcars)[, .N, by="am"], using data.table

## diamonds dataset in ggplot2 package

In [3]:
library(ggplot2)

In [2]:
head(diamonds)

carat,cut,color,clarity,depth,table,price,x,y,z
0.23,Ideal,E,SI2,61.5,55,326,3.95,3.98,2.43
0.21,Premium,E,SI1,59.8,61,326,3.89,3.84,2.31
0.23,Good,E,VS1,56.9,65,327,4.05,4.07,2.31
0.29,Premium,I,VS2,62.4,58,334,4.2,4.23,2.63
0.31,Good,J,SI2,63.3,58,335,4.34,4.35,2.75
0.24,Very Good,J,VVS2,62.8,57,336,3.94,3.96,2.48


In [4]:
?diamonds

In [37]:
dim(diamonds)

In [39]:
# Using Base R
data.frame(table(diamonds$cut))

Var1,Freq
Fair,1610
Good,4906
Very Good,12082
Premium,13791
Ideal,21551


In [40]:
# Using dplyr package
diamonds %>% count(cut)

cut,n
Fair,1610
Good,4906
Very Good,12082
Premium,13791
Ideal,21551


In [41]:
# Using group_by, summarize in dplyr
diamonds %>% group_by(cut) %>% summarize(N=n())

cut,N
Fair,1610
Good,4906
Very Good,12082
Premium,13791
Ideal,21551


In [42]:
diamonds %>% group_by(cut) %>% summarise(N=n())

cut,N
Fair,1610
Good,4906
Very Good,12082
Premium,13791
Ideal,21551


In [43]:
# Using data.table package
class(diamonds)

In [45]:
as.data.table(diamonds)[, .N, by="cut"]

cut,N
Ideal,21551
Premium,13791
Good,4906
Very Good,12082
Fair,1610


# Frequency Table or count of Categorical Variable

In [48]:
write.csv(diamonds, "diamonds.csv", row.names=FALSE)