# How to get a Frequency Table (count) of a Categorical Variable using pandas ?

In [7]:
import numpy as np
import pandas as pd

### Scenario
[Link to the Sample Store Dataset](https://data.world/annjackson/2019-superstore)

In [8]:
df = pd.read_csv("D:/Github/datasets/Sample - Superstore.csv")
req_columns = ["Category", "Sub-Category","Segment"]
df = df[req_columns]
print("Shape of Dataset :",df.shape)
df.head(7)

Shape of Dataset : (9994, 3)


Unnamed: 0,Category,Sub-Category,Segment
0,Furniture,Bookcases,Consumer
1,Furniture,Chairs,Consumer
2,Office Supplies,Labels,Corporate
3,Furniture,Tables,Consumer
4,Office Supplies,Storage,Consumer
5,Furniture,Furnishings,Consumer
6,Office Supplies,Art,Consumer


In [38]:
# Expected Output - Frequency Table of Category column

Office Supplies    6026
Furniture          2121
Technology         1847
Name: Category, dtype: int64

## Frequency Table of `Category` column

In [17]:
# Method 1
df["Category"].value_counts()#.reset_index()

Office Supplies    6026
Furniture          2121
Technology         1847
Name: Category, dtype: int64

In [15]:
# Method 2
df.groupby("Category").size().reset_index(name="count")

Unnamed: 0,Category,count
0,Furniture,2121
1,Office Supplies,6026
2,Technology,1847


## Frequency Table of `Segment` column

In [18]:
# Method 1
df.Segment.value_counts()

Consumer       5191
Corporate      3020
Home Office    1783
Name: Segment, dtype: int64

In [19]:
# Method 2
df.groupby("Segment").size()

Segment
Consumer       5191
Corporate      3020
Home Office    1783
dtype: int64

## Frequency Table of `Category` and `Segment` column

In [21]:
df.head()

Unnamed: 0,Category,Sub-Category,Segment
0,Furniture,Bookcases,Consumer
1,Furniture,Chairs,Consumer
2,Office Supplies,Labels,Corporate
3,Furniture,Tables,Consumer
4,Office Supplies,Storage,Consumer


In [30]:
df.groupby(["Category", "Segment"]).size().reset_index(name="count")

Unnamed: 0,Category,Segment,count
0,Furniture,Consumer,1113
1,Furniture,Corporate,646
2,Furniture,Home Office,362
3,Office Supplies,Consumer,3127
4,Office Supplies,Corporate,1820
5,Office Supplies,Home Office,1079
6,Technology,Consumer,951
7,Technology,Corporate,554
8,Technology,Home Office,342


## Summary
 - Frequency Table of a `single` categorical column
  - df["column_name"].value_counts()
  - df.groupby(["column_name"]).size()
 - Frequency Table of a `two or more` categorical columns
  - df.groupby(["column_name1", "column_name2"].size()
  - df.groupby(["column_name1", "column_name2"].size(name="count")
  

# Thank You :)