<a href="https://colab.research.google.com/github/siglimumuni/Published-Articles/blob/main/A_Complete_Guide_to_the_value_counts()_Method_in_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [122]:
import numpy as np
import pandas as pd

my_dict = {"Name": ["James","Dan","Jonathan","Eric","Joanna","Mary","Sue","Jim","Lionel","Tim"],
           "Sex":["Male","Male","Male","Male","Female","Female","Female","Male","Male","Male"],
           "Score":[80,91,75,88,89,90,92,91,82,80],
           "Grade":["B","A","C","B","B","A","A",np.nan,"B",np.nan]}

df = pd.DataFrame(my_dict)

print(df)

       Name     Sex  Score Grade
0     James    Male     80     B
1       Dan    Male     91     A
2  Jonathan    Male     75     C
3      Eric    Male     88     B
4    Joanna  Female     89     B
5      Mary  Female     90     A
6       Sue  Female     92     A
7       Jim    Male     91   NaN
8    Lionel    Male     82     B
9       Tim    Male     80   NaN


In [22]:
#Return counts of values in Grade column
df["Grade"].value_counts()

B    4
A    3
C    1
Name: Grade, dtype: int64

In [7]:
#Include NaN values in result
df["Grade"].value_counts(dropna=False)

B      4
A      3
NaN    2
C      1
Name: Grade, dtype: int64

In [25]:
#Sort count values in ascending order
df["Grade"].value_counts(dropna=False,ascending=True)

C      1
NaN    2
A      3
B      4
Name: Grade, dtype: int64

In [26]:
#No sorting of values
df["Grade"].value_counts(dropna=False,sort=False)

B      4
A      3
C      1
NaN    2
Name: Grade, dtype: int64

In [27]:
#Display relative frequencies
df["Grade"].value_counts(dropna=False,normalize=True)

B      0.4
A      0.3
NaN    0.2
C      0.1
Name: Grade, dtype: float64

In [32]:
#Bin continuous data into discrete intervals
df["Score"].value_counts(bins=3, sort=False)

(74.982, 80.667]    3
(80.667, 86.333]    1
(86.333, 92.0]      6
Name: Score, dtype: int64

In [31]:
#Bin continuous data into user specified bins
df["Score"].value_counts(bins=[70,80,90,100],sort=False)

(69.999, 80.0]    3
(80.0, 90.0]      4
(90.0, 100.0]     3
Name: Score, dtype: int64

In [34]:
#Sort column values A-Z
df["Grade"].value_counts().sort_index()

A    3
B    4
C    1
Name: Grade, dtype: int64

In [35]:
#Sort column values Z-A
df["Grade"].value_counts().sort_index(ascending=False)

C    1
B    4
A    3
Name: Grade, dtype: int64

In [40]:
#Group values and then count
df.groupby("Sex")["Grade"].value_counts()

Sex     Grade
Female  A        2
        B        1
Male    B        3
        A        1
        C        1
Name: Grade, dtype: int64

In [115]:
#Convert series to dataframe
grade_count = df["Grade"].value_counts().to_frame()

#Rename columns
grade_count = grade_count.reset_index().rename(columns={"index":"Grade","Grade":"Count"})

print(grade_count)
print(type(grade_count))

  Grade  Count
0     B      4
1     A      3
2     C      1
<class 'pandas.core.frame.DataFrame'>


In [127]:
#Specifying a condition
df["Grade"].value_counts().loc[lambda x : x>2]

B    4
A    3
Name: Grade, dtype: int64