# How to Get Top 10 Values in Pandas

In [1]:
import pandas as pd

cols = ['Date', 'Time', 'Depth', 'Magnitude Type', 'Type', 'Magnitude']
df = pd.read_csv(f'../data/earthquakes_1965_2016_database.csv.zip')[cols]

df

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
0,01/02/1965,13:44:18,131.60,MW,Earthquake,6.0
1,01/04/1965,11:29:49,80.00,MW,Earthquake,5.8
2,01/05/1965,18:05:58,20.00,MW,Earthquake,6.2
3,01/08/1965,18:49:43,15.00,MW,Earthquake,5.8
4,01/09/1965,13:32:50,15.00,MW,Earthquake,5.8
...,...,...,...,...,...,...
23407,12/28/2016,08:22:12,12.30,ML,Earthquake,5.6
23408,12/28/2016,09:13:47,8.80,ML,Earthquake,5.5
23409,12/28/2016,12:38:51,10.00,MWW,Earthquake,5.9
23410,12/29/2016,22:30:19,79.00,MWW,Earthquake,6.3


## Step 2: Get Most Frequent value of Column in Pandas

In [3]:
df['Magnitude'].mode()

0    5.5
dtype: float64

In [4]:
df['Time'].mode()

0    02:56:58
1    14:09:03
dtype: object

## Step 3: Get Most Frequent value for all columns in Pandas

In [5]:
dfs = []

for col in df.columns:
    top_values = []
    top_values = df[col].mode()
    dfs.append(pd.DataFrame({col: top_values}).reset_index(drop=True))
pd.concat(dfs, axis=1)

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
0,03/11/2011,02:56:58,10.0,MW,Earthquake,5.5
1,,14:09:03,,,,


In [6]:
from pandas.api.types import is_numeric_dtype

dfs = []

for col in df.columns:
    top_values = []
    if is_numeric_dtype(df[col]):
        top_values = df[col].mode()
        dfs.append(pd.DataFrame({col: top_values}).reset_index(drop=True))
pd.concat(dfs, axis=1)

Unnamed: 0,Depth,Magnitude
0,10.0,5.5


## Step 4: Get N most frequent values in a column

In [7]:
df['Magnitude'].value_counts()

5.50    4685
5.60    3967
5.70    3079
5.80    2346
5.90    1947
        ... 
5.51       1
5.55       1
6.31       1
6.47       1
5.73       1
Name: Magnitude, Length: 64, dtype: int64

In [8]:
n = 5
df['Magnitude'].value_counts().index.tolist()[:n]

[5.5, 5.6, 5.7, 5.8, 5.9]

In [9]:
n = 5
df['Magnitude'].value_counts().values.tolist()[:n]

[4685, 3967, 3079, 2346, 1947]

In [10]:
df['Magnitude'].value_counts().head()

5.5    4685
5.6    3967
5.7    3079
5.8    2346
5.9    1947
Name: Magnitude, dtype: int64

In [11]:
df['Magnitude'].value_counts().max()

4685

In [12]:
df['Magnitude'].value_counts().idxmax()

5.5

## Step 5: Get Top 10 most frequent values in DataFrame

In [13]:
from pandas.api.types import is_categorical_dtype

for col in df.columns:
    print(col, end=' - \n')
    print('_' * 50)
    if col in ['Magnitude'] or is_categorical_dtype(col):
        display(pd.DataFrame(df[col].astype('str').value_counts().sort_values(ascending=False).head(5)))
    else:
        display(pd.DataFrame(df[col].value_counts().sort_values(ascending=False).head(3)))

Date - 
__________________________________________________


Unnamed: 0,Date
03/11/2011,128
12/26/2004,51
02/27/2010,39


Time - 
__________________________________________________


Unnamed: 0,Time
02:56:58,5
14:09:03,5
16:25:34,4


Depth - 
__________________________________________________


Unnamed: 0,Depth
10.0,3911
33.0,3694
35.0,592


Magnitude Type - 
__________________________________________________


Unnamed: 0,Magnitude Type
MW,7722
MWC,5669
MB,3761


Type - 
__________________________________________________


Unnamed: 0,Type
Earthquake,23232
Nuclear Explosion,175
Explosion,4


Magnitude - 
__________________________________________________


Unnamed: 0,Magnitude
5.5,4685
5.6,3967
5.7,3079
5.8,2346
5.9,1947
