# How to Get Top 10 Values in Pandas

In [1]:
import pandas as pd

cols = ['Date', 'Time', 'Depth', 'Magnitude Type', 'Type', 'Magnitude']
df = pd.read_csv(f'../data/earthquakes_1965_2016_database.csv.zip')[cols]

df

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
0,01/02/1965,13:44:18,131.60,MW,Earthquake,6.0
1,01/04/1965,11:29:49,80.00,MW,Earthquake,5.8
2,01/05/1965,18:05:58,20.00,MW,Earthquake,6.2
3,01/08/1965,18:49:43,15.00,MW,Earthquake,5.8
4,01/09/1965,13:32:50,15.00,MW,Earthquake,5.8
...,...,...,...,...,...,...
23407,12/28/2016,08:22:12,12.30,ML,Earthquake,5.6
23408,12/28/2016,09:13:47,8.80,ML,Earthquake,5.5
23409,12/28/2016,12:38:51,10.00,MWW,Earthquake,5.9
23410,12/29/2016,22:30:19,79.00,MWW,Earthquake,6.3


## Step 2: Get Top 10 biggest/lowest values for single column

In [2]:
df['Magnitude'].nlargest(n=10)

17083    9.1
20501    9.1
19928    8.8
16       8.7
17329    8.6
21219    8.6
15440    8.4
18615    8.4
12119    8.3
16446    8.3
Name: Magnitude, dtype: float64

In [3]:
df['Magnitude'].nlargest(n=5)

17083    9.1
20501    9.1
19928    8.8
16       8.7
17329    8.6
Name: Magnitude, dtype: float64

## Step 3: Get Top 10 biggest/lowest values - duplicates

In [4]:
df.nsmallest(n=5, columns=['Depth'])

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
10978,06/28/1992,12:00:45,-1.1,ML,Earthquake,5.77
10977,06/28/1992,11:57:34,-0.097,MW,Earthquake,7.3
8099,07/21/1986,22:07:16,-0.076,ML,Earthquake,5.6
2614,02/16/1973,05:02:58,0.0,MB,Explosion,5.6
2797,07/23/1973,01:22:58,0.0,MB,Nuclear Explosion,6.3


In [5]:
df.nsmallest(n=5, columns=['Depth'], keep='all')

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
10978,06/28/1992,12:00:45,-1.100,ML,Earthquake,5.77
10977,06/28/1992,11:57:34,-0.097,MW,Earthquake,7.30
8099,07/21/1986,22:07:16,-0.076,ML,Earthquake,5.60
2614,02/16/1973,05:02:58,0.000,MB,Explosion,5.60
2797,07/23/1973,01:22:58,0.000,MB,Nuclear Explosion,6.30
...,...,...,...,...,...,...
12613,08/17/1995,00:59:58,0.000,MB,Nuclear Explosion,6.00
13054,06/08/1996,02:55:58,0.000,MB,Nuclear Explosion,5.90
14841,05/26/2000,01:28:23,0.000,MWC,Earthquake,5.60
16668,02/24/2004,02:27:46,0.000,MWB,Earthquake,6.40


In [6]:
df.nsmallest(n=5, columns=['Depth'], keep='last')

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
10978,06/28/1992,12:00:45,-1.1,ML,Earthquake,5.77
10977,06/28/1992,11:57:34,-0.097,MW,Earthquake,7.3
8099,07/21/1986,22:07:16,-0.076,ML,Earthquake,5.6
22363,09/01/2014,11:41:10,0.0,MWW,Earthquake,5.5
16668,02/24/2004,02:27:46,0.0,MWB,Earthquake,6.4


## Step 4: Get Top N values in multiple columns

In [7]:
df.nlargest(n=5, columns=['Magnitude', 'Depth'])

Unnamed: 0,Date,Time,Depth,Magnitude Type,Type,Magnitude
17083,12/26/2004,00:58:53,30.0,MW,Earthquake,9.1
20501,03/11/2011,05:46:24,29.0,MWW,Earthquake,9.1
19928,02/27/2010,06:34:12,22.9,MWW,Earthquake,8.8
16,02/04/1965,05:01:22,30.3,MW,Earthquake,8.7
17329,03/28/2005,16:09:37,30.0,MWW,Earthquake,8.6


## Step 5: How do `nsmallest` and `nlargest` work

In [8]:
df.nlargest(n=5, columns=['Magnitude Type', 'Depth'])

TypeError: Column 'Magnitude Type' has dtype object, cannot use method 'nlargest' with this dtype

## Step 6: Display Top values for all numeric columns in DataFrame

In [9]:
import numpy as np

dfs = []

for col in df.columns:
    top_values = []
    if(df[col].dtype == np.float64 or df[col].dtype == np.int64):
        top_values = df[col].nlargest(n=7)
        dfs.append(pd.DataFrame({col: top_values}).reset_index(drop=True))
pd.concat(dfs, axis=1)

Unnamed: 0,Depth,Magnitude
0,700.0,9.1
1,691.6,9.1
2,690.0,8.8
3,688.0,8.7
4,687.6,8.6
5,682.2,8.6
6,678.9,8.4
