# Sorting & Ranking

Common patterns for ordering data and ranking values in Pandas.


In [2]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'city': ['Delhi', 'Mumbai', 'Chennai', 'Delhi', 'Mumbai'],
    'year': [2022, 2021, 2023, 2021, 2023],
    'sales': [120, 150, 90, 100, 160],
    'profit': [25, 35, 10, 20, 40]
})

df

Unnamed: 0,city,year,sales,profit
0,Delhi,2022,120,25
1,Mumbai,2021,150,35
2,Chennai,2023,90,10
3,Delhi,2021,100,20
4,Mumbai,2023,160,40


## sort_values

In [3]:
df.sort_values('sales')

Unnamed: 0,city,year,sales,profit
2,Chennai,2023,90,10
3,Delhi,2021,100,20
0,Delhi,2022,120,25
1,Mumbai,2021,150,35
4,Mumbai,2023,160,40


In [4]:
df.sort_values('sales', ascending=False)

Unnamed: 0,city,year,sales,profit
4,Mumbai,2023,160,40
1,Mumbai,2021,150,35
0,Delhi,2022,120,25
3,Delhi,2021,100,20
2,Chennai,2023,90,10


In [5]:
df.sort_values(['city', 'sales'])

Unnamed: 0,city,year,sales,profit
2,Chennai,2023,90,10
3,Delhi,2021,100,20
0,Delhi,2022,120,25
1,Mumbai,2021,150,35
4,Mumbai,2023,160,40


In [6]:
df.sort_values(['city', 'sales'], ascending=[True, False])

Unnamed: 0,city,year,sales,profit
2,Chennai,2023,90,10
0,Delhi,2022,120,25
3,Delhi,2021,100,20
4,Mumbai,2023,160,40
1,Mumbai,2021,150,35


## sort_index

In [7]:
df_indexed = df.set_index('city')
df_indexed

Unnamed: 0_level_0,year,sales,profit
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Delhi,2022,120,25
Mumbai,2021,150,35
Chennai,2023,90,10
Delhi,2021,100,20
Mumbai,2023,160,40


In [8]:
df_indexed.sort_index()

Unnamed: 0_level_0,year,sales,profit
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chennai,2023,90,10
Delhi,2022,120,25
Delhi,2021,100,20
Mumbai,2021,150,35
Mumbai,2023,160,40


In [9]:
df_indexed.sort_index(ascending=False)

Unnamed: 0_level_0,year,sales,profit
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mumbai,2021,150,35
Mumbai,2023,160,40
Delhi,2022,120,25
Delhi,2021,100,20
Chennai,2023,90,10


## Ranking data

In [10]:
df['sales_rank'] = df['sales'].rank()
df

Unnamed: 0,city,year,sales,profit,sales_rank
0,Delhi,2022,120,25,3.0
1,Mumbai,2021,150,35,4.0
2,Chennai,2023,90,10,1.0
3,Delhi,2021,100,20,2.0
4,Mumbai,2023,160,40,5.0


In [11]:
df['sales_rank_dense'] = df['sales'].rank(method='dense')
df

Unnamed: 0,city,year,sales,profit,sales_rank,sales_rank_dense
0,Delhi,2022,120,25,3.0,3.0
1,Mumbai,2021,150,35,4.0,4.0
2,Chennai,2023,90,10,1.0,1.0
3,Delhi,2021,100,20,2.0,2.0
4,Mumbai,2023,160,40,5.0,5.0


In [12]:
df['profit_rank_desc'] = df['profit'].rank(ascending=False)
df

Unnamed: 0,city,year,sales,profit,sales_rank,sales_rank_dense,profit_rank_desc
0,Delhi,2022,120,25,3.0,3.0,3.0
1,Mumbai,2021,150,35,4.0,4.0,2.0
2,Chennai,2023,90,10,1.0,1.0,5.0
3,Delhi,2021,100,20,2.0,2.0,4.0
4,Mumbai,2023,160,40,5.0,5.0,1.0


## nlargest / nsmallest

In [13]:
df.nlargest(3, 'sales')

Unnamed: 0,city,year,sales,profit,sales_rank,sales_rank_dense,profit_rank_desc
4,Mumbai,2023,160,40,5.0,5.0,1.0
1,Mumbai,2021,150,35,4.0,4.0,2.0
0,Delhi,2022,120,25,3.0,3.0,3.0


In [14]:
df.nsmallest(2, 'profit')

Unnamed: 0,city,year,sales,profit,sales_rank,sales_rank_dense,profit_rank_desc
2,Chennai,2023,90,10,1.0,1.0,5.0
3,Delhi,2021,100,20,2.0,2.0,4.0


In [15]:
df.groupby('city')['sales'].nlargest(2)

city      
Chennai  2     90
Delhi    0    120
         3    100
Mumbai   4    160
         1    150
Name: sales, dtype: int64