https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rank.html

In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
                                   'spider', 'snake'],
                        'Number_legs': [4, 2, 4, 8, np.nan]})
df

Unnamed: 0,Animal,Number_legs
0,cat,4.0
1,penguin,2.0
2,dog,4.0
3,spider,8.0
4,snake,


In [2]:
help(df.rank)

Help on method rank in module pandas.core.generic:

rank(axis=0, method: 'str' = 'average', numeric_only: 'bool_t | None | lib.NoDefault' = <no_default>, na_option: 'str' = 'keep', ascending: 'bool_t' = True, pct: 'bool_t' = False) -> 'NDFrameT' method of pandas.core.frame.DataFrame instance
    Compute numerical data ranks (1 through n) along axis.
    
    By default, equal values are assigned a rank that is the average of the
    ranks of those values.
    
    Parameters
    ----------
    axis : {0 or 'index', 1 or 'columns'}, default 0
        Index to direct ranking.
    method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
        How to rank the group of records that have the same value (i.e. ties):
    
        * average: average rank of the group
        * min: lowest rank in the group
        * max: highest rank in the group
        * first: ranks assigned in order they appear in the array
        * dense: like 'min', but rank always increases by 1 between

In [3]:
# default_rank: this is the default behaviour obtained without using any parameter.
df['default_rank'] = df['Number_legs'].rank()
df

Unnamed: 0,Animal,Number_legs,default_rank
0,cat,4.0,2.5
1,penguin,2.0,1.0
2,dog,4.0,2.5
3,spider,8.0,4.0
4,snake,,


In [4]:
# max_rank: setting method = 'max' the records that have the same values are ranked using the highest rank (e.g.: 
# since ‘cat’ and ‘dog’ are both in the 2nd and 3rd position, rank 3 is assigned.)
df['max_rank'] = df['Number_legs'].rank(method='max')
df

Unnamed: 0,Animal,Number_legs,default_rank,max_rank
0,cat,4.0,2.5,3.0
1,penguin,2.0,1.0,1.0
2,dog,4.0,2.5,3.0
3,spider,8.0,4.0,4.0
4,snake,,,


In [5]:
# NA_bottom: choosing na_option = 'bottom', if there are records with NaN values they are placed at the bottom of the ranking.
df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
df

Unnamed: 0,Animal,Number_legs,default_rank,max_rank,NA_bottom
0,cat,4.0,2.5,3.0,2.5
1,penguin,2.0,1.0,1.0,1.0
2,dog,4.0,2.5,3.0,2.5
3,spider,8.0,4.0,4.0,4.0
4,snake,,,,5.0


In [6]:
# pct_rank: when setting pct = True, the ranking is expressed as percentile rank.  
df['pct_rank'] = df['Number_legs'].rank(pct=True)
df

Unnamed: 0,Animal,Number_legs,default_rank,max_rank,NA_bottom,pct_rank
0,cat,4.0,2.5,3.0,2.5,0.625
1,penguin,2.0,1.0,1.0,1.0,0.25
2,dog,4.0,2.5,3.0,2.5,0.625
3,spider,8.0,4.0,4.0,4.0,1.0
4,snake,,,,5.0,


In [7]:
df1 = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
                                   'spider', 'snake'],
                        'Number_legs': [4, 2, 4, 8, np.nan]})
df1

Unnamed: 0,Animal,Number_legs
0,cat,4.0
1,penguin,2.0
2,dog,4.0
3,spider,8.0
4,snake,


In [8]:
# sql 中的row_number()
df1['first_rank'] = df1['Number_legs'].rank(method='first')
df1

Unnamed: 0,Animal,Number_legs,first_rank
0,cat,4.0,2.0
1,penguin,2.0,1.0
2,dog,4.0,3.0
3,spider,8.0,4.0
4,snake,,


In [9]:
# sql 中的rank()
df1['min_rank'] = df1['Number_legs'].rank(method='min')
df1

Unnamed: 0,Animal,Number_legs,first_rank,min_rank
0,cat,4.0,2.0,2.0
1,penguin,2.0,1.0,1.0
2,dog,4.0,3.0,2.0
3,spider,8.0,4.0,4.0
4,snake,,,


In [10]:
# sql 中的dense_rank()
df1['dense_rank'] = df1['Number_legs'].rank(ascending=False, method='dense')
df1

Unnamed: 0,Animal,Number_legs,first_rank,min_rank,dense_rank
0,cat,4.0,2.0,2.0,2.0
1,penguin,2.0,1.0,1.0,3.0
2,dog,4.0,3.0,2.0,2.0
3,spider,8.0,4.0,4.0,1.0
4,snake,,,,
