In [1]:
import numpy as np
import pandas as pd

# 1.排序(Sorting)：

### 1.1 sort_index()方法

#### Series类型

In [2]:
obj = pd.Series(np.arange(4), index=["d", "a", "b", "c"])
obj

d    0
a    1
b    2
c    3
dtype: int32

In [3]:
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int32

#### DataFrame类型

In [4]:
df = pd.DataFrame(np.arange(8).reshape((2, 4)),index=[3, 1],columns=["d", "a", "b", "c"])
df

Unnamed: 0,d,a,b,c
3,0,1,2,3
1,4,5,6,7


In [5]:
df.sort_index()

Unnamed: 0,d,a,b,c
1,4,5,6,7
3,0,1,2,3


In [6]:
# 在指定轴上按索引进行排序
df.sort_index(axis=1)

Unnamed: 0,a,b,c,d
3,1,2,3,0
1,5,6,7,4


In [7]:
# 默认情况下，数据按升序排序，但也可以按降序排序
df.sort_index(axis=1 , ascending=False)

Unnamed: 0,d,c,b,a
3,0,3,2,1
1,4,7,6,5


### 1.2 sort_values()方法

#### Series类型

In [8]:
obj = pd.Series([4, 7, -3, 2])
obj

0    4
1    7
2   -3
3    2
dtype: int64

In [9]:
obj.sort_values()

2   -3
3    2
0    4
1    7
dtype: int64

In [10]:
# 默认情况下，任何缺失值都排序到 Series 的末尾
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])
obj

0    4.0
1    NaN
2    7.0
3    NaN
4   -3.0
5    2.0
dtype: float64

In [11]:
obj.sort_values()

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [12]:
# 可以使用 na_position 选项将缺失值排序到开头
obj.sort_values(na_position='first')

1    NaN
3    NaN
4   -3.0
5    2.0
0    4.0
2    7.0
dtype: float64

#### DataFrame类型

In [13]:
df = pd.DataFrame({"b": [4, 7, -3, 2], "a": [0, 1, 0, 1]})
df

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [14]:
df.sort_values("b")

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [15]:
df.sort_values(by=["a", "b"])

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


# 2.排名(Ranking)：

### 2.1 Series类型

In [16]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [17]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

#### 默认情况下，数据按升序排序，通过设置ascending=False可以按降序排序。

In [18]:
obj.rank(ascending=False)

0    1.5
1    7.0
2    1.5
3    3.5
4    5.0
5    6.0
6    3.5
dtype: float64

#### 也可以根据在数据中的观察顺序进行排名

In [19]:
obj.rank(method="first")

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [20]:
obj.rank(method="max")

0    7.0
1    1.0
2    7.0
3    5.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [21]:
obj.rank(method="min")

0    6.0
1    1.0
2    6.0
3    4.0
4    3.0
5    2.0
6    4.0
dtype: float64

### 2.2 DataFrame类型

In [22]:
df = pd.DataFrame({"b": [4.3, 7, -3, 2], "a": [0, 1, 0, 1],"c": [-2, 5, 8, -2.5]})
df

Unnamed: 0,b,a,c
0,4.3,0,-2.0
1,7.0,1,5.0
2,-3.0,0,8.0
3,2.0,1,-2.5


In [23]:
df.rank()

Unnamed: 0,b,a,c
0,3.0,1.5,2.0
1,4.0,3.5,3.0
2,1.0,1.5,4.0
3,2.0,3.5,1.0


In [24]:
df.rank(axis=1)

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0


In [25]:
df.rank(axis=1 , ascending=False)

Unnamed: 0,b,a,c
0,1.0,2.0,3.0
1,1.0,3.0,2.0
2,3.0,2.0,1.0
3,1.0,2.0,3.0
