In [1]:
import pandas as pd
import numpy as np

# 按索引排序
## Series按索引排序
Series使用方法`.sort_index()`可以按索引排序*（索引可能是字符串由此可能是按ASCII码排序）*

In [2]:
a = pd.Series(np.random.randn(4),index=["a","c","b","d"])
a

a    0.469360
c   -1.168672
b    0.356986
d    1.048451
dtype: float64

In [3]:
a.sort_index()

a    0.469360
b    0.356986
c   -1.168672
d    1.048451
dtype: float64

In [4]:
a.sort_index(ascending=False)

d    1.048451
c   -1.168672
b    0.356986
a    0.469360
dtype: float64

## DataFrame按索引排序
DataFrame同样使用`sort_index()`排序，注意，关于索引的操作第0维为索引，第一维为列头

In [5]:
a = pd.DataFrame(np.random.randn(3,3),index=["c","b","a"],columns=["1","4","3"])
a

Unnamed: 0,1,4,3
c,0.269903,0.917099,0.452932
b,-0.039466,2.241592,-1.046896
a,-1.471238,0.708927,0.569439


In [6]:
a.sort_index()

Unnamed: 0,1,4,3
a,-1.471238,0.708927,0.569439
b,-0.039466,2.241592,-1.046896
c,0.269903,0.917099,0.452932


In [7]:
a.sort_index(axis=1,ascending=False)

Unnamed: 0,4,3,1
c,0.917099,0.452932,0.269903
b,2.241592,-1.046896,-0.039466
a,0.708927,0.569439,-1.471238


# 按值排序
## Series按值排序
Series按值排序使用`sort_values()`方法

In [8]:
b = pd.Series(np.random.randn(5))
b

0    0.302439
1   -0.075233
2   -1.220369
3   -1.062012
4    2.157151
dtype: float64

In [9]:
b.sort_values()

2   -1.220369
3   -1.062012
1   -0.075233
0    0.302439
4    2.157151
dtype: float64

In [10]:
b.sort_values(ascending=False)

4    2.157151
0    0.302439
1   -0.075233
3   -1.062012
2   -1.220369
dtype: float64

## DataFrame按值排序
DataFrame同样使用`sorf_values()`方法排序。可以通过传入`by=`参数确定按哪一列或行排序，`axis`参数确定是行（1）还是列（0）

In [11]:
b = pd.DataFrame(np.random.randn(4,4))
b

Unnamed: 0,0,1,2,3
0,-0.386207,-1.267659,1.639546,0.089644
1,-0.539182,0.350437,0.433781,0.613236
2,-1.119834,-1.331724,0.108438,-0.449766
3,-0.253233,-0.068464,-1.019283,-0.181039


In [12]:
b.sort_values(by=1)

Unnamed: 0,0,1,2,3
2,-1.119834,-1.331724,0.108438,-0.449766
0,-0.386207,-1.267659,1.639546,0.089644
3,-0.253233,-0.068464,-1.019283,-0.181039
1,-0.539182,0.350437,0.433781,0.613236


In [13]:
b.sort_values(axis=1,by=3)

Unnamed: 0,2,0,3,1
0,1.639546,-0.386207,0.089644,-1.267659
1,0.433781,-0.539182,0.613236,0.350437
2,0.108438,-1.119834,-0.449766,-1.331724
3,-1.019283,-0.253233,-0.181039,-0.068464


# rank排序
## Series 的rank排序
`rank`方法排序可以破坏原有的数据，原有数据位置替换为排名

In [14]:
c = pd.Series(np.random.randn(6))
c

0   -0.345732
1   -0.986282
2    0.036093
3    0.842136
4    1.780281
5    0.425187
dtype: float64

In [15]:
c.rank()

0    2.0
1    1.0
2    3.0
3    5.0
4    6.0
5    4.0
dtype: float64

## DataFrame的rank排序
DataFrame的rank方法与Series相似，破坏原有数据，将数据替换为该列（0）或行（1）的排名情况

In [16]:
c = pd.DataFrame(np.random.randn(4,4))
c

Unnamed: 0,0,1,2,3
0,0.254753,-1.992049,-1.361444,-1.141423
1,-0.279746,0.247671,0.263105,0.034353
2,1.934963,-1.021818,0.700599,0.468822
3,0.661806,1.067422,-0.11394,1.858247


In [17]:
c.rank()

Unnamed: 0,0,1,2,3
0,2.0,1.0,1.0,1.0
1,1.0,3.0,3.0,2.0
2,4.0,2.0,4.0,3.0
3,3.0,4.0,2.0,4.0


In [18]:
c.rank(axis=1)

Unnamed: 0,0,1,2,3
0,4.0,1.0,2.0,3.0
1,1.0,3.0,4.0,2.0
2,4.0,1.0,3.0,2.0
3,2.0,3.0,1.0,4.0


# 按出现频率排序
使用`pandas.value_counts()`方法可以计算Series中每个值出现的频率，并默认降序排列

In [20]:
d = pd.Series(["a","a","b","c"])
d

0    a
1    a
2    b
3    c
dtype: object

In [21]:
pd.value_counts(d)

a    2
b    1
c    1
dtype: int64