In [1]:
import pandas as pd
import numpy as np

# 层次化索引
在一个轴上拥有多个索引级别，为层次化索引
## Series的层次化索引

In [4]:
a = pd.Series(np.random.randn(4),index=[["a","a","b","b"],[1,2,1,2]])
print(a)
a.index

a  1    0.494095
   2    0.860942
b  1   -0.888777
   2    0.089773
dtype: float64


MultiIndex(levels=[['a', 'b'], [1, 2]],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

In [8]:
print(a["a"][1],type(a["a"][1]))
print(a["a"],type(a["a"]))

0.494094724769 <class 'numpy.float64'>
1    0.494095
2    0.860942
dtype: float64 <class 'pandas.core.series.Series'>


In [9]:
print(a[:,1])

a    0.494095
b   -0.888777
dtype: float64


## DataFrame的层次化索引

In [14]:
b = pd.DataFrame(np.random.randn(4,4),index=[[1,1,2,2],[1,2,1,2]],columns=[["a","a","b","b"],[1,2,1,2]])
print(b)

            a                   b          
            1         2         1         2
1 1 -0.136009 -0.046841  1.111815 -0.816332
  2 -0.681994 -1.295141 -1.141866 -0.784326
2 1  0.385674  0.520862 -0.439898  0.866908
  2 -0.778311 -0.903412 -0.621226  0.366785


## DataFrame与层次化Series相互转化

In [16]:
a = pd.DataFrame(np.random.randn(3,3))
a = a.stack()
print(a)
print(a.unstack())

0  0   -1.284332
   1    1.247881
   2   -0.055405
1  0    0.761875
   1   -0.126662
   2   -0.911591
2  0   -0.679783
   1   -0.201970
   2   -0.165232
dtype: float64
          0         1         2
0 -1.284332  1.247881 -0.055405
1  0.761875 -0.126662 -0.911591
2 -0.679783 -0.201970 -0.165232


# 多层索引操作
## 排列索引顺序
使用`.swaplevel()`可以交换索引的顺序

In [17]:
print(b)
print(b.swaplevel(1,0))

            a                   b          
            1         2         1         2
1 1 -0.136009 -0.046841  1.111815 -0.816332
  2 -0.681994 -1.295141 -1.141866 -0.784326
2 1  0.385674  0.520862 -0.439898  0.866908
  2 -0.778311 -0.903412 -0.621226  0.366785
            a                   b          
            1         2         1         2
1 1 -0.136009 -0.046841  1.111815 -0.816332
2 1 -0.681994 -1.295141 -1.141866 -0.784326
1 2  0.385674  0.520862 -0.439898  0.866908
2 2 -0.778311 -0.903412 -0.621226  0.366785


使用`.sort_index(level=<a>)`可以对数据按第a个索引排序

In [19]:
print(b.swaplevel(1,0).sort_index(level=0))

            a                   b          
            1         2         1         2
1 1 -0.136009 -0.046841  1.111815 -0.816332
  2  0.385674  0.520862 -0.439898  0.866908
2 1 -0.681994 -1.295141 -1.141866 -0.784326
  2 -0.778311 -0.903412 -0.621226  0.366785


## 根据级别统计
汇总统计的函数中传入`level=`参数可以根据某层索引进行统计

In [24]:
print(b.sum(level=0))
print(b.sum(level=1,axis=1))

          a                   b          
          1         2         1         2
1 -0.818003 -1.341981 -0.030051 -1.600659
2 -0.392637 -0.382550 -1.061124  1.233693
            1         2
1 1  0.975806 -0.863173
  2 -1.823860 -2.079467
2 1 -0.054225  1.387771
  2 -1.399537 -0.536627


## 多层索引与列互换
使用`.set_index()`可以将一列转为索引，传入`drop=`参数可以决定是否将转为索引的列丢弃

In [28]:
b = pd.DataFrame(np.random.randn(4,4))
print(b)

          0         1         2         3
0  0.026205 -0.881022  0.817465  0.004099
1  0.735007  1.530284 -0.539946  0.661112
2  0.690341 -0.690820  0.017885  0.297844
3  0.692861 -0.315491 -0.518512  0.735319


In [33]:
print(b.set_index(1))
print(b.set_index([1,2],drop=False))

                  0         2         3
1                                      
-0.881022  0.026205  0.817465  0.004099
 1.530284  0.735007 -0.539946  0.661112
-0.690820  0.690341  0.017885  0.297844
-0.315491  0.692861 -0.518512  0.735319
                            0         1         2         3
1         2                                                
-0.881022  0.817465  0.026205 -0.881022  0.817465  0.004099
 1.530284 -0.539946  0.735007  1.530284 -0.539946  0.661112
-0.690820  0.017885  0.690341 -0.690820  0.017885  0.297844
-0.315491 -0.518512  0.692861 -0.315491 -0.518512  0.735319


使用`.reset_index()`可以将层次化索引变成列

In [37]:
d = b.set_index(1)
print(d)
print(d.reset_index())

                  0         2         3
1                                      
-0.881022  0.026205  0.817465  0.004099
 1.530284  0.735007 -0.539946  0.661112
-0.690820  0.690341  0.017885  0.297844
-0.315491  0.692861 -0.518512  0.735319
          1         0         2         3
0 -0.881022  0.026205  0.817465  0.004099
1  1.530284  0.735007 -0.539946  0.661112
2 -0.690820  0.690341  0.017885  0.297844
3 -0.315491  0.692861 -0.518512  0.735319
