In [1]:
# 多层索引
import pandas as pd
import numpy as np

In [2]:
# Part1 用带标签的字典创建多层索引
df1 = pd.DataFrame({'row':[0,1,2],\
                    'One_X':[1.1,1.1,1.1],\
                    'One_Y':[1.2,1.2,1.2],\
                    'Two_X':[1.11,1.11,1.11],\
                    'Two_Y':[1.22,1.22,1.22]})
print(df1)

   row  One_X  One_Y  Two_X  Two_Y
0    0    1.1    1.2   1.11   1.22
1    1    1.1    1.2   1.11   1.22
2    2    1.1    1.2   1.11   1.22


In [3]:
# 设置索引标签
df1 = df1.set_index('row')
print(df1)

     One_X  One_Y  Two_X  Two_Y
row                            
0      1.1    1.2   1.11   1.22
1      1.1    1.2   1.11   1.22
2      1.1    1.2   1.11   1.22


In [4]:
# 设置成多层索引的列
df1.columns = pd.MultiIndex.from_tuples([tuple(c.split('_')) for c in df1.columns])
print(df1)

     One        Two      
       X    Y     X     Y
row                      
0    1.1  1.2  1.11  1.22
1    1.1  1.2  1.11  1.22
2    1.1  1.2  1.11  1.22


In [5]:
# 先 stack，再 reset 索引
df1 = df1.stack(0).reset_index(1)
print(df1)

    level_1     X     Y
row                    
0       One  1.10  1.20
0       Two  1.11  1.22
1       One  1.10  1.20
1       Two  1.11  1.22
2       One  1.10  1.20
2       Two  1.11  1.22


In [6]:
# 修整标签，将上述 columns 改名
df1.columns = ['Sample','All_X','All_Y']
print(df1)

    Sample  All_X  All_Y
row                     
0      One   1.10   1.20
0      Two   1.11   1.22
1      One   1.10   1.20
1      Two   1.11   1.22
2      One   1.10   1.20
2      Two   1.11   1.22


In [7]:
# Part2 运算--多层索引运算要用广播机制
cols = pd.MultiIndex.from_tuples([(x,y) for x in ['A','B','C'] for y in ['O','I']])
df2 = pd.DataFrame(np.random.randn(2,6),index=['n','m'],columns=cols)
print(df2)

          A                   B                  C          
          O         I         O        I         O         I
n -0.775432  1.951352 -0.351463  0.27430 -1.243607 -1.399602
m -0.398901  0.927159 -0.339113 -0.76981 -0.812962 -0.485457


In [8]:
# 列的内层索引
print(df2.sum(level = 1,axis = 1))

          O         I
n -2.370503  0.826050
m -1.550976 -0.328109


In [9]:
# 列的外层索引
print(df2.sum(level = 0,axis = 1))

          A         B         C
n  1.175920 -0.077164 -2.643209
m  0.528258 -1.108923 -1.298419


In [10]:
# div(10),除法，即，元素值/10
df2 = df2.div(df2['C'],level=1)
print(df2)

          A                   B              C     
          O         I         O         I    O    I
n  0.623535 -1.394219  0.282616 -0.195984  1.0  1.0
m  0.490676 -1.909867  0.417133  1.585743  1.0  1.0


In [11]:
# Part3 切片-- 用xs 切片多层索引
cords = [('AA','one'),('AA','six'),\
         ('BB','one'),('BB','two'),('BB','six')]
c = pd.MultiIndex.from_tuples(cords)
df3 = pd.DataFrame(range(11,56,11),index=c,columns=['MyData'])
print(df3)

        MyData
AA one      11
   six      22
BB one      33
   two      44
   six      55


In [12]:
# 提取第一层与索引第一个轴交叉数据
# level、axis 均可选数值；默认为0
# level 为0，则为内层
a = df3.xs('BB',level=0,axis=0)
print(a)

     MyData
one      33
two      44
six      55


In [13]:
# 现在是 第1个轴的第2层;level 为1，即为外层
b = df3.xs('six',level=1,axis=0)
print(b)

    MyData
AA      22
BB      55


In [14]:
# Part4 用xs切片 多层索引--方法2
import itertools

In [15]:
a = list(itertools.product(['Ada','Quinn','Violet'],\
                           ['Comp','Math','Sci']))
b = list(itertools.product(['Exams','Labs'],\
                           ['I','II']))

In [17]:
indx = pd.MultiIndex.from_tuples(a,names=['Student','Course'])
cols = pd.MultiIndex.from_tuples(b)

In [18]:
data1 = [[70+x+y+(x*y)%3 for x in range(4)] for y in range(9)]
print(data1)

[[70, 71, 72, 73], [71, 73, 75, 74], [72, 75, 75, 75], [73, 74, 75, 76], [74, 76, 78, 77], [75, 78, 78, 78], [76, 77, 78, 79], [77, 79, 81, 80], [78, 81, 81, 81]]


In [19]:
# 构建多层索引的 DataFrame
df4 = pd.DataFrame(data1,index=indx,columns=cols)
print(df4)

               Exams     Labs    
                   I  II    I  II
Student Course                   
Ada     Comp      70  71   72  73
        Math      71  73   75  74
        Sci       72  75   75  75
Quinn   Comp      73  74   75  76
        Math      74  76   78  77
        Sci       75  78   78  78
Violet  Comp      76  77   78  79
        Math      77  79   81  80
        Sci       78  81   81  81


In [20]:
print(df4.loc['Violet'])

       Exams     Labs    
           I  II    I  II
Course                   
Comp      76  77   78  79
Math      77  79   81  80
Sci       78  81   81  81


In [21]:
# 各种 索引切片尝试
All = slice(None)
print(df4.loc[(All,'Math'),All])

               Exams     Labs    
                   I  II    I  II
Student Course                   
Ada     Math      71  73   75  74
Quinn   Math      74  76   78  77
Violet  Math      77  79   81  80


In [22]:
print(df4.loc[(slice('Ada','Quinn'),'Math'),All])

               Exams     Labs    
                   I  II    I  II
Student Course                   
Ada     Math      71  73   75  74
Quinn   Math      74  76   78  77


In [23]:
print(df4.loc[(All,'Math'),'Exams'])

                 I  II
Student Course        
Ada     Math    71  73
Quinn   Math    74  76
Violet  Math    77  79


In [24]:
print(df4.loc[(All,'Math'),(All,'II')])

               Exams Labs
                  II   II
Student Course           
Ada     Math      73   74
Quinn   Math      76   77
Violet  Math      79   80
