In [1]:
#階層式索引
#分類欄或列


import numpy as np
import pandas as pd

#Series 階層式索引
data = pd.Series(np.random.randn(9),
                index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                       [1, 2, 3, 1, 2, 3, 1, 2, 3]])
data

a  1   -1.102558
   2    0.903652
   3   -0.317243
b  1   -0.412698
   2   -0.016791
c  3   -0.412937
   1   -0.558813
d  2   -0.623319
   3    1.349742
dtype: float64

In [2]:
#索引
data['b']

1   -0.412698
2   -0.016791
dtype: float64

In [4]:
#slicing
data['b':'c']

b  1   -0.412698
   2   -0.016791
c  3   -0.412937
   1   -0.558813
dtype: float64

In [7]:
#[list]也可
data[['b', 'd']]

b  1   -0.412698
   2   -0.016791
d  2   -0.623319
   3    1.349742
dtype: float64

In [8]:
#階層式索引可以運用在重新塑型資料和使用群組運算
#產生一個樞紐分析表
#使用unstack()方法塑型資料為DataFrame

data.unstack()

Unnamed: 0,1,2,3
a,-1.102558,0.903652,-0.317243
b,-0.412698,-0.016791,
c,-0.558813,,-0.412937
d,,-0.623319,1.349742


In [10]:
data.unstack().stack()

a  1   -1.102558
   2    0.903652
   3   -0.317243
b  1   -0.412698
   2   -0.016791
c  1   -0.558813
   3   -0.412937
d  2   -0.623319
   3    1.349742
dtype: float64

In [15]:
#DataFrame使用多階層索引
frame=pd.DataFrame(np.arange(12).reshape((4,3)),
            index=[['a','a','b','b'],[1, 2, 1, 2]],
            columns=[['台北','台北','台中'],['Green','Red','Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,台北,台北,台中
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [18]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['縣市', '顏色']
frame['台北']

Unnamed: 0_level_0,顏色,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [19]:
#swaplevel()
#sort_index()
#group加總
frame

Unnamed: 0_level_0,縣市,台北,台北,台中
Unnamed: 0_level_1,顏色,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [20]:
frame.swaplevel('key1', 'key2')

Unnamed: 0_level_0,縣市,台北,台北,台中
Unnamed: 0_level_1,顏色,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [22]:
frame.sort_index(level=0)

Unnamed: 0_level_0,縣市,台北,台北,台中
Unnamed: 0_level_1,顏色,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [24]:
frame.sort_index(level='key2')

Unnamed: 0_level_0,縣市,台北,台北,台中
Unnamed: 0_level_1,顏色,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [25]:
frame

Unnamed: 0_level_0,縣市,台北,台北,台中
Unnamed: 0_level_1,顏色,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [27]:
frame.sum(level='key1')

縣市,台北,台北,台中
顏色,Green,Red,Green
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,3,5,7
b,15,17,19


In [29]:
frame.sum(level='顏色',axis=1)

Unnamed: 0_level_0,顏色,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
b,1,14,7
b,2,20,10


In [30]:
#將DataFrame的欄位變為索引
#set_index()
#reset_index()
frame = pd.DataFrame({'a':range(7), 'b':range(7, 0, -1),
                     'c':['one', 'one', 'one', 'two', 'two', 'two', 'two'],
                     'd':[0, 1, 2, 0, 1, 2, 3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [34]:
frame2 = frame.set_index(['c', 'd'])

In [35]:
frame.set_index(['c', 'd'], drop=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


In [36]:
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [39]:
frame3=frame2.reset_index()
frame3.reindex(columns = ['a', 'b', 'c', 'd'])


Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3
