In [1]:
import numpy as np
import pandas as pd

# 1.分层索引：

### 1.1 Series：

In [2]:
data = pd.Series(np.random.randn(9),index=[['android','android','android','ios','ios','wp','wp','symbian','symbian'],['htc','google','huawei','iphone6','iphone7','htc','Nokia','Nokia','LG']])
data

android  htc        1.409699
         google    -1.902702
         huawei    -0.189435
ios      iphone6   -0.081863
         iphone7    0.537393
wp       htc       -0.474064
         Nokia     -1.046248
symbian  Nokia      1.309650
         LG        -0.215271
dtype: float64

#### MultiIndex 

In [3]:
data.index

MultiIndex([('android',     'htc'),
            ('android',  'google'),
            ('android',  'huawei'),
            (    'ios', 'iphone6'),
            (    'ios', 'iphone7'),
            (     'wp',     'htc'),
            (     'wp',   'Nokia'),
            ('symbian',   'Nokia'),
            ('symbian',      'LG')],
           )

#### 选择出数据的子集

In [4]:
data["android"]

htc       1.409699
google   -1.902702
huawei   -0.189435
dtype: float64

In [5]:
# 如果试图索引一些没有完全 lexsorted 的索引会报错，需要先进行第二部分提到的重排序操作
data["android":"symbian"]

UnsortedIndexError: 'Key length (1) was greater than MultiIndex lexsort depth (0)'

In [6]:
data.loc[["android","symbian"]]

android  htc       1.409699
         google   -1.902702
         huawei   -0.189435
symbian  Nokia     1.309650
         LG       -0.215271
dtype: float64

#### 在内部层级中进行选择

In [7]:
data.loc[: , "huawei"]

android   -0.189435
dtype: float64

#### unstack

In [8]:
data.unstack()

Unnamed: 0,LG,Nokia,google,htc,huawei,iphone6,iphone7
android,,,-1.902702,1.409699,-0.189435,,
ios,,,,,,-0.081863,0.537393
symbian,-0.215271,1.30965,,,,,
wp,,-1.046248,,-0.474064,,,


In [9]:
data.unstack().stack()

android  google    -1.902702
         htc        1.409699
         huawei    -0.189435
ios      iphone6   -0.081863
         iphone7    0.537393
symbian  LG        -0.215271
         Nokia      1.309650
wp       Nokia     -1.046248
         htc       -0.474064
dtype: float64

### 1.2 DataFrame：

#### 在DataFrame中，每个轴都可以拥有分层索引且可以有名称

In [10]:
data = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
                     columns=[["android", "android", "ios"],["HTC", "Google", "iphone"]])
data

Unnamed: 0_level_0,Unnamed: 1_level_0,android,android,ios
Unnamed: 0_level_1,Unnamed: 1_level_1,HTC,Google,iphone
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [11]:
data.index.names = ["key1" ,"key2"]

In [12]:
data.columns.names = ["platform","device"]

In [13]:
data

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


#### 访问其 nlevels 属性来查看索引有多少级别

In [14]:
data.index.nlevels

2

#### 在DataFrame中也可以通过列索引选择列中的数据组

In [15]:
data["android"]

Unnamed: 0_level_0,device,HTC,Google
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


#### 一个MultiIndex可以自己创建，然后复用；前面 DataFrame 中具有级别名称的列也可以这样创建

In [16]:
pd.MultiIndex.from_arrays([["android","android","ios"],["HTC","Google","iphone"]],
                        names=["platform","device"])

MultiIndex([('android',    'HTC'),
            ('android', 'Google'),
            (    'ios', 'iphone')],
           names=['platform', 'device'])

# 2.重排序和层级排序

#### swaplevel 

In [17]:
data

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [18]:
data.swaplevel('key1','key2')

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [19]:
data.swaplevel(0,1)

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


#### sort_index 

In [20]:
data.sort_index(level=1)

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [21]:
data.swaplevel(0,1).sort_index(level=0)

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11


# 3.按层级进行汇总统计

In [22]:
data

Unnamed: 0_level_0,platform,android,android,ios
Unnamed: 0_level_1,device,HTC,Google,iphone
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [23]:
data.sum(level="key1")

  data.sum(level="key1")


platform,android,android,ios
device,HTC,Google,iphone
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,3,5,7
b,15,17,19


In [24]:
data.groupby("key1").sum()

platform,android,android,ios
device,HTC,Google,iphone
key1,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
a,3,5,7
b,15,17,19


In [25]:
data.sum(level="platform",axis=1)

  data.sum(level="platform",axis=1)


Unnamed: 0_level_0,platform,android,ios
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,2
a,2,7,5
b,1,13,8
b,2,19,11


In [26]:
data.groupby(level="platform",axis=1).sum()

Unnamed: 0_level_0,platform,android,ios
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,2
a,2,7,5
b,1,13,8
b,2,19,11


# 4.使用 DataFrame 的列进行索引

In [27]:
df = pd.DataFrame({"key1":["a", "b", "b", "a", "b", "a"],
                    "key2":range(6),
                    "key3":["one", "one", "two", "one", "one", "two"],
                    "key4":[11, 22, 33, 22, 22, 33]})
df

Unnamed: 0,key1,key2,key3,key4
0,a,0,one,11
1,b,1,one,22
2,b,2,two,33
3,a,3,one,22
4,b,4,one,22
5,a,5,two,33


#### set_index 

In [28]:
df2 = df.set_index(["key1","key3"])
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,key2,key4
key1,key3,Unnamed: 2_level_1,Unnamed: 3_level_1
a,one,0,11
b,one,1,22
b,two,2,33
a,one,3,22
b,one,4,22
a,two,5,33


#### drop=False

In [29]:
df.set_index(["key1","key3"],drop=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,key1,key2,key3,key4
key1,key3,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
a,one,a,0,one,11
b,one,b,1,one,22
b,two,b,2,two,33
a,one,a,3,one,22
b,one,b,4,one,22
a,two,a,5,two,33


#### reset_index

In [30]:
df2.reset_index()

Unnamed: 0,key1,key3,key2,key4
0,a,one,0,11
1,b,one,1,22
2,b,two,2,33
3,a,one,3,22
4,b,one,4,22
5,a,two,5,33
