### 行的多层索引
- Sereis的多层索引
- DataFrame的多层索引
- 创建方式
    - 隐式方式， 使用二维数组，如 [[一级索引名称,], [二级索引名称,]]
    - 显示方式， MultiIndex
        - MultiIndex.from_arrays([[一级索引名称,], [二级索引名称,]])
        - MultiIndex.from_tuples([(一级索引名， 二级索引名), ])
        - MultiIndex.from_product([[一级索引名称], [二级索引名称]])
        
<font color=red>**说明： from_arrays()中索引名称会重复多次，而from_product()中索引名不会重复。**</font>

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame, MultiIndex

In [2]:
# 创建1月份的五大城市(西安、上海、北京、广州、深圳)的
#     Oppo、华为、小米、iPhone四种手机的售量情况  
cosumers_1 = Series(np.random.randint(1000, 2000, size=20),
                   index=[['西安','西安','西安','西安',
                           '上海','上海','上海','上海',
                           '北京', '北京','北京', '北京',
                           '广州', '广州', '广州', '广州',
                           '深圳', '深圳', '深圳', '深圳'],
                           ['Oppo', '华为', '小米', 'iPhone']*5
                         ])
cosumers_1

西安  Oppo      1577
    华为        1780
    小米        1037
    iPhone    1262
上海  Oppo      1448
    华为        1326
    小米        1127
    iPhone    1367
北京  Oppo      1671
    华为        1087
    小米        1294
    iPhone    1064
广州  Oppo      1942
    华为        1851
    小米        1042
    iPhone    1990
深圳  Oppo      1232
    华为        1677
    小米        1747
    iPhone    1747
dtype: int64

In [3]:
# 创建2月份的2个城市(西安、上海)的Oppo、iPhone2种手机的售量情况  
cosumers_2 = Series([1500, 600, 2412, 50],
                   index=MultiIndex.from_tuples([('西安', 'Oppo'),
                                                 ('西安', 'iPhone'),
                                                 ('上海', 'Oppo'),
                                                 ('上海', 'iPhone')]))

cosumers_2

西安  Oppo      1500
    iPhone     600
上海  Oppo      2412
    iPhone      50
dtype: int64

In [4]:
# 使用product()方式，将以上两个创建多层索引的表的方式优化
c1 = Series(np.random.randint(100, 2000, size=20),
           index=MultiIndex.from_product([['西安', '上海', '北京', '广州', '深圳'],
                                          ['Oppo', '华为', '小米', 'iPhone']]))
c1

西安  Oppo       205
    华为        1702
    小米        1925
    iPhone    1623
上海  Oppo      1441
    华为        1801
    小米         856
    iPhone     323
北京  Oppo      1379
    华为        1741
    小米        1613
    iPhone    1581
广州  Oppo      1758
    华为        1431
    小米         465
    iPhone     243
深圳  Oppo      1560
    华为        1996
    小米        1928
    iPhone    1960
dtype: int64

In [5]:
# 创建1~12月份的五大城市的四种手机的售量情况表
index = MultiIndex.from_product([['西安', '上海', '北京', '广州', '深圳'],
                                 ['Oppo', '华为', '小米', 'iPhone']])
sales = DataFrame(np.random.randint(50, 2000, size=(20, 12)),
                 index=index,
                 columns=np.arange(1, 13, step=1))
sales

Unnamed: 0,Unnamed: 1,1,2,3,4,5,6,7,8,9,10,11,12
西安,Oppo,225,1498,1017,1719,1166,1277,1433,521,1839,654,764,794
西安,华为,428,247,1505,568,90,518,161,1328,782,1049,1742,1294
西安,小米,705,1468,901,1746,193,1442,938,1607,1794,1207,1578,262
西安,iPhone,874,1339,1307,1716,772,478,358,1703,600,441,810,1277
上海,Oppo,869,882,1902,613,1166,1762,393,513,899,316,1459,1873
上海,华为,206,1299,1832,1988,1496,334,657,773,1094,1493,1811,1836
上海,小米,279,937,933,264,1415,1165,300,1367,977,969,1610,1178
上海,iPhone,625,1509,1822,555,526,90,1686,1823,1098,1955,232,698
北京,Oppo,192,1890,1396,910,496,804,1152,536,1809,1685,1799,1396
北京,华为,966,789,1275,1129,1198,1943,952,1824,1990,1866,696,1095


### 多层列索引
- 同多层行索引

In [7]:
# 生成5大城市的4种手机品牌的2017、2018两个年度的4个季节的售量表
datas = np.random.randint(50, 5000, size=(20, 8))
index = MultiIndex.from_product([['西安','上海', '北京', '广州', '深圳'],
                                 ['Oppo','华为', '小米', 'iPhone']])
columns = MultiIndex.from_product([['2017', '2018'],
                                   ['1', '2', '3', '4']])

df = DataFrame(datas, index, columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2017,2017,2017,2017,2018,2018,2018,2018
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,3,4,1,2,3,4
西安,Oppo,156,2932,4701,3929,129,1754,343,4777
西安,华为,3980,4247,1809,2632,3192,1649,1796,603
西安,小米,469,3078,790,2165,3447,4734,3327,4517
西安,iPhone,1168,797,2954,3233,1926,4612,57,3404
上海,Oppo,702,1149,1100,246,2580,415,4830,963
上海,华为,3443,3029,178,3321,3212,4767,2981,3608
上海,小米,4029,2362,2138,4118,3264,408,958,1619
上海,iPhone,1607,4438,677,3610,2248,3354,394,2503
北京,Oppo,1031,2543,3684,1273,4840,2832,3388,2122
北京,华为,1193,3418,3624,3090,2306,3103,524,799


### 多层索引选择和切片操作

In [8]:
# 查看 2017年西安城市的所有手机品牌的售量数据
# 一级标签索引的选择, 同DataFrame的普通索引的选择操作
df.loc['西安', '2017']

Unnamed: 0,1,2,3,4
Oppo,156,2932,4701,3929
华为,3980,4247,1809,2632
小米,469,3078,790,2165
iPhone,1168,797,2954,3233


In [11]:
# 查看北京城市的所有手机品牌在不同年度的第2季度售量情况
# 一级行标签 + 二级列标签 (元组中包含一级标签和二级标签)
df.loc['北京', [('2017', '2'), ('2018', '2')]]

Unnamed: 0_level_0,2017,2018
Unnamed: 0_level_1,2,2
Oppo,2543,2832
华为,3418,3103
小米,569,4875
iPhone,238,2055


In [14]:
# 查看2018年的不同城市的小米手机的售量情况
# 一级列标签 + 二级行标签
df.loc[[('西安','小米'), 
        ('上海','小米'),
        ('北京','小米'),
        ('广州','小米'),
        ('深圳','小米'),
       ], '2018']

Unnamed: 0,Unnamed: 1,1,2,3,4
西安,小米,3447,4734,3327,4517
上海,小米,3264,408,958,1619
北京,小米,2382,4875,4224,2992
广州,小米,2084,1122,1484,2234
深圳,小米,3188,3855,240,3879


In [16]:
# 查看2018年的不同城市的小米手机的售量情况
# 使用.iloc[]方式，以切片方式实现
df.iloc[2::4, 4:]

Unnamed: 0_level_0,Unnamed: 1_level_0,2018,2018,2018,2018
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,3,4
西安,小米,3447,4734,3327,4517
上海,小米,3264,408,958,1619
北京,小米,2382,4875,4224,2992
广州,小米,2084,1122,1484,2234
深圳,小米,3188,3855,240,3879


In [18]:
# 查看不同城市的iPhone品牌在不同年度的第3季度的售量情况
# 二级行索引 + 二级列索引
df.iloc[3::4, 2::4]

Unnamed: 0_level_0,Unnamed: 1_level_0,2017,2018
Unnamed: 0_level_1,Unnamed: 1_level_1,3,3
西安,iPhone,2954,57
上海,iPhone,677,394
北京,iPhone,2807,3996
广州,iPhone,392,122
深圳,iPhone,4271,4250


In [19]:
# 将西安的小米品牌在2017年第2季度的售量减去1000
df.loc[('西安', '小米'), ('2017', '2')] -= 1000
df

Unnamed: 0_level_0,Unnamed: 1_level_0,2017,2017,2017,2017,2018,2018,2018,2018
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,3,4,1,2,3,4
西安,Oppo,156,2932,4701,3929,129,1754,343,4777
西安,华为,3980,4247,1809,2632,3192,1649,1796,603
西安,小米,469,2078,790,2165,3447,4734,3327,4517
西安,iPhone,1168,797,2954,3233,1926,4612,57,3404
上海,Oppo,702,1149,1100,246,2580,415,4830,963
上海,华为,3443,3029,178,3321,3212,4767,2981,3608
上海,小米,4029,2362,2138,4118,3264,408,958,1619
上海,iPhone,1607,4438,677,3610,2248,3354,394,2503
北京,Oppo,1031,2543,3684,1273,4840,2832,3388,2122
北京,华为,1193,3418,3624,3090,2306,3103,524,799


### 扩展： python中的对象，哪些可以作为dict的key

In [20]:
class ChildError(Exception):
    pass

In [25]:
e = ChildError('不能创建分身')

In [26]:
# str, int, float, bool(True, False), None
# tuple(), object, fun, class
# 如果 类对象作为 dict的key必须可以被hash的， 
#     可以通过hash()内置函数尝试获取对象的hash值
disen = {
    'name': 'disen',
    'age': 20,
    'salary': 1000.5,
    'action': lambda x: x**2,
    'is_alive': True,
    'level': None,
    'city': ('陕西', '西安'),
    'error': ChildError,
    'error_obj': e
}

In [32]:
action = disen.get('action')
action(100)

10000

In [27]:
disen_ct = { value:key for key,value in disen.items() }

In [28]:
disen_ct

{'disen': 'name',
 20: 'age',
 1000.5: 'salary',
 <function __main__.<lambda>(x)>: 'action',
 True: 'is_alive',
 None: 'level',
 ('陕西', '西安'): 'city',
 __main__.ChildError: 'error',
 __main__.ChildError('不能创建分身'): 'error_obj'}

In [29]:
hash(e)

-9223372036570724620

### 索引的堆操作
- stack()  将列索引转成行索引（标签）
- unstack() 将行索引转成列索引（标签）

两个函数中，有一个level 属性，表示哪一级别的标签， 0 一级， 1 二级, -1最内级别

In [34]:
df.stack(level=-1) # 将列的第二级的标签转成行标签（第三级）

Unnamed: 0,Unnamed: 1,Unnamed: 2,2017,2018
西安,Oppo,1,156,129
西安,Oppo,2,2932,1754
西安,Oppo,3,4701,343
西安,Oppo,4,3929,4777
西安,华为,1,3980,3192
西安,华为,2,4247,1649
西安,华为,3,1809,1796
西安,华为,4,2632,603
西安,小米,1,469,3447
西安,小米,2,2078,4734


In [35]:
df.stack(level=-1).loc[('西安', 'Oppo', '3'), '2017']

4701

In [36]:
df.stack(level=-1).index

MultiIndex(levels=[['上海', '北京', '广州', '深圳', '西安'], ['Oppo', 'iPhone', '华为', '小米'], ['1', '2', '3', '4']],
           labels=[[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]])

In [37]:
df.stack(level=0) # 将列的一级标签转成行的三级标签

Unnamed: 0,Unnamed: 1,Unnamed: 2,1,2,3,4
西安,Oppo,2017,156,2932,4701,3929
西安,Oppo,2018,129,1754,343,4777
西安,华为,2017,3980,4247,1809,2632
西安,华为,2018,3192,1649,1796,603
西安,小米,2017,469,2078,790,2165
西安,小米,2018,3447,4734,3327,4517
西安,iPhone,2017,1168,797,2954,3233
西安,iPhone,2018,1926,4612,57,3404
上海,Oppo,2017,702,1149,1100,246
上海,Oppo,2018,2580,415,4830,963


In [38]:
df.unstack(level=0) # 将行的一级标签转成列的三级标签

Unnamed: 0_level_0,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2017,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018,2018
Unnamed: 0_level_1,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4,4
Unnamed: 0_level_2,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安,上海,北京,广州,深圳,西安
Oppo,702,1031,1520,104,156,1149,2543,3785,778,2932,1100,3684,976,2040,4701,246,1273,1592,1025,3929,2580,4840,3969,2696,129,415,2832,1028,1877,1754,4830,3388,3601,4793,343,963,2122,97,540,4777
iPhone,1607,4627,2176,1293,1168,4438,238,3043,3299,797,677,2807,392,4271,2954,3610,3512,3849,126,3233,2248,2030,1011,2238,1926,3354,2055,2657,3777,4612,394,3996,122,4250,57,2503,2555,1419,2410,3404
华为,3443,1193,4231,139,3980,3029,3418,1892,2179,4247,178,3624,1584,715,1809,3321,3090,4817,4885,2632,3212,2306,2118,4662,3192,4767,3103,4011,2929,1649,2981,524,3252,676,1796,3608,799,1864,3617,603
小米,4029,75,4681,465,469,2362,569,4870,356,2078,2138,3027,1108,4585,790,4118,4007,254,3089,2165,3264,2382,2084,3188,3447,408,4875,1122,3855,4734,958,4224,1484,240,3327,1619,2992,2234,3879,4517


In [45]:
# 将城市的标签转成列的一级索引
df.unstack(level=0).stack(level=0).stack(level=0).unstack(level=1).unstack()

Unnamed: 0_level_0,上海,上海,上海,上海,上海,上海,上海,上海,北京,北京,北京,北京,北京,北京,北京,北京,广州,广州,广州,广州,广州,广州,广州,广州,深圳,深圳,深圳,深圳,深圳,深圳,深圳,深圳,西安,西安,西安,西安,西安,西安,西安,西安
Unnamed: 0_level_1,2017,2017,2017,2017,2018,2018,2018,2018,2017,2017,2017,2017,2018,2018,2018,2018,2017,2017,2017,2017,2018,2018,2018,2018,2017,2017,2017,2017,2018,2018,2018,2018,2017,2017,2017,2017,2018,2018,2018,2018
Unnamed: 0_level_2,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4
Oppo,702,1149,1100,246,2580,415,4830,963,1031,2543,3684,1273,4840,2832,3388,2122,1520,3785,976,1592,3969,1028,3601,97,104,778,2040,1025,2696,1877,4793,540,156,2932,4701,3929,129,1754,343,4777
iPhone,1607,4438,677,3610,2248,3354,394,2503,4627,238,2807,3512,2030,2055,3996,2555,2176,3043,392,3849,1011,2657,122,1419,1293,3299,4271,126,2238,3777,4250,2410,1168,797,2954,3233,1926,4612,57,3404
华为,3443,3029,178,3321,3212,4767,2981,3608,1193,3418,3624,3090,2306,3103,524,799,4231,1892,1584,4817,2118,4011,3252,1864,139,2179,715,4885,4662,2929,676,3617,3980,4247,1809,2632,3192,1649,1796,603
小米,4029,2362,2138,4118,3264,408,958,1619,75,569,3027,4007,2382,4875,4224,2992,4681,4870,1108,254,2084,1122,1484,2234,465,356,4585,3089,3188,3855,240,3879,469,2078,790,2165,3447,4734,3327,4517


### 多层索引的聚合操作
- max(level=0)  
- min()
- mean()

指定了level之后，按level层级进行数据的聚合操作

In [47]:
# 统计全年的不同城市不同品牌的售量
df.sum(axis=1, level=0) # 保留一级列标签， 理解为按一级的列标签进行分组聚合

Unnamed: 0,Unnamed: 1,2017,2018
西安,Oppo,11718,7003
西安,华为,12668,7240
西安,小米,5502,16025
西安,iPhone,8152,9999
上海,Oppo,3197,8788
上海,华为,9971,14568
上海,小米,12647,6249
上海,iPhone,10332,8499
北京,Oppo,8531,13182
北京,华为,11325,6732


In [49]:
# 统计不同季度下各个城市所有手机的售量情况
df.sum(axis=0, level=0)

Unnamed: 0_level_0,2017,2017,2017,2017,2018,2018,2018,2018
Unnamed: 0_level_1,1,2,3,4,1,2,3,4
西安,5773,10054,10254,11959,8694,12749,5523,13301
上海,9781,10978,4093,11295,11304,8944,9163,8693
北京,6926,6768,13142,11882,11558,12865,12132,8468
广州,12608,13590,4060,10512,9182,8818,8459,5614
深圳,2001,6612,11611,9125,12784,12438,9959,10446


In [51]:
# 统计不同品牌的不同季度的售量情况
df.sum(axis=0, level=1)

Unnamed: 0_level_0,2017,2017,2017,2017,2018,2018,2018,2018
Unnamed: 0_level_1,1,2,3,4,1,2,3,4
Oppo,3513,11187,12501,8065,14214,7906,16955,8499
华为,12986,14765,7910,18745,15490,16459,9229,10491
小米,9719,10235,11648,13633,14365,14994,10233,15241
iPhone,10871,11815,11101,14330,9453,16455,8819,12291


In [52]:
# 统计不同品牌的全年售量情况
df.sum(axis=0, level=1).sum(axis=1, level=0)

Unnamed: 0,2017,2018
Oppo,35266,47574
华为,54406,51669
小米,45235,54833
iPhone,48117,47018


练习11：
计算各个科目期中期末平均成绩
计算各科目张三李四的最高分