# pandas层次化索引

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

## 1. 创建多层行索引

### 1) 隐式构造

最常见的方法是给DataFrame构造函数的index参数传递两个或更多的数组

In [2]:
index = [['一班', '一班', '一班', '二班', '二班', '二班'], ['张三', '李四', '王五', '赵六', '田七', '孙八']]
columns = [['期中', '期中', '期中', '期末', '期末', '期末'], ['语文', '数学', '英语', '语文', '数学', '英语']]
data = np.random.randint(0,150, size=(6,6))
df = DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,69,136,129,27,15,127
一班,李四,143,128,25,52,147,5
一班,王五,19,46,140,18,98,72
二班,赵六,37,62,62,134,139,44
二班,田七,70,93,56,74,43,69
二班,孙八,70,44,27,12,29,85


- Series也可以创建多层索引

In [3]:
index = [['一班', '一班', '一班', '二班', '二班', '二班'], ['张三', '李四', '王五', '赵六', '田七', '孙八']]
data = np.random.randint(0,150, size=6)
s = Series(data=data, index=index)
s

一班  张三     17
    李四     34
    王五    130
二班  赵六     45
    田七     93
    孙八     96
dtype: int32

### 2) 显示构造pd.MultiIndex

- 使用数组

In [4]:
index = pd.MultiIndex.from_arrays([['一班', '一班', '一班', '二班', '二班', '二班'], ['张三', '李四', '王五', '赵六', '田七', '孙八']])
columns = [['期中', '期中', '期中', '期末', '期末', '期末'], ['语文', '数学', '英语', '语文', '数学', '英语']]
data = np.random.randint(0,150, size=(6,6))
df = DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,126,114,147,104,86,84
一班,李四,74,36,133,108,102,18
一班,王五,31,18,137,124,10,127
二班,赵六,34,9,66,13,57,118
二班,田七,101,44,19,138,127,107
二班,孙八,116,110,144,133,141,48


- 使用tuple

In [5]:
index = pd.MultiIndex.from_tuples([('一班', '张三'), ('一班', '李四'), ('一班', '王五'), ('二班', '赵六'), ('二班', '田七'), ('二班', '孙八')])
columns = [['期中', '期中', '期中', '期末', '期末', '期末'], ['语文', '数学', '英语', '语文', '数学', '英语']]
data = np.random.randint(0,150, size=(6,6))
df = DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,56,121,13,69,8,103
一班,李四,44,31,147,6,53,145
一班,王五,122,1,21,34,116,9
二班,赵六,82,126,42,120,79,10
二班,田七,107,129,33,80,32,37
二班,孙八,79,83,134,102,83,56


- 使用product

    最简单，推荐使用

In [6]:
index = pd.MultiIndex.from_arrays([['一班', '一班', '一班', '二班', '二班', '二班'], ['张三', '李四', '王五', '赵六', '田七', '孙八']])
columns = pd.MultiIndex.from_product([['期中', '期末'], ['语文', '数学', '英语']])
data = np.random.randint(0,150, size=(6,6))
df = DataFrame(data=data, index=index, columns=columns)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


============================================

练习8：

1. 创建一个DataFrame，表示出张三李四期中期末各科成绩

============================================

## 2. 多层列索引

除了行索引index，列索引columns也能用同样的方法创建多层索引

## 3. 多层索引对象的索引与切片操作

### 1）Series的操作

【重要】对于Series来说，直接中括号[]与使用.loc()完全一样，推荐使用中括号索引和切片。

In [7]:
s

一班  张三     17
    李四     34
    王五    130
二班  赵六     45
    田七     93
    孙八     96
dtype: int32

(1) 索引

In [8]:
s['张三']

KeyError: '张三'

In [9]:
s.loc['张三'] # 不能直接索引内层索引.

KeyError: '张三'

In [11]:
s.loc['一班'].loc['张三']

17

In [12]:
s.loc['一班', '张三'] # 推荐写法

17

In [14]:
s.iloc[[0]]

一班  张三    17
dtype: int32

(2) 切片

In [15]:
s.loc['张三': '王五'] # 没报错,但是没结果.不要直接对内层索引进行切片.

Series([], dtype: int32)

In [16]:
s.loc['一班':]

一班  张三     17
    李四     34
    王五    130
二班  赵六     45
    田七     93
    孙八     96
dtype: int32

In [17]:
s.loc['一班']

张三     17
李四     34
王五    130
dtype: int32

In [19]:
s.iloc[0:4]

一班  张三     17
    李四     34
    王五    130
二班  赵六     45
dtype: int32

### 2）DataFrame的操作

In [20]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


(1) 可以直接使用列名称来进行列索引

In [21]:
df['期中']

Unnamed: 0,Unnamed: 1,语文,数学,英语
一班,张三,83,145,19
一班,李四,29,94,78
一班,王五,119,72,54
二班,赵六,92,74,20
二班,田七,25,38,111
二班,孙八,60,95,2


In [22]:
df['语文'] # DataFrame也是不能直接索引内层索引.

KeyError: '语文'

In [24]:
df['期中']['语文']

一班  张三     83
    李四     29
    王五    119
二班  赵六     92
    田七     25
    孙八     60
Name: 语文, dtype: int32

In [25]:
df['期中', '语文']

一班  张三     83
    李四     29
    王五    119
二班  赵六     92
    田七     25
    孙八     60
Name: (期中, 语文), dtype: int32

行多级索引的索引和切片操作

In [26]:
df.loc['一班']

Unnamed: 0_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,语文,数学,英语,语文,数学,英语
张三,83,145,19,87,3,2
李四,29,94,78,44,98,44
王五,119,72,54,17,80,6


In [27]:
df.loc['张三']

KeyError: '张三'

In [29]:
df.loc['一班'].loc['张三']

期中  语文     83
    数学    145
    英语     19
期末  语文     87
    数学      3
    英语      2
Name: 张三, dtype: int32

In [30]:
df.loc['一班', '张三']

期中  语文     83
    数学    145
    英语     19
期末  语文     87
    数学      3
    英语      2
Name: (一班, 张三), dtype: int32

列多级索引的索引和切片操作

In [31]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


In [32]:
df['一班': '二班']

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


In [33]:
df.iloc[0:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39


(2) 使用行索引需要用loc[]函数

【极其重要】推荐使用loc[]函数

注意在对行索引的时候，若一级行索引还有多个，对二级行索引会遇到问题！也就是说，无法直接对二级索引进行索引，必须让二级索引变成一级索引后才能对其进行索引！

============================================

练习9：

1. 分析比较Series和DataFrame各种索引的方式，熟练掌握.loc()方法

2. 假设张三再一次在期中考试的时候因为特殊原因放弃英语考试，如何实现？

============================================

In [34]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


In [38]:
df.loc['一班', '张三'].loc['期中', '英语'] = None

In [39]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


In [40]:
df.loc['一班', '张三'].loc['期中', '英语'] = np.nan

In [41]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83,145,19,87,3,2
一班,李四,29,94,78,44,98,44
一班,王五,119,72,54,17,80,6
二班,赵六,92,74,20,99,98,39
二班,田七,25,38,111,92,137,46
二班,孙八,60,95,2,42,110,132


In [43]:
df.dtypes

期中  语文    int32
    数学    int32
    英语    int32
期末  语文    int32
    数学    int32
    英语    int32
dtype: object

In [45]:
df = df.astype(np.float64)

In [46]:
df.dtypes

期中  语文    float64
    数学    float64
    英语    float64
期末  语文    float64
    数学    float64
    英语    float64
dtype: object

In [47]:
df.loc['一班', '张三'].loc['期中', '英语'] = np.nan

In [48]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


## 4. 索引的堆（stack）

- ``stack()``
- ``unstack()``

In [49]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


In [None]:
把水平的列索引转化为行索引就叫做stack
反之叫做unstack

In [50]:
df.stack() # 默认level=-1表示对最里面那一层操作

Unnamed: 0,Unnamed: 1,Unnamed: 2,期中,期末
一班,张三,数学,145.0,3.0
一班,张三,英语,,2.0
一班,张三,语文,83.0,87.0
一班,李四,数学,94.0,98.0
一班,李四,英语,78.0,44.0
一班,李四,语文,29.0,44.0
一班,王五,数学,72.0,80.0
一班,王五,英语,54.0,6.0
一班,王五,语文,119.0,17.0
二班,赵六,数学,74.0,98.0


In [51]:
df.stack(level=0)

Unnamed: 0,Unnamed: 1,Unnamed: 2,数学,英语,语文
一班,张三,期中,145.0,,83.0
一班,张三,期末,3.0,2.0,87.0
一班,李四,期中,94.0,78.0,29.0
一班,李四,期末,98.0,44.0,44.0
一班,王五,期中,72.0,54.0,119.0
一班,王五,期末,80.0,6.0,17.0
二班,赵六,期中,74.0,20.0,92.0
二班,赵六,期末,98.0,39.0,99.0
二班,田七,期中,38.0,111.0,25.0
二班,田七,期末,137.0,46.0,92.0


In [52]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


In [54]:
df.unstack(fill_value=0)

Unnamed: 0_level_0,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末
Unnamed: 0_level_1,语文,语文,语文,语文,语文,语文,数学,数学,数学,数学,数学,数学,英语,英语,英语,英语,英语,英语,语文,语文,语文,语文,语文,语文,数学,数学,数学,数学,数学,数学,英语,英语,英语,英语,英语,英语
Unnamed: 0_level_2,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六
一班,0.0,83.0,29.0,119.0,0.0,0.0,0.0,145.0,94.0,72.0,0.0,0.0,0.0,,78.0,54.0,0.0,0.0,0.0,87.0,44.0,17.0,0.0,0.0,0.0,3.0,98.0,80.0,0.0,0.0,0.0,2.0,44.0,6.0,0.0,0.0
二班,60.0,0.0,0.0,0.0,25.0,92.0,95.0,0.0,0.0,0.0,38.0,74.0,2.0,0.0,0.0,0.0,111.0,20.0,42.0,0.0,0.0,0.0,92.0,99.0,110.0,0.0,0.0,0.0,137.0,98.0,132.0,0.0,0.0,0.0,46.0,39.0


In [57]:
df.unstack(level=0, fill_value=0)

Unnamed: 0_level_0,期中,期中,期中,期中,期中,期中,期末,期末,期末,期末,期末,期末
Unnamed: 0_level_1,语文,语文,数学,数学,英语,英语,语文,语文,数学,数学,英语,英语
Unnamed: 0_level_2,一班,二班,一班,二班,一班,二班,一班,二班,一班,二班,一班,二班
孙八,0.0,60.0,0.0,95.0,0.0,2.0,0.0,42.0,0.0,110.0,0.0,132.0
张三,83.0,0.0,145.0,0.0,,0.0,87.0,0.0,3.0,0.0,2.0,0.0
李四,29.0,0.0,94.0,0.0,78.0,0.0,44.0,0.0,98.0,0.0,44.0,0.0
王五,119.0,0.0,72.0,0.0,54.0,0.0,17.0,0.0,80.0,0.0,6.0,0.0
田七,0.0,25.0,0.0,38.0,0.0,111.0,0.0,92.0,0.0,137.0,0.0,46.0
赵六,0.0,92.0,0.0,74.0,0.0,20.0,0.0,99.0,0.0,98.0,0.0,39.0


小技巧】使用stack()的时候，level等于哪一个，哪一个就消失，出现在行里。

【小技巧】使用unstack()的时候，level等于哪一个，哪一个就消失，出现在列里。

============================================

练习10：

1. 使用unstack()将ddd变为两行，分别为期中期末

2. 使用unstack()将ddd变为四行，分别为四个科目

============================================

In [58]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


In [61]:
df.stack(level=0).unstack(level=0, fill_value=0).unstack(level=0)

Unnamed: 0_level_0,数学,数学,数学,数学,数学,数学,数学,数学,数学,数学,数学,数学,英语,英语,英语,英语,英语,英语,英语,英语,英语,英语,英语,英语,语文,语文,语文,语文,语文,语文,语文,语文,语文,语文,语文,语文
Unnamed: 0_level_1,一班,一班,一班,一班,一班,一班,二班,二班,二班,二班,二班,二班,一班,一班,一班,一班,一班,一班,二班,二班,二班,二班,二班,二班,一班,一班,一班,一班,一班,一班,二班,二班,二班,二班,二班,二班
Unnamed: 0_level_2,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六
期中,0.0,145.0,94.0,72.0,0.0,0.0,95.0,0.0,0.0,0.0,38.0,74.0,0.0,,78.0,54.0,0.0,0.0,2.0,0.0,0.0,0.0,111.0,20.0,0.0,83.0,29.0,119.0,0.0,0.0,60.0,0.0,0.0,0.0,25.0,92.0
期末,0.0,3.0,98.0,80.0,0.0,0.0,110.0,0.0,0.0,0.0,137.0,98.0,0.0,2.0,44.0,6.0,0.0,0.0,132.0,0.0,0.0,0.0,46.0,39.0,0.0,87.0,44.0,17.0,0.0,0.0,42.0,0.0,0.0,0.0,92.0,99.0


In [62]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


In [64]:
df.stack().unstack(level=0, fill_value=0).unstack(level=0)

Unnamed: 0_level_0,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期中,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末,期末
Unnamed: 0_level_1,一班,一班,一班,一班,一班,一班,二班,二班,二班,二班,二班,二班,一班,一班,一班,一班,一班,一班,二班,二班,二班,二班,二班,二班
Unnamed: 0_level_2,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六,孙八,张三,李四,王五,田七,赵六
数学,0.0,145.0,94.0,72.0,0.0,0.0,95.0,0.0,0.0,0.0,38.0,74.0,0.0,3.0,98.0,80.0,0.0,0.0,110.0,0.0,0.0,0.0,137.0,98.0
英语,0.0,,78.0,54.0,0.0,0.0,2.0,0.0,0.0,0.0,111.0,20.0,0.0,2.0,44.0,6.0,0.0,0.0,132.0,0.0,0.0,0.0,46.0,39.0
语文,0.0,83.0,29.0,119.0,0.0,0.0,60.0,0.0,0.0,0.0,25.0,92.0,0.0,87.0,44.0,17.0,0.0,0.0,42.0,0.0,0.0,0.0,92.0,99.0


## 5. 聚合操作

【注意】

- 需要指定axis

- 【小技巧】和unstack()相反，聚合的时候，axis等于哪一个，哪一个就保留。

In [None]:
sum mean max min std  var

In [65]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


In [67]:
df.sum(axis=1)

一班  张三    320.0
    李四    387.0
    王五    348.0
二班  赵六    422.0
    田七    449.0
    孙八    441.0
dtype: float64

In [68]:
df.sum(axis=0, level=0) # level等于哪一层,哪一层就保留下来,其他层就被聚合掉.

Unnamed: 0_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,语文,数学,英语,语文,数学,英语
一班,231.0,311.0,132.0,148.0,181.0,52.0
二班,177.0,207.0,133.0,233.0,345.0,217.0


In [71]:
df.sum(axis=1, level=0)

Unnamed: 0,Unnamed: 1,期中,期末
一班,张三,228.0,92.0
一班,李四,201.0,186.0
一班,王五,245.0,103.0
二班,赵六,186.0,236.0
二班,田七,174.0,275.0
二班,孙八,157.0,284.0


In [72]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,期中,期中,期中,期末,期末,期末
Unnamed: 0_level_1,Unnamed: 1_level_1,语文,数学,英语,语文,数学,英语
一班,张三,83.0,145.0,,87.0,3.0,2.0
一班,李四,29.0,94.0,78.0,44.0,98.0,44.0
一班,王五,119.0,72.0,54.0,17.0,80.0,6.0
二班,赵六,92.0,74.0,20.0,99.0,98.0,39.0
二班,田七,25.0,38.0,111.0,92.0,137.0,46.0
二班,孙八,60.0,95.0,2.0,42.0,110.0,132.0


In [73]:
# 期中期末最大值.
df.max(axis=1, level=0)
# 正常axis=0,表示行,axis=1表示列.


Unnamed: 0,Unnamed: 1,期中,期末
一班,张三,145.0,87.0
一班,李四,94.0,98.0
一班,王五,119.0,80.0
二班,赵六,92.0,99.0
二班,田七,111.0,137.0
二班,孙八,95.0,132.0


所谓的聚合操作：平均数，方差，最大值，最小值……

============================================

练习11：

1. 计算各个科目期中期末平均成绩

2. 计算各科目张三李四的最高分

============================================

In [75]:
df.mean(axis=1, level=1)

Unnamed: 0,Unnamed: 1,语文,数学,英语
一班,张三,85.0,74.0,2.0
一班,李四,36.5,96.0,61.0
一班,王五,68.0,76.0,30.0
二班,赵六,95.5,86.0,29.5
二班,田七,58.5,87.5,78.5
二班,孙八,51.0,102.5,67.0


In [76]:
df.max(level=1, axis=1)

Unnamed: 0,Unnamed: 1,语文,数学,英语
一班,张三,87.0,145.0,2.0
一班,李四,44.0,98.0,78.0
一班,王五,119.0,80.0,54.0
二班,赵六,99.0,98.0,39.0
二班,田七,92.0,137.0,111.0
二班,孙八,60.0,110.0,132.0
