In [1]:
import numpy as np
import pandas as pd

# 1.通过多层索引进行重塑：

In [2]:
df = pd.DataFrame(np.arange(6).reshape((2,3)),
                  index=pd.Index(['ios','andiord'],name='style'),
                  columns=pd.Index(['one','two','three'],name='number'))
df

number,one,two,three
style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ios,0,1,2
andiord,3,4,5


In [3]:
df_stack = df.stack()
df_stack

style    number
ios      one       0
         two       1
         three     2
andiord  one       3
         two       4
         three     5
dtype: int32

In [4]:
df_stack.unstack()

number,one,two,three
style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ios,0,1,2
andiord,3,4,5


#### 默认情况下，unstack()拆堆最内层，可以传入一个层级序号或名称来拆分不同的层级

In [5]:
df_stack.unstack(1)

number,one,two,three
style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ios,0,1,2
andiord,3,4,5


In [6]:
df_stack.unstack(0)

style,ios,andiord
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [7]:
df_stack.unstack("style")

style,ios,andiord
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


#### 如果层级中所有值并未包含与每个子分组中时，拆分会引入缺失值

In [8]:
s1 = pd.Series([0, 1, 2, 3], index=["a", "b", "c", "d"], dtype="Int64")
s1

a    0
b    1
c    2
d    3
dtype: Int64

In [9]:
s2 = pd.Series([4, 5, 6], index=["c", "d", "e"], dtype="Int64")
s2

c    4
d    5
e    6
dtype: Int64

In [10]:
data = pd.concat([s1,s2],keys=["one","two"])
data

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: Int64

In [11]:
data.unstack()

Unnamed: 0,a,b,c,d,e
one,0.0,1.0,2,3,
two,,,4,5,6.0


#### 默认情况下，堆叠会过滤出缺失值，因此堆叠拆堆的操作是可逆的

In [12]:
data.unstack().stack()

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: Int64

In [13]:
# 设置dropna=False
data.unstack().stack(dropna=False)

one  a       0
     b       1
     c       2
     d       3
     e    <NA>
two  a    <NA>
     b    <NA>
     c       4
     d       5
     e       6
dtype: Int64

#### 当在DataFrame中拆堆时，被拆堆的层级会变为结果中最低的层级

In [14]:
df = pd.DataFrame({"left": df_stack, "right": df_stack + 5},columns=pd.Index(["left", "right"], name="side"))
df

Unnamed: 0_level_0,side,left,right
style,number,Unnamed: 2_level_1,Unnamed: 3_level_1
ios,one,0,5
ios,two,1,6
ios,three,2,7
andiord,one,3,8
andiord,two,4,9
andiord,three,5,10


In [15]:
df.unstack(level="style")

side,left,left,right,right
style,ios,andiord,ios,andiord
number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,0,3,5,8
two,1,4,6,9
three,2,5,7,10


#### 调用 stack 时可以指定要堆叠的轴的名称

In [16]:
df.unstack(level="style").stack(level="side")

Unnamed: 0_level_0,style,andiord,ios
number,side,Unnamed: 2_level_1,Unnamed: 3_level_1
one,left,3,0
one,right,8,5
two,left,4,1
two,right,9,6
three,left,5,2
three,right,10,7
