In [1]:
import numpy as np
import pandas as pd

# 1.将“长”转换为“宽”格式：

In [2]:
df = pd.DataFrame({"Style":["one", "two", "three","one", "two", "three","one", "two", "three"],
                  "variable":["A","A","A","B","B","B","C","C","C"],
                  "value":[1,2,3,4,5,6,7,8,9]})
df

Unnamed: 0,Style,variable,value
0,one,A,1
1,two,A,2
2,three,A,3
3,one,B,4
4,two,B,5
5,three,B,6
6,one,C,7
7,two,C,8
8,three,C,9


#### pivot

In [3]:
df_pivot = df.pivot(index="Style",columns="variable",values="value")
df_pivot

variable,A,B,C
Style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,4,7
three,3,6,9
two,2,5,8


In [4]:
df_pivot.index

Index(['one', 'three', 'two'], dtype='object', name='Style')

In [5]:
df_pivot.columns

Index(['A', 'B', 'C'], dtype='object', name='variable')

#### 如果想重塑时保留两个数据列的话，去除最后一个参数即可

In [6]:
df['value2'] = [10,11,12,13,14,15,16,17,18]
df

Unnamed: 0,Style,variable,value,value2
0,one,A,1,10
1,two,A,2,11
2,three,A,3,12
3,one,B,4,13
4,two,B,5,14
5,three,B,6,15
6,one,C,7,16
7,two,C,8,17
8,three,C,9,18


In [7]:
df.pivot(index="Style",columns="variable")

Unnamed: 0_level_0,value,value,value,value2,value2,value2
variable,A,B,C,A,B,C
Style,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,4,7,10,13,16
three,3,6,9,12,15,18
two,2,5,8,11,14,17


#### pivot()方法等价于使用set_index()方法创建分层索引，然后再调用unstack()方法

In [8]:
df.set_index(["Style","variable"])

Unnamed: 0_level_0,Unnamed: 1_level_0,value,value2
Style,variable,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,1,10
two,A,2,11
three,A,3,12
one,B,4,13
two,B,5,14
three,B,6,15
one,C,7,16
two,C,8,17
three,C,9,18


In [9]:
df.set_index(["Style","variable"]).unstack()

Unnamed: 0_level_0,value,value,value,value2,value2,value2
variable,A,B,C,A,B,C
Style,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,4,7,10,13,16
three,3,6,9,12,15,18
two,2,5,8,11,14,17


### 如果指定的 index + columns 构成的数据里面存在重复的情况，将会报错

In [10]:
df

Unnamed: 0,Style,variable,value,value2
0,one,A,1,10
1,two,A,2,11
2,three,A,3,12
3,one,B,4,13
4,two,B,5,14
5,three,B,6,15
6,one,C,7,16
7,two,C,8,17
8,three,C,9,18


In [11]:
# 修改第二行第一列的数据
df.iloc[1,0] = "one"
df

Unnamed: 0,Style,variable,value,value2
0,one,A,1,10
1,one,A,2,11
2,three,A,3,12
3,one,B,4,13
4,two,B,5,14
5,three,B,6,15
6,one,C,7,16
7,two,C,8,17
8,three,C,9,18


In [12]:
df.pivot(index="Style",columns="variable")

ValueError: Index contains duplicate entries, cannot reshape

# 2.将“宽”转换为“长”格式：

#### melt()

In [13]:
df = pd.DataFrame({'Style':['one','two','three'],'A':[1,2,3],'B':[4,5,6],'C':[7,8,9]})
df

Unnamed: 0,Style,A,B,C
0,one,1,4,7
1,two,2,5,8
2,three,3,6,9


In [14]:
# 将Style列作为分组指标
df_melted = pd.melt(df,id_vars=['Style'])
df_melted

Unnamed: 0,Style,variable,value
0,one,A,1
1,two,A,2
2,three,A,3
3,one,B,4
4,two,B,5
5,three,B,6
6,one,C,7
7,two,C,8
8,three,C,9


#### 使用pivot()方法，可以将数据重塑会原来的布局

In [15]:
df_re = df_melted.pivot(index="Style",columns="variable",values="value")
df_re

variable,A,B,C
Style,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,4,7
three,3,6,9
two,2,5,8


#### 由于pivot()之后的结果根据作为行标签的列生成索引，此时需要使用reset_index()方法将数据回移一列

In [16]:
df_re.reset_index()

variable,Style,A,B,C
0,one,1,4,7
1,three,3,6,9
2,two,2,5,8


#### 指定列的子集作为值列

In [17]:
df

Unnamed: 0,Style,A,B,C
0,one,1,4,7
1,two,2,5,8
2,three,3,6,9


In [18]:
pd.melt(df , id_vars=["Style"] , value_vars=["A","B"])

Unnamed: 0,Style,variable,value
0,one,A,1
1,two,A,2
2,three,A,3
3,one,B,4
4,two,B,5
5,three,B,6


In [19]:
pd.melt(df , id_vars=["Style"] , value_vars=["A"])

Unnamed: 0,Style,variable,value
0,one,A,1
1,two,A,2
2,three,A,3


#### 也可以无须任何分组指标

In [20]:
df

Unnamed: 0,Style,A,B,C
0,one,1,4,7
1,two,2,5,8
2,three,3,6,9


In [21]:
pd.melt(df ,value_vars=["A","B"])

Unnamed: 0,variable,value
0,A,1
1,A,2
2,A,3
3,B,4
4,B,5
5,B,6


In [22]:
pd.melt(df ,value_vars=["Style","A","B"])

Unnamed: 0,variable,value
0,Style,one
1,Style,two
2,Style,three
3,A,1
4,A,2
5,A,3
6,B,4
7,B,5
8,B,6
