In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### 透過list建立一個Series

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

### 透過一個numpy array 建立DataFrame

In [4]:
dates = pd.date_range('20130101', periods = 6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns = list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.859503,-0.232251,-0.273836,-1.621659
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794
2013-01-04,2.412223,0.815166,0.305732,1.078677
2013-01-05,0.42603,-0.319924,1.112984,-0.099434
2013-01-06,-1.054557,1.029751,-1.1213,0.130143


### 通過傳遞一個能夠被轉換成類似序列結構的字典對象來建立一個DataFrame

In [8]:
df2 = pd.DataFrame({"A" : 1.,\
                    "B" : pd.Timestamp('20130102'),\
                    "C" : pd.Series(1, index = list(range(4)), dtype = "float32"),\
                    "D" : np.array([3] * 4, dtype = "int32"),\
                    "E" : pd.Categorical(["test", "train", "test", "train"]),\
                    "F" : "foo"})

In [9]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


### 查看不同列的數據類型

In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### 查看DataFrame的頭部和尾部的行

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-0.859503,-0.232251,-0.273836,-1.621659
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794
2013-01-04,2.412223,0.815166,0.305732,1.078677
2013-01-05,0.42603,-0.319924,1.112984,-0.099434


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,2.412223,0.815166,0.305732,1.078677
2013-01-05,0.42603,-0.319924,1.112984,-0.099434
2013-01-06,-1.054557,1.029751,-1.1213,0.130143


### 顯示索引、列和底層的numpy數據

In [13]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [14]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [15]:
df.values

array([[-0.85950259, -0.23225113, -0.27383601, -1.62165862],
       [ 0.95269092, -1.24081713,  1.43066822,  0.47630264],
       [-0.41481578, -1.35425593, -0.21620537, -0.74679435],
       [ 2.41222293,  0.81516638,  0.3057317 ,  1.07867671],
       [ 0.42602966, -0.31992406,  1.11298351, -0.09943356],
       [-1.05455707,  1.02975098, -1.12130035,  0.13014268]])

### describe() 函數對於數據的快速統計匯總

In [16]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.243678,-0.217055,0.20634,-0.130461
std,1.309548,0.99721,0.948958,0.948956
min,-1.054557,-1.354256,-1.1213,-1.621659
25%,-0.748331,-1.010594,-0.259428,-0.584954
50%,0.005607,-0.276088,0.044763,0.015355
75%,0.821026,0.553312,0.911171,0.389763
max,2.412223,1.029751,1.430668,1.078677


### 對數據的轉置

In [17]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-0.859503,0.952691,-0.414816,2.412223,0.42603,-1.054557
B,-0.232251,-1.240817,-1.354256,0.815166,-0.319924,1.029751
C,-0.273836,1.430668,-0.216205,0.305732,1.112984,-1.1213
D,-1.621659,0.476303,-0.746794,1.078677,-0.099434,0.130143


### 按軸進行排序

In [18]:
df.sort_index(axis = 1, ascending = False)

Unnamed: 0,D,C,B,A
2013-01-01,-1.621659,-0.273836,-0.232251,-0.859503
2013-01-02,0.476303,1.430668,-1.240817,0.952691
2013-01-03,-0.746794,-0.216205,-1.354256,-0.414816
2013-01-04,1.078677,0.305732,0.815166,2.412223
2013-01-05,-0.099434,1.112984,-0.319924,0.42603
2013-01-06,0.130143,-1.1213,1.029751,-1.054557


### 按值進行排序

In [19]:
df.sort_values(by = "B")

Unnamed: 0,A,B,C,D
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-05,0.42603,-0.319924,1.112984,-0.099434
2013-01-01,-0.859503,-0.232251,-0.273836,-1.621659
2013-01-04,2.412223,0.815166,0.305732,1.078677
2013-01-06,-1.054557,1.029751,-1.1213,0.130143


# 選擇

### 獲取
### 選擇一個單獨的列，將會返回Series，等同於df.A

In [20]:
df["A"]

2013-01-01   -0.859503
2013-01-02    0.952691
2013-01-03   -0.414816
2013-01-04    2.412223
2013-01-05    0.426030
2013-01-06   -1.054557
Freq: D, Name: A, dtype: float64

In [21]:
df.A

2013-01-01   -0.859503
2013-01-02    0.952691
2013-01-03   -0.414816
2013-01-04    2.412223
2013-01-05    0.426030
2013-01-06   -1.054557
Freq: D, Name: A, dtype: float64

### 通過[ ] 進行選擇，將會對行進行切片

In [22]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-0.859503,-0.232251,-0.273836,-1.621659
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794


In [23]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794
2013-01-04,2.412223,0.815166,0.305732,1.078677


# 通過標籤選擇

### 使用標籤來獲取一個交叉的區域

In [24]:
df.loc[dates[0]]

A   -0.859503
B   -0.232251
C   -0.273836
D   -1.621659
Name: 2013-01-01 00:00:00, dtype: float64

### 通過標籤來在多個軸上進行選擇

In [25]:
df.loc[:,["A","B"]]

Unnamed: 0,A,B
2013-01-01,-0.859503,-0.232251
2013-01-02,0.952691,-1.240817
2013-01-03,-0.414816,-1.354256
2013-01-04,2.412223,0.815166
2013-01-05,0.42603,-0.319924
2013-01-06,-1.054557,1.029751


### 標籤切片

In [26]:
df.loc["20130102":"20130104",["A","B"]]

Unnamed: 0,A,B
2013-01-02,0.952691,-1.240817
2013-01-03,-0.414816,-1.354256
2013-01-04,2.412223,0.815166


### 對於返回的對象進行維度縮減

In [27]:
df.loc["20130102", ["A", "B"]]

A    0.952691
B   -1.240817
Name: 2013-01-02 00:00:00, dtype: float64

## 獲取一個標量

In [28]:
df.loc[dates[0], "A"]

-0.85950259205952406

### 快速訪問一個標量 等同上

In [29]:
df.at[dates[0], "A"]

-0.85950259205952406

# 通過位置選擇

### 通過傳遞數值進行位置選擇(選擇的是行)

In [30]:
df.iloc[3]

A    2.412223
B    0.815166
C    0.305732
D    1.078677
Name: 2013-01-04 00:00:00, dtype: float64

### 通過數值進行切片

In [31]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,2.412223,0.815166
2013-01-05,0.42603,-0.319924


### 通過指定一個位置的列表

In [32]:
df.iloc[[1, 2, 4],[0, 2]]

Unnamed: 0,A,C
2013-01-02,0.952691,1.430668
2013-01-03,-0.414816,-0.216205
2013-01-05,0.42603,1.112984


### 對行進行切片

In [33]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794


### 對列進行切片

In [34]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2013-01-01,-0.232251,-0.273836
2013-01-02,-1.240817,1.430668
2013-01-03,-1.354256,-0.216205
2013-01-04,0.815166,0.305732
2013-01-05,-0.319924,1.112984
2013-01-06,1.029751,-1.1213


### 獲取特定的值

In [35]:
df.iloc[1, 1]

-1.2408171261745065

In [36]:
df.iat[1, 1]

-1.2408171261745065

# Boolean索引

### 使用一個單獨列來選擇數據

In [37]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-02,0.952691,-1.240817,1.430668,0.476303
2013-01-04,2.412223,0.815166,0.305732,1.078677
2013-01-05,0.42603,-0.319924,1.112984,-0.099434


### 使用 where操作來選擇數據

In [38]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,,,,
2013-01-02,0.952691,,1.430668,0.476303
2013-01-03,,,,
2013-01-04,2.412223,0.815166,0.305732,1.078677
2013-01-05,0.42603,,1.112984,
2013-01-06,,1.029751,,0.130143


### 使用isin() 方法來過濾

In [39]:
df2 = df.copy()

In [40]:
df2["E"] = ["one", "one", "two", "three", "four", "three"]

In [41]:
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,-0.859503,-0.232251,-0.273836,-1.621659,one
2013-01-02,0.952691,-1.240817,1.430668,0.476303,one
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794,two
2013-01-04,2.412223,0.815166,0.305732,1.078677,three
2013-01-05,0.42603,-0.319924,1.112984,-0.099434,four
2013-01-06,-1.054557,1.029751,-1.1213,0.130143,three


In [42]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-0.414816,-1.354256,-0.216205,-0.746794,two
2013-01-05,0.42603,-0.319924,1.112984,-0.099434,four


# 設置

### 設置一個新的列

In [43]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index = pd.date_range("20130102", periods = 6))

In [44]:
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [45]:
df["F"] = s1

### 通過標籤設置新的值

In [46]:
df.at[dates[0], "A"] = 0

### 通過位置設置新的值

In [47]:
df.iat[0, 1] = 0

### 通過一個numpy數組設置一組新值

In [48]:
df.loc[:, "D"] = np.array([5] * len(df))

In [49]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.273836,5,
2013-01-02,0.952691,-1.240817,1.430668,5,1.0
2013-01-03,-0.414816,-1.354256,-0.216205,5,2.0
2013-01-04,2.412223,0.815166,0.305732,5,3.0
2013-01-05,0.42603,-0.319924,1.112984,5,4.0
2013-01-06,-1.054557,1.029751,-1.1213,5,5.0


### 通過where操作來設置新的值

In [50]:
df2 =df.copy()

In [51]:
df2[df2 > 0] = -df2

In [52]:
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.273836,-5,
2013-01-02,-0.952691,-1.240817,-1.430668,-5,-1.0
2013-01-03,-0.414816,-1.354256,-0.216205,-5,-2.0
2013-01-04,-2.412223,-0.815166,-0.305732,-5,-3.0
2013-01-05,-0.42603,-0.319924,-1.112984,-5,-4.0
2013-01-06,-1.054557,-1.029751,-1.1213,-5,-5.0


# 缺失值處理

### reindex()方法可以對指定軸上的索引進行改變/增加/刪除操作，這將返回原始數據的一個拷貝

In [53]:
df1 = df.reindex(index = dates[0:4], columns = list(df.columns) + ["E"])

In [54]:
df1.loc[dates[0]:dates[1], "E"] = 1

In [55]:
df1

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,-0.273836,5,,1.0
2013-01-02,0.952691,-1.240817,1.430668,5,1.0,1.0
2013-01-03,-0.414816,-1.354256,-0.216205,5,2.0,
2013-01-04,2.412223,0.815166,0.305732,5,3.0,


### 去掉包函缺失值的行

In [56]:
df1.dropna(how="any")

Unnamed: 0,A,B,C,D,F,E
2013-01-02,0.952691,-1.240817,1.430668,5,1.0,1.0


### 對缺失值進行填充

In [57]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,-0.273836,5,5.0,1.0
2013-01-02,0.952691,-1.240817,1.430668,5,1.0,1.0
2013-01-03,-0.414816,-1.354256,-0.216205,5,2.0,5.0
2013-01-04,2.412223,0.815166,0.305732,5,3.0,5.0


### 對數據進行Boolean填充 

In [58]:
pd.isnull(df1)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,False,False,False,False,True,False
2013-01-02,False,False,False,False,False,False
2013-01-03,False,False,False,False,False,True
2013-01-04,False,False,False,False,False,True


# 統計 

### 執行描述性統計 

In [59]:
df.mean()

A    0.386928
B   -0.178347
C    0.206340
D    5.000000
F    3.000000
dtype: float64

### 在其他軸上進行相同的操作 

In [60]:
df.mean(1)

2013-01-01    1.181541
2013-01-02    1.428508
2013-01-03    1.002945
2013-01-04    2.306624
2013-01-05    2.043818
2013-01-06    1.770779
Freq: D, dtype: float64

### 對於擁有不同維度，需要對齊的對象進行操作。Pandas會自動延著指定的維度進行廣播

In [61]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)

In [62]:
s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [63]:
df.sub(s, axis="index")

Unnamed: 0,A,B,C,D,F
2013-01-01,,,,,
2013-01-02,,,,,
2013-01-03,-1.414816,-2.354256,-1.216205,4.0,1.0
2013-01-04,-0.587777,-2.184834,-2.694268,2.0,0.0
2013-01-05,-4.57397,-5.319924,-3.887016,0.0,-1.0
2013-01-06,,,,,


# Apply 

###  對數據應用函數

In [64]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.273836,5,
2013-01-02,0.952691,-1.240817,1.430668,5,1.0
2013-01-03,-0.414816,-1.354256,-0.216205,5,2.0
2013-01-04,2.412223,0.815166,0.305732,5,3.0
2013-01-05,0.42603,-0.319924,1.112984,5,4.0
2013-01-06,-1.054557,1.029751,-1.1213,5,5.0


In [65]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.273836,5,
2013-01-02,0.952691,-1.240817,1.156832,10,1.0
2013-01-03,0.537875,-2.595073,0.940627,15,3.0
2013-01-04,2.950098,-1.779907,1.246359,20,6.0
2013-01-05,3.376128,-2.099831,2.359342,25,10.0
2013-01-06,2.321571,-1.07008,1.238042,30,15.0


In [66]:
df.apply(lambda x: x.max() - x.min())

A    3.466780
B    2.384007
C    2.551969
D    0.000000
F    4.000000
dtype: float64

# 直方圖

In [67]:
s = pd.Series(np.random.randint(0, 7, size=10))

In [68]:
s

0    3
1    3
2    4
3    5
4    5
5    0
6    2
7    1
8    2
9    2
dtype: int32

In [69]:
s.value_counts()

2    3
5    2
3    2
4    1
1    1
0    1
dtype: int64

# 字符串方法

### Series對象在其Str屬性中配備了一組字符串處理方法，可以很容易的應用到數組中的每個元素

In [70]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])

In [71]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

# 合併 

### Concat

In [72]:
df = pd.DataFrame(np.random.randn(10, 4))

In [73]:
df

Unnamed: 0,0,1,2,3
0,-0.273677,0.934328,-0.943298,0.373893
1,-1.30799,-0.454414,-0.768333,1.12717
2,-0.812616,0.76422,-1.064131,1.087965
3,0.49908,-0.937675,-0.035694,0.367425
4,-0.139149,-1.526742,0.182001,-0.545076
5,0.63093,-1.198623,0.77436,1.18426
6,-0.297158,-0.728836,1.160922,-1.143075
7,-0.780346,-1.097553,0.149643,0.355301
8,-0.610371,-0.755877,-0.008445,0.078169
9,-0.231953,-0.587271,-0.123664,0.481784


In [74]:
pieces = [df[:3], df[3:7], df[7:]]

In [75]:
pieces

[          0         1         2         3
 0 -0.273677  0.934328 -0.943298  0.373893
 1 -1.307990 -0.454414 -0.768333  1.127170
 2 -0.812616  0.764220 -1.064131  1.087965,
           0         1         2         3
 3  0.499080 -0.937675 -0.035694  0.367425
 4 -0.139149 -1.526742  0.182001 -0.545076
 5  0.630930 -1.198623  0.774360  1.184260
 6 -0.297158 -0.728836  1.160922 -1.143075,
           0         1         2         3
 7 -0.780346 -1.097553  0.149643  0.355301
 8 -0.610371 -0.755877 -0.008445  0.078169
 9 -0.231953 -0.587271 -0.123664  0.481784]

In [76]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,-0.273677,0.934328,-0.943298,0.373893
1,-1.30799,-0.454414,-0.768333,1.12717
2,-0.812616,0.76422,-1.064131,1.087965
3,0.49908,-0.937675,-0.035694,0.367425
4,-0.139149,-1.526742,0.182001,-0.545076
5,0.63093,-1.198623,0.77436,1.18426
6,-0.297158,-0.728836,1.160922,-1.143075
7,-0.780346,-1.097553,0.149643,0.355301
8,-0.610371,-0.755877,-0.008445,0.078169
9,-0.231953,-0.587271,-0.123664,0.481784


### JOIN

In [77]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})

In [78]:
right = pd.DataFrame({"key": ["foo", "foo"], "lval": [4, 5]})

In [79]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [80]:
right

Unnamed: 0,key,lval
0,foo,4
1,foo,5


In [81]:
pd.merge(left, right, on = "key")

Unnamed: 0,key,lval_x,lval_y
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [82]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})

In [83]:
right = pd.DataFrame({"key": ["foo", "bar"], "lval": [4, 5]})

In [84]:
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [85]:
right

Unnamed: 0,key,lval
0,foo,4
1,bar,5


In [86]:
pd.merge(left, right, on="key")

Unnamed: 0,key,lval_x,lval_y
0,foo,1,4
1,bar,2,5


### Append

In [87]:
df = pd.DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"])

In [88]:
df

Unnamed: 0,A,B,C,D
0,-2.111872,-2.009283,1.469674,0.756885
1,0.00524,0.512799,0.069325,0.427115
2,-1.937192,0.336167,0.238927,-0.110881
3,1.042766,0.526413,-1.563548,1.026296
4,0.757712,1.578434,-0.864005,0.172537
5,0.492545,-0.522844,0.029071,0.651833
6,-0.670444,-0.511517,-2.941343,-0.772887
7,-0.152936,-0.907855,0.0881,-1.498185


In [89]:
s = df.iloc[3]

In [90]:
df.append(s, ignore_index=True)

Unnamed: 0,A,B,C,D
0,-2.111872,-2.009283,1.469674,0.756885
1,0.00524,0.512799,0.069325,0.427115
2,-1.937192,0.336167,0.238927,-0.110881
3,1.042766,0.526413,-1.563548,1.026296
4,0.757712,1.578434,-0.864005,0.172537
5,0.492545,-0.522844,0.029071,0.651833
6,-0.670444,-0.511517,-2.941343,-0.772887
7,-0.152936,-0.907855,0.0881,-1.498185
8,1.042766,0.526413,-1.563548,1.026296


# 分組

### 對於"group by"操作，我們通常是指以下一個或多個操作步驟:

### (Splitting) 按照一些規則將數據分為不同的組
### (Applying)對於每組數據分別執行一個函數
### (Combining) 將結果組合到一個數據結構中

In [91]:
df = pd.DataFrame({"A" : ["foo", "bar", "foo", "bar", 
                          "foo", "bar", "foo", "foo"],
                   "B" : ["one", "one", "two", "three",
                          "two", "two", "one", "three"],
                   "C" : np.random.randn(8),
                   "D" : np.random.randn(8)})

In [92]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-2.055286,-0.514698
1,bar,one,1.361488,0.634475
2,foo,two,-0.668711,0.89452
3,bar,three,0.012464,-0.488017
4,foo,two,0.662114,0.081509
5,bar,two,-0.221005,-0.182525
6,foo,one,0.131121,1.513819
7,foo,three,0.729517,-1.330081


### 分組並對每個分組執行sum函數

In [93]:
df.groupby("A").sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,1.152948,-0.036067
foo,-1.201246,0.645067


# 改變形狀 

### Stack 

In [94]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))

In [95]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [96]:
index = pd.MultiIndex.from_tuples(tuples, names = ["first", "second"])

In [97]:
df = pd.DataFrame(np.random.randn(8, 2), index = index, columns=["A", "B"])

In [98]:
df2 = df[:4]

In [99]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.41319,-0.003869
bar,two,0.409949,-1.011612
baz,one,-0.015958,0.898391
baz,two,-0.017886,-0.035681


In [100]:
stacked = df2.stack()

In [101]:
stacked

first  second   
bar    one     A   -0.413190
               B   -0.003869
       two     A    0.409949
               B   -1.011612
baz    one     A   -0.015958
               B    0.898391
       two     A   -0.017886
               B   -0.035681
dtype: float64

In [102]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.41319,-0.003869
bar,two,0.409949,-1.011612
baz,one,-0.015958,0.898391
baz,two,-0.017886,-0.035681


In [103]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,-0.41319,0.409949
bar,B,-0.003869,-1.011612
baz,A,-0.015958,-0.017886
baz,B,0.898391,-0.035681


In [104]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-0.41319,-0.015958
one,B,-0.003869,0.898391
two,A,0.409949,-0.017886
two,B,-1.011612,-0.035681


# 數據透視表 

In [105]:
df = pd.DataFrame({"A" : ["one", "one", "two", "three"] * 3,
                   "B" : ["A", "B", "C"] * 4,
                   "C" : ["foo", "foo", "foo", "bar", "bar", "bar"] *2,
                   "D" : np.random.randn(12),
                   "E" : np.random.randn(12)})

In [106]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,1.535145,1.032611
1,one,B,foo,-0.182504,-0.44073
2,two,C,foo,0.02403,1.418855
3,three,A,bar,0.19561,-0.405001
4,one,B,bar,-0.322174,0.305489
5,one,C,bar,-0.0224,-0.414173
6,two,A,foo,-0.305514,0.132336
7,three,B,foo,-0.759472,-1.114597
8,one,C,foo,-0.399446,0.739176
9,one,A,bar,-1.070946,-1.622333


In [107]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,-1.070946,1.535145
one,B,-0.322174,-0.182504
one,C,-0.0224,-0.399446
three,A,0.19561,
three,B,,-0.759472
three,C,-1.376473,
two,A,,-0.305514
two,B,0.080011,
two,C,,0.02403
