In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### 透過list建立一個Series

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

### 透過一個numpy array 建立DataFrame

In [4]:
dates = pd.date_range('20130101', periods = 6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns = list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,0.315967,2.483645,-1.324061,1.227715
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-03,-1.915223,0.737836,1.608039,-1.921122
2013-01-04,0.712223,1.058712,-0.603693,-1.731746
2013-01-05,-0.29567,0.450864,1.209508,1.083905
2013-01-06,0.771848,0.04524,-0.869707,0.533179


### 通過傳遞一個能夠被轉換成類似序列結構的字典對象來建立一個DataFrame

In [8]:
df2 = pd.DataFrame({"A" : 1.,\
                    "B" : pd.Timestamp('20130102'),\
                    "C" : pd.Series(1, index = list(range(4)), dtype = "float32"),\
                    "D" : np.array([3] * 4, dtype = "int32"),\
                    "E" : pd.Categorical(["test", "train", "test", "train"]),\
                    "F" : "foo"})

In [9]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


### 查看不同列的數據類型

In [10]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### 查看DataFrame的頭部和尾部的行

In [11]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.315967,2.483645,-1.324061,1.227715
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-03,-1.915223,0.737836,1.608039,-1.921122
2013-01-04,0.712223,1.058712,-0.603693,-1.731746
2013-01-05,-0.29567,0.450864,1.209508,1.083905


In [12]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,0.712223,1.058712,-0.603693,-1.731746
2013-01-05,-0.29567,0.450864,1.209508,1.083905
2013-01-06,0.771848,0.04524,-0.869707,0.533179


### 顯示索引、列和底層的numpy數據

In [13]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [14]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [15]:
df.values

array([[ 0.31596733,  2.48364475, -1.32406065,  1.22771543],
       [ 0.460523  ,  1.39819511,  0.85889557, -1.39092283],
       [-1.91522278,  0.73783571,  1.60803917, -1.92112182],
       [ 0.7122227 ,  1.05871227, -0.60369335, -1.73174643],
       [-0.29566967,  0.45086422,  1.20950821,  1.0839045 ],
       [ 0.77184809,  0.04523984, -0.86970724,  0.53317923]])

### describe() 函數對於數據的快速統計匯總

In [16]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.008278,1.029082,0.146497,-0.366499
std,1.016859,0.853227,1.227323,1.46866
min,-1.915223,0.04524,-1.324061,-1.921122
25%,-0.14276,0.522607,-0.803204,-1.646541
50%,0.388245,0.898274,0.127601,-0.428872
75%,0.649298,1.313324,1.121855,0.946223
max,0.771848,2.483645,1.608039,1.227715


### 對數據的轉置

In [17]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.315967,0.460523,-1.915223,0.712223,-0.29567,0.771848
B,2.483645,1.398195,0.737836,1.058712,0.450864,0.04524
C,-1.324061,0.858896,1.608039,-0.603693,1.209508,-0.869707
D,1.227715,-1.390923,-1.921122,-1.731746,1.083905,0.533179


### 按軸進行排序

In [18]:
df.sort_index(axis = 1, ascending = False)

Unnamed: 0,D,C,B,A
2013-01-01,1.227715,-1.324061,2.483645,0.315967
2013-01-02,-1.390923,0.858896,1.398195,0.460523
2013-01-03,-1.921122,1.608039,0.737836,-1.915223
2013-01-04,-1.731746,-0.603693,1.058712,0.712223
2013-01-05,1.083905,1.209508,0.450864,-0.29567
2013-01-06,0.533179,-0.869707,0.04524,0.771848


### 按值進行排序

In [19]:
df.sort_values(by = "B")

Unnamed: 0,A,B,C,D
2013-01-06,0.771848,0.04524,-0.869707,0.533179
2013-01-05,-0.29567,0.450864,1.209508,1.083905
2013-01-03,-1.915223,0.737836,1.608039,-1.921122
2013-01-04,0.712223,1.058712,-0.603693,-1.731746
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-01,0.315967,2.483645,-1.324061,1.227715


# 選擇

### 獲取
### 選擇一個單獨的列，將會返回Series，等同於df.A

In [20]:
df["A"]

2013-01-01    0.315967
2013-01-02    0.460523
2013-01-03   -1.915223
2013-01-04    0.712223
2013-01-05   -0.295670
2013-01-06    0.771848
Freq: D, Name: A, dtype: float64

In [21]:
df.A

2013-01-01    0.315967
2013-01-02    0.460523
2013-01-03   -1.915223
2013-01-04    0.712223
2013-01-05   -0.295670
2013-01-06    0.771848
Freq: D, Name: A, dtype: float64

### 通過[ ] 進行選擇，將會對行進行切片

In [22]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.315967,2.483645,-1.324061,1.227715
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-03,-1.915223,0.737836,1.608039,-1.921122


In [23]:
df["20130102":"20130104"]

Unnamed: 0,A,B,C,D
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-03,-1.915223,0.737836,1.608039,-1.921122
2013-01-04,0.712223,1.058712,-0.603693,-1.731746


# 通過標籤選擇

### 使用標籤來獲取一個交叉的區域

In [24]:
df.loc[dates[0]]

A    0.315967
B    2.483645
C   -1.324061
D    1.227715
Name: 2013-01-01 00:00:00, dtype: float64

### 通過標籤來在多個軸上進行選擇

In [25]:
df.loc[:,["A","B"]]

Unnamed: 0,A,B
2013-01-01,0.315967,2.483645
2013-01-02,0.460523,1.398195
2013-01-03,-1.915223,0.737836
2013-01-04,0.712223,1.058712
2013-01-05,-0.29567,0.450864
2013-01-06,0.771848,0.04524


### 標籤切片

In [26]:
df.loc["20130102":"20130104",["A","B"]]

Unnamed: 0,A,B
2013-01-02,0.460523,1.398195
2013-01-03,-1.915223,0.737836
2013-01-04,0.712223,1.058712


### 對於返回的對象進行維度縮減

In [27]:
df.loc["20130102", ["A", "B"]]

A    0.460523
B    1.398195
Name: 2013-01-02 00:00:00, dtype: float64

## 獲取一個標量

In [28]:
df.loc[dates[0], "A"]

0.31596733160911089

### 快速訪問一個標量 等同上

In [29]:
df.at[dates[0], "A"]

0.31596733160911089

# 通過位置選擇

### 通過傳遞數值進行位置選擇(選擇的是行)

In [30]:
df.iloc[3]

A    0.712223
B    1.058712
C   -0.603693
D   -1.731746
Name: 2013-01-04 00:00:00, dtype: float64

### 通過數值進行切片

In [31]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,0.712223,1.058712
2013-01-05,-0.29567,0.450864


### 通過指定一個位置的列表

In [32]:
df.iloc[[1, 2, 4],[0, 2]]

Unnamed: 0,A,C
2013-01-02,0.460523,0.858896
2013-01-03,-1.915223,1.608039
2013-01-05,-0.29567,1.209508


### 對行進行切片

In [33]:
df.iloc[1:3,:]

Unnamed: 0,A,B,C,D
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-03,-1.915223,0.737836,1.608039,-1.921122


### 對列進行切片

In [34]:
df.iloc[:,1:3]

Unnamed: 0,B,C
2013-01-01,2.483645,-1.324061
2013-01-02,1.398195,0.858896
2013-01-03,0.737836,1.608039
2013-01-04,1.058712,-0.603693
2013-01-05,0.450864,1.209508
2013-01-06,0.04524,-0.869707


### 獲取特定的值

In [35]:
df.iloc[1, 1]

1.398195110153829

In [36]:
df.iat[1, 1]

1.398195110153829

# Boolean索引

### 使用一個單獨列來選擇數據

In [37]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.315967,2.483645,-1.324061,1.227715
2013-01-02,0.460523,1.398195,0.858896,-1.390923
2013-01-04,0.712223,1.058712,-0.603693,-1.731746
2013-01-06,0.771848,0.04524,-0.869707,0.533179


### 使用 where操作來選擇數據

In [38]:
df[df > 0]

Unnamed: 0,A,B,C,D
2013-01-01,0.315967,2.483645,,1.227715
2013-01-02,0.460523,1.398195,0.858896,
2013-01-03,,0.737836,1.608039,
2013-01-04,0.712223,1.058712,,
2013-01-05,,0.450864,1.209508,1.083905
2013-01-06,0.771848,0.04524,,0.533179


### 使用isin() 方法來過濾

In [39]:
df2 = df.copy()

In [40]:
df2["E"] = ["one", "one", "two", "three", "four", "three"]

In [41]:
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,0.315967,2.483645,-1.324061,1.227715,one
2013-01-02,0.460523,1.398195,0.858896,-1.390923,one
2013-01-03,-1.915223,0.737836,1.608039,-1.921122,two
2013-01-04,0.712223,1.058712,-0.603693,-1.731746,three
2013-01-05,-0.29567,0.450864,1.209508,1.083905,four
2013-01-06,0.771848,0.04524,-0.869707,0.533179,three


In [42]:
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-1.915223,0.737836,1.608039,-1.921122,two
2013-01-05,-0.29567,0.450864,1.209508,1.083905,four


# 設置

### 設置一個新的列

In [43]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index = pd.date_range("20130102", periods = 6))

In [44]:
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [45]:
df["F"] = s1

### 通過標籤設置新的值

In [46]:
df.at[dates[0], "A"] = 0

### 通過位置設置新的值

In [47]:
df.iat[0, 1] = 0

### 通過一個numpy數組設置一組新值

In [48]:
df.loc[:, "D"] = np.array([5] * len(df))

In [49]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-1.324061,5,
2013-01-02,0.460523,1.398195,0.858896,5,1.0
2013-01-03,-1.915223,0.737836,1.608039,5,2.0
2013-01-04,0.712223,1.058712,-0.603693,5,3.0
2013-01-05,-0.29567,0.450864,1.209508,5,4.0
2013-01-06,0.771848,0.04524,-0.869707,5,5.0


### 通過where操作來設置新的值

In [50]:
df2 =df.copy()

In [51]:
df2[df2 > 0] = -df2

In [52]:
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-1.324061,-5,
2013-01-02,-0.460523,-1.398195,-0.858896,-5,-1.0
2013-01-03,-1.915223,-0.737836,-1.608039,-5,-2.0
2013-01-04,-0.712223,-1.058712,-0.603693,-5,-3.0
2013-01-05,-0.29567,-0.450864,-1.209508,-5,-4.0
2013-01-06,-0.771848,-0.04524,-0.869707,-5,-5.0


# 缺失值處理

### reindex()方法可以對指定軸上的索引進行改變/增加/刪除操作，這將返回原始數據的一個拷貝

In [53]:
df1 = df.reindex(index = dates[0:4], columns = list(df.columns) + ["E"])

In [54]:
df1.loc[dates[0]:dates[1], "E"] = 1

In [55]:
df1

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,-1.324061,5,,1.0
2013-01-02,0.460523,1.398195,0.858896,5,1.0,1.0
2013-01-03,-1.915223,0.737836,1.608039,5,2.0,
2013-01-04,0.712223,1.058712,-0.603693,5,3.0,


### 去掉包函缺失值的行

In [56]:
df1.dropna(how="any")

Unnamed: 0,A,B,C,D,F,E
2013-01-02,0.460523,1.398195,0.858896,5,1.0,1.0


### 對缺失值進行填充

In [57]:
df1.fillna(value=5)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,0.0,0.0,-1.324061,5,5.0,1.0
2013-01-02,0.460523,1.398195,0.858896,5,1.0,1.0
2013-01-03,-1.915223,0.737836,1.608039,5,2.0,5.0
2013-01-04,0.712223,1.058712,-0.603693,5,3.0,5.0


### 對數據進行Boolean填充 

In [58]:
pd.isnull(df1)

Unnamed: 0,A,B,C,D,F,E
2013-01-01,False,False,False,False,True,False
2013-01-02,False,False,False,False,False,False
2013-01-03,False,False,False,False,False,True
2013-01-04,False,False,False,False,False,True


# 統計 

### 執行描述性統計 

In [59]:
df.mean()

A   -0.044383
B    0.615141
C    0.146497
D    5.000000
F    3.000000
dtype: float64

### 在其他軸上進行相同的操作 

In [60]:
df.mean(1)

2013-01-01    0.918985
2013-01-02    1.743523
2013-01-03    1.486130
2013-01-04    1.833448
2013-01-05    2.072941
2013-01-06    1.989476
Freq: D, dtype: float64

### 對於擁有不同維度，需要對齊的對象進行操作。Pandas會自動延著指定的維度進行廣播

In [61]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)

In [62]:
s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [63]:
df.sub(s, axis="index")

Unnamed: 0,A,B,C,D,F
2013-01-01,,,,,
2013-01-02,,,,,
2013-01-03,-2.915223,-0.262164,0.608039,4.0,1.0
2013-01-04,-2.287777,-1.941288,-3.603693,2.0,0.0
2013-01-05,-5.29567,-4.549136,-3.790492,0.0,-1.0
2013-01-06,,,,,


# Apply 

###  對數據應用函數

In [64]:
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-1.324061,5,
2013-01-02,0.460523,1.398195,0.858896,5,1.0
2013-01-03,-1.915223,0.737836,1.608039,5,2.0
2013-01-04,0.712223,1.058712,-0.603693,5,3.0
2013-01-05,-0.29567,0.450864,1.209508,5,4.0
2013-01-06,0.771848,0.04524,-0.869707,5,5.0


In [65]:
df.apply(np.cumsum)

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-1.324061,5,
2013-01-02,0.460523,1.398195,-0.465165,10,1.0
2013-01-03,-1.4547,2.136031,1.142874,15,3.0
2013-01-04,-0.742477,3.194743,0.539181,20,6.0
2013-01-05,-1.038147,3.645607,1.748689,25,10.0
2013-01-06,-0.266299,3.690847,0.878982,30,15.0


In [66]:
df.apply(lambda x: x.max() - x.min())

A    2.687071
B    1.398195
C    2.932100
D    0.000000
F    4.000000
dtype: float64

# 直方圖

In [67]:
s = pd.Series(np.random.randint(0, 7, size=10))

In [68]:
s

0    4
1    5
2    1
3    3
4    6
5    4
6    4
7    3
8    1
9    4
dtype: int32

In [69]:
s.value_counts()

4    4
3    2
1    2
6    1
5    1
dtype: int64

# 字符串方法

### Series對象在其Str屬性中配備了一組字符串處理方法，可以很容易的應用到數組中的每個元素

In [70]:
s = pd.Series(["A", "B", "C", "Aaba", "Baca", np.nan, "CABA", "dog", "cat"])

In [71]:
s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

# 合併 

### Concat

In [72]:
df = pd.DataFrame(np.random.randn(10, 4))

In [73]:
df

Unnamed: 0,0,1,2,3
0,0.150195,-0.453101,0.759285,-0.588875
1,-1.126013,-0.677584,0.305154,1.755979
2,-0.112683,-0.35062,-0.553661,0.244416
3,-0.664614,1.526593,0.763833,-2.207836
4,0.476591,-0.692365,1.619382,-0.010158
5,-0.350016,-0.002509,-0.8837,1.036144
6,-0.48669,1.148199,0.654207,0.350677
7,-0.049228,0.908544,-0.202062,1.59594
8,0.615008,-0.160732,0.01966,-0.99449
9,0.09155,0.468875,0.586842,-1.731453


In [74]:
pieces = [df[:3], df[3:7], df[7:]]

In [75]:
pieces

[          0         1         2         3
 0  0.150195 -0.453101  0.759285 -0.588875
 1 -1.126013 -0.677584  0.305154  1.755979
 2 -0.112683 -0.350620 -0.553661  0.244416,
           0         1         2         3
 3 -0.664614  1.526593  0.763833 -2.207836
 4  0.476591 -0.692365  1.619382 -0.010158
 5 -0.350016 -0.002509 -0.883700  1.036144
 6 -0.486690  1.148199  0.654207  0.350677,
           0         1         2         3
 7 -0.049228  0.908544 -0.202062  1.595940
 8  0.615008 -0.160732  0.019660 -0.994490
 9  0.091550  0.468875  0.586842 -1.731453]

In [76]:
pd.concat(pieces)

Unnamed: 0,0,1,2,3
0,0.150195,-0.453101,0.759285,-0.588875
1,-1.126013,-0.677584,0.305154,1.755979
2,-0.112683,-0.35062,-0.553661,0.244416
3,-0.664614,1.526593,0.763833,-2.207836
4,0.476591,-0.692365,1.619382,-0.010158
5,-0.350016,-0.002509,-0.8837,1.036144
6,-0.48669,1.148199,0.654207,0.350677
7,-0.049228,0.908544,-0.202062,1.59594
8,0.615008,-0.160732,0.01966,-0.99449
9,0.09155,0.468875,0.586842,-1.731453


### JOIN

In [77]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval": [1, 2]})

In [78]:
right = pd.DataFrame({"key": ["foo", "foo"], "lval": [4, 5]})

In [79]:
left

Unnamed: 0,key,lval
0,foo,1
1,foo,2


In [80]:
right

Unnamed: 0,key,lval
0,foo,4
1,foo,5


In [81]:
pd.merge(left, right, on = "key")

Unnamed: 0,key,lval_x,lval_y
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [82]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval": [1, 2]})

In [83]:
right = pd.DataFrame({"key": ["foo", "bar"], "lval": [4, 5]})

In [84]:
left

Unnamed: 0,key,lval
0,foo,1
1,bar,2


In [85]:
right

Unnamed: 0,key,lval
0,foo,4
1,bar,5


In [86]:
pd.merge(left, right, on="key")

Unnamed: 0,key,lval_x,lval_y
0,foo,1,4
1,bar,2,5


### Append

In [88]:
df = pd.DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"])

In [89]:
df

Unnamed: 0,A,B,C,D
0,-1.416237,-1.06453,-0.876406,0.406315
1,-0.269181,0.519478,0.036476,-0.995094
2,1.008594,0.180147,0.434685,-0.862238
3,0.405314,0.195701,-0.072603,-0.039881
4,-0.19099,-1.08427,0.381104,-0.079369
5,0.530969,-0.465917,-0.947098,-1.061109
6,0.155443,-0.976369,-0.985825,-0.688135
7,0.134602,-1.394541,0.193101,-0.935044


In [90]:
s = df.iloc[3]

In [92]:
df.append(s, ignore_index=True)

Unnamed: 0,A,B,C,D
0,-1.416237,-1.06453,-0.876406,0.406315
1,-0.269181,0.519478,0.036476,-0.995094
2,1.008594,0.180147,0.434685,-0.862238
3,0.405314,0.195701,-0.072603,-0.039881
4,-0.19099,-1.08427,0.381104,-0.079369
5,0.530969,-0.465917,-0.947098,-1.061109
6,0.155443,-0.976369,-0.985825,-0.688135
7,0.134602,-1.394541,0.193101,-0.935044
8,0.405314,0.195701,-0.072603,-0.039881


# 分組

### 對於"group by"操作，我們通常是指以下一個或多個操作步驟:

### (Splitting) 按照一些規則將數據分為不同的組
### (Applying)對於每組數據分別執行一個函數
### (Combining) 將結果組合到一個數據結構中

In [93]:
df = pd.DataFrame({"A" : ["foo", "bar", "foo", "bar", 
                          "foo", "bar", "foo", "foo"],
                   "B" : ["one", "one", "two", "three",
                          "two", "two", "one", "three"],
                   "C" : np.random.randn(8),
                   "D" : np.random.randn(8)})

In [94]:
df

Unnamed: 0,A,B,C,D
0,foo,one,-1.208052,0.049242
1,bar,one,-0.284158,-1.0433
2,foo,two,-0.033696,0.047499
3,bar,three,0.776081,-0.965938
4,foo,two,-1.632151,-0.563372
5,bar,two,-0.000664,0.172101
6,foo,one,1.274684,0.065878
7,foo,three,0.342365,0.762773


### 分組並對每個分組執行sum函數

In [95]:
df.groupby("A").sum()

Unnamed: 0_level_0,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.491258,-1.837137
foo,-1.25685,0.36202


# 改變形狀 

### Stack 

In [100]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))

In [101]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [102]:
index = pd.MultiIndex.from_tuples(tuples, names = ["first", "second"])

In [103]:
df = pd.DataFrame(np.random.randn(8, 2), index = index, columns=["A", "B"])

In [105]:
df2 = df[:4]

In [106]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.798238,-2.426456
bar,two,-1.525664,-0.941517
baz,one,1.454301,-1.784411
baz,two,0.211689,0.870214


In [108]:
stacked = df2.stack()

In [109]:
stacked

first  second   
bar    one     A    0.798238
               B   -2.426456
       two     A   -1.525664
               B   -0.941517
baz    one     A    1.454301
               B   -1.784411
       two     A    0.211689
               B    0.870214
dtype: float64

In [110]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.798238,-2.426456
bar,two,-1.525664,-0.941517
baz,one,1.454301,-1.784411
baz,two,0.211689,0.870214


In [115]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.798238,-1.525664
bar,B,-2.426456,-0.941517
baz,A,1.454301,0.211689
baz,B,-1.784411,0.870214


In [116]:
stacked.unstack(0)

Unnamed: 0_level_0,first,bar,baz
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.798238,1.454301
one,B,-2.426456,-1.784411
two,A,-1.525664,0.211689
two,B,-0.941517,0.870214


# 數據透視表 

In [117]:
df = pd.DataFrame({"A" : ["one", "one", "two", "three"] * 3,
                   "B" : ["A", "B", "C"] * 4,
                   "C" : ["foo", "foo", "foo", "bar", "bar", "bar"] *2,
                   "D" : np.random.randn(12),
                   "E" : np.random.randn(12)})

In [118]:
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.211097,0.837472
1,one,B,foo,-0.418677,0.749903
2,two,C,foo,1.309082,1.121827
3,three,A,bar,-1.368175,0.657734
4,one,B,bar,-1.516053,-1.96491
5,one,C,bar,0.899981,1.684062
6,two,A,foo,-0.183885,-0.932758
7,three,B,foo,0.930717,0.442162
8,one,C,foo,-0.270676,0.000779
9,one,A,bar,0.589357,0.009521


In [119]:
pd.pivot_table(df, values="D", index=["A", "B"], columns=["C"])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.589357,-0.211097
one,B,-1.516053,-0.418677
one,C,0.899981,-0.270676
three,A,-1.368175,
three,B,,0.930717
three,C,-0.099173,
two,A,,-0.183885
two,B,-0.644778,
two,C,,1.309082
