# 10 Minutes to pandas
请参阅[官方文档](http://pandas.pydata.org/pandas-docs/stable/10min.html)

In [2]:
# 设置为 inline 风格
%matplotlib inline
# 包导入
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 数据整形

In [3]:
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz',
                     'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two',
                     'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.011175,0.967244
bar,two,-1.306996,0.785107
baz,one,-0.651833,-1.030319
baz,two,0.145674,0.656003
foo,one,-1.148914,0.341089
foo,two,1.632073,-0.942795
qux,one,-0.051695,2.113905
qux,two,0.132565,-0.148505


In [4]:
df.loc['bar']

Unnamed: 0_level_0,A,B
second,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0.011175,0.967244
two,-1.306996,0.785107


In [5]:
df.loc['bar'].loc['one']

A    0.011175
B    0.967244
Name: one, dtype: float64

In [6]:
stacked = df.stack()
stacked

first  second   
bar    one     A    0.011175
               B    0.967244
       two     A   -1.306996
               B    0.785107
baz    one     A   -0.651833
               B   -1.030319
       two     A    0.145674
               B    0.656003
foo    one     A   -1.148914
               B    0.341089
       two     A    1.632073
               B   -0.942795
qux    one     A   -0.051695
               B    2.113905
       two     A    0.132565
               B   -0.148505
dtype: float64

In [7]:
stacked.loc['bar'].loc['one'].loc['A']

0.011175381253084274

In [8]:
stacked.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.011175,0.967244
bar,two,-1.306996,0.785107
baz,one,-0.651833,-1.030319
baz,two,0.145674,0.656003
foo,one,-1.148914,0.341089
foo,two,1.632073,-0.942795
qux,one,-0.051695,2.113905
qux,two,0.132565,-0.148505


In [9]:
stacked.unstack().unstack()

Unnamed: 0_level_0,A,A,B,B
second,one,two,one,two
first,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
bar,0.011175,-1.306996,0.967244,0.785107
baz,-0.651833,0.145674,-1.030319,0.656003
foo,-1.148914,1.632073,0.341089,-0.942795
qux,-0.051695,0.132565,2.113905,-0.148505


In [10]:
stacked.unstack(1)

Unnamed: 0_level_0,second,one,two
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,A,0.011175,-1.306996
bar,B,0.967244,0.785107
baz,A,-0.651833,0.145674
baz,B,-1.030319,0.656003
foo,A,-1.148914,1.632073
foo,B,0.341089,-0.942795
qux,A,-0.051695,0.132565
qux,B,2.113905,-0.148505


### 数据透视表

pivot table/轴向旋转表

In [11]:
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
                    'B' : ['A', 'B', 'C'] * 4,
                    'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
                    'D' : np.random.randn(12),
                    'E' : np.random.randn(12)})
df

Unnamed: 0,A,B,C,D,E
0,one,A,foo,-0.697575,0.408313
1,one,B,foo,-0.689773,1.33726
2,two,C,foo,0.223507,0.137397
3,three,A,bar,0.234732,-1.348073
4,one,B,bar,0.457273,0.553412
5,one,C,bar,0.517181,0.097894
6,two,A,foo,-0.520072,-0.514069
7,three,B,foo,-0.591167,0.287131
8,one,C,foo,1.211665,-1.443944
9,one,A,bar,0.905675,1.808546


In [12]:
pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C'])

Unnamed: 0_level_0,C,bar,foo
A,B,Unnamed: 2_level_1,Unnamed: 3_level_1
one,A,0.905675,-0.697575
one,B,0.457273,-0.689773
one,C,0.517181,1.211665
three,A,0.234732,
three,B,,-0.591167
three,C,1.487466,
two,A,,-0.520072
two,B,0.586686,
two,C,,0.223507


In [22]:
pd.pivot_table(df, values=['E'], index=['A'], columns=['C'])

Unnamed: 0_level_0,E,E
C,bar,foo
A,Unnamed: 1_level_2,Unnamed: 2_level_2
one,0.819951,0.100543
three,-0.369825,0.287131
two,0.734968,-0.188336


In [23]:
df[df.A=='one'].groupby('C').mean()

Unnamed: 0_level_0,D,E
C,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,0.62671,0.819951
foo,-0.058561,0.100543
