# pandas入门

## pandas的数据结构介绍

### Series

In [3]:
from pandas import DataFrame, Series
import pandas as pd

In [3]:
obj = Series([4, 7, -5, 3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [2]:
import numpy as np

In [5]:
arr = np.arange(3)
arr[2]

2

In [6]:
obj2 = Series([4,7,-5,3], index=['a', 'b', 'c', 'd'])
obj2

a    4
b    7
c   -5
d    3
dtype: int64

In [10]:
obj2[['a', 'b', 'd']]

a    4
b    7
d    3
dtype: int64

In [11]:
sdata = {'ohio':35000, 'texas': 71000, 'oregon': 16000, 'Utah': 5000}

In [12]:
obj3 = Series(sdata)
obj3

ohio      35000
texas     71000
oregon    16000
Utah       5000
dtype: int64

In [13]:
states = ['california', 'ohio', 'oregon', 'texas']
obj4 = Series(sdata, index = states)
obj4

california        NaN
ohio          35000.0
oregon        16000.0
texas         71000.0
dtype: float64

In [14]:
obj4.isnull()

california     True
ohio          False
oregon        False
texas         False
dtype: bool

In [15]:
obj3 + obj4

Utah               NaN
california         NaN
ohio           70000.0
oregon         32000.0
texas         142000.0
dtype: float64

In [16]:
obj4.name = 'population'
obj4.index.name = 'state'
obj4

state
california        NaN
ohio          35000.0
oregon        16000.0
texas         71000.0
Name: population, dtype: float64

### Dataframe

In [17]:
data = {'state' : ['ohio', 'ohio', 'ohio', 'nevada', 'nevada'],
       'year' : [2000, 2001, 2002, 2001, 2002],
       'pop' : [1.5, 1.7, 3.6, 2.4, 2.9]}
data

{'state': ['ohio', 'ohio', 'ohio', 'nevada', 'nevada'],
 'year': [2000, 2001, 2002, 2001, 2002],
 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}

In [18]:
frame = DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,ohio,2000,1.5
1,ohio,2001,1.7
2,ohio,2002,3.6
3,nevada,2001,2.4
4,nevada,2002,2.9


In [19]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
                  index=['one', 'two', 'three', 'four', 'five'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,ohio,1.5,
two,2001,ohio,1.7,
three,2002,ohio,3.6,
four,2001,nevada,2.4,
five,2002,nevada,2.9,


In [20]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [21]:
frame2['state']

one        ohio
two        ohio
three      ohio
four     nevada
five     nevada
Name: state, dtype: object

In [22]:
frame2.loc['three']

year     2002
state    ohio
pop       3.6
debt      NaN
Name: three, dtype: object

In [23]:
frame2.debt = 16.5
frame2

Unnamed: 0,year,state,pop,debt
one,2000,ohio,1.5,16.5
two,2001,ohio,1.7,16.5
three,2002,ohio,3.6,16.5
four,2001,nevada,2.4,16.5
five,2002,nevada,2.9,16.5


In [24]:
frame2.debt = np.arange(5)
frame2

Unnamed: 0,year,state,pop,debt
one,2000,ohio,1.5,0
two,2001,ohio,1.7,1
three,2002,ohio,3.6,2
four,2001,nevada,2.4,3
five,2002,nevada,2.9,4


In [25]:
frame2['eastern'] = frame2.state == 'ohio'
frame2

Unnamed: 0,year,state,pop,debt,eastern
one,2000,ohio,1.5,0,True
two,2001,ohio,1.7,1,True
three,2002,ohio,3.6,2,True
four,2001,nevada,2.4,3,False
five,2002,nevada,2.9,4,False


In [26]:
del frame2['eastern']

In [27]:
frame2

Unnamed: 0,year,state,pop,debt
one,2000,ohio,1.5,0
two,2001,ohio,1.7,1
three,2002,ohio,3.6,2
four,2001,nevada,2.4,3
five,2002,nevada,2.9,4


In [28]:
pop = {'nevada': {2001:2.4, 2002: 2.9},
      'ohio':{2000:1.5, 2001:1.7, 2002 :3.6}}
pop

{'nevada': {2001: 2.4, 2002: 2.9}, 'ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

In [29]:
frame3 = DataFrame(pop)
frame3

Unnamed: 0,nevada,ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [30]:
frame3.T

Unnamed: 0,2000,2001,2002
nevada,,2.4,2.9
ohio,1.5,1.7,3.6


In [31]:
frame3.index.name = 'year'
frame3.columns.name = 'state'

In [32]:
frame3

state,nevada,ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [33]:
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])

### 索引对象

In [34]:
obj = Series(range(3), index=['a', 'b', 'c'])
index = obj.index
index

Index(['a', 'b', 'c'], dtype='object')

In [35]:
frame3

state,nevada,ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [36]:
'ohio' in frame3.columns

True

In [37]:
2003 in frame3.columns

False

## 基本功能

### 重新索引

In [38]:
obj = Series([4.5, 7.2, -5.3, 3.6], index = ['d', 'b','a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [39]:
obj2 = obj.reindex(['a','b','c','d','e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [40]:
obj.reindex(['a','b','c','d','e'],fill_value=0)

a   -5.3
b    7.2
c    3.6
d    4.5
e    0.0
dtype: float64

In [41]:
obj3 = Series(['blue', 'purple', 'yellow'], index=[0,2,4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [42]:
obj3.reindex(range(6),method = 'ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [43]:
frame = DataFrame(np.arange(9).reshape((3,3)), index = ['a', 'c', 'd'],
                 columns=['ohio', 'texas', 'california'])
frame

Unnamed: 0,ohio,texas,california
a,0,1,2
c,3,4,5
d,6,7,8


In [44]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,ohio,texas,california
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [45]:
states = ['texas', 'utah', 'california']
frame.reindex(columns = states)

Unnamed: 0,texas,utah,california
a,1,,2
c,4,,5
d,7,,8


### 丢弃指定轴上的项

In [47]:
obj = Series(np.arange(5.), index = ['a','b','c','d','e'])
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [48]:
obj.drop(['d','c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [49]:
data = DataFrame(np.arange(16).reshape((4,4)),
                index = ['ohio','colorado','utha','new york'],
                columns = ['one','two','three','four'])
data

Unnamed: 0,one,two,three,four
ohio,0,1,2,3
colorado,4,5,6,7
utha,8,9,10,11
new york,12,13,14,15


In [50]:
data.drop(['colorado','ohio'])

Unnamed: 0,one,two,three,four
utha,8,9,10,11
new york,12,13,14,15


In [52]:
data.drop('two',axis=1)

Unnamed: 0,one,three,four
ohio,0,2,3
colorado,4,6,7
utha,8,10,11
new york,12,14,15


### 索引、选取和过滤

In [53]:
obj = Series(np.arange(4.), index=['a','b','c','d'])
obj

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [58]:
obj[[1,3]]

b    1.0
d    3.0
dtype: float64

In [62]:
data = DataFrame(np.arange(16).reshape((4,4)),
                index = ['ohio','colorado','utha','new york'],
                columns = ['one','two','three','four'])
data

Unnamed: 0,one,two,three,four
ohio,0,1,2,3
colorado,4,5,6,7
utha,8,9,10,11
new york,12,13,14,15


In [91]:
data[['one','two']]

Unnamed: 0,one,two
ohio,0,1
colorado,4,5
utha,8,9
new york,12,13


In [93]:
data.loc[['ohio','colorado']]

Unnamed: 0,one,two,three,four
ohio,0,1,2,3
colorado,4,5,6,7


In [85]:
data.iloc[[1,2]]

Unnamed: 0,one,two,three,four
colorado,4,5,6,7
utha,8,9,10,11


In [87]:
data.iloc[:,[1,3]]

Unnamed: 0,two,four
ohio,1,3
colorado,5,7
utha,9,11
new york,13,15


### 算术运算和数据对齐

In [4]:
s1 = Series([7.3, -2.5, 3.4, 1.5], index=['a','c','d','e'])
s2 = Series([-2.1, 3.6, -1.5, 4, 3.1],index=['a','c','e','f','g'])

In [5]:
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [6]:
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [7]:
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [10]:
list('abc')

['a', 'b', 'c']

In [9]:
df1 = DataFrame(np.arange(9.).reshape((3,3)),columns = list('bcd'),
               index = ['ohio','texas','colorado'])
df1

Unnamed: 0,b,c,d
ohio,0.0,1.0,2.0
texas,3.0,4.0,5.0
colorado,6.0,7.0,8.0


In [11]:
df2 = DataFrame(np.arange(12.).reshape((4,3)),columns = list('bde'),
               index = ['utah','ohio','texas','oregon'])
df2

Unnamed: 0,b,d,e
utah,0.0,1.0,2.0
ohio,3.0,4.0,5.0
texas,6.0,7.0,8.0
oregon,9.0,10.0,11.0


In [12]:
df1 + df2

Unnamed: 0,b,c,d,e
colorado,,,,
ohio,3.0,,6.0,
oregon,,,,
texas,9.0,,12.0,
utah,,,,


#### 在算数方法中填充值

In [14]:
df1.add(df2, fill_value=0)

Unnamed: 0,b,c,d,e
colorado,6.0,7.0,8.0,
ohio,3.0,1.0,6.0,5.0
oregon,9.0,,10.0,11.0
texas,9.0,4.0,12.0,8.0
utah,0.0,,1.0,2.0


### Dataframe和series之间的运算

In [16]:
arr = np.arange(12.).reshape((3,4))
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [18]:
arr[0]

array([0., 1., 2., 3.])

In [19]:
arr - arr[0]

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [22]:
frame = DataFrame(np.arange(12.).reshape(4,3), columns=list('bde'),
                 index=['utah','ohio','texas','oregon'])
frame

Unnamed: 0,b,d,e
utah,0.0,1.0,2.0
ohio,3.0,4.0,5.0
texas,6.0,7.0,8.0
oregon,9.0,10.0,11.0


In [24]:
series = frame.iloc[0]
series

b    0.0
d    1.0
e    2.0
Name: utah, dtype: float64

In [25]:
frame - series

Unnamed: 0,b,d,e
utah,0.0,0.0,0.0
ohio,3.0,3.0,3.0
texas,6.0,6.0,6.0
oregon,9.0,9.0,9.0


In [27]:
series2 = Series(range(3),index=list('bef'))
series2

b    0
e    1
f    2
dtype: int64

In [28]:
frame + series2

Unnamed: 0,b,d,e,f
utah,0.0,,3.0,
ohio,3.0,,6.0,
texas,6.0,,9.0,
oregon,9.0,,12.0,


In [32]:
frame.add(series2, axis=1)

Unnamed: 0,b,d,e,f
utah,0.0,,3.0,
ohio,3.0,,6.0,
texas,6.0,,9.0,
oregon,9.0,,12.0,


### 函数的应用和映射

In [33]:
frame = DataFrame(np.random.randn(4,3),columns=list('bde'),
                 index=['utah','ohio','texas','oregon'])
frame

Unnamed: 0,b,d,e
utah,-0.100367,1.19795,-0.48977
ohio,0.989146,1.5519,-1.1531
texas,-0.39346,0.856983,-1.317878
oregon,-0.512977,-1.650947,-1.026261


In [34]:
np.abs(frame)

Unnamed: 0,b,d,e
utah,0.100367,1.19795,0.48977
ohio,0.989146,1.5519,1.1531
texas,0.39346,0.856983,1.317878
oregon,0.512977,1.650947,1.026261


In [35]:
f = lambda x : x.max() - x.min()
frame.apply(f)

b    1.502123
d    3.202848
e    0.828108
dtype: float64

In [37]:
frame.apply(f,axis=1)

utah      1.687720
ohio      2.705001
texas     2.174861
oregon    1.137971
dtype: float64

### 序列和排名

In [38]:
obj = Series(range(4), index = ['d','a','b','c'])
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [39]:
frame = DataFrame(np.arange(8).reshape((2,4)), index=['three','one'],
                 columns=['d','a','b','c'])
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [40]:
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [42]:
frame.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [43]:
obj = Series([4,7,-3,2])
obj

0    4
1    7
2   -3
3    2
dtype: int64

In [45]:
obj.sort_values()

2   -3
3    2
0    4
1    7
dtype: int64

In [51]:
frame = DataFrame({'b':[4,7,-3,2],'a':[0,1,0,1]})
frame

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


### 带有重复值的轴索引

In [52]:
obj = Series(range(5), index=['a','a','b','b','c'])
obj

a    0
a    1
b    2
b    3
c    4
dtype: int64

In [53]:
obj.index.is_unique

False

### 汇总和计算描述统计

In [54]:
df = DataFrame([[1.4,np.nan],[7.1, -4.5],
               [np.nan,np.nan],[0.75, -1.3]],
              index=['a','b','c','d'],
              columns=['one','two'])
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [55]:
df.sum(axis=1)

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [56]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


### 唯一值、值计数以及成员资格

In [8]:
obj = Series(['c','a','d','a','a','b','b','c','c'])
obj.unique()

array(['c', 'a', 'd', 'b'], dtype=object)

In [9]:
obj.value_counts()

c    3
a    3
b    2
d    1
dtype: int64

In [10]:
obj.isin(['b','c'])

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool

## 处理缺失值

In [11]:
string_data = Series(['aardvark','artichoke',np.nan,'avocado'])
string_data

0     aardvark
1    artichoke
2          NaN
3      avocado
dtype: object

In [12]:
string_data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [13]:
string_data.isna()

0    False
1    False
2     True
3    False
dtype: bool

In [14]:
from numpy import nan as NA

In [15]:
data = Series([1,NA, 3.5, NA, 7])
data

0    1.0
1    NaN
2    3.5
3    NaN
4    7.0
dtype: float64

In [16]:
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

In [17]:
data = DataFrame([[1, 6.5, 3],[1, NA, NA],
                 [NA, NA, NA],[NA, 6.5, 3]])
data

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [18]:
data.dropna()

Unnamed: 0,0,1,2
0,1.0,6.5,3.0


In [19]:
data.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
3,,6.5,3.0


In [20]:
df = DataFrame(np.random.randn(7,3))
df

Unnamed: 0,0,1,2
0,1.000029,-0.077407,0.108075
1,-0.664426,0.576428,0.80403
2,0.491499,0.275917,0.888743
3,0.463826,-1.683653,-1.247256
4,1.063204,0.862045,-0.969239
5,1.028642,1.883957,0.172917
6,-0.144527,0.207049,0.880532


In [21]:
df.iloc[:4,1] =NA
df.iloc[:2,2]=NA
df

Unnamed: 0,0,1,2
0,1.000029,,
1,-0.664426,,
2,0.491499,,0.888743
3,0.463826,,-1.247256
4,1.063204,0.862045,-0.969239
5,1.028642,1.883957,0.172917
6,-0.144527,0.207049,0.880532


In [30]:
df.dropna(thresh=0,axis=1)

Unnamed: 0,0,1,2
0,1.000029,,
1,-0.664426,,
2,0.491499,,0.888743
3,0.463826,,-1.247256
4,1.063204,0.862045,-0.969239
5,1.028642,1.883957,0.172917
6,-0.144527,0.207049,0.880532


### 填补缺失值

In [32]:
df.fillna(0)

Unnamed: 0,0,1,2
0,1.000029,0.0,0.0
1,-0.664426,0.0,0.0
2,0.491499,0.0,0.888743
3,0.463826,0.0,-1.247256
4,1.063204,0.862045,-0.969239
5,1.028642,1.883957,0.172917
6,-0.144527,0.207049,0.880532


## 层次化索引

In [4]:
data = Series(np.random.randn(10),index = [['a','a','a','b','b','b',
                                           'c','c','d','d'],
                                          [1,2,3,1,2,3,1,2,2,3]])
data

a  1    0.309081
   2   -0.366852
   3    0.727288
b  1   -1.107989
   2   -0.709528
   3   -1.142970
c  1    0.556823
   2    1.986777
d  2   -1.380068
   3   -1.267169
dtype: float64

In [5]:
data.index

MultiIndex(levels=[['a', 'b', 'c', 'd'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])

In [6]:
data['b']

1   -1.107989
2   -0.709528
3   -1.142970
dtype: float64

In [8]:
data[:,2]

a   -0.366852
b   -0.709528
c    1.986777
d   -1.380068
dtype: float64

In [9]:
data.unstack()

Unnamed: 0,1,2,3
a,0.309081,-0.366852,0.727288
b,-1.107989,-0.709528,-1.14297
c,0.556823,1.986777,
d,,-1.380068,-1.267169


In [12]:
frame =  DataFrame(np.arange(12).reshape((4,3)),
                  index=[['a','a','b','b'],[1,2,1,2]],
                  columns=[['ohio','ohio','colorado'],
                          ['green','red','green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,ohio,ohio,colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,green,red,green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [13]:
frame.index.names=['key1','key2']
frame.columns.names=['state','color']
frame

Unnamed: 0_level_0,state,ohio,ohio,colorado
Unnamed: 0_level_1,color,green,red,green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [15]:
frame['ohio']

Unnamed: 0_level_0,color,green,red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [16]:
MultiIndex.from_arrays([['ohio','ohio','colorado'],['green','red','green']],
                      names=['state','color'])

NameError: name 'MultiIndex' is not defined

## 重排分级顺序 

In [17]:
frame

Unnamed: 0_level_0,state,ohio,ohio,colorado
Unnamed: 0_level_1,color,green,red,green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [18]:
frame.swaplevel('key1','key2')

Unnamed: 0_level_0,state,ohio,ohio,colorado
Unnamed: 0_level_1,color,green,red,green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [20]:
frame.sort_index(level = 1)

Unnamed: 0_level_0,state,ohio,ohio,colorado
Unnamed: 0_level_1,color,green,red,green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [21]:
frame.swaplevel(0,1).sort_index(level = 1)

Unnamed: 0_level_0,state,ohio,ohio,colorado
Unnamed: 0_level_1,color,green,red,green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


## 根据级别汇总统计 

In [22]:
frame.sum(level='key2')

state,ohio,ohio,colorado
color,green,red,green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16


In [23]:
frame.sum(level='color',axis=1)

Unnamed: 0_level_0,color,green,red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,2,1
a,2,8,4
b,1,14,7
b,2,20,10


## 使用DataFrame的列 

In [24]:
frame = DataFrame({'a':range(7),'b':range(7,0,-1),
                  'c':['one','one','one','two','two','two','two'],
                  'd':[0,1,2,0,1,2,3]})
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [25]:
frame2 = frame.set_index(['c','d'])
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [26]:
frame2.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1


## 其他关于pandas的话题 

In [28]:
ser = Series(np.arange(3.))
ser[-1]

KeyError: -1

In [29]:
ser.iloc[-1]

2.0

In [30]:
frame = DataFrame(np.arange(6).reshape(3,2), index=[2,0,1])
frame

Unnamed: 0,0,1
2,0,1
0,2,3
1,4,5


In [32]:
frame[0]

2    0
0    2
1    4
Name: 0, dtype: int64

In [33]:
frame.iloc[0]

0    0
1    1
Name: 2, dtype: int64

## 面板数据 