In [54]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [2]:
# Series
obj = Series([1, 2, 3, 4])

In [3]:
obj

0    1
1    2
2    3
3    4
dtype: int64

In [4]:
obj.index, obj.values

(RangeIndex(start=0, stop=4, step=1), array([1, 2, 3, 4]))

In [6]:
obj2 = Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])

In [7]:
obj2.index, obj2.values

(Index([u'a', u'b', u'c', u'd'], dtype='object'), array([1, 2, 3, 4]))

In [8]:
obj2['a']

1

In [16]:
obj2[[u'a', u'b']]

a    1
b    2
dtype: int64

In [17]:
obj2[obj2>0]

a    1
b    2
c    3
d    4
dtype: int64

In [18]:
obj * 2

0    2
1    4
2    6
3    8
dtype: int64

In [19]:
obj2 ** 2

a     1
b     4
c     9
d    16
dtype: int64

In [20]:
'a' in obj2, 'e' in obj2

(True, False)

In [22]:
sdata = {'a': 1, 'b': 2, 'c': 3, 'd': 4}

In [23]:
obj3 = Series(sdata)
obj3

a    1
b    2
c    3
d    4
dtype: int64

In [24]:
header = ['a', 'b', 'c', 'e']
obj4 = Series(sdata, index=header)

In [25]:
obj4

a    1.0
b    2.0
c    3.0
e    NaN
dtype: float64

In [26]:
# 判定空值
pd.isnull(obj4)

a    False
b    False
c    False
e     True
dtype: bool

In [27]:
pd.notnull(obj4)

a     True
b     True
c     True
e    False
dtype: bool

In [29]:
obj4[pd.notnull(obj4)]

a    1.0
b    2.0
c    3.0
dtype: float64

In [30]:
obj4.isnull()

a    False
b    False
c    False
e     True
dtype: bool

In [31]:
# Series + 索引对齐
obj3 + obj4

a    2.0
b    4.0
c    6.0
d    NaN
e    NaN
dtype: float64

In [33]:
# name
obj4.index.name = 'index4'

In [34]:
obj4.name = 'obj4'

In [35]:
obj4

index4
a    1.0
b    2.0
c    3.0
e    NaN
Name: obj4, dtype: float64

In [36]:
obj.index = ['c', 'd', 'e', 'f']

In [37]:
obj

c    1
d    2
e    3
f    4
dtype: int64

In [40]:
# Dataframe
data = {
    'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.9]
}
frame = DataFrame(data)
frame

Unnamed: 0,pop,state,year
0,1.5,Ohio,2000
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002


In [41]:
DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [48]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['a', 'b', 'c', 'd', 'e'])
frame2

Unnamed: 0,year,state,pop,debt
a,2000,Ohio,1.5,
b,2001,Ohio,1.7,
c,2002,Ohio,3.6,
d,2001,Nevada,2.4,
e,2002,Nevada,2.9,


In [49]:
frame2['state'], frame2['year']

(a      Ohio
 b      Ohio
 c      Ohio
 d    Nevada
 e    Nevada
 Name: state, dtype: object, a    2000
 b    2001
 c    2002
 d    2001
 e    2002
 Name: year, dtype: int64)

In [51]:
frame2.ix['c']

year     2002
state    Ohio
pop       3.6
debt      NaN
Name: c, dtype: object

In [52]:
frame2['debt'] = 14.5

In [53]:
frame2

Unnamed: 0,year,state,pop,debt
a,2000,Ohio,1.5,14.5
b,2001,Ohio,1.7,14.5
c,2002,Ohio,3.6,14.5
d,2001,Nevada,2.4,14.5
e,2002,Nevada,2.9,14.5


In [55]:
frame2['debt'] = np.arange(5.)

In [56]:
frame2

Unnamed: 0,year,state,pop,debt
a,2000,Ohio,1.5,0.0
b,2001,Ohio,1.7,1.0
c,2002,Ohio,3.6,2.0
d,2001,Nevada,2.4,3.0
e,2002,Nevada,2.9,4.0


In [58]:
# set the values
val = Series([-1.2, -1.5, -1.7], index=['a', 'c', 'e'])
frame2['debt'] = val

In [59]:
frame2

Unnamed: 0,year,state,pop,debt
a,2000,Ohio,1.5,-1.2
b,2001,Ohio,1.7,
c,2002,Ohio,3.6,-1.5
d,2001,Nevada,2.4,
e,2002,Nevada,2.9,-1.7


In [60]:
frame2['eastern'] = frame2.state == 'Ohio'

In [61]:
frame2

Unnamed: 0,year,state,pop,debt,eastern
a,2000,Ohio,1.5,-1.2,True
b,2001,Ohio,1.7,,True
c,2002,Ohio,3.6,-1.5,True
d,2001,Nevada,2.4,,False
e,2002,Nevada,2.9,-1.7,False


In [63]:
# 注意，赋值不适拷贝
del frame2['eastern']

In [64]:
frame2

Unnamed: 0,year,state,pop,debt
a,2000,Ohio,1.5,-1.2
b,2001,Ohio,1.7,
c,2002,Ohio,3.6,-1.5
d,2001,Nevada,2.4,
e,2002,Nevada,2.9,-1.7


In [65]:
# 嵌套字典
# 外层字典的键做列，内层的键做索引
pop = {
    'Nevada': {2001: 2.4, 2002: 2.9},
    'Ohio': {2000:1.5, 2001: 1.6, 2002: 1.9}
}

In [68]:
frame3 = DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.6
2002,2.9,1.9


In [69]:
frame3.T

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.6,1.9


In [70]:
DataFrame(pop, index=[2001, 2002, 2003])

Unnamed: 0,Nevada,Ohio
2001,2.4,1.6
2002,2.9,1.9
2003,,


In [71]:
pdata = {
    'Ohio': frame3['Ohio'][:-1],
    'Nevada': frame3['Nevada'][:2]
}
DataFrame(pdata)

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.6


In [72]:
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.6
2002,2.9,1.9


In [73]:
frame3.index.name = 'Year'
frame3.columns.name = 'State'

In [74]:
frame3

State,Nevada,Ohio
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.6
2002,2.9,1.9


In [75]:
frame3.values

array([[ nan,  1.5],
       [ 2.4,  1.6],
       [ 2.9,  1.9]])

In [76]:
frame2.values

array([[2000, 'Ohio', 1.5, -1.2],
       [2001, 'Ohio', 1.7, nan],
       [2002, 'Ohio', 3.6, -1.5],
       [2001, 'Nevada', 2.4, nan],
       [2002, 'Nevada', 2.9, -1.7]], dtype=object)

In [None]:
# 索引对象