In [3]:
import pandas as pd
import numpy as np

In [4]:
from pandas import Series, DataFrame

### Series使用

In [16]:
test = pd.Series([1,3,2,1])
test

0    1
1    3
2    2
3    1
dtype: int64

In [17]:
test.values

array([1, 3, 2, 1])

In [10]:
test.index

RangeIndex(start=0, stop=4, step=1)

In [18]:
test2 = pd.Series([1,4,2,3], index = ['a', 'b', 'c', 'd'])
test2

a    1
b    4
c    2
d    3
dtype: int64

In [19]:
test2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [20]:
test2.values

array([1, 4, 2, 3])

In [21]:
test2[test2 > 1]

b    4
c    2
d    3
dtype: int64

## numpy操作

In [22]:
np.square(test2)

a     1
b    16
c     4
d     9
dtype: int64

In [23]:
'c' in test2

True

In [24]:
'q' in test2

False

## 字典操作

In [31]:
dict0 = {'w':1, 'o':2, 'r':3, 'l':4, 'd':5}
test3 = pd.Series(dict0)
test3

w    1
o    2
r    3
l    4
d    5
dtype: int64

## NaN标记缺失值

In [32]:
keys = ['h', 'e', 'w', 'o', 'r', 'l', 'd', ]
test4 = pd.Series(dict0, keys)
test4

h    NaN
e    NaN
w    1.0
o    2.0
r    3.0
l    4.0
d    5.0
dtype: float64

## .isnull  .notnull检查缺失值

In [34]:
pd.isnull(test4)


h     True
e     True
w    False
o    False
r    False
l    False
d    False
dtype: bool

In [35]:
pd.notnull(test4)

h    False
e    False
w     True
o     True
r     True
l     True
d     True
dtype: bool

## name 属性

In [36]:
test4.name =  'primary'
test4.index.name = 'key'
test4

key
h    NaN
e    NaN
w    1.0
o    2.0
r    3.0
l    4.0
d    5.0
Name: primary, dtype: float64

## 改变索引

In [37]:
test

0    1
1    3
2    2
3    1
dtype: int64

In [38]:
test.index = ['a', 'b', 'c', 'd']

In [39]:
test

a    1
b    3
c    2
d    1
dtype: int64

# DataFrame

In [40]:
data = {'key':['h', 'e', 'l', 'l', 'o', 'w'],
        'num':[1, 2, 3, 4, 5, 6],
        '?':['!', '@', '#', '$', '%', '^']}
frame = pd.DataFrame(data)
frame

Unnamed: 0,key,num,?
0,h,1,!
1,e,2,@
2,l,3,#
3,l,4,$
4,o,5,%
5,w,6,^


## head方法选出前五行

In [43]:
frame.head()

Unnamed: 0,key,num,?
0,h,1,!
1,e,2,@
2,l,3,#
3,l,4,$
4,o,5,%


## 按照指定顺序排列column

In [44]:
pd.DataFrame(data, columns=['num', 'key', '?'])

Unnamed: 0,num,key,?
0,1,h,!
1,2,e,@
2,3,l,#
3,4,l,$
4,5,o,%
5,6,w,^


## 缺失值

In [55]:
frame2 = pd.DataFrame(data, columns=['num', '?', 'key', 'will_be_NaN'],
                     index=['row1', 'row2', 'row3', 'row4', 'row5', 'row6'])
frame2

Unnamed: 0,num,?,key,will_be_NaN
row1,1,!,h,
row2,2,@,e,
row3,3,#,l,
row4,4,$,l,
row5,5,%,o,
row6,6,^,w,


## 检索

In [51]:
frame2['num']

row1    1
row2    2
row3    3
row4    4
row5    5
row6    6
Name: num, dtype: int64

In [52]:
frame2.key

row1    h
row2    e
row3    l
row4    l
row5    o
row6    w
Name: key, dtype: object

## 修改列

In [59]:
frame2.will_be_NaN = np.arange(6)
frame2

Unnamed: 0,num,?,key,will_be_NaN
row1,1,!,h,0
row2,2,@,e,1
row3,3,#,l,2
row4,4,$,l,3
row5,5,%,o,4
row6,6,^,w,5


In [60]:
frame2['will_be_NaN'] = np.arange(4, 10)
frame2

Unnamed: 0,num,?,key,will_be_NaN
row1,1,!,h,4
row2,2,@,e,5
row3,3,#,l,6
row4,4,$,l,7
row5,5,%,o,8
row6,6,^,w,9


## 增加col与.del删除col

In [63]:
frame2['boolean'] = frame2.key >= 'l'
frame2

Unnamed: 0,num,?,key,will_be_NaN,boolean
row1,1,!,h,4,False
row2,2,@,e,5,False
row3,3,#,l,6,True
row4,4,$,l,7,True
row5,5,%,o,8,True
row6,6,^,w,9,True


In [65]:
del frame2['?']
frame2

Unnamed: 0,num,key,will_be_NaN,boolean
row1,1,h,4,False
row2,2,e,5,False
row3,3,l,6,True
row4,4,l,7,True
row5,5,o,8,True
row6,6,w,9,True


## 字典操作

In [69]:
dict02 = {'hello':{2017: 0.8, 2018: 12, 2019: 0},
          'world':{2017: 1, 2018: 2, 2019: 3}}
                   
frame3 = pd.DataFrame(dict02)

frame3

Unnamed: 0,hello,world
2017,0.8,1
2018,12.0,2
2019,0.0,3


## Numpy语法

In [70]:
frame3.T

Unnamed: 0,2017,2018,2019
hello,0.8,12.0,0.0
world,1.0,2.0,3.0


## name属性

In [73]:
frame3.index.name = 'salaries'
frame3.columns.name = 'states'
frame3

states,hello,world
salaries,Unnamed: 1_level_1,Unnamed: 2_level_1
2017,0.8,1
2018,12.0,2
2019,0.0,3


## .values属性返回ndarray形式

In [75]:
frame3.values

array([[ 0.8,  1. ],
       [12. ,  2. ],
       [ 0. ,  3. ]])

## 索引对象