In [4]:
import numpy as np
import pandas as pd
import datetime

# series

### series创建

In [51]:
pd.Series({11:111,22:222,})

11    111
22    222
dtype: int64

In [52]:
pd.Series({11:111,33:333,},index=[11,22,33])

11    111.0
22      NaN
33    333.0
dtype: float64

In [14]:
sr1 = pd.Series(list('abcde'),index=[2,4,6,3,5])
sr1

2    a
4    b
6    c
3    d
5    e
dtype: object

In [None]:
"""创建时，默认index.name为空"""
sr1.index.name

In [17]:
sr1.index.name = 'hi'

In [19]:
"""创建时，默认name为空"""
sr1.name

In [60]:
sr1.name = 'ww'

### series切片

In [70]:
sr1 = pd.Series(list('abcd'),index=range(2,6))
sr1

2    a
3    b
4    c
5    d
dtype: object

In [71]:
sr2 = pd.Series(range(4),index=list('xymn'))
sr2

x    0
y    1
m    2
n    3
dtype: int64

In [72]:
"""切片返回单个元素"""
"""当series索引本身是int或float，下面这种单个元素切片是按值索引，而不是位置"""
print(sr1[3])
print(sr2['m'])

b
2


In [74]:
"""切片返回series"""
print(sr1[[3,1]])
print(sr1[[3]])
print(sr2[['a','m']])
print(sr2[['m']])
print(sr1[1:2])
print(sr1[1:1])
"""对于字符串索引的帽号切片，是按字母位置而不是按字母顺序，注意这里包含切片的最后一个元素"""
print(sr2['y':'n'])

3      b
1    NaN
dtype: object
3    b
dtype: object
a    NaN
m    2.0
dtype: float64
m    2
dtype: int64
3    b
dtype: object
Series([], dtype: object)
y    1
m    2
n    3
dtype: int64


In [101]:
"""bool切片必返回series，这点类似帽号切片"""
sr3 = pd.Series(list('abcd'),index=range(4))
sr4 = pd.Series(list('abcd'),index=range(2,6))
bseries = sr3=='b'
blist = list(bool1)
print(sr3)
print(sr4)
print(bool1)
print(bool2)
"""使用series进行bool切片，要求series的index必须完全匹配。
sr4[bseries]会报错，因为sr4和bseries的index不完全匹配"""
print(sr3[bseries])
print(sr3[blist])
print(sr4[blist])

0    a
1    b
2    c
3    d
dtype: object
2    a
3    b
4    c
5    d
dtype: object
0    False
1     True
2    False
3    False
dtype: bool
[False, True, False, False]
1    b
dtype: object
1    b
dtype: object
3    b
dtype: object


### series一些操作

In [31]:
sr1 = pd.Series(range(4), index=list('abcd'))
sr1.values

array([0, 1, 2, 3], dtype=int64)

In [32]:
sr1.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [33]:
sr1.index = range(4)
sr1.index

RangeIndex(start=0, stop=4, step=1)

### series复制

In [92]:
sr1 = pd.Series(list('abcde'),index=[2,4,6,3,5])
sr1

2    a
4    b
6    c
3    d
5    e
dtype: object

In [93]:
"""直接复制，sr1_a和sr1指向同一个物理地址，修改会相互影响"""
sr1_a = sr1
sr1_a.index = [21,22,23,24,25]
sr1

21    a
22    b
23    c
24    d
25    e
dtype: object

In [94]:
"""使用copy复制，修改sr1_b不会影响sr1"""
sr1_b = sr1.copy()
sr1_b.index = [111,222,333,444,555]
sr1

21    a
22    b
23    c
24    d
25    e
dtype: object

In [161]:
"""切片后得到的series与原来的指向同一个value，但index是各自的"""
sr1 = pd.Series(list('abcde'),index=range(5))
ssr1 = sr1[1:4]
print(ssr1)
"""下面这种修改ssr1不会修改sr1的index"""
ssr1.index = [11,22,33]
print(ssr1)
print(sr1)
"""给ssr1原来的索引赋值，会影响sr1的value。因为二者的index是各自的，但指向同一个内存位置的value"""
ssr1[[11,22]] = ['bb','cc']
print(ssr1)
print(sr1)
"""当给ssr1新增一个索引时，ssr1不再与sr1指向同一个内存位置的value。此时修改ssr1的index或value，都不会改变sr1"""
ssr1[44] = 'xx'
print(ssr1)
print(sr1)
"""修改ssr1的value，不再改变sr1的value，二者的value也是各自的"""
ssr1[33] = 'dd'
print(ssr1)
print(sr1)
"""显然，这样修改ssr1=['x','xx','xxx']是对ssr1重新赋值，也会导致sr1与ssr1不再共享value，各自修改不受影响"""
print('byby')

1    b
2    c
3    d
dtype: object
11    b
22    c
33    d
dtype: object
0    a
1    b
2    c
3    d
4    e
dtype: object
11    bb
22    cc
33     d
dtype: object
0     a
1    bb
2    cc
3     d
4     e
dtype: object
11    bb
22    cc
33     d
44    xx
dtype: object
0     a
1    bb
2    cc
3     d
4     e
dtype: object
11    bb
22    cc
33    dd
44    xx
dtype: object
0     a
1    bb
2    cc
3     d
4     e
dtype: object
byby


### series增删改

In [73]:
sr1.drop(21)

22    b
23    c
24    d
25    e
Name: ww, dtype: object

In [75]:
sr1.drop([21,23,25])

22    b
24    d
Name: ww, dtype: object

### series运算

In [7]:
sr1 = pd.Series(range(3),index=range(3))
sr2 = pd.Series(range(3),index=range(1,4))
print(sr1,'\n',sr2)

0    0
1    1
2    2
dtype: int64 
 1    0
2    1
3    2
dtype: int64


In [9]:
"""series相加是按index对齐的"""
sr1+sr2

0    NaN
1    1.0
2    3.0
3    NaN
dtype: float64

In [10]:
sr1*2

0    0
1    2
2    4
dtype: int64

In [11]:
import numpy as np
np.exp(sr1)

0    1.000000
1    2.718282
2    7.389056
dtype: float64

In [13]:
"""查看某索引是否在obj中，返回bool"""
print(2 in sr1)
print(2 & 8 | 2 in sr1)

True
True


In [14]:
"""缺失值判断"""
sr3 = pd.Series(['a',None,'',0],index=[1,2,3,4])
sr3

1       a
2    None
3        
4       0
dtype: object

In [15]:
pd.isnull(sr3)

1    False
2     True
3    False
4    False
dtype: bool

In [16]:
pd.notnull(sr3)

1     True
2    False
3     True
4     True
dtype: bool

In [17]:
sr3.isnull()

1    False
2     True
3    False
4    False
dtype: bool