In [78]:
import numpy as np
import pandas as pd

**算术运算和数据对齐**

In [79]:
s1 = pd.Series([7.3,-2.5,3.4,1.5],index=['a','c','d','e'])
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [80]:
s2 = pd.Series([-2.1,3.6,-1.5,4,3.1],index=['a','c','e','f','g'])
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [81]:
s1+s2
# 自动的数据对齐操作在不重叠的索引处引入了NA值。缺失值会在算术运算过程中传播

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [82]:
# 对于DataFrame，对齐操作会同时发生在行和列上
df1 = pd.DataFrame(np.arange(9.).reshape((3,3)),columns=list("bcd"),index=
                   ["ohio","teax","colora"])
df1

Unnamed: 0,b,c,d
ohio,0.0,1.0,2.0
teax,3.0,4.0,5.0
colora,6.0,7.0,8.0


In [83]:
df2 = pd.DataFrame(np.arange(12.).reshape(4,3),columns=list("bde"),index=
                   ['utah','ohio','teax','organ'])
df2

Unnamed: 0,b,d,e
utah,0.0,1.0,2.0
ohio,3.0,4.0,5.0
teax,6.0,7.0,8.0
organ,9.0,10.0,11.0


In [84]:
df1+df2

Unnamed: 0,b,c,d,e
colora,,,,
ohio,3.0,,6.0,
organ,,,,
teax,9.0,,12.0,
utah,,,,


在算术方法中填充值
在对不同索引的对象进行算术运算时，当一个对象中某个轴标签在另一
个对象中找不到时填充一个特殊值（比如0）


In [85]:
df1 = pd.DataFrame(np.arange(12.).reshape((3,4)),columns=list("abcd"))
df1

Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,5.0,6.0,7.0
2,8.0,9.0,10.0,11.0


In [86]:
df2 = pd.DataFrame(np.arange(20.).reshape(4,5),columns=list("abcde"))
df2

Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,4.0
1,5.0,6.0,7.0,8.0,9.0
2,10.0,11.0,12.0,13.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [87]:
df1.loc[1,'b'] = np.nan
df1


Unnamed: 0,a,b,c,d
0,0.0,1.0,2.0,3.0
1,4.0,,6.0,7.0
2,8.0,9.0,10.0,11.0


In [88]:
df2+df1

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


**使用add方法**

In [89]:
df1.add(df2,fill_value = 0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,6.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [90]:
1/df1

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


In [91]:
df1.rdiv(1)

Unnamed: 0,a,b,c,d
0,inf,1.0,0.5,0.333333
1,0.25,,0.166667,0.142857
2,0.125,0.111111,0.1,0.090909


add,  radd 用于加法

sub,  rsub 方法用于减法

div,  rdiv 用于除法

floordiv,  rfloordiv 方法用于地板除法

mul,  rmul 乘法

pow,  rpow 指数


In [92]:
df1.reindex(columns=df2.columns,fill_value=0)
# 对Series或DataFrame重新索引时，也可以指定一个填充值


Unnamed: 0,a,b,c,d,e
0,0.0,1.0,2.0,3.0,0
1,4.0,,6.0,7.0,0
2,8.0,9.0,10.0,11.0,0


DataFrame和Series之间的运算

In [93]:
arr = np.arange(12.).reshape(3,4)
arr

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

In [94]:
arr[0]
arr-arr[0]
# 符合广播机制

array([[0., 0., 0., 0.],
       [4., 4., 4., 4.],
       [8., 8., 8., 8.]])

In [95]:
frame = pd.DataFrame(np.arange(12.).reshape(4,3),columns=list('bcd'))
frame
seriers = frame.loc[0]
seriers

b    0.0
c    1.0
d    2.0
Name: 0, dtype: float64

In [96]:
frame-seriers
# DataFrame和Series之间的算术运算会将Series的索引匹配到DataFrame的列，然后沿着行一直向下广播：

Unnamed: 0,b,c,d
0,0.0,0.0,0.0
1,3.0,3.0,3.0
2,6.0,6.0,6.0
3,9.0,9.0,9.0


In [97]:
seriers2 = pd.Series(range(3),index=list('bcf'))
seriers2

b    0
c    1
f    2
dtype: int64

In [98]:
frame+seriers2

Unnamed: 0,b,c,d,f
0,0.0,2.0,,
1,3.0,5.0,,
2,6.0,8.0,,
3,9.0,11.0,,


**匹配行且在列上广播，则必须使用算术运算方法。**

In [99]:
seriers3 = frame['d']
seriers3

0     2.0
1     5.0
2     8.0
3    11.0
Name: d, dtype: float64

In [100]:
frame

Unnamed: 0,b,c,d
0,0.0,1.0,2.0
1,3.0,4.0,5.0
2,6.0,7.0,8.0
3,9.0,10.0,11.0


In [101]:
frame.sub(seriers3,axis = 'index')


Unnamed: 0,b,c,d
0,-2.0,-1.0,0.0
1,-2.0,-1.0,0.0
2,-2.0,-1.0,0.0
3,-2.0,-1.0,0.0


In [102]:
frame.add(seriers3,axis = 'index')

Unnamed: 0,b,c,d
0,2.0,3.0,4.0
1,8.0,9.0,10.0
2,14.0,15.0,16.0
3,20.0,21.0,22.0
