# Mod12 Object Operations in Pandas

## Ufuncs: Operations Between DataFrame and Series

In [20]:
import pandas as pd
import numpy as np

In [21]:
pd.__version__

'1.1.0'

In [22]:
np.__version__

'1.19.1'

### Review NumPy Operation

In [23]:
x = np.array([[3,7,12],[8,9,23]]);x

array([[ 3,  7, 12],
       [ 8,  9, 23]])

In [24]:
y = np.array([1,3,9]); y

array([1, 3, 9])

In [25]:
z = np.array([4,8]); z

array([4, 8])

In [26]:
x * y

array([[  3,  21, 108],
       [  8,  27, 207]])

In [17]:
x * z   # 維度跟形狀皆無法相同，所以無法broadcasting

ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [27]:
# 2d array
ar=np.array([[1,2,3],[10,20,30]]); ar

array([[ 1,  2,  3],
       [10, 20, 30]])

In [28]:
# 1d array
ar[0]

array([1, 2, 3])

subtraction between a two-dimensional array and one of its rows is applied row-wise

In [29]:
ar-ar[0]

array([[ 0,  0,  0],
       [ 9, 18, 27]])

### Pandas Operation

In Pandas, the convention similarly operates row-wise by default:

In [30]:
rng = np.random.RandomState(42)
A = rng.randint(10, size=(3, 4))
A

array([[6, 3, 7, 4],
       [6, 9, 2, 6],
       [7, 4, 3, 7]])

In [31]:
df = pd.DataFrame(A, columns=list('QRST'))
df

Unnamed: 0,Q,R,S,T
0,6,3,7,4
1,6,9,2,6
2,7,4,3,7


In [19]:
df.iloc[0]

NameError: name 'df' is not defined

In [32]:
df[0:1]

Unnamed: 0,Q,R,S,T
0,6,3,7,4


In [33]:
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,0,6,-5,2
2,1,1,-4,3


In [34]:
df.subtract(df.iloc[0], axis=1)

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,0,6,-5,2
2,1,1,-4,3


## Pandas Objects Operations

In [35]:
ind_char = pd.Index(['a','b','c','d','e'])
ind_char

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [36]:
ind_num = pd.RangeIndex(5)
ind_num

RangeIndex(start=0, stop=5, step=1)

In [37]:
s1 = pd.Series(range(10, 15), ind_char)
s1

a    10
b    11
c    12
d    13
e    14
dtype: int64

In [38]:
s2 = pd.Series(range(30, 40, 2), ind_char)
s2

a    30
b    32
c    34
d    36
e    38
dtype: int64

In [39]:
df1 = pd.DataFrame(100, index=ind_num, columns=ind_char)
df1

Unnamed: 0,a,b,c,d,e
0,100,100,100,100,100
1,100,100,100,100,100
2,100,100,100,100,100
3,100,100,100,100,100
4,100,100,100,100,100


In [40]:
df2 = pd.DataFrame(np.arange(25).reshape(5,5),
        index=ind_num, columns=ind_char)
df2

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,4
1,5,6,7,8,9
2,10,11,12,13,14
3,15,16,17,18,19
4,20,21,22,23,24


### Series and Series Operation

Series operatrion is index alignment

In [41]:
s1

a    10
b    11
c    12
d    13
e    14
dtype: int64

In [42]:
s1 + s2

a    40
b    43
c    46
d    49
e    52
dtype: int64

In [43]:
# shuffle s1, same result 
s1.sample(frac=1)

b    11
c    12
a    10
e    14
d    13
dtype: int64

In [44]:
# shuffle s1, same result  是照index去相加, index: a + a, b + b
s1.sample(frac=1) + s2

a    40
b    43
c    46
d    49
e    52
dtype: int64

### DataFrame and DataFrame Opertaion

In [45]:
df1 + df2

Unnamed: 0,a,b,c,d,e
0,100,101,102,103,104
1,105,106,107,108,109
2,110,111,112,113,114
3,115,116,117,118,119
4,120,121,122,123,124


In [46]:
# Shuffle second DataFrame on both axes. The index and columns will still align, 只要key 相同, 怎麼變相加結果都相同
df2.sample(frac=1).sample(frac=1, axis=1)  

Unnamed: 0,e,c,a,b,d
4,24,22,20,21,23
3,19,17,15,16,18
2,14,12,10,11,13
0,4,2,0,1,3
1,9,7,5,6,8


In [47]:
# Shuffle second DataFrame on both axes. The index and columns will still align
df1 + df2.sample(frac=1).sample(frac=1, axis=1)

Unnamed: 0,a,b,c,d,e
0,100,101,102,103,104
1,105,106,107,108,109
2,110,111,112,113,114
3,115,116,117,118,119
4,120,121,122,123,124


In [48]:
# Add a scalar. Nothing to align with
# so broadcasts to everything

df1 + 1

Unnamed: 0,a,b,c,d,e
0,101,101,101,101,101
1,101,101,101,101,101
2,101,101,101,101,101
3,101,101,101,101,101
4,101,101,101,101,101


### DataFrame and Series Operation

In [49]:
df1

Unnamed: 0,a,b,c,d,e
0,100,100,100,100,100
1,100,100,100,100,100
2,100,100,100,100,100
3,100,100,100,100,100
4,100,100,100,100,100


In [50]:
s1

a    10
b    11
c    12
d    13
e    14
dtype: int64

In [51]:
df1 + s1

Unnamed: 0,a,b,c,d,e
0,110,111,112,113,114
1,110,111,112,113,114
2,110,111,112,113,114
3,110,111,112,113,114
4,110,111,112,113,114


In [52]:
df1 + s2

Unnamed: 0,a,b,c,d,e
0,130,132,134,136,138
1,130,132,134,136,138
2,130,132,134,136,138
3,130,132,134,136,138
4,130,132,134,136,138


In [53]:
s3 = pd.Series(range(50, 10, -8), ind_num)
s3

0    50
1    42
2    34
3    26
4    18
dtype: int64

In [54]:
# DataFrame default column alignment 兩邊的key不一致
# cannot find matching column
df1 + s3

Unnamed: 0,a,b,c,d,e,0,1,2,3,4
0,,,,,,,,,,
1,,,,,,,,,,
2,,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,


In [55]:
df1.add(s3, axis=0)

Unnamed: 0,a,b,c,d,e
0,150,150,150,150,150
1,142,142,142,142,142
2,134,134,134,134,134
3,126,126,126,126,126
4,118,118,118,118,118


Using transposition to resolve the problem

In [56]:
df1.T

Unnamed: 0,0,1,2,3,4
a,100,100,100,100,100
b,100,100,100,100,100
c,100,100,100,100,100
d,100,100,100,100,100
e,100,100,100,100,100


In [57]:
df1.T + s3

Unnamed: 0,0,1,2,3,4
a,150,142,134,126,118
b,150,142,134,126,118
c,150,142,134,126,118
d,150,142,134,126,118
e,150,142,134,126,118


In [None]:
(df1.T + s3).T

Better way to resolve the problem

In [58]:
#df1.add(s3, axis=0)
df1.add(s3, axis='rows')

Unnamed: 0,a,b,c,d,e
0,150,150,150,150,150
1,142,142,142,142,142
2,134,134,134,134,134
3,126,126,126,126,126
4,118,118,118,118,118


In [59]:
rng = np.random.RandomState(42)
A = pd.DataFrame(rng.randint(0, 20, (2, 2)),
                 columns=list('AB'))
A

Unnamed: 0,A,B
0,6,19
1,14,10


In [60]:
A.mean(axis='columns')   

0    12.5
1    12.0
dtype: float64

## Lab

<b>有一個 DataFrame df 如下，試著正確的運算 df - df['Z'] 的結果</b>

operate column-wise by specifying the ``axis`` keyword:

In [61]:
np.random.seed(62)
A = np.random.randint(10, size=(3, 4))

df = pd.DataFrame(A, columns=list('WXYZ'))
df

Unnamed: 0,W,X,Y,Z
0,2,8,1,9
1,3,5,1,4
2,5,1,9,8


In [75]:
df['Z']

0    9
1    4
2    8
Name: Z, dtype: int64

In [72]:
df['Z'].shape

(3,)

In [65]:
df.shape

(3, 4)

In [63]:
df - df['Z']

Unnamed: 0,W,X,Y,Z,0,1,2
0,,,,,,,
1,,,,,,,
2,,,,,,,


In [79]:
df.subtract(df['X'], axis=0)

Unnamed: 0,W,X,Y,Z
0,-6,0,-7,1
1,-2,0,-4,-1
2,4,0,8,7
