In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

연산

## 색인이 다른 객체를 더하는 산술연산

In [4]:
s1 = Series([5, 6, -1, 2], index=['a', 'c', 'd', 'e'])
s2 = Series([3, 4, -1, 2, 7], index=['a', 'c', 'e', 'f', 'g'])

In [5]:
print(s1)
print('----------------------------------')
print(s2)

a    5
c    6
d   -1
e    2
dtype: int64
----------------------------------
a    3
c    4
e   -1
f    2
g    7
dtype: int64


In [6]:
s1 + s2             # Series 객체는 인덱스 쌍이 서로 맞지 않으면 NaN

a     8.0
c    10.0
d     NaN
e     1.0
f     NaN
g     NaN
dtype: float64

In [10]:
# DataFrame 연산
df1 = DataFrame(np.arange(9).reshape(3, 3), columns=list('bcd'), index=['seoul', 'busan', 'gwangju'])
df2 = DataFrame(np.arange(12).reshape(4, 3), columns=list('bde'), index=['incheon', 'seoul', 'busan', 'jeonju'])

In [11]:
print(df1)
print('-------------------------')
print(df2)

         b  c  d
seoul    0  1  2
busan    3  4  5
gwangju  6  7  8
-------------------------
         b   d   e
incheon  0   1   2
seoul    3   4   5
busan    6   7   8
jeonju   9  10  11


In [13]:
df1 + df2            # 마찬가지로 index가 맞지 않으면 NaN

Unnamed: 0,b,c,d,e
busan,9.0,,12.0,
gwangju,,,,
incheon,,,,
jeonju,,,,
seoul,3.0,,6.0,


In [15]:
df3 = DataFrame(np.arange(12).reshape(3, 4), columns=list('abcd'))
df4 = DataFrame(np.arange(20).reshape(4, 5), columns=list('abcde'))

In [16]:
print(df3)
print('-----------------------------------')
print(df4)

   a  b   c   d
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11
-----------------------------------
    a   b   c   d   e
0   0   1   2   3   4
1   5   6   7   8   9
2  10  11  12  13  14
3  15  16  17  18  19


In [17]:
df3 + df4

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,11.0,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [18]:
# NaN은 권장이 아님. 그러므로 0으로 채우기
df3.add(df4, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,4.0
1,9.0,11.0,13.0,15.0,9.0
2,18.0,20.0,22.0,24.0,14.0
3,15.0,16.0,17.0,18.0,19.0


In [20]:
df5 = df3.reindex(columns=df4.columns, fill_value=0)
df5

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,0
1,4,5,6,7,0
2,8,9,10,11,0


DataFrame과 Series간의 연산

In [None]:
# 넘파이의 브로드캐스팅과 유사

In [21]:
arr = np.arange(12).reshape(3, 4); arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [22]:
arr[0]

array([0, 1, 2, 3])

In [23]:
arr - arr[0]

array([[0, 0, 0, 0],
       [4, 4, 4, 4],
       [8, 8, 8, 8]])

In [24]:
arr - arr[1]

array([[-4, -4, -4, -4],
       [ 0,  0,  0,  0],
       [ 4,  4,  4,  4]])

In [25]:
df = DataFrame(np.arange(12).reshape(4, 3), columns=list('bde'), index=['incheon', 'seoul', 'busan', 'jeonju'])
df

Unnamed: 0,b,d,e
incheon,0,1,2
seoul,3,4,5
busan,6,7,8
jeonju,9,10,11


In [27]:
s1 = df.iloc[0]
print(s1)
print(type(s1))

b    0
d    1
e    2
Name: incheon, dtype: int32
<class 'pandas.core.series.Series'>


In [28]:
df - s1

Unnamed: 0,b,d,e
incheon,0,0,0
seoul,3,3,3
busan,6,6,6
jeonju,9,9,9


In [30]:
s2 = Series(range(3), index=list('def'))
s2

d    0
e    1
f    2
dtype: int64

In [31]:
df + s2

Unnamed: 0,b,d,e,f
incheon,,1.0,3.0,
seoul,,4.0,6.0,
busan,,7.0,9.0,
jeonju,,10.0,12.0,


In [32]:
s3 = df['d']
s3

incheon     1
seoul       4
busan       7
jeonju     10
Name: d, dtype: int32

In [33]:
df + s3

Unnamed: 0,b,busan,d,e,incheon,jeonju,seoul
incheon,,,,,,,
seoul,,,,,,,
busan,,,,,,,
jeonju,,,,,,,


In [34]:
# index(행)에 대한 연산은 함수를 이용
df

Unnamed: 0,b,d,e
incheon,0,1,2
seoul,3,4,5
busan,6,7,8
jeonju,9,10,11


In [35]:
df.add(s3, axis=0)

Unnamed: 0,b,d,e
incheon,1,2,3
seoul,7,8,9
busan,13,14,15
jeonju,19,20,21
