---
수학연산
- axis
- divide,
- sum
- cumprod
- cummax
- rolling
- rank

---

- apply
- shift
- np.where
- concat

---
- 중복데이터 처리 (drop_duplicated, dropna, fillna ~~)
---
- 시각화 plot, subplot

## 자주 사용하는 수학 연산 함수 소개

In [1]:
import pandas as pd

In [45]:
df = pd.DataFrame({
    "가로" : [10, 20, 30, 10, 30, 20, 11],
    "세로" : [20, 23, 22, 33, 22, 12, 11],
    "높이" : [50, 40, 20, 50, 20, 30, 40]
})

df

Unnamed: 0,가로,세로,높이
0,10,20,50
1,20,23,40
2,30,22,20
3,10,33,50
4,30,22,20
5,20,12,30
6,11,11,40


sum : 합계

In [46]:
# sum 합계 계산 axis=0
df.sum()

가로    131
세로    143
높이    250
dtype: int64

In [47]:
# sum 합계 계산 axis=1
df.sum(axis=1)

0    80
1    83
2    72
3    93
4    72
5    62
6    62
dtype: int64

divide : 나누기

In [48]:
df.divide(2)

Unnamed: 0,가로,세로,높이
0,5.0,10.0,25.0
1,10.0,11.5,20.0
2,15.0,11.0,10.0
3,5.0,16.5,25.0
4,15.0,11.0,10.0
5,10.0,6.0,15.0
6,5.5,5.5,20.0


In [49]:
# sum과 함께 응용하자면.. 이런식으로도 사용 가능
df.divide(df.sum(axis=1), axis=0)

Unnamed: 0,가로,세로,높이
0,0.125,0.25,0.625
1,0.240964,0.277108,0.481928
2,0.416667,0.305556,0.277778
3,0.107527,0.354839,0.537634
4,0.416667,0.305556,0.277778
5,0.322581,0.193548,0.483871
6,0.177419,0.177419,0.645161


누적 계산 (누적곱, 누적 최대,최소값)

In [50]:
# cumprod 누적곱

df.cumprod(axis=0)

Unnamed: 0,가로,세로,높이
0,10,20,50
1,200,460,2000
2,6000,10120,40000
3,60000,333960,2000000
4,1800000,7347120,40000000
5,36000000,88165440,1200000000
6,396000000,969819840,48000000000


In [51]:
# cumprod 누적곱

df.cumprod(axis=1)

Unnamed: 0,가로,세로,높이
0,10,200,10000
1,20,460,18400
2,30,660,13200
3,10,330,16500
4,30,660,13200
5,20,240,7200
6,11,121,4840


In [52]:
# cummax 누적 최댓값

df.cummax()

Unnamed: 0,가로,세로,높이
0,10,20,50
1,20,23,50
2,30,23,50
3,30,33,50
4,30,33,50
5,30,33,50
6,30,33,50


In [53]:
# cummax 누적 최댓값

df.cummax(axis=1)

Unnamed: 0,가로,세로,높이
0,10,20,50
1,20,23,40
2,30,30,30
3,10,33,50
4,30,30,30
5,20,20,30
6,11,11,40


## apply 함수

dataframe에 파이썬 함수를 적용할 수 있다

예를들어, 가로, 세로, 높이를 이용해 부피라는 컬럼을 추가해보자

In [59]:
df

Unnamed: 0,가로,세로,높이,부피
0,10,20,50,10000
1,20,23,40,18400
2,30,22,20,13200
3,10,33,50,16500
4,30,22,20,13200
5,20,12,30,7200
6,11,11,40,4840


In [58]:
def getVolume(row):
    return row['가로'] * row['세로'] * row['높이']

df['부피'] = df.apply(getVolume, axis=1)
df

Unnamed: 0,가로,세로,높이,부피
0,10,20,50,10000
1,20,23,40,18400
2,30,22,20,13200
3,10,33,50,16500
4,30,22,20,13200
5,20,12,30,7200
6,11,11,40,4840


## concat 함수

서로 다른 두 개의 데이터 프레임을 합치는 기능

In [62]:
df

Unnamed: 0,가로,세로,높이,부피
0,10,20,50,10000
1,20,23,40,18400
2,30,22,20,13200
3,10,33,50,16500
4,30,22,20,13200
5,20,12,30,7200
6,11,11,40,4840


In [63]:
df2 = pd.DataFrame({
    "가로" : [10, 20, 30, 10, 30, 20, 11],
    "세로" : [20, 23, 22, 33, 22, 12, 11],
    "높이" : [50, 40, 20, 50, 20, 30, 40]
})

df2

Unnamed: 0,가로,세로,높이
0,10,20,50
1,20,23,40
2,30,22,20
3,10,33,50
4,30,22,20
5,20,12,30
6,11,11,40


In [64]:
pd.concat([df, df2])

Unnamed: 0,가로,세로,높이,부피
0,10,20,50,10000.0
1,20,23,40,18400.0
2,30,22,20,13200.0
3,10,33,50,16500.0
4,30,22,20,13200.0
5,20,12,30,7200.0
6,11,11,40,4840.0
0,10,20,50,
1,20,23,40,
2,30,22,20,


In [65]:
pd.concat([df, df2], axis=1)

Unnamed: 0,가로,세로,높이,부피,가로.1,세로.1,높이.1
0,10,20,50,10000,10,20,50
1,20,23,40,18400,20,23,40
2,30,22,20,13200,30,22,20
3,10,33,50,16500,10,33,50
4,30,22,20,13200,30,22,20
5,20,12,30,7200,20,12,30
6,11,11,40,4840,11,11,40


## 중복 데이터 처리

In [None]:
# drop duplicated
# duplicate

## 결측(빈) 데이터 처리