# **DataFrame 병합**

In [1]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(np.arange(16).reshape((4,4)),
                 index = ['Ohio', 'Colorado', 'Utah', 'New York'],
                 columns = ['one','two','three','four'])

In [4]:
df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


# **pd.concat**
### **행방향, 열방향 모두 데이터 병합 가능**

- 디폴트 행방향 합침 --> 컬럼명 참조
- axis = 1 인수 사용시 컬럼으로 병합 --> 인덱스 참조

**case1 : 데이터의 컬럼명 같고 인덱스 다름**

In [5]:
add_table1 = pd.DataFrame(np.random.choice(100,16).reshape((4,4)),
                         index = ['Hawaii', 'Washington', 'Seattle', 'Texas'],
                         columns = ['one','two','three','four'])

In [6]:
add_table1

Unnamed: 0,one,two,three,four
Hawaii,50,49,27,92
Washington,12,18,10,27
Seattle,71,26,75,47
Texas,6,20,3,64


In [8]:
# concat method (컬럼명 같을 때)
pd.concat([df,add_table1])

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15
Hawaii,50,49,27,92
Washington,12,18,10,27
Seattle,71,26,75,47
Texas,6,20,3,64


In [10]:
# concat methoe (컬럼명 같을 때) axis 인수 사용
pd.concat([df, add_table1], axis = 1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,one,two,three,four,one.1,two.1,three.1,four.1
Colorado,4.0,5.0,6.0,7.0,,,,
Hawaii,,,,,50.0,49.0,27.0,92.0
New York,12.0,13.0,14.0,15.0,,,,
Ohio,0.0,1.0,2.0,3.0,,,,
Seattle,,,,,71.0,26.0,75.0,47.0
Texas,,,,,6.0,20.0,3.0,64.0
Utah,8.0,9.0,10.0,11.0,,,,
Washington,,,,,12.0,18.0,10.0,27.0


## **Quiz) 아래와 같이 행이름이 없는 데이터를 기존의 df와 컬럼 바인드 수행**

In [11]:
add_table4 = pd.DataFrame(np.random.choice(100,16).reshape((4,4)),
                         columns = ['five','six','seven','eight'])

In [17]:
add_table4.index = df.index

In [18]:
pd.concat([df,add_table4], axis =1)

Unnamed: 0,one,two,three,four,five,six,seven,eight
Ohio,0,1,2,3,35,87,43,54
Colorado,4,5,6,7,78,94,64,59
Utah,8,9,10,11,65,95,67,29
New York,12,13,14,15,98,76,34,0


In [12]:
add_table4

Unnamed: 0,five,six,seven,eight
0,35,87,43,54
1,78,94,64,59
2,65,95,67,29
3,98,76,34,0


## **apply & applymap**

In [19]:
add_table11 = pd.DataFrame(np.random.randn(4,3),
                          index = ['Hawaii','Washington','Seattle','Texas'],
                          columns = ['one','two','three'])

In [20]:
add_table11

Unnamed: 0,one,two,three
Hawaii,0.67158,-0.186951,1.160443
Washington,0.777178,-0.952455,-1.92584
Seattle,0.974128,0.840294,0.730769
Texas,-0.196175,0.979135,-0.120438


In [21]:
# 절대값
np.abs(add_table11)

Unnamed: 0,one,two,three
Hawaii,0.67158,0.186951,1.160443
Washington,0.777178,0.952455,1.92584
Seattle,0.974128,0.840294,0.730769
Texas,0.196175,0.979135,0.120438


위 add_table11에서 컬럼별로 '최대값 - 최소값'을 구하고 싶다면?

In [25]:
a = add_table11['one'].max() - add_table11['one'].min()
b = add_table11['two'].max() - add_table11['two'].min()
c = add_table11['three'].max() - add_table11['three'].min()

In [27]:
a,b,c

(1.170303287419128, 1.9315898707880894, 3.086283568206209)

위 add_table11에서 행별로 '최대값 - 최소값'을 구하고 싶다면?

In [29]:
hw = add_table11.loc['Hawaii',:].max() - add_table11.loc['Hawaii',:].min()
wa = add_table11.loc['Washington',:].max() - add_table11.loc['Washington',:].min()
se = add_table11.loc['Seattle',:].max() - add_table11.loc['Seattle',:].min()
te = add_table11.loc['Texas',:].max() - add_table11.loc['Texas',:].min()

In [30]:
hw,wa,se,te

(1.3473944216004636,
 2.7030183333489886,
 0.2433595495204095,
 1.1753101087278992)

apply()와 람다표현식을 적용하여 간단히 해결

In [31]:
f = lambda x: x.max() - x.min()

In [33]:
# default 행축으로 연산
add_table11.apply(f)

one      1.170303
two      1.931590
three    3.086284
dtype: float64

In [35]:
# axis = 1 열로 연산
add_table11.apply(f,axis = 1)

Hawaii        1.347394
Washington    2.703018
Seattle       0.243360
Texas         1.175310
dtype: float64

In [36]:
f2 = lambda x: '%.2f' %x

In [43]:
add_table12 = add_table11.applymap(f2)

In [44]:
add_table12

Unnamed: 0,one,two,three
Hawaii,0.67,-0.19,1.16
Washington,0.78,-0.95,-1.93
Seattle,0.97,0.84,0.73
Texas,-0.2,0.98,-0.12
