In [None]:
# 참조 url
# https://programmerpsy.tistory.com/17

In [7]:
# 결합
import numpy as np
import pandas as pd

data1 = np.arange(1,21).reshape(4,5)
data2 = np.arange(11,31).reshape(5,4)
df1 = pd.DataFrame(data1,columns=['a','b','c','d','e'])
df2 = pd.DataFrame(data2,columns=['d','e','h','i'])
print(df1,'\n')
print(df2)

    a   b   c   d   e
0   1   2   3   4   5
1   6   7   8   9  10
2  11  12  13  14  15
3  16  17  18  19  20 

    d   e   h   i
0  11  12  13  14
1  15  16  17  18
2  19  20  21  22
3  23  24  25  26
4  27  28  29  30


In [4]:
# 열방향 결합 -> 세로 -> 책을 세워서 정리
con1 = pd.concat([df1,df2], axis=1)
con1

Unnamed: 0,a,b,c,d,e,d.1,e.1,h,i
0,1.0,2.0,3.0,4.0,5.0,11,12,13,14
1,6.0,7.0,8.0,9.0,10.0,15,16,17,18
2,11.0,12.0,13.0,14.0,15.0,19,20,21,22
3,16.0,17.0,18.0,19.0,20.0,23,24,25,26
4,,,,,,27,28,29,30


In [9]:
# 행방향 결합 -> 가로 -> 책을 눕혀 정리
# default 값이 axis=0임
con2 =pd.concat([df1,df2], axis=0)
con2

Unnamed: 0,a,b,c,d,e,h,i
0,1.0,2.0,3.0,4,5,,
1,6.0,7.0,8.0,9,10,,
2,11.0,12.0,13.0,14,15,,
3,16.0,17.0,18.0,19,20,,
0,,,,11,12,13.0,14.0
1,,,,15,16,17.0,18.0
2,,,,19,20,21.0,22.0
3,,,,23,24,25.0,26.0
4,,,,27,28,29.0,30.0


In [12]:
con3 = pd.concat([df1, df2], ignore_index=True)
con3

Unnamed: 0,a,b,c,d,e,h,i
0,1.0,2.0,3.0,4,5,,
1,6.0,7.0,8.0,9,10,,
2,11.0,12.0,13.0,14,15,,
3,16.0,17.0,18.0,19,20,,
4,,,,11,12,13.0,14.0
5,,,,15,16,17.0,18.0
6,,,,19,20,21.0,22.0
7,,,,23,24,25.0,26.0
8,,,,27,28,29.0,30.0


In [13]:
df1 # 4행 5열

Unnamed: 0,a,b,c,d,e
0,1,2,3,4,5
1,6,7,8,9,10
2,11,12,13,14,15
3,16,17,18,19,20


In [14]:
df2 # 5행 4열

Unnamed: 0,d,e,h,i
0,11,12,13,14
1,15,16,17,18
2,19,20,21,22
3,23,24,25,26
4,27,28,29,30


# DataFrame Join
## Join 이란?

- 두 개의 DataFrame을 합치는 것
- 열기준 컬럼명으로 합치기 : merge
- 열기준 Index명로 합치기 : merge, concat
- 행기준으로 합치기 : concat, append

### 합치는 방법은?

- Inner : 두 DataFrame의 기준 컬럼에서 둘 다 존재하는 데이터만 Join
- Left Outer join : 왼쪽 DataFrame으로 합치기
- Right Outer Join : 오른쪽 DataFrame으로 합치기
- Outer Join : 두 DataFrame의 모든 Data를 합치기


In [16]:
# 교집합
# d와 e가 중첩되므로 lsuffix, rsuffix 접미사 사용하여 구분
df1.join(df2,lsuffix='_a', rsuffix='_b', how='inner')
# 4행 5열 교집함 5행 4열 --> 4행 9열

Unnamed: 0,a,b,c,d_a,e_a,d_b,e_b,h,i
0,1,2,3,4,5,11,12,13,14
1,6,7,8,9,10,15,16,17,18
2,11,12,13,14,15,19,20,21,22
3,16,17,18,19,20,23,24,25,26


In [17]:
# 합집합
df1.join(df2,lsuffix='_a', rsuffix='_b', how='outer')
# 4행 5열 합집함 5행 4열 --> 5행 9열

Unnamed: 0,a,b,c,d_a,e_a,d_b,e_b,h,i
0,1.0,2.0,3.0,4.0,5.0,11,12,13,14
1,6.0,7.0,8.0,9.0,10.0,15,16,17,18
2,11.0,12.0,13.0,14.0,15.0,19,20,21,22
3,16.0,17.0,18.0,19.0,20.0,23,24,25,26
4,,,,,,27,28,29,30


In [48]:
df1['ind'] = [1,2,3,4] # 컬럼 데이터는 삭제됨
df2['ind'] = [1,2,3,4,5] # 컬럼 데이터는 삭제됨
# default 값은  how = inner
df1.set_index('ind').join(df2.set_index('ind'),lsuffix='_a',rsuffix='_b')

Unnamed: 0_level_0,a,b,c,d_a,e_a,d_b,e_b,h,i
ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1,2,3,4,5,11,12,13,14
2,6,7,8,9,10,15,16,17,18
3,11,12,13,14,15,19,20,21,22
4,16,17,18,19,20,23,24,25,26


In [46]:
# 합집합 how = 'outer'
pd.merge(df1, df2, on='ind', how='outer')

Unnamed: 0,a,b,c,d_x,e_x,ind,d_y,e_y,h,i
0,1.0,2.0,3.0,4.0,5.0,1,11,12,13,14
1,6.0,7.0,8.0,9.0,10.0,2,15,16,17,18
2,11.0,12.0,13.0,14.0,15.0,3,19,20,21,22
3,16.0,17.0,18.0,19.0,20.0,4,23,24,25,26
4,,,,,,5,27,28,29,30


##### 같은 Index 기준으로 합치기
##### set_index('컬럼명') : 컬럼명의 데이터가 index로 설정
- 컬럼데이터는 삭제됨

In [27]:
import pandas as pd
from pandas import Series, DataFrame

DF1 = DataFrame([["싸이",180,75],["덕구",160,65],["또치",170,75]], columns = ["이름","키","몸무게"])
DF1

Unnamed: 0,이름,키,몸무게
0,싸이,180,75
1,덕구,160,65
2,또치,170,75


In [23]:
DF2 = DataFrame([["싸이","포워드","잘함"],["덕구","미드필더","못함"],["똥갈","수비수","잘함"]], columns = ["이름","포지션","실력"])
DF2

Unnamed: 0,이름,포지션,실력
0,싸이,포워드,잘함
1,덕구,미드필더,못함
2,똥갈,수비수,잘함


In [35]:
# set_index('칼럼명') : 칼럼명의 데이터가 index로 설정
# 컬럼데이터는 삭제됨

DF1_1=DF1.set_index('이름')
DF1

Unnamed: 0,이름,키,몸무게
0,싸이,180,75
1,덕구,160,65
2,또치,170,75


In [36]:
DF1_1  # 삭제후
# 이름 싸이 덕구 또치가 INDEX로 변경됨

Unnamed: 0_level_0,키,몸무게
이름,Unnamed: 1_level_1,Unnamed: 2_level_1
싸이,180,75
덕구,160,65
또치,170,75


In [37]:
DF2=DF2.set_index('이름')
DF2

Unnamed: 0_level_0,포지션,실력
이름,Unnamed: 1_level_1,Unnamed: 2_level_1
싸이,포워드,잘함
덕구,미드필더,못함
똥갈,수비수,잘함


In [40]:
pd.merge(left = DF1_1, right = DF2, left_index = True, right_index = True, how = "inner")

Unnamed: 0_level_0,키,몸무게,포지션,실력
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
싸이,180,75,포워드,잘함
덕구,160,65,미드필더,못함


In [41]:
pd.concat([DF1_1,DF2],axis=1, join='inner')

Unnamed: 0_level_0,키,몸무게,포지션,실력
이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
싸이,180,75,포워드,잘함
덕구,160,65,미드필더,못함
