# 데이터프레임 병합

- merge()
    - SOL의 join 명령어와 비슷한 방식으로 어떤 기준에 의해 두 데이터프레임을 병합하는 개념
        - 병합의 기준이 되는 열이나 인덱스를 키(key)라고 부름
        - 키가 되는 열이나 인덱스는 반드시 양쪽 데이터프레임에 모두 존재해야함

In [1]:
import pandas as pd

In [2]:
df1 = pd.DataFrame({"key" : list("abbbaab"), "data1" : range(7)})
df2 = pd.DataFrame({"key" : list("abd"), "data2" : range(3)})

In [3]:
df1

Unnamed: 0,key,data1
0,a,0
1,b,1
2,b,2
3,b,3
4,a,4
5,a,5
6,b,6


In [4]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [5]:
# on = "key" : key열을 키로 병합
pd.merge(df1, df2, on = "key")

Unnamed: 0,key,data1,data2
0,a,0,0
1,a,4,0
2,a,5,0
3,b,1,1
4,b,2,1
5,b,3,1
6,b,6,1


In [10]:
exam = pd.DataFrame({"class" : [1, 1, 2, 2, 3, 3],
                    "kor" : [33, 79, 25, 75, 87, 74],
                    "eng" : [25, 50, 73, 42, 52, 24],
                    "math" : [87, 82, 45, 16, 58, 70]})
teachers = pd.DataFrame({"class" : [1, 2, 3],
                         "teacher" : ["kim", "lee", "park"]})

In [11]:
exam

Unnamed: 0,class,kor,eng,math
0,1,33,25,87
1,1,79,50,82
2,2,25,73,45
3,2,75,42,16
4,3,87,52,58
5,3,74,24,70


In [12]:
teachers

Unnamed: 0,class,teacher
0,1,kim
1,2,lee
2,3,park


In [13]:
pd.merge(exam, teachers, on = "class")

Unnamed: 0,class,kor,eng,math,teacher
0,1,33,25,87,kim
1,1,79,50,82,kim
2,2,25,73,45,lee
3,2,75,42,16,lee
4,3,87,52,58,park
5,3,74,24,70,park


In [14]:
df3 = pd.DataFrame({"key" : list("abcd"), "value" : range(4)})
df4 = pd.DataFrame({"key" : list("bdef"), "value" : range(4)})

In [15]:
df3

Unnamed: 0,key,value
0,a,0
1,b,1
2,c,2
3,d,3


In [16]:
df4

Unnamed: 0,key,value
0,b,0
1,d,1
2,e,2
3,f,3


In [17]:
pd.merge(df3, df4, on = "key")

Unnamed: 0,key,value_x,value_y
0,b,1,0
1,d,3,1


- value_x와 value_y 열은 각각 첫 번째 데이터프레임과 두 번째 데이터프레임에서 "value"열의 값

In [18]:
pd.merge(df3, df4, on = "key", how = "inner")

Unnamed: 0,key,value_x,value_y
0,b,1,0
1,d,3,1


In [19]:
pd.merge(df3, df4, on = "key", how = "left")

Unnamed: 0,key,value_x,value_y
0,a,0,
1,b,1,0.0
2,c,2,
3,d,3,1.0


In [20]:
pd.merge(df3, df4, on = "key", how = "right")

Unnamed: 0,key,value_x,value_y
0,b,1.0,0
1,d,3.0,1
2,e,,2
3,f,,3


In [21]:
pd.merge(df3, df4, on = "key", how = "outer")

Unnamed: 0,key,value_x,value_y
0,a,0.0,
1,b,1.0,0.0
2,c,2.0,
3,d,3.0,1.0
4,e,,2.0
5,f,,3.0


In [23]:
df5 = df3.rename(columns = {"key" : "key1"})
df6 = df4.rename(columns = {"key" : "key2"})

In [24]:
df5

Unnamed: 0,key1,value
0,a,0
1,b,1
2,c,2
3,d,3


In [25]:
df6

Unnamed: 0,key2,value
0,b,0
1,d,1
2,e,2
3,f,3


In [26]:
pd.merge(df5, df6, left_on = "key1", right_on = "key2", how = "inner")

Unnamed: 0,key1,value_x,key2,value_y
0,b,1,b,0
1,d,3,d,1
