In [1]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

df1 = DataFrame({"key": list("bbacaab"), "value1": range(7)})
df2 = DataFrame({"key": ["a", "b", "d"], "value2": [10,11,12]})
display(df1)
display(df2)

Unnamed: 0,key,value1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


Unnamed: 0,key,value2
0,a,10
1,b,11
2,d,12


## Merge

### One To Many Fusion

In [2]:
# by default uses key
pd.merge(df1, df2)

Unnamed: 0,key,value1,value2
0,b,0,11
1,b,1,11
2,b,6,11
3,a,2,10
4,a,4,10
5,a,5,10


In [3]:
# on defines which column to fusion
pd.merge(df1, df2, on="key")

Unnamed: 0,key,value1,value2
0,b,0,11
1,b,1,11
2,b,6,11
3,a,2,10
4,a,4,10
5,a,5,10


In [4]:
df3 = DataFrame({"key1": list("bbacaab"), "value1": range(7)})
df4 = DataFrame({"key2": ["a", "b", "d"], "value2": [10,11,12]})
display(df1)
display(df2)

Unnamed: 0,key,value1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


Unnamed: 0,key,value2
0,a,10
1,b,11
2,d,12


In [5]:
# there's no similar column: you need to tell the left and right join columns
pd.merge(df3, df4)

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

### Inner Join

In [8]:
# intersection join: inner join
pd.merge(df3, df4, left_on="key1", right_on="key2")

Unnamed: 0,key1,value1,key2,value2
0,b,0,b,11
1,b,1,b,11
2,b,6,b,11
3,a,2,a,10
4,a,4,a,10
5,a,5,a,10


## Fusion Types

### 1. Inner Join

combinations of two tables that appear in **both** tables (intersection)

### 2. Left Join

combinations with the keys that appear on the left column

### 3. Right Join

combinations with the keys that appear on the right column

### 4. Outer Join

all possible combinations

## Inner Join

In [9]:
pd.merge(df1, df2, how="inner")

Unnamed: 0,key,value1,value2
0,b,0,11
1,b,1,11
2,b,6,11
3,a,2,10
4,a,4,10
5,a,5,10


## Left Join

In [10]:
# for each row of table 1 will create a row in result, and the ones with no combination, nan
pd.merge(df1, df2, how="left")

Unnamed: 0,key,value1,value2
0,b,0,11.0
1,b,1,11.0
2,a,2,10.0
3,c,3,
4,a,4,10.0
5,a,5,10.0
6,b,6,11.0


## Right Join

In [11]:
# for each row of table 2 will create a row in result with all possible combinations with table 1, 
# and the ones with no combination, nan
pd.merge(df1, df2, how="right")


Unnamed: 0,key,value1,value2
0,a,2.0,10
1,a,4.0,10
2,a,5.0,10
3,b,0.0,11
4,b,1.0,11
5,b,6.0,11
6,d,,12


## Outer Join

In [13]:
# union of left and right
pd.merge(df1, df2, how="outer")

Unnamed: 0,key,value1,value2
0,b,0.0,11.0
1,b,1.0,11.0
2,b,6.0,11.0
3,a,2.0,10.0
4,a,4.0,10.0
5,a,5.0,10.0
6,c,3.0,
7,d,,12.0


In [15]:
df5 = DataFrame({"key": list("bbacaab"), "value1": range(7)})
df6 = DataFrame({"key": ["a", "b", "d", "a", "b"], "value2": [10,11,12, 13, 14]})
display(df5)
display(df6)
pd.merge(df5, df6, how="inner") # 12 rows

Unnamed: 0,key,value1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


Unnamed: 0,key,value2
0,a,10
1,b,11
2,d,12
3,a,13
4,b,14


Unnamed: 0,key,value1,value2
0,b,0,11
1,b,0,14
2,b,1,11
3,b,1,14
4,b,6,11
5,b,6,14
6,a,2,10
7,a,2,13
8,a,4,10
9,a,4,13


In [16]:
pd.merge(df5, df6, how="left") # 13 rows

Unnamed: 0,key,value1,value2
0,b,0,11.0
1,b,0,14.0
2,b,1,11.0
3,b,1,14.0
4,a,2,10.0
5,a,2,13.0
6,c,3,
7,a,4,10.0
8,a,4,13.0
9,a,5,10.0


In [17]:
pd.merge(df5, df6, how="right") # 13 rows

Unnamed: 0,key,value1,value2
0,a,2.0,10
1,a,4.0,10
2,a,5.0,10
3,b,0.0,11
4,b,1.0,11
5,b,6.0,11
6,d,,12
7,a,2.0,13
8,a,4.0,13
9,a,5.0,13


In [18]:
pd.merge(df5, df6, how="outer") # 14 rows

Unnamed: 0,key,value1,value2
0,b,0.0,11.0
1,b,0.0,14.0
2,b,1.0,11.0
3,b,1.0,14.0
4,b,6.0,11.0
5,b,6.0,14.0
6,a,2.0,10.0
7,a,2.0,13.0
8,a,4.0,10.0
9,a,4.0,13.0


In [21]:
df7 = DataFrame({"key1": ["one", "two", "one"], 
                 "key2": ["dog", "dog", "cat"],
                 "value1": [10,20,30]
                })
df8 = DataFrame({"key1": ["one", "one", "two", "two"], 
                 "key2": ["dog", "dog", "dog", "cat"],
                 "value2": [40,50,60,70]
                })
display(df7)
display(df8)

Unnamed: 0,key1,key2,value1
0,one,dog,10
1,two,dog,20
2,one,cat,30


Unnamed: 0,key1,key2,value2
0,one,dog,40
1,one,dog,50
2,two,dog,60
3,two,cat,70


In [22]:
# uses all common keys to merge
pd.merge(df7, df8)

Unnamed: 0,key1,key2,value1,value2
0,one,dog,10,40
1,one,dog,10,50
2,two,dog,20,60


In [23]:
# use only key1 
pd.merge(df7, df8, on=["key1"])

Unnamed: 0,key1,key2_x,value1,key2_y,value2
0,one,dog,10,dog,40
1,one,dog,10,dog,50
2,one,cat,30,dog,40
3,one,cat,30,dog,50
4,two,dog,20,dog,60
5,two,dog,20,cat,70


In [25]:
pd.merge(df7, df8, on=["key1", "key2"], how="outer")

Unnamed: 0,key1,key2,value1,value2
0,one,dog,10.0,40.0
1,one,dog,10.0,50.0
2,two,dog,20.0,60.0
3,one,cat,30.0,
4,two,cat,,70.0


In [26]:
pd.merge(df7, df8, on=["key1"], how="outer")

Unnamed: 0,key1,key2_x,value1,key2_y,value2
0,one,dog,10,dog,40
1,one,dog,10,dog,50
2,one,cat,30,dog,40
3,one,cat,30,dog,50
4,two,dog,20,dog,60
5,two,dog,20,cat,70


In [None]:
# merge is for column based operations, concat is for row based operations