In [4]:
import pandas as pd
import numpy as np

| Merge method | SQL Join Name | Description |
| ------------ | ------------- | ----------- | 
| left | LEFT OUTER JOIN | Use keys from left frame only |
| right | RIGHT OUTER JOIN | Use keys from right frame only |
| outer | FULL OUTER JOIN | Use union of keys from both frames |
| inner | INNER JOIN | Use intersection of keys from both frames |

# **_MERGE()_**

In [6]:
left = pd.DataFrame({'A' : [1,2], 'B' : [1, 2]})
left

Unnamed: 0,A,B
0,1,1
1,2,2


In [7]:
right = pd.DataFrame({'A' : [4,5,6], 'B': [2, 2, 2]})
right

Unnamed: 0,A,B
0,4,2
1,5,2
2,6,2


In [11]:
pd.merge(left, right, on='B')

Unnamed: 0,A_x,B,A_y
0,2,2,4
1,2,2,5
2,2,2,6


In [12]:
pd.merge(left, right, on='B', how='outer', validate='one_to_many')

Unnamed: 0,A_x,B,A_y
0,1,1,
1,2,2,4.0
2,2,2,5.0
3,2,2,6.0


### **Indicator**

In [14]:
pd.merge(left, right, on='B', how='outer', indicator=True)

Unnamed: 0,A_x,B,A_y,_merge
0,1,1,,left_only
1,2,2,4.0,both
2,2,2,5.0,both
3,2,2,6.0,both


In [18]:
df1 = pd.DataFrame({'col1': [0, 1], 'col_left':['a', 'b']})
df1

Unnamed: 0,col1,col_left
0,0,a
1,1,b


In [19]:
df2 = pd.DataFrame({'col1': [1, 2, 2],'col_right':[2, 2, 2]})
df2

Unnamed: 0,col1,col_right
0,1,2
1,2,2
2,2,2


In [20]:
pd.merge(df1, df2, on='col1', how='outer', indicator=True)

Unnamed: 0,col1,col_left,col_right,_merge
0,0,a,,left_only
1,1,b,2.0,both
2,2,,2.0,right_only
3,2,,2.0,right_only


### **indicator column name**

In [21]:
pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')

Unnamed: 0,col1,col_left,col_right,indicator_column
0,0,a,,left_only
1,1,b,2.0,both
2,2,,2.0,right_only
3,2,,2.0,right_only


# **_JOIN()_**

#### **combining the columns of two potentially differently-indexed**

In [28]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                     index=['K0', 'K1', 'K2'])
left

Unnamed: 0,A,B
K0,A0,B0
K1,A1,B1
K2,A2,B2


In [29]:
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                      'D': ['D0', 'D2', 'D3']},
                      index=['K0', 'K2', 'K3'])
right

Unnamed: 0,C,D
K0,C0,D0
K2,C2,D2
K3,C3,D3


In [30]:
left.join(right)

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2


In [36]:
left.join(right, how='outer')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K1,A1,B1,,
K2,A2,B2,C2,D2
K3,,,C3,D3


In [37]:
pd.merge(left, right, left_index=True, right_index=True, how='outer', indicator=True)

Unnamed: 0,A,B,C,D,_merge
K0,A0,B0,C0,D0,both
K1,A1,B1,,,left_only
K2,A2,B2,C2,D2,both
K3,,,C3,D3,right_only


In [33]:
left.join(right, how='inner')

Unnamed: 0,A,B,C,D
K0,A0,B0,C0,D0
K2,A2,B2,C2,D2


In [40]:
pd.merge(left, right, left_index=True, right_index=True, how='inner', indicator=True)

Unnamed: 0,A,B,C,D,_merge
K0,A0,B0,C0,D0,both
K2,A2,B2,C2,D2,both


## Merging on a combination of columns and index

In [59]:
left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name="key1")
right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name="key1")

In [60]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3'],
                     'key2': ['K0', 'K1', 'K0', 'K1']},
                     index=left_index)
left

Unnamed: 0_level_0,A,B,key2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
K0,A0,B0,K0
K0,A1,B1,K1
K1,A2,B2,K0
K2,A3,B3,K1


In [61]:
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                      'D': ['D0', 'D1', 'D2', 'D3'],
                      'key2': ['K0', 'K0', 'K0', 'K1']},
                       index=right_index)
right

Unnamed: 0_level_0,C,D,key2
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
K0,C0,D0,K0
K1,C1,D1,K0
K2,C2,D2,K0
K2,C3,D3,K1


In [62]:
left.merge(right, on=['key1', 'key2'])

Unnamed: 0_level_0,A,B,key2,C,D
key1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
K0,A0,B0,K0,C0,D0
K1,A2,B2,K0,C1,D1
K2,A3,B3,K1,C3,D3


# **PATCHING**

### **combine_first()**
### **update()**

In [94]:
df1 = pd.DataFrame([[np.nan, 3., 5.], [-4.6, np.nan, np.nan],[np.nan, 7., np.nan]])
df1

Unnamed: 0,0,1,2
0,,3.0,5.0
1,-4.6,,
2,,7.0,


In [95]:
df2 = pd.DataFrame([[-42.6, np.nan, -8.2], [-5., 1.6, 4]],index=[1, 2])
df2

Unnamed: 0,0,1,2
1,-42.6,,-8.2
2,-5.0,1.6,4.0


## **_patches value from df1_**

In [98]:
df1.combine_first(df2)

Unnamed: 0,0,1,2
0,,3.0,5.0
1,-4.6,,-8.2
2,-5.0,7.0,4.0


In [99]:
df1.update(df2)
df1

Unnamed: 0,0,1,2
0,,3.0,5.0
1,-4.6,,-8.2
2,-5.0,7.0,4.0
