# Merge Pandas Dataframes

In [1]:
import pandas as pd

In [3]:
data1 = {'key' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'Name': ['Paula', 'Alexis', 'Jordan', 'Trent', 'Ulysses']}

data2 = {'key' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'Score': [96, 98, 94, 92, 92]}

df1 = pd.DataFrame(data1)

df2 = pd.DataFrame(data2)

In [4]:
df1

Unnamed: 0,key,Name
0,k0,Paula
1,k1,Alexis
2,k2,Jordan
3,k3,Trent
4,k4,Ulysses


In [5]:
df2

Unnamed: 0,key,Score
0,k0,96
1,k1,98
2,k2,94
3,k3,92
4,k4,92


## Merge left using on

Always remember that the reference column as a value of `on` must be the exact same in both dataframes

We can pass label or list as the parameter

In [6]:
pd.merge(left=df1, right=df2, on='key')

Unnamed: 0,key,Name,Score
0,k0,Paula,96
1,k1,Alexis,98
2,k2,Jordan,94
3,k3,Trent,92
4,k4,Ulysses,92


In [7]:
pd.merge(df1, df2, on='key')

Unnamed: 0,key,Name,Score
0,k0,Paula,96
1,k1,Alexis,98
2,k2,Jordan,94
3,k3,Trent,92
4,k4,Ulysses,92


In [8]:
pd.merge(df2, df1, on='key')

Unnamed: 0,key,Score,Name
0,k0,96,Paula
1,k1,98,Alexis
2,k2,94,Jordan
3,k3,92,Trent
4,k4,92,Ulysses


Label : single key <br>
List : multiple keys

In [10]:
data1 = {'key1' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'key2' : ['k0', 'k1', 'K0', 'k1', 'k2'],
         'Name': ['Paula', 'Alexis', 'Jordan', 'Trent', 'Ulysses']}

data2 = {'key1' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'key2' : ['k0', 'k1', 'K0', 'k1', 'k2'],
         'Score': [96, 98, 94, 92, 92]}

df1 = pd.DataFrame(data1)

df2 = pd.DataFrame(data2)

In [12]:
df1

Unnamed: 0,key1,key2,Name
0,k0,k0,Paula
1,k1,k1,Alexis
2,k2,K0,Jordan
3,k3,k1,Trent
4,k4,k2,Ulysses


In [13]:
df2

Unnamed: 0,key1,key2,Score
0,k0,k0,96
1,k1,k1,98
2,k2,K0,94
3,k3,k1,92
4,k4,k2,92


In [11]:
pd.merge(df1, df2, on=['key1', 'key2'])

Unnamed: 0,key1,key2,Name,Score
0,k0,k0,Paula,96
1,k1,k1,Alexis,98
2,k2,K0,Jordan,94
3,k3,k1,Trent,92
4,k4,k2,Ulysses,92


## Inner merge/join

how ('left', 'right', 'outer', 'inner')

In [18]:
data1 = {'key' : ['k0', 'k1', 'k2', 'k5', 'k6'],
         'Name': ['Paula', 'Alexis', 'Jordan', 'Trent', 'Ulysses']}

data2 = {'key' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'Score': [96, 98, 94, 92, 92]}

df1 = pd.DataFrame(data1)

df2 = pd.DataFrame(data2)

In [19]:
df1

Unnamed: 0,key,Name
0,k0,Paula
1,k1,Alexis
2,k2,Jordan
3,k5,Trent
4,k6,Ulysses


In [20]:
df2

Unnamed: 0,key,Score
0,k0,96
1,k1,98
2,k2,94
3,k3,92
4,k4,92


In [21]:
pd.merge(df1, df2, on="key")

Unnamed: 0,key,Name,Score
0,k0,Paula,96
1,k1,Alexis,98
2,k2,Jordan,94


In [22]:
pd.merge(df1, df2, on='key', how='inner')

Unnamed: 0,key,Name,Score
0,k0,Paula,96
1,k1,Alexis,98
2,k2,Jordan,94


In [23]:
pd.merge(df1, df2, on='key', how='outer')

Unnamed: 0,key,Name,Score
0,k0,Paula,96.0
1,k1,Alexis,98.0
2,k2,Jordan,94.0
3,k5,Trent,
4,k6,Ulysses,
5,k3,,92.0
6,k4,,92.0


In [24]:
pd.merge(df1, df2, on='key', how='left')

Unnamed: 0,key,Name,Score
0,k0,Paula,96.0
1,k1,Alexis,98.0
2,k2,Jordan,94.0
3,k5,Trent,
4,k6,Ulysses,


In [25]:
pd.merge(df1, df2, on='key', how='right')

Unnamed: 0,key,Name,Score
0,k0,Paula,96
1,k1,Alexis,98
2,k2,Jordan,94
3,k3,,92
4,k4,,92


## Indicator parameter

Indocator (bool, string)

In [26]:
pd.merge(df1, df2, on='key', how='outer', indicator=True)

Unnamed: 0,key,Name,Score,_merge
0,k0,Paula,96.0,both
1,k1,Alexis,98.0,both
2,k2,Jordan,94.0,both
3,k5,Trent,,left_only
4,k6,Ulysses,,left_only
5,k3,,92.0,right_only
6,k4,,92.0,right_only


In [27]:
pd.merge(df1, df2, on='key', how='outer', indicator='Source')

Unnamed: 0,key,Name,Score,Source
0,k0,Paula,96.0,both
1,k1,Alexis,98.0,both
2,k2,Jordan,94.0,both
3,k5,Trent,,left_only
4,k6,Ulysses,,left_only
5,k3,,92.0,right_only
6,k4,,92.0,right_only


## `left_index` and `right_index`

In [28]:
data1 = {'key' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'Name': ['Paula', 'Alexis', 'Jordan', 'Trent', 'Ulysses']}

data2 = {'key' : ['k5', 'k6', 'k7', 'k8', 'k9'],
         'Score': [96, 98, 94, 92, 92]}

df1 = pd.DataFrame(data1)

df2 = pd.DataFrame(data2)

In [29]:
df1

Unnamed: 0,key,Name
0,k0,Paula
1,k1,Alexis
2,k2,Jordan
3,k3,Trent
4,k4,Ulysses


In [30]:
df2

Unnamed: 0,key,Score
0,k5,96
1,k6,98
2,k7,94
3,k8,92
4,k9,92


In [31]:
pd.merge(df1, df2)

Unnamed: 0,key,Name,Score


In [32]:
pd.merge(df1, df2, left_index=True, right_index=True)

Unnamed: 0,key_x,Name,key_y,Score
0,k0,Paula,k5,96
1,k1,Alexis,k6,98
2,k2,Jordan,k7,94
3,k3,Trent,k8,92
4,k4,Ulysses,k9,92


## suffix

In [33]:
data1 = {'key' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'Name': ['Paula', 'Alexis', 'Jordan', 'Trent', 'Ulysses']}

data2 = {'key' : ['k0', 'k1', 'k2', 'k3', 'k4'],
         'Name': ['Paula', 'Alexis', 'Jordan', 'Trent', 'Ulysses']}

df1 = pd.DataFrame(data1)

df2 = pd.DataFrame(data2)

In [34]:
df1

Unnamed: 0,key,Name
0,k0,Paula
1,k1,Alexis
2,k2,Jordan
3,k3,Trent
4,k4,Ulysses


In [35]:
df2

Unnamed: 0,key,Name
0,k0,Paula
1,k1,Alexis
2,k2,Jordan
3,k3,Trent
4,k4,Ulysses


In [36]:
pd.merge(df1, df2, on='key')

Unnamed: 0,key,Name_x,Name_y
0,k0,Paula,Paula
1,k1,Alexis,Alexis
2,k2,Jordan,Jordan
3,k3,Trent,Trent
4,k4,Ulysses,Ulysses


In [37]:
pd.merge(df1, df2, on='key', suffixes=('_leftDF', '_rightDF'))

Unnamed: 0,key,Name_leftDF,Name_rightDF
0,k0,Paula,Paula
1,k1,Alexis,Alexis
2,k2,Jordan,Jordan
3,k3,Trent,Trent
4,k4,Ulysses,Ulysses
