# 07 Merging DataFrames

* Basic merging
* Merge on attributes
* Join-type operations

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(0)

In [3]:
scores = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "points": np.random.randint(0, 100, 5),
}

favorites = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "favorite": np.random.randint(0, 10, 5),
}

In [4]:
a = pd.DataFrame(scores)
b = pd.DataFrame(favorites)

In [5]:
a

Unnamed: 0,name,points
0,Alice,44
1,Bob,47
2,Carol,64
3,Dave,67
4,Eve,67


In [6]:
b

Unnamed: 0,name,favorite
0,Alice,7
1,Bob,9
2,Carol,3
3,Dave,5
4,Eve,2


In [7]:
a.merge(b)

Unnamed: 0,name,points,favorite
0,Alice,44,7
1,Bob,47,9
2,Carol,64,3
3,Dave,67,5
4,Eve,67,2


### Specifying explicit merge keys

In [8]:
scores = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "points": np.random.randint(0, 100, 5),
}

favorites = {
    "Name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "favorite": np.random.randint(0, 10, 5),
}

In [9]:
a = pd.DataFrame(scores)
b = pd.DataFrame(favorites)

In [10]:
# a.merge(b) # MergeError, No common columns to perform merge on

In [11]:
a.merge(b, left_on="name", right_on="Name")

Unnamed: 0,name,points,Name,favorite
0,Alice,36,Alice,1
1,Bob,87,Bob,6
2,Carol,70,Carol,7
3,Dave,88,Dave,7
4,Eve,88,Eve,8


The redundant column can be dropped.

In [12]:
pd.merge(a, b, left_on="name", right_on="Name").drop('Name', axis=1)

Unnamed: 0,name,points,favorite
0,Alice,36,1
1,Bob,87,6
2,Carol,70,7
3,Dave,88,7
4,Eve,88,8


### Merge on index

In [13]:
scores = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "points": np.random.randint(0, 100, 5),
}

favorites = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "favorite": np.random.randint(0, 10, 5),
}

a = pd.DataFrame(scores["points"], index=scores["name"], columns=["points"])
b = pd.DataFrame(favorites["favorite"], index=favorites["name"], columns=["favorite"])

In [14]:
a

Unnamed: 0,points
Alice,81
Bob,37
Carol,25
Dave,77
Eve,72


In [15]:
b

Unnamed: 0,favorite
Alice,9
Bob,4
Carol,3
Dave,0
Eve,3


In [16]:
# a.merge(b) # MergeError: No common columns to perform merge on

In [17]:
pd.merge(a, b, left_index=True, right_index=True)

Unnamed: 0,points,favorite
Alice,81,9
Bob,37,4
Carol,25,3
Dave,77,0
Eve,72,3


It is also possible to merge based on an index on one side and a regular column on the other.