# 07 Merging DataFrames

* Basic merging
* Merge on attributes
* Join-type operations

In [1]:
import numpy as np
import pandas as pd

In [4]:
np.random.seed(0)

In [12]:
scores = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "points": np.random.randint(0, 100, 5),
}

favorites = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "favorite": np.random.randint(0, 10, 5),
}

In [13]:
a = pd.DataFrame(scores)
b = pd.DataFrame(favorites)

In [14]:
a

Unnamed: 0,name,points
0,Alice,39
1,Bob,87
2,Carol,46
3,Dave,88
4,Eve,81


In [15]:
b

Unnamed: 0,favorite,name
0,5,Alice
1,9,Bob
2,8,Carol
3,9,Dave
4,4,Eve


In [16]:
a.merge(b)

Unnamed: 0,name,points,favorite
0,Alice,39,5
1,Bob,87,9
2,Carol,46,8
3,Dave,88,9
4,Eve,81,4


### Specifying explicit merge keys

In [20]:
scores = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "points": np.random.randint(0, 100, 5),
}

favorites = {
    "Name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "favorite": np.random.randint(0, 10, 5),
}

In [21]:
a = pd.DataFrame(scores)
b = pd.DataFrame(favorites)

In [23]:
# a.merge(b) # MergeError, No common columns to perform merge on

In [29]:
a.merge(b, left_on="name", right_on="Name")

Unnamed: 0,name,points,Name,favorite
0,Alice,36,Alice,4
1,Bob,53,Bob,9
2,Carol,5,Carol,8
3,Dave,38,Dave,1
4,Eve,17,Eve,1


The redundant column can be dropped.

In [30]:
pd.merge(a, b, left_on="name", right_on="Name").drop('Name', axis=1)

Unnamed: 0,name,points,favorite
0,Alice,36,4
1,Bob,53,9
2,Carol,5,8
3,Dave,38,1
4,Eve,17,1


### Merge on index

In [47]:
scores = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "points": np.random.randint(0, 100, 5),
}

favorites = {
    "name": ["Alice", "Bob", "Carol", "Dave", "Eve"],
    "favorite": np.random.randint(0, 10, 5),
}

a = pd.DataFrame(scores["points"], index=scores["name"], columns=["points"])
b = pd.DataFrame(favorites["favorite"], index=favorites["name"], columns=["favorite"])

In [50]:
a

Unnamed: 0,points
Alice,82
Bob,46
Carol,99
Dave,20
Eve,81


In [51]:
b

Unnamed: 0,favorite
Alice,2
Bob,9
Carol,1
Dave,4
Eve,6


In [53]:
# a.merge(b) # MergeError: No common columns to perform merge on

In [54]:
pd.merge(a, b, left_index=True, right_index=True)

Unnamed: 0,points,favorite
Alice,82,2
Bob,46,9
Carol,99,1
Dave,20,4
Eve,81,6


It is also possible to merge based on an index on one side and a regular column on the other.