## Make sure you have downloaded the supermarket_marketing and drugstore_pregnancy sheets [from this folder](https://drive.google.com/drive/u/1/folders/1UDhRY1XZ1y0H3jHckdRvImAaJozq3gup) as Excel and uploaded both to this directory

In [None]:
!pip install pandas openpyxl

# Merging (aka joining) two datasets with a unique identifier

In [None]:
import pandas as pd

In [None]:
data_supermarket = pd.read_excel("supermarket_marketing.xlsx")
data_drugstore = pd.read_excel("drugstore_pregnancy.xlsx")

In [None]:
data_supermarket.sample(3).T

In [None]:
data_drugstore.sample(3).T

# Merging: four types

<img src = "https://cdn.mindmajix.com/blog/images/db-01_2119.png" width=50%>

## Inner merge: only keep the rows where the unique identifier is found in both datasets

In [None]:
data_merged_inner = pd.merge(left=data_supermarket, right=data_drugstore, on='phone', how='inner')

In [None]:
data_merged_inner

In [None]:
data_merged_inner.sample(3).T

## Left merge: show the entire first/left dataset; if unique identifier matches in second/right dataset, add variables for the matching row; if not, leave blank/NaN

In [None]:
data_merged_left = pd.merge(left=data_supermarket, right=data_drugstore, on='phone', how='left')

In [None]:
data_merged_left

In [None]:
data_merged_left.sample(5).T

## Outer merge: keep all data from both datasets; if unique identifier matches, add variables for the matching row; if not, leave blank/NaN

In [None]:
data_merged_outer = pd.merge(left=data_supermarket, right=data_drugstore, on='phone', how='outer')

In [None]:
data_merged_outer

In [None]:
data_merged_outer.sample(5).T

## Note that if your two dataframes both have a column with the same name, it will rename it with `_x` for first/left and `_y` for second/right. `drinks` appears in both, so there is a `drinks_x` for supermarket and `drinks_y` for drugstore.

# Export to file

In [None]:
data_merged_inner.to_excel("supermarket_drugstore_inner_merge.xlsx")
data_merged_left.to_excel("supermarket_drugstore_left_merge.xlsx")
data_merged_outer.to_excel("supermarket_drugstore_outer_merge.xlsx")