# PANDAS MERGE

pandas.merge(df1, df2) is equivalent to df1.merge(df2)

In [1]:
import pandas as pd

customer=pd.DataFrame({
    'id':[1,2,3,4,5,6,7,8,9],
    'name':['Olivia','Aditya','Cory','Isabell','Dominic','Tyler','Samuel','Daniel','Jeremy'],
    'age':[20,25,15,10,30,65,35,18,23],
    'Product_ID':[101,0,106,0,103,104,0,0,107],
    'Purchased_Product':['Watch','NA','Oil','NA','Shoes','Smartphone','NA','NA','Laptop'],
    'City':['Mumbai','Delhi','Bangalore','Chennai','Chennai','Delhi','Kolkata','Delhi','Mumbai']
})

product=pd.DataFrame({
    'Product_ID':[101,102,103,104,105,106,107],
    'Product_name':['Watch','Bag', 'Shoes', 'Smartphone', 'Books', 'Oil', 'Laptop'],
    'Category': ['Fashion', 'Fashion', 'Fashion', 'Electronics', 'Study', 'Grocery', 'Electronics'],
    'Price': [299.0,1350.5,2999.0,14999.0,145.0,110.0,79999.0],
    'Seller_City':['Delhi','Mumbai','Chennai','Kolkata','Delhi','Chennai', 'Bangalore']
})
display(customer, product)

Unnamed: 0,id,name,age,Product_ID,Purchased_Product,City
0,1,Olivia,20,101,Watch,Mumbai
1,2,Aditya,25,0,,Delhi
2,3,Cory,15,106,Oil,Bangalore
3,4,Isabell,10,0,,Chennai
4,5,Dominic,30,103,Shoes,Chennai
5,6,Tyler,65,104,Smartphone,Delhi
6,7,Samuel,35,0,,Kolkata
7,8,Daniel,18,0,,Delhi
8,9,Jeremy,23,107,Laptop,Mumbai


Unnamed: 0,Product_ID,Product_name,Category,Price,Seller_City
0,101,Watch,Fashion,299.0,Delhi
1,102,Bag,Fashion,1350.5,Mumbai
2,103,Shoes,Fashion,2999.0,Chennai
3,104,Smartphone,Electronics,14999.0,Kolkata
4,105,Books,Study,145.0,Delhi
5,106,Oil,Grocery,110.0,Chennai
6,107,Laptop,Electronics,79999.0,Bangalore


**INNER JOIN**

Returns a DataFrame with only those rows that have common characteristics or similar values. 

An inner join requires each row in the two joined dataframes to have matching column values (The column on which we are performing inner join). This is similar to the intersection of two set.

In [2]:
# merge() performs inner_join by default.
# Takes arguments of two datframes and column name on which we want to perform inner join

# Showing all the products sold online and who purchased them
merged_Df = pd.merge(product, customer, on= ['Product_ID'])
# Equivalent to product.merge(customer, on= ['Product_ID'])
display(merged_Df)

print('-------------')

# Showing Product and Buyer from the same location
# Can be done with 'on' parameter is both had same 'City' column_name
merged_Df = pd.merge(product, customer, left_on= ['Product_ID','Seller_City'], right_on= ['Product_ID','City'])  
display(merged_Df)

Unnamed: 0,Product_ID,Product_name,Category,Price,Seller_City,id,name,age,Purchased_Product,City
0,101,Watch,Fashion,299.0,Delhi,1,Olivia,20,Watch,Mumbai
1,103,Shoes,Fashion,2999.0,Chennai,5,Dominic,30,Shoes,Chennai
2,104,Smartphone,Electronics,14999.0,Kolkata,6,Tyler,65,Smartphone,Delhi
3,106,Oil,Grocery,110.0,Chennai,3,Cory,15,Oil,Bangalore
4,107,Laptop,Electronics,79999.0,Bangalore,9,Jeremy,23,Laptop,Mumbai


-------------


Unnamed: 0,Product_ID,Product_name,Category,Price,Seller_City,id,name,age,Purchased_Product,City
0,103,Shoes,Fashion,2999.0,Chennai,5,Dominic,30,Shoes,Chennai


In [3]:
# In case of diffrent column name
merged_Df = pd.merge(product, customer, left_on= 'Product_name', right_on= 'Purchased_Product')
display(merged_Df)

Unnamed: 0,Product_ID_x,Product_name,Category,Price,Seller_City,id,name,age,Product_ID_y,Purchased_Product,City
0,101,Watch,Fashion,299.0,Delhi,1,Olivia,20,101,Watch,Mumbai
1,103,Shoes,Fashion,2999.0,Chennai,5,Dominic,30,103,Shoes,Chennai
2,104,Smartphone,Electronics,14999.0,Kolkata,6,Tyler,65,104,Smartphone,Delhi
3,106,Oil,Grocery,110.0,Chennai,3,Cory,15,106,Oil,Bangalore
4,107,Laptop,Electronics,79999.0,Bangalore,9,Jeremy,23,107,Laptop,Mumbai


In [29]:
product.merge(customer, how='inner', left_on ='Product_name', right_on ='Purchased_Product', indicator= True)

Unnamed: 0,Product_ID_x,Product_name,Category,Price,Seller_City,id,name,age,Product_ID_y,Purchased_Product,City,_merge
0,101,Watch,Fashion,299.0,Delhi,1,Olivia,20,101,Watch,Mumbai,both
1,103,Shoes,Fashion,2999.0,Chennai,5,Dominic,30,103,Shoes,Chennai,both
2,104,Smartphone,Electronics,14999.0,Kolkata,6,Tyler,65,104,Smartphone,Delhi,both
3,106,Oil,Grocery,110.0,Chennai,3,Cory,15,106,Oil,Bangalore,both
4,107,Laptop,Electronics,79999.0,Bangalore,9,Jeremy,23,107,Laptop,Mumbai,both


**FULL JOIN**

Full Join, also known as Full Outer Join, returns all those records which either have a match in the left or right dataframe.

When rows in both the dataframes do not match, the resulting dataframe will have NaN for every column of the dataframe that lacks a matching row.

Unnamed: 0,Product_ID_x,Product_name,Category,Price,Seller_City,id,name,age,Product_ID_y,Purchased_Product,City,_merge
0,101,Watch,Fashion,299.0,Delhi,1,Olivia,20,101,Watch,Mumbai,both
1,103,Shoes,Fashion,2999.0,Chennai,5,Dominic,30,103,Shoes,Chennai,both
2,104,Smartphone,Electronics,14999.0,Kolkata,6,Tyler,65,104,Smartphone,Delhi,both
3,106,Oil,Grocery,110.0,Chennai,3,Cory,15,106,Oil,Bangalore,both
4,107,Laptop,Electronics,79999.0,Bangalore,9,Jeremy,23,107,Laptop,Mumbai,both
