### Merging & Joining Data

In [2]:
import pandas as pd

df_customers = pd.DataFrame({
    "customer_id":[1,2,3,4],
    "name":["Adam","Bob","Charlie","Dave"]
})

df_orders = pd.DataFrame({
    "order_id":[101,102,103,104],
    "customer_id":[2,1,4,5],
    "amount":[250,120,300,1200]
})

In [None]:
# df.merge() - used for SQL-like joins & combines two DataFrames based on common columns.
# Supports:
# Inner Join - Only Keep common values of both dataframe
# Outer Join - Keep all values of both df & replace missing with NaN
# Left Join - Keep all values of left df & replace missing with Nan
# Right Join - Keep all values of right df & replace missing with Nan

pd.merge(df_customers, df_orders, on="customer_id") #Inner Join
# on="customer_id" means combine 2 dataframe on the bases of 2 common columns that exist in both df.

Unnamed: 0,customer_id,name,order_id,amount
0,1,Adam,102,120
1,2,Bob,101,250
2,4,Dave,103,300


In [4]:
pd.merge(df_customers,df_orders,on="customer_id",how="left") #Left Join

Unnamed: 0,customer_id,name,order_id,amount
0,1,Adam,102.0,120.0
1,2,Bob,101.0,250.0
2,3,Charlie,,
3,4,Dave,103.0,300.0


In [5]:
pd.merge(df_customers,df_orders,on="customer_id",how="right") # Right Join

Unnamed: 0,customer_id,name,order_id,amount
0,2,Bob,101,250
1,1,Adam,102,120
2,4,Dave,103,300
3,5,,104,1200


In [6]:
pd.merge(df_customers,df_orders,on="customer_id",how="outer") #Outer Join

Unnamed: 0,customer_id,name,order_id,amount
0,1,Adam,102.0,120.0
1,2,Bob,101.0,250.0
2,3,Charlie,,
3,4,Dave,103.0,300.0
4,5,,104.0,1200.0


We also have `join()` method which is essentially a shortcut for merge when using indices. It is used to combine two DataFrames by matching their index values.
By default, pandas looks at the index of both DataFrames and joins rows that have the same index.

If needed, you can also join using a key column (common values) instead of the index by specifying it.

### Data Concatenation

In [None]:
# df.concat() - use for data concatenation i.e to stack DataFrames vertically (on top) or horizontally (side by side).
df1 = pd.DataFrame({
    "id":[1,2,3],
    "name":["Adam","Bob","Charlie"]
})

df2 = pd.DataFrame({
    "id":[4,5,6],
    "name":["David","Eva","Frank"]
})

pd.concat([df1,df2]) #Row wise concatenation

Unnamed: 0,id,name
0,1,Adam
1,2,Bob
2,3,Charlie
0,8,David
1,9,Eva
2,10,Frank


In [None]:
pd.concat([df1,df2],ignore_index=True) #Row wise concatenation - but new row index

Unnamed: 0,id,name
0,1,Adam
1,2,Bob
2,3,Charlie
3,8,David
4,9,Eva
5,10,Frank


In [19]:
pd.concat(([df1, df2]), axis=1) # col wise concatenation

Unnamed: 0,id,name,id.1,name.1
0,1,Adam,8,David
1,2,Bob,9,Eva
2,3,Charlie,10,Frank
