In [9]:
import numpy as np
import pandas as pd

In [10]:
# merging 2 Dataframe 
# Merging in Pandas is a way to combine two or more DataFrames based on a common column or index.

In [13]:
employees = pd.DataFrame({
    'employe_id' : [1,2,3,4,5],
    'name' : ['John', 'Linda', 'Peter', 'Parker', 'Smith'],
    'department' : ['HR', 'IT', 'Sells', 'Finance', 'HR']
})

# Dataframe2 : Salary Information
salaries = pd.DataFrame({
    'employe_id' : [1,2,3,6,7],
    'salary' : [50000,55000,60000,65000,70000],
    'bonus' : [5000,10000,15000,20000,35000]
})

In [14]:
employees

Unnamed: 0,employe_id,name,department
0,1,John,HR
1,2,Linda,IT
2,3,Peter,Sells
3,4,Parker,Finance
4,5,Smith,HR


In [15]:
salaries

Unnamed: 0,employe_id,salary,bonus
0,1,50000,5000
1,2,55000,10000
2,3,60000,15000
3,6,65000,20000
4,7,70000,35000


In [16]:
pd.merge(employees,salaries)

Unnamed: 0,employe_id,name,department,salary,bonus
0,1,John,HR,50000,5000
1,2,Linda,IT,55000,10000
2,3,Peter,Sells,60000,15000


In [19]:
pd.merge(employees,salaries,on = 'employe_id', how='inner')

Unnamed: 0,employe_id,name,department,salary,bonus
0,1,John,HR,50000,5000
1,2,Linda,IT,55000,10000
2,3,Peter,Sells,60000,15000


In [18]:
pd.merge(employees,salaries,on = 'employe_id', how='outer')

Unnamed: 0,employe_id,name,department,salary,bonus
0,1,John,HR,50000.0,5000.0
1,2,Linda,IT,55000.0,10000.0
2,3,Peter,Sells,60000.0,15000.0
3,4,Parker,Finance,,
4,5,Smith,HR,,
5,6,,,65000.0,20000.0
6,7,,,70000.0,35000.0


In [20]:
pd.merge(employees,salaries,on = 'employe_id', how='left')

Unnamed: 0,employe_id,name,department,salary,bonus
0,1,John,HR,50000.0,5000.0
1,2,Linda,IT,55000.0,10000.0
2,3,Peter,Sells,60000.0,15000.0
3,4,Parker,Finance,,
4,5,Smith,HR,,


In [21]:
pd.merge(employees,salaries,on = 'employe_id', how='right')

Unnamed: 0,employe_id,name,department,salary,bonus
0,1,John,HR,50000,5000
1,2,Linda,IT,55000,10000
2,3,Peter,Sells,60000,15000
3,6,,,65000,20000
4,7,,,70000,35000


In [22]:
# Concatination of DataFrame

In [None]:
# Concatenation is the process of joining or "stacking" two or more DataFrames 
# or Series either vertically (one on top of the other) or horizontally (side by side).

#

In [29]:
df1 = pd.DataFrame({
    'A' : ['A0', 'A1', 'A2'],
    'B' : ['B0', 'B1', 'B2'],
    'C' : ['C0', 'C1', 'C3']
})

df2 = pd.DataFrame({
    'A' : ['A3', 'A4', 'A5'],
    'B' : ['B3', 'B4', 'B5'],
    'C' : ['C3', 'C4', 'C5']
})

In [30]:
df1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C3


In [31]:
df2

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [32]:
pd.concat((df1,df2))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C3
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [None]:
# axis=0 (Default): Stacks DataFrames vertically, one below the other. 
# This is used when you have new rows of data for the same columns.

# axis=1: Stacks DataFrames horizontally, side by side. 
# This is used when you have new columns of data for the same rows.

In [33]:
pd.concat([df1,df2], axis = 1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A0,B0,C0,A3,B3,C3
1,A1,B1,C1,A4,B4,C4
2,A2,B2,C3,A5,B5,C5


In [37]:
#Joining 2 DataFrame

In [None]:
# Joining is the process of combining two DataFrames based on a common key or index.

In [41]:
df1 = pd.DataFrame({
    'name': ['Linda', 'Minda', 'Cena']
}, index=[1,2,3])

# 2 DataFrame
df2 = pd.DataFrame({
    'score': [85, 90, 85]
}, index=[2,3,4])

In [42]:
df1

Unnamed: 0,name
1,Linda
2,Minda
3,Cena


In [43]:
df2

Unnamed: 0,score
2,85
3,90
4,85


In [44]:
df1.join(df2,how= 'outer')

Unnamed: 0,name,score
1,Linda,
2,Minda,85.0
3,Cena,90.0
4,,85.0


In [46]:
df2.join(df1)

Unnamed: 0,score,name
2,85,Minda
3,90,Cena
4,85,


In [None]:
# .join() vs. pd.merge()
# While both functions are used for joining, their default behavior is different.

# pd.merge(): Joins on a specified column by default. It is more versatile
# and can perform inner, left, right, and outer joins.
# .join(): Joins on the index by default. It is a simplified method primarily used for left joins.

# You can use pd.merge() to replicate all of the functionalities of
# .join() by specifying left_index=True and right_index=True.
# However, for simple index-based joins, .join() is a quicker and more readable option.