# `Merging`, `Joining` and `Concatinating` 2 `DataFrames`:

In [111]:
import numpy as np 
import pandas as pd 

In [112]:
employees = pd.DataFrame({
    'employee_id': [1, 2, 3, 4, 5],
    'name': ['John', 'Anna', 'Peter', 'Linda', 'Bob'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'HR']
})

# DataFrame 2: Salary information
salaries = pd.DataFrame({
    'employee_id': [1, 2, 3, 6, 7],
    'salary': [60000, 80000, 65000, 70000, 90000],
    'bonus': [5000, 10000, 7000, 8000, 12000]
})


In [113]:
salaries

Unnamed: 0,employee_id,salary,bonus
0,1,60000,5000
1,2,80000,10000
2,3,65000,7000
3,6,70000,8000
4,7,90000,12000


In [114]:
employees

Unnamed: 0,employee_id,name,department
0,1,John,HR
1,2,Anna,IT
2,3,Peter,Finance
3,4,Linda,IT
4,5,Bob,HR


## Performing `merge` operation(Equivalent to `join` of `SQL`)

In [115]:
# Default type of join is the inner join:
pd.merge(left=employees, right=salaries)

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


In [116]:
#on=, is the kwarg that makes merging based on what as a basis...
#inner merge is equvalent to intersection of two sets...
pd.merge(left=employees,right=salaries,on='employee_id',how ='inner')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000


![Logo](https://media.geeksforgeeks.org/wp-content/uploads/20250607125822884856/SQL-Join.webp)

In [117]:
pd.merge(employees,salaries,on='employee_id',how ='outer')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,
5,6,,,70000.0,8000.0
6,7,,,90000.0,12000.0


![Outer Merge](https://media.geeksforgeeks.org/wp-content/uploads/20250607130842424020/Full_Join.webp)

In [118]:
pd.merge(employees,salaries,on='employee_id',how ='left')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,


![Left Merge](https://media.geeksforgeeks.org/wp-content/uploads/20250607130445309937/Left_Join.webp)

In [119]:
pd.merge(employees,salaries,on='employee_id',how ='right')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000,5000
1,2,Anna,IT,80000,10000
2,3,Peter,Finance,65000,7000
3,6,,,70000,8000
4,7,,,90000,12000


<img src="https://media.geeksforgeeks.org/wp-content/uploads/20250607130703636787/Right_join.webp" alt="Photo" width="520">


## Performing *`Concat`*:
* ### Concat is basically **appending** one `DataFrame` to another on an axis.

In [120]:
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2'],
    'C': ['C0', 'C1', 'C2']
})

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5'],
    'C': ['C3', 'C4', 'C5']
})

In [121]:
df1

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [122]:
df2

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [123]:
# axis=0 i.e. row-wise appending is the default for concat
pd.concat(objs=[df2,df1])

Unnamed: 0,A,B,C
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [124]:
pd.concat(objs=[df1,df2],axis= 1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
0,A0,B0,C0,A3,B3,C3
1,A1,B1,C1,A4,B4,C4
2,A2,B2,C2,A5,B5,C5


## Performing `Join`:
* ### Join is similar to performing **`merge`** one `DataFrame` to another.
* ## Differences between `Join` and `Merge`:
### 1. Join is performed on indexes, Merge is done on columns.
### i.e. Merge requires a column to be common between both DataFrames, Join does not.
### 2. Join's default how= kwarg is 'left' and merge's how= kwarg is 'inner'
### 3. Join pre

In [125]:
df1 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie']
}, index=[1, 2, 3])

# Second DataFrame
df2 = pd.DataFrame({
    'score': [85, 90, 75]
}, index=[2, 3, 4])

In [126]:
df1

Unnamed: 0,name
1,Alice
2,Bob
3,Charlie


In [127]:
df2

Unnamed: 0,score
2,85
3,90
4,75


In [None]:
df1.join(df2)# merge will not work here because there are no common columns in both the DataFrames, index is not a column.

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0


In [None]:
df1.join(df2,how='inner') 


Unnamed: 0,name,score
2,Bob,85
3,Charlie,90


In [130]:
df1.join(df2,how='outer')

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0
4,,75.0


In [None]:
df1.join(df2,how='left')#join will preserve the index of left

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0


In [None]:
df1.join(df2,how='right')#join will preserve the index of right

Unnamed: 0,name,score
2,Bob,85
3,Charlie,90
4,,75
