# Merging and Joining Datasets

<!--
Author: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
Description: Comprehensive guide to merging and joining datasets in Pandas
-->

## Introduction

This notebook covers various methods of combining DataFrames including merge(), join(), concat(), and append() operations.



In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

import pandas as pd
import numpy as np

# Create sample DataFrames
df1 = pd.DataFrame({
    'ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Department': ['IT', 'HR', 'IT', 'Finance']
})

df2 = pd.DataFrame({
    'ID': [1, 2, 3, 5],
    'Salary': [50000, 60000, 70000, 55000],
    'Experience': [2, 5, 8, 3]
})

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)



## Inner Join


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Inner join - returns only matching rows
df_inner = pd.merge(df1, df2, on='ID', how='inner')
print("=== Inner Join ===")
print(df_inner)



## Left Join


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Left join - returns all rows from left DataFrame
df_left = pd.merge(df1, df2, on='ID', how='left')
print("=== Left Join ===")
print(df_left)



## Right Join


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Right join - returns all rows from right DataFrame
df_right = pd.merge(df1, df2, on='ID', how='right')
print("=== Right Join ===")
print(df_right)



## Outer Join (Full Outer Join)


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Outer join - returns all rows from both DataFrames
df_outer = pd.merge(df1, df2, on='ID', how='outer')
print("=== Outer Join (Full Outer) ===")
print(df_outer)



## Merging on Different Column Names


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# DataFrames with different key column names
df3 = pd.DataFrame({
    'Emp_ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David']
})

df4 = pd.DataFrame({
    'Employee_ID': [1, 2, 3, 5],
    'Salary': [50000, 60000, 70000, 55000]
})

# Merge using left_on and right_on
df_merge_different = pd.merge(df3, df4, left_on='Emp_ID', right_on='Employee_ID', how='inner')
print("=== Merge with different column names ===")
print(df_merge_different)



## Concatenating DataFrames


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Concatenate along rows (axis=0)
df_a = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df_b = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})

df_concat_rows = pd.concat([df_a, df_b], axis=0, ignore_index=True)
print("=== Concatenate along rows ===")
print(df_concat_rows)

# Concatenate along columns (axis=1)
df_c = pd.DataFrame({'C': [9, 10], 'D': [11, 12]})
df_concat_cols = pd.concat([df_a, df_c], axis=1)
print("\n=== Concatenate along columns ===")
print(df_concat_cols)



## Using join() Method


In [None]:
# Author: RSK World | Website: https://rskworld.in | Email: help@rskworld.in | Phone: +91 93305 39277

# Using join() method (joins on index by default)
df_left_join = df1.set_index('ID').join(df2.set_index('ID'), how='left')
print("=== Join on index ===")
print(df_left_join)

