In [1]:
import pandas as pd
import numpy as np


### Merging 


In [2]:

left = pd.DataFrame({
    'ID': [1, 2, 3, 4],
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Score': [85, 90, 78, 88]
})

right = pd.DataFrame({
    'ID': [3, 4, 5, 6],
    'Name': ['Charlie', 'David', 'Eve', 'Frank'],
    'Grade': ['B', 'A', 'C', 'B']
})

display(left, right)


Unnamed: 0,ID,Name,Score
0,1,Alice,85
1,2,Bob,90
2,3,Charlie,78
3,4,David,88


Unnamed: 0,ID,Name,Grade
0,3,Charlie,B
1,4,David,A
2,5,Eve,C
3,6,Frank,B


# merge(): Joining two datasets based on a common column

In [3]:
merged_inner = left.merge(right, on='ID', how='inner')
display(merged_inner)

Unnamed: 0,ID,Name_x,Score,Name_y,Grade
0,3,Charlie,78,Charlie,B
1,4,David,88,David,A


In [4]:
grouped_left = left.merge(right, on='ID', how='left')
display(grouped_left)

Unnamed: 0,ID,Name_x,Score,Name_y,Grade
0,1,Alice,85,,
1,2,Bob,90,,
2,3,Charlie,78,Charlie,B
3,4,David,88,David,A


In [None]:
grouped_outer = left.merge(right, on='ID', how='outer')
display(grouped_outer)


Unnamed: 0,ID,Name_x,Score,Name_y,Grade
0,1,Alice,85.0,,
1,2,Bob,90.0,,
2,3,Charlie,78.0,Charlie,B
3,4,David,88.0,David,A
4,5,,,Eve,C
5,6,,,Frank,B



# concat(): Stacking datasets vertically or horizontally


In [4]:

df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2']
})
df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5']
})
display(df1,df2)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2


Unnamed: 0,A,B
0,A3,B3
1,A4,B4
2,A5,B5


In [5]:
# Vertical Concatenation (axis=0)
df_concat_v = pd.concat([df1, df2], axis=0)
display(df_concat_v)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
0,A3,B3
1,A4,B4
2,A5,B5


In [6]:
# Horizontal Concatenation (axis=1)
df3 = pd.DataFrame({
    'C': ['C0', 'C1', 'C2']
})

In [7]:
df_concat_h = pd.concat([df1, df2], axis=1)
display(df_concat_h)

Unnamed: 0,A,B,A.1,B.1
0,A0,B0,A3,B3
1,A1,B1,A4,B4
2,A2,B2,A5,B5



# join(): Similar to merge() but works on index

In [8]:

left_indexed = left.set_index('ID')
right_indexed = right.set_index('ID')


In [9]:
# Joining on index
joined_df = left_indexed.join(right_indexed, how='inner', lsuffix='_left', rsuffix='_right')
display(joined_df)

Unnamed: 0_level_0,Name_left,Score,Name_right,Grade
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,Charlie,78,Charlie,B
4,David,88,David,A


# combine_first(): Filling missing values from another dataset

In [10]:
df_a = pd.DataFrame({
    'A': [np.nan, 2, np.nan, 4],
    'B': [5, np.nan, 7, np.nan]
})
df_b = pd.DataFrame({
    'A': [1, np.nan, 3, np.nan],
    'B': [np.nan, 6, np.nan, 8]
})
display(df_a,df_b)

Unnamed: 0,A,B
0,,5.0
1,2.0,
2,,7.0
3,4.0,


Unnamed: 0,A,B
0,1.0,
1,,6.0
2,3.0,
3,,8.0


In [11]:

# Filling missing values using another DataFrame
df_combined = df_a.combine_first(df_b)
display(df_combined)


Unnamed: 0,A,B
0,1.0,5.0
1,2.0,6.0
2,3.0,7.0
3,4.0,8.0
