In [1]:
# importing libraries
import pandas as pd

In [11]:
# dataframes
df1 = pd.DataFrame({
    "id": [1, 2, 3],
    "name": ["A", "B", "C"]
})

df2 = pd.DataFrame({
    "id": [2, 3, 4],
    "salary": [50000, 60000, 70000]
})

MERGING DATAFRAMES

In [3]:
# inner merge (keeps only common rows)
pd.merge(df1, df2, on="id", how="inner")

Unnamed: 0,id,name,salary
0,2,B,50000
1,3,C,60000


In [4]:
# left merge (all rows from left dataframe)
pd.merge(df1, df2, on="id", how="left")

Unnamed: 0,id,name,salary
0,1,A,
1,2,B,50000.0
2,3,C,60000.0


In [5]:
# right merge (all rows from right dataframe)
pd.merge(df1, df2, on="id", how="right")

Unnamed: 0,id,name,salary
0,2,B,50000
1,3,C,60000
2,4,,70000


In [6]:
# outer merge (all rows from both tables)
pd.merge(df1, df2, on="id", how="outer")

Unnamed: 0,id,name,salary
0,1,A,
1,2,B,50000.0
2,3,C,60000.0
3,4,,70000.0


In [10]:
# merging with different column names
df3 = pd.DataFrame({
    "user_id": [2, 3, 4],
    "salary": [50000, 60000, 70000]
})

pd.merge(df1, df3, left_on="id", right_on="user_id", how="outer")

Unnamed: 0,id,name,user_id,salary
0,1.0,A,,
1,2.0,B,2.0,50000.0
2,3.0,C,3.0,60000.0
3,,,4.0,70000.0


CONCATENATING DATAFRAMES

In [12]:
# row=wise concatenation
pd.concat([df1, df2])

Unnamed: 0,id,name,salary
0,1,A,
1,2,B,
2,3,C,
0,2,,50000.0
1,3,,60000.0
2,4,,70000.0


In [13]:
# row-wise concatenation after index reset
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,id,name,salary
0,1,A,
1,2,B,
2,3,C,
3,2,,50000.0
4,3,,60000.0
5,4,,70000.0


In [16]:
# column-wise concatenation
pd.concat([df1,df2], axis=1)

Unnamed: 0,id,name,id.1,salary
0,1,A,2,50000
1,2,B,3,60000
2,3,C,4,70000


JOINING DATAFRAMES

In [17]:
df1_indexed = df1.set_index("id")
df2_indexed = df2.set_index("id")
df1_indexed.join(df2_indexed)

Unnamed: 0_level_0,name,salary
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,A,
2,B,50000.0
3,C,60000.0


In [18]:
df1_indexed.join(df2_indexed, how = "outer")

Unnamed: 0_level_0,name,salary
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,A,
2,B,50000.0
3,C,60000.0
4,,70000.0


In [19]:
df1.join(df2.set_index("id"), on="id")

Unnamed: 0,id,name,salary
0,1,A,
1,2,B,50000.0
2,3,C,60000.0
