# Merging, Joining, and Concatenating in Pandas


In [1]:
import numpy as np
import pandas as pd

In [28]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2'], 'B': ['B0', 'B1', 'B2']}, index=[0, 1, 2])
df2 = pd.DataFrame({'A': ['A3', 'A4', 'A5'], 'B': ['B3', 'B4', 'B5']}, index=[3, 4, 5])
df3 = pd.DataFrame({'A': ['A6', 'A7', 'A8'], 'B': ['B6', 'B7', 'B8']}, index=[6, 7, 8])

In [30]:
df1

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2


In [32]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4
5,A5,B5


In [34]:
df3

Unnamed: 0,A,B
6,A6,B6
7,A7,B7
8,A8,B8


## Concatenation
Concatenation in Pandas is used to combine multiple DataFrames along a particular axis (rows or columns).

### Concatenating along rows (axis=0)

In [37]:
df4 = pd.DataFrame({'C': ['C0', 'C1', 'C2']}, index=[0, 1, 2])

In [39]:
df4

Unnamed: 0,C
0,C0
1,C1
2,C2


In [43]:
concatenated_columns = pd.concat([df1,df2,df3])

In [45]:
concatenated_columns

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4
5,A5,B5
6,A6,B6
7,A7,B7
8,A8,B8


### Concatenating along columns (axis=1)

In [47]:
concatenated_columns_1_4 = pd.concat([df1, df4], axis=1)
display(concatenated_columns_1_4)

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


## Merging DataFrames
Merging is similar to SQL joins. You can merge two DataFrames based on a common column.
### Inner Join (Default)


In [54]:
df_left = pd.DataFrame({'Key': ['K0', 'K1', 'K2'], 'A': ['A0', 'A1', 'A2']})
df_right = pd.DataFrame({'Key': ['K0', 'K1', 'K3'], 'B': ['B0', 'B1', 'B3']})


In [58]:
df_left

Unnamed: 0,Key,A
0,K0,A0
1,K1,A1
2,K2,A2


In [60]:
df_right

Unnamed: 0,Key,B
0,K0,B0
1,K1,B1
2,K3,B3


In [56]:
# Merge using 'Key' column
merged_df = pd.merge(df_left, df_right, on='Key')
display(merged_df)


Unnamed: 0,Key,A,B
0,K0,A0,B0
1,K1,A1,B1


### Outer Join
An outer join returns all records from both DataFrames, filling missing values with NaN.

In [69]:
outer_merged_df = pd.merge(df_left, df_right, on='Key', how='outer')
display(outer_merged_df)

Unnamed: 0,Key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,
3,K3,,B3


### Left Join
A left join returns all records from the left DataFrame and only matching records from the right DataFrame.

In [72]:
left_merged_df = pd.merge(df_left, df_right, on='Key', how='left')
display(left_merged_df)

Unnamed: 0,Key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,


### Right Join
A right join returns all records from the right DataFrame and only matching records from the left DataFrame.

In [80]:

left_merged_df = pd.merge(df_left, df_right, on='Key', how='right')
display(left_merged_df)

Unnamed: 0,Key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K3,,B3


## Joining DataFrames
Joining is used when DataFrames have their index set as the key for joining.

In [84]:
df_left.set_index('Key', inplace=True)
df_right.set_index('Key', inplace=True)
joined_df = df_left.join(df_right, how='outer')
display(joined_df)


Unnamed: 0_level_0,A,B
Key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,
K3,,B3


In [90]:
df_left.join(df_right)

Unnamed: 0_level_0,A,B
Key,Unnamed: 1_level_1,Unnamed: 2_level_1
K0,A0,B0
K1,A1,B1
K2,A2,
