# How to Merge Two DataFrames on Index in Pandas

https://datascientyst.com/merge-two-dataframes-on-index-pandas/

In [1]:
import pandas as pd
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)
display(df1)


df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[0, 1, 2, 3],
)
display(df2)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


Unnamed: 0,A,B,C,D
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


## Step 1: Pandas: merge on index by method `merge`

In [2]:
df_m = pd.merge(df1, df2, left_index=True, right_index=True)
df_m

Unnamed: 0,A_x,B_x,C_x,D_x,A_y,B_y,C_y,D_y
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


In [3]:
df_m = pd.merge(df1, df2, left_index=True, right_index=True, suffixes=('_left', '_right'))
df_m

Unnamed: 0,A_left,B_left,C_left,D_left,A_right,B_right,C_right,D_right
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


In [4]:
import pandas as pd
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 2],
)
display(df1)


df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[0, 1, 2, 2],
)
display(df2)
df_m = pd.merge(df1, df2, left_index=True, right_index=True)
df_m

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
2,A3,B3,C3,D3


Unnamed: 0,A,B,C,D
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
2,A7,B7,C7,D7


Unnamed: 0,A_x,B_x,C_x,D_x,A_y,B_y,C_y,D_y
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
2,A2,B2,C2,D2,A7,B7,C7,D7
2,A3,B3,C3,D3,A6,B6,C6,D6
2,A3,B3,C3,D3,A7,B7,C7,D7


## Step 2: Pandas: merge on index by `concat` and axis=1

In [5]:
df_m = pd.concat([df1, df2], axis='columns')
df_m

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
2,A3,B3,C3,D3,A7,B7,C7,D7


In [6]:
df_m['A']

Unnamed: 0,A,A.1
0,A0,A4
1,A1,A5
2,A2,A6
2,A3,A7


In [7]:
import pandas as pd
import traceback

try:
    
    df_m = pd.concat([df1, df2], axis='columns', verify_integrity=True)

except Exception as e:
    traceback.print_exc(limit=1)
    exit(1)

Traceback (most recent call last):
  File "/tmp/ipykernel_5914/151534509.py", line 6, in <module>
    df_m = pd.concat([df1, df2], axis='columns', verify_integrity=True)
ValueError: Indexes have overlapping values: Index(['A', 'B', 'C', 'D'], dtype='object')


In [8]:
df_m['A']

Unnamed: 0,A,A.1
0,A0,A4
1,A1,A5
2,A2,A6
2,A3,A7


## Step 3: Pandas: merge on index - `join`

In [9]:
df1.join(df2, lsuffix='_x')

Unnamed: 0,A_x,B_x,C_x,D_x,A,B,C,D
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
2,A2,B2,C2,D2,A7,B7,C7,D7
2,A3,B3,C3,D3,A6,B6,C6,D6
2,A3,B3,C3,D3,A7,B7,C7,D7


## Step 4: Pandas: merge on index - `merge` vs `concat`

In [10]:
import pandas as pd
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)


df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7", "A8"],
        "B": ["B4", "B5", "B6", "B7", "B8"],
        "C": ["C4", "C5", "C6", "C7", "C8"],
        "D": ["D4", "D5", "D6", "D7", "D8"],
    },
    index=[0, 1, 2, 3, 4],
)

In [11]:
df_m = pd.merge(df1, df2, left_index=True, right_index=True)
df_m

Unnamed: 0,A_x,B_x,C_x,D_x,A_y,B_y,C_y,D_y
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


In [12]:
df_m = pd.merge(df1, df2, left_index=True, right_index=True, how='outer')
df_m

Unnamed: 0,A_x,B_x,C_x,D_x,A_y,B_y,C_y,D_y
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7
4,,,,,A8,B8,C8,D8


In [13]:
df_m = pd.concat([df1, df2], axis='columns')
df_m

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7
4,,,,,A8,B8,C8,D8
