## DataFrameの連結/結合に関するメソッド

<table>
    <thead>
        <tr>
            <th>メソッド</th>
            <th>連結/結合</th>
            <th>方向</th>
            <th>デフォルトの結合方法</th>
            <th>キー</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td><m-b>concat()</m-b></td>
            <td><m-b>連結、結合</m-b></td>
            <td><m-b>行方向、列方向</m-b></td>
            <td><m-b>外部結合</m-b></td>
            <td><m-b>インデックス</m-b></td>
        </tr>
        <tr>
            <td><m-b>join()</m-b></td>
            <td><m-b>結合</m-b></td>
            <td><m-b>列方向</m-b></td>
            <td><m-b>左側外部結合</m-b></td>
            <td><m-b>インデックス(デフォルト)、列</m-b></td>
        </tr>
        <tr>
            <td><m-b>merge()</m-b></td>
            <td><m-b>結合</m-b></td>
            <td><m-b>列方向</m-b></td>
            <td><m-b>内部結合</m-b></td>
            <td><m-b>列(デフォルト)、インデックス</m-b></td>
        </tr>
    </tbody>
</table>

In [1]:
from IPython.display import display
import pandas as pd

df1 = pd.DataFrame(
    [
        [1, 10, 100],
        [2, 20, 200],
        [3, 30, 300],
    ],
    index=list("abc"),
    columns=list("ABC"),
)
df2 = pd.DataFrame(
    [
        [4, 10, 100],
        [2, 20, 200],
        [3, 30, 400],
    ],
    index=list("abd"),
    columns=list("ABD"),
)
display(df1, df2)

Unnamed: 0,A,B,C
a,1,10,100
b,2,20,200
c,3,30,300


Unnamed: 0,A,B,D
a,4,10,100
b,2,20,200
d,3,30,400


In [12]:
# concat()による連結
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D
a,1,10,100.0,
b,2,20,200.0,
c,3,30,300.0,
a,4,10,,100.0
b,2,20,,200.0
d,3,30,,400.0


In [21]:
pd.concat([df1, df2], axis=0, join="outer")

Unnamed: 0,A,B,C,D
a,1,10,100.0,
b,2,20,200.0,
c,3,30,300.0,
a,4,10,,100.0
b,2,20,,200.0
d,3,30,,400.0


In [13]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,A,B,C,D
0,1,10,100.0,
1,2,20,200.0,
2,3,30,300.0,
3,4,10,,100.0
4,2,20,,200.0
5,3,30,,400.0


In [4]:
pd.concat([df1, df2], keys=["data1", "data2"])

Unnamed: 0,Unnamed: 1,A,B,C,D
data1,a,1,10,100.0,
data1,b,2,20,200.0,
data1,c,3,30,300.0,
data2,a,4,10,,100.0
data2,b,2,20,,200.0
data2,d,3,30,,400.0


In [19]:
pd.concat([df2, df1], join="inner")

Unnamed: 0,A,B
a,4,10
b,2,20
d,3,30
a,1,10
b,2,20
c,3,30


In [17]:
# concatによる結合
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,D
a,1.0,10.0,100.0,4.0,10.0,100.0
b,2.0,20.0,200.0,2.0,20.0,200.0
c,3.0,30.0,300.0,,,
d,,,,3.0,30.0,400.0


In [15]:
pd.concat([df2, df1], axis=1)

Unnamed: 0,A,B,D,A.1,B.1,C
a,4.0,10.0,100.0,1.0,10.0,100.0
b,2.0,20.0,200.0,2.0,20.0,200.0
d,3.0,30.0,400.0,,,
c,,,,3.0,30.0,300.0


In [8]:
pd.concat([df2, df2], axis=1)

Unnamed: 0,A,B,D,A.1,B.1,D.1
a,4,10,100,4,10,100
b,2,20,200,2,20,200
d,3,30,400,3,30,400


In [20]:
pd.concat([df2, df1], join="inner", axis=1)

Unnamed: 0,A,B,D,A.1,B.1,C
a,4,10,100,1,10,100
b,2,20,200,2,20,200


In [22]:
df3 = pd.DataFrame(
    [
        [1.3, 10.3],
        [2.3, 20.3],
    ],
    index=list("ab"),
    columns=list("DE"),
)
display(df1, df2, df3)

Unnamed: 0,A,B,C
a,1,10,100
b,2,20,200
c,3,30,300


Unnamed: 0,A,B,D
a,4,10,100
b,2,20,200
d,3,30,400


Unnamed: 0,D,E
a,1.3,10.3
b,2.3,20.3


In [24]:
# join()による結合
df1.join(df3)

Unnamed: 0,A,B,C,D,E
a,1,10,100,1.3,10.3
b,2,20,200,2.3,20.3
c,3,30,300,,


In [26]:
df1.join(df2)

ValueError: columns overlap but no suffix specified: Index(['A', 'B'], dtype='object')

In [25]:
df1.join(
    df2,
    how="inner",
    lsuffix="_df1",
    rsuffix="_df2",
)

Unnamed: 0,A_df1,B_df1,C,A_df2,B_df2,D
a,1,10,100,4,10,100
b,2,20,200,2,20,200


In [36]:
df1.join(
    df2,
    how="outer",
    rsuffix="_df2",
)

Unnamed: 0,A,B,C,A_df2,B_df2,D
a,1.0,10.0,100.0,4.0,10.0,100.0
b,2.0,20.0,200.0,2.0,20.0,200.0
c,3.0,30.0,300.0,,,
d,,,,3.0,30.0,400.0


In [27]:
df4 = pd.DataFrame(
    [
        ["a", 1.4, 10.4],
        ["b", 2.4, 20.4],
    ],
    columns=["key", "F", "G"],
)
display(df4, df1)

Unnamed: 0,key,F,G
0,a,1.4,10.4
1,b,2.4,20.4


Unnamed: 0,A,B,C
a,1,10,100
b,2,20,200
c,3,30,300


In [28]:
df4.join(df1, on="key")

Unnamed: 0,key,F,G,A,B,C
0,a,1.4,10.4,1,10,100
1,b,2.4,20.4,2,20,200


In [41]:
# merge()による結合
pd.merge(df1, df2)

Unnamed: 0,A,B,C,D
0,2,20,200,200
1,3,30,300,400


In [42]:
pd.merge(df1, df2, on="A")

Unnamed: 0,A,B_x,C,B_y,D
0,2,20,200,20,200
1,3,30,300,30,400


In [39]:
pd.merge(df1, df2, on="B")

Unnamed: 0,A_x,B,C,A_y,D
0,1,10,100,4,100
1,2,20,200,2,200
2,3,30,300,3,400


In [44]:
pd.merge(df1, df2, on=["A", "B"], how="right")

Unnamed: 0,A,B,C,D
0,4,10,,100
1,2,20,200.0,200
2,3,30,300.0,400


In [48]:
pd.merge(df1, df2, how="outer")

Unnamed: 0,A,B,C,D
0,1,10,100.0,
1,2,20,200.0,200.0
2,3,30,300.0,400.0
3,4,10,,100.0


In [52]:
pd.merge(df2, df1, on="A", how="outer", suffixes=("-df1", "-df2"), indicator=True)

Unnamed: 0,A,B-df1,D,B-df2,C,_merge
0,1,,,10.0,100.0,right_only
1,2,20.0,200.0,20.0,200.0,both
2,3,30.0,400.0,30.0,300.0,both
3,4,10.0,100.0,,,left_only


In [55]:
pd.merge(df1, df2, how="cross", indicator=True)

Unnamed: 0,A_x,B_x,C,A_y,B_y,D,_merge
0,1,10,100,4,10,100,both
1,1,10,100,2,20,200,both
2,1,10,100,3,30,400,both
3,2,20,200,4,10,100,both
4,2,20,200,2,20,200,both
5,2,20,200,3,30,400,both
6,3,30,300,4,10,100,both
7,3,30,300,2,20,200,both
8,3,30,300,3,30,400,both


In [56]:
pd.merge(df1, df2, on="A", how="cross", indicator=True)

MergeError: Can not pass on, right_on, left_on or set right_index=True or left_index=True

In [67]:
# 欠損値を含む同士の結合
import numpy as np

pd.merge(
    pd.DataFrame({"a": [np.nan, 1, 2, np.nan], "b": ["あ", "い", "う", "え"]}),
    pd.DataFrame({"a": [1, np.nan, 1], "b": ["か", "き", "く"]}),
    on="a",
)

Unnamed: 0,a,b_x,b_y
0,,あ,き
1,1.0,い,か
2,1.0,い,く
3,,え,き


In [68]:
pd.merge(
    pd.DataFrame({"a": [np.nan, 1, 2, np.nan], "b": ["あ", "い", "う", "え"]}),
    pd.DataFrame({"a": [1, np.nan, 1], "b": ["か", "き", "く"]}),
    on="a",
    validate="one_to_one"
)

MergeError: Merge keys are not unique in either left or right dataset; not a one-to-one merge

In [73]:
pd.merge(
    pd.DataFrame({"a": [np.nan, 1, 2, 3], "b": ["あ", "い", "う", "え"]}),
    pd.DataFrame({"a": [1, 1, np.nan, np.nan], "b": ["か", "き", "く", "け"]}),
    on="a",
    validate="one_to_many"
)

Unnamed: 0,a,b_x,b_y
0,,あ,け
1,1.0,い,か
2,1.0,い,き
3,,あ,く


In [79]:
# 近い値での結合
df1 = pd.DataFrame(
    [
        [90.9, 80.8, 70.7],
        [100.3, 123.4, 150.01],
    ]
)
df2 = pd.DataFrame(
    [
        [90.1, 80.2 ,70.3],
        [100.2, 151.02, 143.2],
    ]
)
pd.merge_asof(df1, df2, on=0)

Unnamed: 0,0,1_x,2_x,1_y,2_y
0,90.9,80.8,70.7,80.2,70.3
1,100.3,123.4,150.01,151.02,143.2


In [85]:
pd.merge_asof(df1, df2, on=1, tolerance=1)

Unnamed: 0,0_x,1,2_x,0_y,2_y
0,90.9,80.8,70.7,90.1,70.3
1,100.3,123.4,150.01,,


In [91]:
pd.merge_asof(df1, df2, left_on=0, right_on=1, tolerance=10.8)

Unnamed: 0,key_0,0_x,1_x,2_x,0_y,1_y,2_y
0,90.9,90.9,80.8,70.7,90.1,80.2,70.3
1,100.3,100.3,123.4,150.01,,,
