In [1]:
import numpy as np
import pandas as pd

# Concatenating objects

In [2]:
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)


df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[4, 5, 6, 7],
)


df3 = pd.DataFrame(
    {
        "A": ["A8", "A9", "A10", "A11"],
        "B": ["B8", "B9", "B10", "B11"],
        "C": ["C8", "C9", "C10", "C11"],
        "D": ["D8", "D9", "D10", "D11"],
    },
    index=[8, 9, 10, 11],
)

In [3]:
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [4]:
df2

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [5]:
df3

Unnamed: 0,A,B,C,D
8,A8,B8,C8,D8
9,A9,B9,C9,D9
10,A10,B10,C10,D10
11,A11,B11,C11,D11


In [6]:
frames = [df1, df2, df3]

In [7]:
concat_result = pd.concat([df1 ,df2, df3])  # 或者  concat_result = pd.concat(frames)
concat_result

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B9,C9,D9


In [8]:
# Suppose we wanted to associate specific keys with each of the pieces of the chopped up DataFrame. 
# We can do this using the `keys` argument:
concat_result = pd.concat(frames, keys=['x', 'y', 'z'])
concat_result

Unnamed: 0,Unnamed: 1,A,B,C,D
x,0,A0,B0,C0,D0
x,1,A1,B1,C1,D1
x,2,A2,B2,C2,D2
x,3,A3,B3,C3,D3
y,4,A4,B4,C4,D4
y,5,A5,B5,C5,D5
y,6,A6,B6,C6,D6
y,7,A7,B7,C7,D7
z,8,A8,B8,C8,D8
z,9,A9,B9,C9,D9


In [9]:
# As you can see (if you've read the rest of the documentation), the resulting object's index has a hierarchical index. 
# This means that we can now select out each chunk by key.
concat_result.loc['y']

Unnamed: 0,A,B,C,D
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


# Set logic on the other axes
When gluing together multiple DataFrames, you have a choice of how to handle the other axes (other than the one being concatenated). This can be done in the following two ways:
- Take the union of them all, `join='outer'`. This is the default option as it results in zero information loss.
- Take the intersection, `join='inner'`.

In [10]:
df4 = pd.DataFrame(
    {
        "B": ["B2", "B3", "B6", "B7"],
        "D": ["D2", "D3", "D6", "D7"],
        "F": ["F2", "F3", "F6", "F7"],
    },
    index=[2, 3, 6, 7],
)
df4

Unnamed: 0,B,D,F
2,B2,D2,F2
3,B3,D3,F3
6,B6,D6,F6
7,B7,D7,F7


In [11]:
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [12]:
result = pd.concat([df4, df1], axis=0)  # 默认是并集
result

Unnamed: 0,B,D,F,A,C
2,B2,D2,F2,,
3,B3,D3,F3,,
6,B6,D6,F6,,
7,B7,D7,F7,,
0,B0,D0,,A0,C0
1,B1,D1,,A1,C1
2,B2,D2,,A2,C2
3,B3,D3,,A3,C3


In [13]:
result = pd.concat([df4, df1], axis=1)
result

Unnamed: 0,B,D,F,A,B.1,C,D.1
0,,,,A0,B0,C0,D0
1,,,,A1,B1,C1,D1
2,B2,D2,F2,A2,B2,C2,D2
3,B3,D3,F3,A3,B3,C3,D3
6,B6,D6,F6,,,,
7,B7,D7,F7,,,,


In [14]:
result = pd.concat([df1, df4], axis=1).reindex(df1.index)  # 重建索引
result

Unnamed: 0,A,B,C,D,B.1,D.1,F
0,A0,B0,C0,D0,,,
1,A1,B1,C1,D1,,,
2,A2,B2,C2,D2,B2,D2,F2
3,A3,B3,C3,D3,B3,D3,F3


In [15]:
result = pd.concat([df4, df1], axis=0, join='inner')  # 取交集
result

Unnamed: 0,B,D
2,B2,D2
3,B3,D3
6,B6,D6
7,B7,D7
0,B0,D0
1,B1,D1
2,B2,D2
3,B3,D3


In [16]:
result = pd.concat([df4, df1], axis=1, join='inner')
result

Unnamed: 0,B,D,F,A,B.1,C,D.1
2,B2,D2,F2,A2,B2,C2,D2
3,B3,D3,F3,A3,B3,C3,D3


In [17]:
df4.reindex(df1.index)  # 重建 index。这里是以 df1 的 index 来重建 df4 的 index

Unnamed: 0,B,D,F
0,,,
1,,,
2,B2,D2,F2
3,B3,D3,F3


In [18]:
df4.reindex([2, 3, 6, 7])  # 重建 index

Unnamed: 0,B,D,F
2,B2,D2,F2
3,B3,D3,F3
6,B6,D6,F6
7,B7,D7,F7
