In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### 沿轴向连接

In [2]:
# NumPy的concatenation函数可以在NumPy数组上来做轴向连接（拼接、绑定、堆叠）
arr = np.arange(12).reshape((3, 4))
np.concatenate([arr, arr], axis=0)  # 默认情况下，axis=0

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [3]:
np.concatenate([arr, arr], axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [4]:
# pandas的concat()方法也可以实现类似的效果，二维时，axis=0（默认）相当于union，axis=1相当于join
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

pd.concat([s1, s2, s3])  # 默认情况下，axis=0

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [5]:
pd.concat([s1, s2, s3], axis=1)  # 轴向上没有重叠，默认是full outer join的效果

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [6]:
s4 = pd.concat([s1, s3])
pd.concat([s1, s4], axis=1, join='inner')  # inner join的效果

Unnamed: 0,0,1
a,0,0
b,1,1


In [7]:
pd.concat([s1, s4], axis=1, join_axes=[['a', 'c', 'b', 'e']])  # 新版本的pandas删除了join_axes

TypeError: concat() got an unexpected keyword argument 'join_axes'

In [8]:
result = pd.concat([s1, s1, s3], keys=['one', 'two', 'three'])  # 在连接轴上创建一个多层索引，可以区分出结果中参与的各个连接片段
result

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [9]:
result.unstack()

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


In [10]:
pd.concat([s1, s2, s3], axis=1, keys=['one', 'two', 'three'])  # 当轴向为axis=1时，keys则是DataFrame的列头

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [11]:
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'], columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'], columns=['three', 'four'])

pd.concat([df1, df2], axis=1, keys=['level1', 'level2'])

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [12]:
pd.concat({'level1': df1, 'level2': df2}, axis=1)  # 字典的键相当于keys选项

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [13]:
pd.concat([df1, df2], axis=1, keys=['level1', 'level2'],
          names=['upper', 'lower'])  # 使用names选项可以为轴的层次命名

upper,level1,level1,level2,level2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [14]:
# df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
# df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])
df1 = pd.DataFrame(np.arange(12).reshape((3, 4)), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.arange(6).reshape((2, 3)), columns=['b', 'd', 'a'])

# pd.concat([df1, df2])

pd.concat([df1, df2], ignore_index=True)  # 忽略原索引，产生新的索引

Unnamed: 0,a,b,c,d
0,0,1,2.0,3
1,4,5,6.0,7
2,8,9,10.0,11
3,2,0,,1
4,5,3,,4
