In [1]:
import pandas as pd
import numpy as np

# Concatenating along an axis

With plain `numpy`.

In [3]:
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [4]:
np.concatenate([arr, arr], axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

With `pandas`.

In [5]:
s1 = pd.Series([0, 1], index=['a', 'b'])

In [6]:
s2 = pd.Series(range(2,5), index=['c', 'd', 'e'])

In [7]:
s3 = pd.Series(range(5,7), index=['f', 'g'])

In [12]:
pd.concat([s1, s2, s3], axis=1)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


Maybe not the outer join but the inner join?

In [14]:
s4 = pd.concat([s1, s3])
s4

a    0
b    1
f    5
g    6
dtype: int64

In [18]:
pd.concat([s1, s4], axis=1, join='inner')

Unnamed: 0,0,1
a,0,0
b,1,1


In [19]:
pd.concat([s1, s4], axis=1, join_axes=[['a', 'c', 'b', 'e']])

Unnamed: 0,0,1
a,0.0,0.0
c,,
b,1.0,1.0
e,,


Want to keep track of the concatenated pieces? Use the `keys` argument.

In [21]:
result = pd.concat([s1, s1, s3], keys=['one', 'two', 'three'])
result

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [22]:
result.unstack()

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


Along `axis=1`

In [23]:
pd.concat([s1, s2, s3], axis=1, keys=['one', 'two', 'three'])

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


And the same goes for `DataFrames`.

In [24]:
df1 = pd.DataFrame(
    np.arange(6).reshape(3,2),
    index=['a', 'b', 'c'],
    columns=['one', 'two']
)

In [26]:
df2 = pd.DataFrame(
    5 + np.arange(4).reshape(2, 2),
    index=['a', 'c'],
    columns=['three', 'four']
)

In [28]:
pd.concat([df1, df2], axis=1, keys=['level1', 'level2'])

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [29]:
pd.concat(
    {'level1': df1, 'level2': df2},
    axis=1
)

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


We can name the axes as well.

In [30]:
pd.concat(
    {'level1': df1, 'level2': df2},
    axis=1,
    names=['upper', 'lower']
)

upper,level1,level1,level2,level2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


But when the index does not contain relevant data?

In [31]:
df1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])

In [36]:
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,a,b,c,d
0,-0.119295,-0.108968,0.244943,1.817376
1,0.222312,0.54478,-0.124037,0.921468
2,0.969195,1.559561,-0.961357,0.82878
3,0.219412,-0.631207,,0.254309
4,0.460582,-1.114118,,-0.168936
