# 31. データの連結

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [4]:
# 単純なアレイを用意する
arr1 = np.arange(9).reshape((3, 3))
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [5]:
# 2つのアレイを列方向（axis = 1）に連結する
np.concatenate([arr1, arr1], axis = 1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [6]:
# 行方向（axis = 0）に
np.concatenate([arr1, arr1], axis = 0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [7]:
# pandas.Seriesの場合
ser1 = Series([0, 1, 2], index = ['T', 'U', 'V'])
ser1

T    0
U    1
V    2
dtype: int64

In [8]:
ser2 = Series([3, 4], index = ['X', 'Y'])
ser2

X    3
Y    4
dtype: int64

In [9]:
# concatで、デフォルトはaxis = 0
pd.concat([ser1, ser2, ser1])

T    0
U    1
V    2
X    3
Y    4
T    0
U    1
V    2
dtype: int64

In [10]:
# 列方向に連結すると、DataFrameが出来る
pd.concat([ser1, ser2], axis = 1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [11]:
# 階層的なindexを付けてSeriesを連結することもできる
pd.concat([ser1, ser2], keys = ['cat1', 'cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [13]:
# 列方向に連結すると、列の名前になる
pd.concat([ser1, ser2], axis = 1, keys = ['cat1', 'cat2'])

Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [14]:
# DataFrameでも同じことができる
dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe1

Unnamed: 0,X,Y,Z
0,-1.675028,-0.366404,0.194204
1,0.318086,-0.768473,0.689388
2,-0.01472,0.156455,1.356948
3,0.64056,1.008808,0.372491


In [15]:
dframe2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])
dframe2

Unnamed: 0,Y,Q,X
0,-1.000545,0.325262,-0.938802
1,-0.279313,2.585407,-0.759861
2,-1.043012,0.022848,1.029648


In [16]:
# DataFrameを連結する
pd.concat([dframe1, dframe2])

Unnamed: 0,Q,X,Y,Z
0,,-1.675028,-0.366404,0.194204
1,,0.318086,-0.768473,0.689388
2,,-0.01472,0.156455,1.356948
3,,0.64056,1.008808,0.372491
0,0.325262,-0.938802,-1.000545,
1,2.585407,-0.759861,-0.279313,
2,0.022848,1.029648,-1.043012,


In [17]:
# もとのindexを無視することもできる
pd.concat([dframe1, dframe2], ignore_index = True)

Unnamed: 0,Q,X,Y,Z
0,,-1.675028,-0.366404,0.194204
1,,0.318086,-0.768473,0.689388
2,,-0.01472,0.156455,1.356948
3,,0.64056,1.008808,0.372491
4,0.325262,-0.938802,-1.000545,
5,2.585407,-0.759861,-0.279313,
6,0.022848,1.029648,-1.043012,


In [19]:
# 使う列を指定することができる
pd.concat([ser1, ser2], axis = 1, join_axes = [['U', 'V', 'Y']])

Unnamed: 0,0,1
U,1.0,
V,2.0,
Y,,4.0


公式ドキュメント：
[http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html)