In [1]:
%pylab inline
import numpy as np
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


# index
* ## [Series](#series)
    * [concat along axis=0, make a longer series](#series_axis0)
    * [concat along axis=1, form a DataFrame, and each serie become a column](#series_axis1)
* ## [DataFrame](#dataframe)
    * [concat along axis=0, make a longer DataFrame](#frame_axis0)
    * [concat along axis=1, provide the same function as "join"](#frame_axis1)

<a id="series"></a>
## Series
<a id="series_axis0"></a>
### concat along axis=0, make a longer series

In [2]:
s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
s3 = pd.Series([5, 6], index=['f', 'g'])

In [3]:
pd.concat([s1,s2,s3])

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [4]:
# specify keys, give each block a name. the result will be hierarchical series
series = pd.concat([s1, s2, s3], keys=['one', 'two', 'three'])
series

one    a    0
       b    1
two    c    2
       d    3
       e    4
three  f    5
       g    6
dtype: int64

In [5]:
series.unstack()# convert a hierarchical series into DataFrame

Unnamed: 0,a,b,c,d,e,f,g
one,0.0,1.0,,,,,
two,,,2.0,3.0,4.0,,
three,,,,,,5.0,6.0


<a id="series_axis1"></a>
### concat along axis=1, form a DataFrame, and each serie become a column

In [6]:
pd.concat([s1, s2, s3], axis=1, keys = ["series%d"%i for i in xrange(1,4)])

Unnamed: 0,series1,series2,series3
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


##### <span style="color:red">concat series in column-wise, is like join, which is based on shared, common index</span>

In [7]:
# by default, concat is like a "outer join", indices is the union of input indices
s4 = pd.concat([s1 * 5, s3])
pd.concat([s1, s4], axis=1)

Unnamed: 0,0,1
a,0.0,0
b,1.0,5
f,,5
g,,6


In [8]:
# we can use "inner join" to only keep intersected, overlapping indices
pd.concat([s1, s4], axis=1, join="inner")

Unnamed: 0,0,1
a,0,0
b,1,5


<a id="dataframe"></a>
## DataFrame

<a id="frame_axis0"></a>
### concat along axis=0, make longer DataFrame
<span style="color:green">** such vertical-concat, depends on shared, common column names **</span>

In [9]:
dfupper = pd.DataFrame(np.arange(1,13).reshape(3,4)/100.0+1, columns=['a', 'b', 'c', 'd'])
dfupper

Unnamed: 0,a,b,c,d
0,1.01,1.02,1.03,1.04
1,1.05,1.06,1.07,1.08
2,1.09,1.1,1.11,1.12


In [10]:
dflower = pd.DataFrame(np.arange(1,7).reshape(2,3)*0.01+2, columns=['b', 'd', 'a'])
dflower

Unnamed: 0,b,d,a
0,2.01,2.02,2.03
1,2.04,2.05,2.06


In [11]:
# original indices are just row numbers, so ignore them, which will resulting new row numbers in result DataFrame
pd.concat([dfupper,dflower],ignore_index=True)

Unnamed: 0,a,b,c,d
0,1.01,1.02,1.03,1.04
1,1.05,1.06,1.07,1.08
2,1.09,1.1,1.11,1.12
3,2.03,2.01,,2.02
4,2.06,2.04,,2.05


In [12]:
# we can "inner join" to only keep the overlapped columns
pd.concat([dfupper,dflower],ignore_index=True, join="inner")

Unnamed: 0,b,d,a
0,1.02,1.04,1.01
1,1.06,1.08,1.05
2,1.1,1.12,1.09
3,2.01,2.02,2.03
4,2.04,2.05,2.06


<a id="frame_axis1"></a>
### concat along axis=1, provide the same function as "join"
<span style="color:green">** such horizontal-concat, depends on shared, common indices (row names) **</span>

In [13]:
dfleft = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],columns=['one', 'two'])
dfleft

Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


In [14]:
dfright = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],columns=['three', 'four'])
dfright

Unnamed: 0,three,four
a,5,6
c,7,8


In [15]:
pd.concat([dfleft, dfright], axis=1)# by default it uses "outer join"

Unnamed: 0,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [16]:
pd.concat([dfleft, dfright], axis=1, join="inner")# use inner join to only keep intersected, overlapped indices

Unnamed: 0,one,two,three,four
a,0,1,5,6
c,4,5,7,8


In [18]:
# since default is outer join,outer join will give the same result
dfleft.join(dfright,how="outer")

Unnamed: 0,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0
