In [1]:
import pandas as pd
import numpy as np

In [2]:
def make_df(cols, ind):
    """ Quickly make a DataFrame """
    data = {c : [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data, ind)
make_df('ABC', range(3))

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2


In [3]:
# simple concatination in numpy
x=[1,2,3]
y=[4,5,6]
z=[7,8,9]
np.concatenate([x,y,z])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
x=[[1,2,3],
    [4,5,6]]
np.concatenate([x,x], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [6]:
# Simple concatenation with pd.concat 
# Signature in Pandas v0.18

# pd.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
#           keys=None, levels=None, names=None, verify_integrity=False,
#           copy=True)
          

In [7]:
# Concatenate two series
ser1 = pd.Series(['A','B','C'], index=[1,2,3])
ser2 = pd.Series(['D','E','F'], index=[4,5,6])
pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [18]:
# Concatenate two DataFrames
df1 = make_df('AB',[1,2])
df2 = make_df('AB',[3,4])
pd.concat([df1, df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [20]:
df1 = make_df('AB',[1,2])
df2 = make_df('CD',[1,2])
pd.concat([df1, df2], axis=1)


Unnamed: 0,A,B,C,D
1,A1,B1,C1,D1
2,A2,B2,C2,D2


In [22]:
# Duplicate index
x = make_df('AB',[0,1])
y = make_df('AB',[2,3])
y.index = x.index 
pd.concat([x,y])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A2,B2
1,A3,B3


In [23]:
# verify_integrity is a parameter which if True then pandas will throw an exception if there are duplicate indices. 
# ignore_index is a parameter which reset the index
pd.concat([x,y], ignore_index=True)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [24]:
# another approach is to use multiindex keys (Hierarichical indexes)
# using keys parameter
pd.concat([x,y], keys=['x','y'])

Unnamed: 0,Unnamed: 1,A,B
x,0,A0,B0
x,1,A1,B1
y,0,A2,B2
y,1,A3,B3


In [26]:
# Concatenation with joins
# we were mainly concatenating DataFrames with shared column names. In practice, data from different sources might have different sets of column names, and pd.concat offers several options in this case
df5 = make_df('ABC', [1, 2])
df6 = make_df('BCD', [3, 4])
pd.concat([df5, df6])

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [27]:
# default join is 'outer' (union), we can change this to inner (intersection)
pd.concat([df5, df6], join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [28]:
# Another option is to directly specify the index of the remaining columns using the join_axis argument

pd.concat([df5 , df6], join_axes=[df5.columns])

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,,B3,C3
4,,B4,C4


In [29]:
# the append is simple implementation of pd.concat()
# df1.append(df2) is same as pd.concat([df1, df2])
df1.append(df2)

Unnamed: 0,A,B,C,D
1,A1,B1,,
2,A2,B2,,
1,,,C1,D1
2,,,C2,D2
