In [1]:
# import pandas
import pandas as pd 

## Concatenation

Concatenation combines DataFrames using **pd.concat**. The number of rows or columns should match among dataframes to be combined. 

In [2]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                    index = [0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                    index = [0, 1, 2, 3])

df3 = pd.DataFrame({'E': ['A8', 'A9', 'A10', 'A11'],
                    'F': ['B8', 'B9', 'B10', 'B11'],
                    'G': ['C8', 'C9', 'C10', 'C11'],
                    'H': ['D8', 'D9', 'D10', 'D11']},
                    index = [0, 1, 2, 3])

df4 = pd.DataFrame({'A': ['A8', 'A9', 'A10'],
                    'B': ['B8', 'B9', 'B10'],
                    'C': ['C8', 'C9', 'C10']},
                    index = [0, 1, 2])


In [3]:
df1

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [4]:
df2

Unnamed: 0,A,B,C,D
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [5]:
df3

Unnamed: 0,E,F,G,H
0,A8,B8,C8,D8
1,A9,B9,C9,D9
2,A10,B10,C10,D10
3,A11,B11,C11,D11


In [6]:
df4

Unnamed: 0,A,B,C
0,A8,B8,C8
1,A9,B9,C9
2,A10,B10,C10


In [7]:
pd.concat([df1, df2, df3])


Unnamed: 0,A,B,C,D,E,F,G,H
0,A0,B0,C0,D0,,,,
1,A1,B1,C1,D1,,,,
2,A2,B2,C2,D2,,,,
3,A3,B3,C3,D3,,,,
0,A4,B4,C4,D4,,,,
1,A5,B5,C5,D5,,,,
2,A6,B6,C6,D6,,,,
3,A7,B7,C7,D7,,,,
0,,,,,A8,B8,C8,D8
1,,,,,A9,B9,C9,D9


In [8]:
pd.concat([df1, df2, df3], axis = 1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1,E,F,G,H
0,A0,B0,C0,D0,A4,B4,C4,D4,A8,B8,C8,D8
1,A1,B1,C1,D1,A5,B5,C5,D5,A9,B9,C9,D9
2,A2,B2,C2,D2,A6,B6,C6,D6,A10,B10,C10,D10
3,A3,B3,C3,D3,A7,B7,C7,D7,A11,B11,C11,D11


## Append

Append is a specific case of concat (axis=0, join='outer')


In [9]:
df1.append(df2)


  df1.append(df2)


Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


In [10]:
pd.concat([df1, df2])


Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
0,A4,B4,C4,D4
1,A5,B5,C5,D5
2,A6,B6,C6,D6
3,A7,B7,C7,D7


## Merge

Merge dataframes using **merge** (similar to joins in SQL). 


In [11]:
t1 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
                     'A': ['A0', 'A1', 'A2', 'A3'],
                     'B': ['B0', 'B1', 'B2', 'B3']})
   
t2 = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4'],
                          'C': ['C0', 'C1', 'C2', 'C3', 'C4'],
                          'D': ['D0', 'D1', 'D2', 'D3', 'D4']})    

In [12]:
t1

Unnamed: 0,key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [13]:
t2

Unnamed: 0,key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3
4,K4,C4,D4


In [14]:
t1.merge(t2, how = 'inner', on = 'key')


Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [15]:
t1.merge(t2, how = 'left', on = 'key')


Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


In [16]:
t1.merge(t2, how = 'right', on = 'key')


Unnamed: 0,key,A,B,C,D
0,K0,A0,B0,C0,D0
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3
4,K4,,,C4,D4
