## Concatenation


- **If both sources are in the same format, use pd.concat()**

**Useful link: https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html**

In [1]:
import numpy as np
import pandas as pd

In [5]:
data_one = {'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']}
data_two = {'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']}

In [6]:
one = pd.DataFrame(data_one)
one

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [7]:
two = pd.DataFrame(data_two)
two

Unnamed: 0,C,D
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [17]:
#CONCATENATION
pd.concat(objs = [one,two],axis = 1)

#PARAMETERS:
#objs: list of datsets to be concatenated
#axis = 0 by default (row wise concatenation)
#axis = 1 (column wise concatenation)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


In [18]:
#column order changed
pd.concat([two,one],axis=1)

Unnamed: 0,C,D,A,B
0,C0,D0,A0,B0
1,C1,D1,A1,B1
2,C2,D2,A2,B2
3,C3,D3,A3,B3


In [27]:
#along the rows
pd.concat([one,two])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


### CASE where we need to concatenate row wise by making the columns same

In [26]:
#first, set columns as equal
#we want two's columns set equal to one's columns
#i.e., renaming column names of two
two.columns = one.columns

#check
two

Unnamed: 0,A,B
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [28]:
#secondly, concatenating by rows
mydf = pd.concat([one,two])
mydf

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


In [31]:
#thirdly and lastly, updating index to avoid repetition
mydf.index = range(1,9)
mydf

Unnamed: 0,A,B
1,A0,B0
2,A1,B1
3,A2,B2
4,A3,B3
5,C0,D0
6,C1,D1
7,C2,D2
8,C3,D3
