  ## PANDAS CONCAT

Pandas provides various facilities for easily combining together series,dataframe and panel objects 

In [1]:
import pandas as pd

### Concat on series:

In [2]:
sr1=pd.Series([0,1,2,3])
sr1

0    0
1    1
2    2
3    3
dtype: int64

In [3]:
sr2=pd.Series([5,6,7,8])
sr2

0    5
1    6
2    7
3    8
dtype: int64

In [4]:
pd.concat([sr1,sr2])

0    0
1    1
2    2
3    3
0    5
1    6
2    7
3    8
dtype: int64

In [5]:
sr3=pd.Series([6,7,8,9,10,11])
sr3

0     6
1     7
2     8
3     9
4    10
5    11
dtype: int64

In [6]:
pd.concat([sr2,sr3])

0     5
1     6
2     7
3     8
0     6
1     7
2     8
3     9
4    10
5    11
dtype: int64

In [7]:
sr4=pd.Series(['a','b','c','d'])
sr4

0    a
1    b
2    c
3    d
dtype: object

In [8]:
pd.concat([sr3,sr4])

0     6
1     7
2     8
3     9
4    10
5    11
0     a
1     b
2     c
3     d
dtype: object

#### axis=0/1;   0:concat vertically ,1: concat horizontally  ; default is 0

In [9]:
pd.concat([sr3,sr4],axis=0)

0     6
1     7
2     8
3     9
4    10
5    11
0     a
1     b
2     c
3     d
dtype: object

In [10]:
pd.concat([sr3,sr4],axis=1)

Unnamed: 0,0,1
0,6,a
1,7,b
2,8,c
3,9,d
4,10,
5,11,


#### ignore_index=True/False ; True means index is in systematic order;  default is False 

In [11]:
pd.concat([sr3,sr4],axis=0,ignore_index=True)

0     6
1     7
2     8
3     9
4    10
5    11
6     a
7     b
8     c
9     d
dtype: object

### Concat on dataframes:

In [12]:
# first dataframe created
df1=pd.DataFrame({"ID":[1,2,3,4],"Name":['A','B','C','D'],"Class":[9,10,11,12]})
df1

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12


In [13]:
# second dataframe created
df2=pd.DataFrame({"ID":[11,12,13,14],"Name":['AA','BB','CC','DD'],"Class":[5,6,7,8]})
df2

Unnamed: 0,ID,Name,Class
0,11,AA,5
1,12,BB,6
2,13,CC,7
3,14,DD,8


In [14]:
# concat 2 dataframe
pd.concat([df1,df2])

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,11,AA,5
1,12,BB,6
2,13,CC,7
3,14,DD,8


In [15]:
pd.concat([df1,df2],axis=0)

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,11,AA,5
1,12,BB,6
2,13,CC,7
3,14,DD,8


In [16]:
# axis =1 i.e dataframe are concated side-by-side(horizontally) 
pd.concat([df1,df2],axis=1)

Unnamed: 0,ID,Name,Class,ID.1,Name.1,Class.1
0,1,A,9,11,AA,5
1,2,B,10,12,BB,6
2,3,C,11,13,CC,7
3,4,D,12,14,DD,8


In [17]:
pd.concat([df1,df2],axis=0)

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,11,AA,5
1,12,BB,6
2,13,CC,7
3,14,DD,8


#### Ignore_index: 

In [18]:
pd.concat([df1,df2],axis=0,ignore_index=False)

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,11,AA,5
1,12,BB,6
2,13,CC,7
3,14,DD,8


In [19]:
# ignore_index= True i.e index systematically (0 to 7)
pd.concat([df1,df2],axis=0,ignore_index=True)

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
4,11,AA,5
5,12,BB,6
6,13,CC,7
7,14,DD,8


### keys:name you like to give to your dataframe

In [20]:
# here axis=0:
pd.concat([df1,df2],keys=['first dataframe','second dataframe'])

Unnamed: 0,Unnamed: 1,ID,Name,Class
first dataframe,0,1,A,9
first dataframe,1,2,B,10
first dataframe,2,3,C,11
first dataframe,3,4,D,12
second dataframe,0,11,AA,5
second dataframe,1,12,BB,6
second dataframe,2,13,CC,7
second dataframe,3,14,DD,8


In [21]:
# here axis=1:
pd.concat([df1,df2],keys=['first dataframe','second dataframe'],axis=1)

Unnamed: 0_level_0,first dataframe,first dataframe,first dataframe,second dataframe,second dataframe,second dataframe
Unnamed: 0_level_1,ID,Name,Class,ID,Name,Class
0,1,A,9,11,AA,5
1,2,B,10,12,BB,6
2,3,C,11,13,CC,7
3,4,D,12,14,DD,8


In [22]:
# keys parameter dont work :
pd.concat([df1,df2],keys=['first dataframe','second dataframe'],ignore_index=True)

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
4,11,AA,5
5,12,BB,6
6,13,CC,7
7,14,DD,8


In [23]:
df1

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12


In [24]:
df3=pd.DataFrame({"ID":[5,6],"Name":['E','F'],"Class":[13,14]})
df3

Unnamed: 0,ID,Name,Class
0,5,E,13
1,6,F,14


In [25]:
pd.concat([df1,df3])

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,5,E,13
1,6,F,14


In [26]:
pd.concat([df1,df3],axis=1)

Unnamed: 0,ID,Name,Class,ID.1,Name.1,Class.1
0,1,A,9,5.0,E,13.0
1,2,B,10,6.0,F,14.0
2,3,C,11,,,
3,4,D,12,,,


In [27]:
pd.concat([df1,df3],ignore_index=True)

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
4,5,E,13
5,6,F,14


In [28]:
df1

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12


In [29]:
df3

Unnamed: 0,ID,Name,Class
0,5,E,13
1,6,F,14


In [30]:
pd.concat([df1,df3])

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,5,E,13
1,6,F,14


In [31]:
pd.concat([df1,df3],join="outer")

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,5,E,13
1,6,F,14


In [32]:
pd.concat([df1,df3],join="inner")

Unnamed: 0,ID,Name,Class
0,1,A,9
1,2,B,10
2,3,C,11
3,4,D,12
0,5,E,13
1,6,F,14


In [33]:
pd.concat([df1,df3],axis=1)

Unnamed: 0,ID,Name,Class,ID.1,Name.1,Class.1
0,1,A,9,5.0,E,13.0
1,2,B,10,6.0,F,14.0
2,3,C,11,,,
3,4,D,12,,,


In [34]:
pd.concat([df1,df3],axis=1,join="inner")

Unnamed: 0,ID,Name,Class,ID.1,Name.1,Class.1
0,1,A,9,5,E,13
1,2,B,10,6,F,14


### join_axes:

In [35]:
# join_axes:to return specific dataframe index
## join_axes=df3.index means give all index of df1;same as join=outer
pd.concat([df1,df3],axis=1,join_axes=[df1.index])

Unnamed: 0,ID,Name,Class,ID.1,Name.1,Class.1
0,1,A,9,5.0,E,13.0
1,2,B,10,6.0,F,14.0
2,3,C,11,,,
3,4,D,12,,,


In [36]:
# join_axes=df3.index means give all index of df3;same as join=inner
pd.concat([df1,df3],axis=1,join_axes=[df3.index])

Unnamed: 0,ID,Name,Class,ID.1,Name.1,Class.1
0,1,A,9,5,E,13
1,2,B,10,6,F,14


In [37]:
# dataframe created with  different labels
df4=pd.DataFrame({"Rollno":[22,33],"Grade":['Good','Excellent']})
df4

Unnamed: 0,Rollno,Grade
0,22,Good
1,33,Excellent


In [38]:
# 2 different datframes concated
pd.concat([df1,df4],axis=1)

Unnamed: 0,ID,Name,Class,Rollno,Grade
0,1,A,9,22.0,Good
1,2,B,10,33.0,Excellent
2,3,C,11,,
3,4,D,12,,


In [39]:
pd.concat([df1,df4],axis=0)
#warning come

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,Class,Grade,ID,Name,Rollno
0,9.0,,1.0,A,
1,10.0,,2.0,B,
2,11.0,,3.0,C,
3,12.0,,4.0,D,
0,,Good,,,22.0
1,,Excellent,,,33.0


In [40]:
# added sort=False ;no warning come
pd.concat([df1,df4],sort=False)

Unnamed: 0,ID,Name,Class,Rollno,Grade
0,1.0,A,9.0,,
1,2.0,B,10.0,,
2,3.0,C,11.0,,
3,4.0,D,12.0,,
0,,,,22.0,Good
1,,,,33.0,Excellent
