# Concatenating
*Joining two or more datasets together.*
 
> Think of it like putting Lego blocks together — stacking them either side-by-side (columns) or on top of each other (rows).

In [5]:
import numpy as np
import pandas as pd

from pandas import series, DataFrame

In [8]:
DF_obj = DataFrame(np.arange(36).reshape(6,6))
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [10]:
DF_obj_2 = DataFrame(np.arange(15).reshape(5,3))
DF_obj_2

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14


In [13]:
pd.concat([DF_obj,DF_obj_2], axis = 1) #here axis = 1 represent column

Unnamed: 0,0,1,2,3,4,5,0.1,1.1,2.1
0,0,1,2,3,4,5,0.0,1.0,2.0
1,6,7,8,9,10,11,3.0,4.0,5.0
2,12,13,14,15,16,17,6.0,7.0,8.0
3,18,19,20,21,22,23,9.0,10.0,11.0
4,24,25,26,27,28,29,12.0,13.0,14.0
5,30,31,32,33,34,35,,,


In [15]:
pd.concat([DF_obj,DF_obj_2])  #here axis = 0 by default represent row

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3.0,4.0,5.0
1,6,7,8,9.0,10.0,11.0
2,12,13,14,15.0,16.0,17.0
3,18,19,20,21.0,22.0,23.0
4,24,25,26,27.0,28.0,29.0
5,30,31,32,33.0,34.0,35.0
0,0,1,2,,,
1,3,4,5,,,
2,6,7,8,,,
3,9,10,11,,,


# Transforming
*Changing the data to a new format or structure.*


## drop


In [17]:
DF_obj.drop([0,3])  #here axis is not defined so by default is takes rows

Unnamed: 0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [18]:
DF_obj.drop([0,3], axis =1)

Unnamed: 0,1,2,4,5
0,1,2,4,5
1,7,8,10,11
2,13,14,16,17
3,19,20,22,23
4,25,26,28,29
5,31,32,34,35


## join

In [22]:
series_val = Series(np.arange(6))

In [23]:
series_val

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int32

In [24]:
series_val.name = 'added_variable'

In [27]:
DF_obj

0    0
1    1
2    2
3    3
4    4
5    5
Name: added variable, dtype: int32

In [30]:
new_variable = DataFrame.join(DF_obj,series_val)
new_variable

Unnamed: 0,0,1,2,3,4,5,added variable
0,0,1,2,3,4,5,0
1,6,7,8,9,10,11,1
2,12,13,14,15,16,17,2
3,18,19,20,21,22,23,3
4,24,25,26,27,28,29,4
5,30,31,32,33,34,35,5


In [31]:
new_variable = DataFrame.join(series_val,DF_obj)
new_variable

Unnamed: 0,added variable,0,1,2,3,4,5
0,0,0,1,2,3,4,5
1,1,6,7,8,9,10,11
2,2,12,13,14,15,16,17
3,3,18,19,20,21,22,23
4,4,24,25,26,27,28,29
5,5,30,31,32,33,34,35


In [37]:
concated_data = pd.concat([new_variable,new_variable], ignore_index= True)  #here we gave condition to ignore the index which created a new index
concated_data

Unnamed: 0,added variable,0,1,2,3,4,5
0,0,0,1,2,3,4,5
1,1,6,7,8,9,10,11
2,2,12,13,14,15,16,17
3,3,18,19,20,21,22,23
4,4,24,25,26,27,28,29
5,5,30,31,32,33,34,35
6,0,0,1,2,3,4,5
7,1,6,7,8,9,10,11
8,2,12,13,14,15,16,17
9,3,18,19,20,21,22,23


In [38]:
concated_data = pd.concat([new_variable,new_variable])  #here by default it considers the defalut index value of the table
concated_data

Unnamed: 0,added variable,0,1,2,3,4,5
0,0,0,1,2,3,4,5
1,1,6,7,8,9,10,11
2,2,12,13,14,15,16,17
3,3,18,19,20,21,22,23
4,4,24,25,26,27,28,29
5,5,30,31,32,33,34,35
0,0,0,1,2,3,4,5
1,1,6,7,8,9,10,11
2,2,12,13,14,15,16,17
3,3,18,19,20,21,22,23


## Sorting

In [40]:
DF_obj

Unnamed: 0,0,1,2,3,4,5
0,0,1,2,3,4,5
1,6,7,8,9,10,11
2,12,13,14,15,16,17
3,18,19,20,21,22,23
4,24,25,26,27,28,29
5,30,31,32,33,34,35


In [44]:
DF_sorted = DF_obj.sort_values(by = [5], ascending = [False])

In [45]:
DF_sorted

Unnamed: 0,0,1,2,3,4,5
5,30,31,32,33,34,35
4,24,25,26,27,28,29
3,18,19,20,21,22,23
2,12,13,14,15,16,17
1,6,7,8,9,10,11
0,0,1,2,3,4,5
