# Working on DataFrames Part2

## Concatenate

In [1]:
#For more info in documentation:
import webbrowser
url='http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html'
webbrowser.open(url)

True

Function to display the dfs side by side

In [14]:
from IPython.display import display_html

# function
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

Basic Imports

In [15]:
# Now we'll learn about concatenating along an axis
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

#### First we work on numpy arrays

In [16]:
# Create a matrix 
arr1 = np.arange(16).reshape((4,4))

In [17]:
# Show
arr1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [18]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11],
       [12, 13, 14, 15, 12, 13, 14, 15]])

In [19]:
# Let's see other axis options
np.concatenate([arr1,arr1],axis=0)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

### Concatenation in pandas

Concatenate pandas series

In [20]:
# Lets create two Series with no overlap
ser1 =  Series(data = [0,1,2], index=['A','B','C'])

ser2 = Series(data = [3,4], index=['X','Y'])

By default it takes the axis=0

In [21]:
#Now let use concat (default is axis=0)
pd.concat([ser1,ser2])

A    0
B    1
C    2
X    3
Y    4
dtype: int64

If we pass the axis=1 it produce a new DF

In [22]:
# Concat on axis 1
pd.concat([ser1,ser2], axis=1, sort=False)

Unnamed: 0,0,1
A,0.0,
B,1.0,
C,2.0,
X,,3.0
Y,,4.0


We can specify which specific axes to be used

In [23]:
# Specify the axis to concat
pd.concat([ser1,ser2], axis=1, join_axes=[['A','B','Y']])

Unnamed: 0,0,1
A,0.0,
B,1.0,
Y,,4.0


Add a markers keys to the concatenation result

In [24]:
# On axes=0 it create a hierarchical index

pd.concat([ser1,ser2], keys=['Lev1','Lev2'])

Lev1  A    0
      B    1
      C    2
Lev2  X    3
      Y    4
dtype: int64

In [25]:
# Along the axis=1 then these Keys become column headers

pd.concat([ser1,ser2], axis=1, keys=['Lev1','Lev2'], sort=False)

Unnamed: 0,Lev1,Lev2
A,0.0,
B,1.0,
C,2.0,
X,,3.0
Y,,4.0


### Working on DataFrames

In [26]:
# Everything works similarly in DataFrames

dframe1 = DataFrame(np.random.randn(4,3), columns=['A', 'B', 'C'])
dframe2 = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'X'])

In [27]:
display_side_by_side(dframe1, dframe2)

Unnamed: 0,A,B,C
0,-1.133198,1.071379,-0.513944
1,0.206839,0.499961,-0.998405
2,1.234426,-0.04307,1.665373
3,1.973527,-0.432149,0.107204

Unnamed: 0,A,B,X
0,-1.614565,0.386551,-0.211225
1,-0.541369,-1.101121,0.357847
2,-0.126016,0.762579,0.743207


In [28]:
#Concat on DataFrame (dafault axis=0)
pd.concat([dframe1,dframe2], sort=False)

Unnamed: 0,A,B,C,X
0,-1.133198,1.071379,-0.513944,
1,0.206839,0.499961,-0.998405,
2,1.234426,-0.04307,1.665373,
3,1.973527,-0.432149,0.107204,
0,-1.614565,0.386551,,-0.211225
1,-0.541369,-1.101121,,0.357847
2,-0.126016,0.762579,,0.743207


If we don't care about the index info we can just use **ignore_index=True**

In [35]:
# ignore the index
pd.concat([dframe1,dframe2], ignore_index=True, sort=True)

Unnamed: 0,A,B,C,X
0,0.95507,0.046904,0.79645,
1,0.491414,-0.153832,0.674793,
2,-1.03864,0.815549,-0.052868,
3,0.858887,-0.464251,0.478679,
4,-1.519331,-0.56216,,-0.940501
5,0.400755,-0.683769,,-0.851531
6,0.351798,0.650641,,-0.116146


## Nice! Let's do some exercise!