# Appending and Concatenating series

In [1]:
import pandas as pd
s1 = pd.Series([2, 3, 4, 5, 6])
s2 = pd.Series([7, 8, 9, 10, 11])
s3 = pd.Series([12, 13, 14, 15, 16])

Q1. Append s1 and s2 series 

In [2]:
print(s1.append(s2))

0     2
1     3
2     4
3     5
4     6
0     7
1     8
2     9
3    10
4    11
dtype: int64


Q2. Find the **index** value of s3 series 

In [3]:
print(s3.index)

RangeIndex(start=0, stop=5, step=1)


Q3. Using **.reset_index** append s2 and s3 series

In [4]:
print(s2.append(s3).reset_index(drop=True))

0     7
1     8
2     9
3    10
4    11
5    12
6    13
7    14
8    15
9    16
dtype: int64


Q4. Concatenate s1 and s2 series

In [5]:
con = pd.concat([s1, s2])
print(con)

0     2
1     3
2     4
3     5
4     6
0     7
1     8
2     9
3    10
4    11
dtype: int64


Q5. Use **ignore_index** function to concatenate s2 and s3 series

In [7]:
con1 = pd.concat([s2, s3], ignore_index=True)
print(con1)

0     7
1     8
2     9
3    10
4    11
5    12
6    13
7    14
8    15
9    16
dtype: int64


# Appending and Concatenating dataframes

In [8]:
import pandas as pd
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                    index=[0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                    index=[4, 5, 6, 7])

df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
                    'B': ['B8', 'B9', 'B10', 'B11'],
                    'C': ['C8', 'C9', 'C10', 'C11'],
                    'D': ['D8', 'D9', 'D10', 'D11']},
                    index=[8, 9, 10, 11])

Q1. find the **type** and **shape** of df1 dataframe

In [9]:
print(type(df1))

<class 'pandas.core.frame.DataFrame'>


In [11]:
print(df1.shape)

(4, 4)


Q2. Append df1 and df2 dataframes. 

In [12]:
print(df1.append(df2))

    A   B   C   D
0  A0  B0  C0  D0
1  A1  B1  C1  D1
2  A2  B2  C2  D2
3  A3  B3  C3  D3
4  A4  B4  C4  D4
5  A5  B5  C5  D5
6  A6  B6  C6  D6
7  A7  B7  C7  D7


Q3. Concatentate the columns of df1 and df2 dataframes. 

In [13]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,,,,
1,A1,B1,C1,D1,,,,
2,A2,B2,C2,D2,,,,
3,A3,B3,C3,D3,,,,
4,,,,,A4,B4,C4,D4
5,,,,,A5,B5,C5,D5
6,,,,,A6,B6,C6,D6
7,,,,,A7,B7,C7,D7


Q4. Concatentate rows of df1 and df2 dataframes. 

In [14]:
pd.concat([df1, df2], axis=0)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


# Concatenating, Keys & MultiIndexes

In [15]:
import pandas as pd

d1 = pd.DataFrame(dict(A=.1, B=.2, C=.3), [2, 3])
d2 = pd.DataFrame(dict(B=.4, C=.5, D=.6), [1, 2])
d3 = pd.DataFrame(dict(A=.7, B=.8, D=.9), [1, 3])

In [16]:
# print d1 and d2 dataframes
print(d1)

     A    B    C
2  0.1  0.2  0.3
3  0.1  0.2  0.3


In [17]:
print(d2)

     B    C    D
1  0.4  0.5  0.6
2  0.4  0.5  0.6


Q1. Concatenate rows of d1, d2 and d3 dataframes. 

In [19]:
z = (pd.concat([d1, d2,d3], axis=0))
print(z)

     A    B    C    D
2  0.1  0.2  0.3  NaN
3  0.1  0.2  0.3  NaN
1  NaN  0.4  0.5  0.6
2  NaN  0.4  0.5  0.6
1  0.7  0.8  NaN  0.9
3  0.7  0.8  NaN  0.9


Q2. Use **multi-index** on (d1, d2, d3 )rows 

In [18]:
z = (pd.concat([d1, d2, d3], keys=[1,2,3], axis=0))
print(z)

       A    B    C    D
1 2  0.1  0.2  0.3  NaN
  3  0.1  0.2  0.3  NaN
2 1  NaN  0.4  0.5  0.6
  2  NaN  0.4  0.5  0.6
3 1  0.7  0.8  NaN  0.9
  3  0.7  0.8  NaN  0.9


Q3. Concatenate the columns of d1 and d2 dataframes. 

In [20]:
print(pd.concat([d1, d2], axis=1))

     A    B    C    B    C    D
1  NaN  NaN  NaN  0.4  0.5  0.6
2  0.1  0.2  0.3  0.4  0.5  0.6
3  0.1  0.2  0.3  NaN  NaN  NaN


Q4. Use **multi-index** on (d1 ,d2, d3)columns 

In [21]:
print(pd.concat([d1, d2, d3], keys=[1,2,3] ,axis=1))

     1              2              3          
     A    B    C    B    C    D    A    B    D
1  NaN  NaN  NaN  0.4  0.5  0.6  0.7  0.8  0.9
2  0.1  0.2  0.3  0.4  0.5  0.6  NaN  NaN  NaN
3  0.1  0.2  0.3  NaN  NaN  NaN  0.7  0.8  0.9


# Outer and inner Joins

In [22]:
import numpy as np
import pandas as pd
A = np.arange(3).reshape(1,3) 
print(A)
B = np.arange(6).reshape(3,2)
print(B)
C = np.arange(9).reshape(3,3)
print(C)

[[0 1 2]]
[[0 1]
 [2 3]
 [4 5]]
[[0 1 2]
 [3 4 5]
 [6 7 8]]


Q1. Arrange the array (B,C) horizontally

Q2. Concatenate the array(B,C). 

In [24]:
np.hstack([B,C])

array([[0, 1, 0, 1, 2],
       [2, 3, 3, 4, 5],
       [4, 5, 6, 7, 8]])

In [25]:
np.concatenate([B,C], axis=1)

array([[0, 1, 0, 1, 2],
       [2, 3, 3, 4, 5],
       [4, 5, 6, 7, 8]])

Q3. Arrange the array(A,C) vertically

Q4. Concatenate  the array(A,C).

In [26]:
np.vstack([A,C])

array([[0, 1, 2],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [27]:
np.concatenate([A,C], axis=0)

array([[0, 1, 2],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [29]:
import pandas as pd
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3'],
                    'C': ['C0', 'C1', 'C2', 'C3'],
                    'D': ['D0', 'D1', 'D2', 'D3']},
                    index=[0, 1, 2, 3])

df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7'],
                    'C': ['C4', 'C5', 'C6', 'C7'],
                    'D': ['D4', 'D5', 'D6', 'D7']},
                    index=[4, 5, 6, 7])

Q1. Convert the dataframes df1 and df2 to numy arrays as A_array and B_array respectively.

In [33]:
A_array = np.array(df1)
print(A_array)

[['A0' 'B0' 'C0' 'D0']
 ['A1' 'B1' 'C1' 'D1']
 ['A2' 'B2' 'C2' 'D2']
 ['A3' 'B3' 'C3' 'D3']]


In [34]:
B_array = np.array(df2)
print(B_array)

[['A4' 'B4' 'C4' 'D4']
 ['A5' 'B5' 'C5' 'D5']
 ['A6' 'B6' 'C6' 'D6']
 ['A7' 'B7' 'C7' 'D7']]


Q2. Concatenate the new arrays with column 

In [36]:
print(np.concatenate([A_array,B_array], axis=1))

[['A0' 'B0' 'C0' 'D0' 'A4' 'B4' 'C4' 'D4']
 ['A1' 'B1' 'C1' 'D1' 'A5' 'B5' 'C5' 'D5']
 ['A2' 'B2' 'C2' 'D2' 'A6' 'B6' 'C6' 'D6']
 ['A3' 'B3' 'C3' 'D3' 'A7' 'B7' 'C7' 'D7']]


### Joins : 
> Combining rows of multiple tables. Two types:-
        
         1. Inner Joins : a) Intersection of common sets
        
         2. Outer Joins : a) Missing fields fill with NaN
                          b) Union of index sets

Q1. Concatenate dataframes df1 and df2 with inner joins

In [37]:
pd.concat([df1, df2], axis=1, join='inner')

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1


Q2. Concatenate dataframes df1 and df2 with outer joins

In [38]:
pd.concat([df1, df2], axis=1, join='outer')

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,,,,
1,A1,B1,C1,D1,,,,
2,A2,B2,C2,D2,,,,
3,A3,B3,C3,D3,,,,
4,,,,,A4,B4,C4,D4
5,,,,,A5,B5,C5,D5
6,,,,,A6,B6,C6,D6
7,,,,,A7,B7,C7,D7
