# Merging and Concatenating

In [1]:
import numpy as np
import pandas as pd

## Sample Data

In [2]:
data_1 = {'StudentID':['S-001','S-002','S-003','S-004','S-005','S-006','S-007'],
          'Student': ['Pedro','Jose','Petra','Sam','Billy','Melandra','Johnny'], 
          'Section': ['A','B','A','B','C','C','C'],
          'Grades':  ['82','85','90','89','92','90','83']
}

In [3]:
data_2 = {'StudentID': ['S-001','S-002','S-003','S-004','S-005','S-006'],
          'School' : ['School 1','School 1','School 2','School 3', 'School 2','School 3']
          
}

In [4]:
df_1 = pd.DataFrame(data_1)
df_1

Unnamed: 0,StudentID,Student,Section,Grades
0,S-001,Pedro,A,82
1,S-002,Jose,B,85
2,S-003,Petra,A,90
3,S-004,Sam,B,89
4,S-005,Billy,C,92
5,S-006,Melandra,C,90
6,S-007,Johnny,C,83


In [5]:
df_2 = pd.DataFrame(data_2)
df_2

Unnamed: 0,StudentID,School
0,S-001,School 1
1,S-002,School 1
2,S-003,School 2
3,S-004,School 3
4,S-005,School 2
5,S-006,School 3


## Merging Data Frame

In [6]:
# Inner

merge_df1 = pd.merge(df_1, df_2, how = 'inner', on = 'StudentID')
merge_df1

Unnamed: 0,StudentID,Student,Section,Grades,School
0,S-001,Pedro,A,82,School 1
1,S-002,Jose,B,85,School 1
2,S-003,Petra,A,90,School 2
3,S-004,Sam,B,89,School 3
4,S-005,Billy,C,92,School 2
5,S-006,Melandra,C,90,School 3


In [7]:
# Left

pd.merge(df_1, df_2, how = 'left', on = 'StudentID')

Unnamed: 0,StudentID,Student,Section,Grades,School
0,S-001,Pedro,A,82,School 1
1,S-002,Jose,B,85,School 1
2,S-003,Petra,A,90,School 2
3,S-004,Sam,B,89,School 3
4,S-005,Billy,C,92,School 2
5,S-006,Melandra,C,90,School 3
6,S-007,Johnny,C,83,


In [8]:
# Right

pd.merge(df_1, df_2, how = 'right', on = 'StudentID')

Unnamed: 0,StudentID,Student,Section,Grades,School
0,S-001,Pedro,A,82,School 1
1,S-002,Jose,B,85,School 1
2,S-003,Petra,A,90,School 2
3,S-004,Sam,B,89,School 3
4,S-005,Billy,C,92,School 2
5,S-006,Melandra,C,90,School 3


In [9]:
# Outer

pd.merge(df_1, df_2, how = 'outer', on = 'StudentID')

Unnamed: 0,StudentID,Student,Section,Grades,School
0,S-001,Pedro,A,82,School 1
1,S-002,Jose,B,85,School 1
2,S-003,Petra,A,90,School 2
3,S-004,Sam,B,89,School 3
4,S-005,Billy,C,92,School 2
5,S-006,Melandra,C,90,School 3
6,S-007,Johnny,C,83,


## Concatenation

In [10]:
df_3 = pd.DataFrame({'Residence': ['Laguna','Quezon','Cavite','Batangas','Rizal','NCR','Batangas']})
df_3

Unnamed: 0,Residence
0,Laguna
1,Quezon
2,Cavite
3,Batangas
4,Rizal
5,NCR
6,Batangas


In [11]:
df_concat = pd.concat([df_1,df_3], axis = 1)
df_concat

Unnamed: 0,StudentID,Student,Section,Grades,Residence
0,S-001,Pedro,A,82,Laguna
1,S-002,Jose,B,85,Quezon
2,S-003,Petra,A,90,Cavite
3,S-004,Sam,B,89,Batangas
4,S-005,Billy,C,92,Rizal
5,S-006,Melandra,C,90,NCR
6,S-007,Johnny,C,83,Batangas


In [12]:
df_concat = pd.concat([df_1, df_2, df_3], axis = 1)
df_concat

Unnamed: 0,StudentID,Student,Section,Grades,StudentID.1,School,Residence
0,S-001,Pedro,A,82,S-001,School 1,Laguna
1,S-002,Jose,B,85,S-002,School 1,Quezon
2,S-003,Petra,A,90,S-003,School 2,Cavite
3,S-004,Sam,B,89,S-004,School 3,Batangas
4,S-005,Billy,C,92,S-005,School 2,Rizal
5,S-006,Melandra,C,90,S-006,School 3,NCR
6,S-007,Johnny,C,83,,,Batangas


In [13]:
# Removing Duplicate Column Names (in case you need to remove one)

df_concat.loc[:, ~df_concat.columns.duplicated()]

Unnamed: 0,StudentID,Student,Section,Grades,School,Residence
0,S-001,Pedro,A,82,School 1,Laguna
1,S-002,Jose,B,85,School 1,Quezon
2,S-003,Petra,A,90,School 2,Cavite
3,S-004,Sam,B,89,School 3,Batangas
4,S-005,Billy,C,92,School 2,Rizal
5,S-006,Melandra,C,90,School 3,NCR
6,S-007,Johnny,C,83,,Batangas


In [14]:
df_concat

Unnamed: 0,StudentID,Student,Section,Grades,StudentID.1,School,Residence
0,S-001,Pedro,A,82,S-001,School 1,Laguna
1,S-002,Jose,B,85,S-002,School 1,Quezon
2,S-003,Petra,A,90,S-003,School 2,Cavite
3,S-004,Sam,B,89,S-004,School 3,Batangas
4,S-005,Billy,C,92,S-005,School 2,Rizal
5,S-006,Melandra,C,90,S-006,School 3,NCR
6,S-007,Johnny,C,83,,,Batangas


In [26]:
# Other approach in constructing the data set?
# Merge 1st then Concat

df_merge = pd.merge(df_1, df_2, on = 'StudentID', how = 'left')
df_merge

Unnamed: 0,StudentID,Student,Section,Grades,School
0,S-001,Pedro,A,82,School 1
1,S-002,Jose,B,85,School 1
2,S-003,Petra,A,90,School 2
3,S-004,Sam,B,89,School 3
4,S-005,Billy,C,92,School 2
5,S-006,Melandra,C,90,School 3
6,S-007,Johnny,C,83,


In [28]:
df_dataset = pd.concat([df_merge, df_3], axis = 1)
df_dataset

Unnamed: 0,StudentID,Student,Section,Grades,School,Residence
0,S-001,Pedro,A,82,School 1,Laguna
1,S-002,Jose,B,85,School 1,Quezon
2,S-003,Petra,A,90,School 2,Cavite
3,S-004,Sam,B,89,School 3,Batangas
4,S-005,Billy,C,92,School 2,Rizal
5,S-006,Melandra,C,90,School 3,NCR
6,S-007,Johnny,C,83,,Batangas
