In [206]:
import pandas as pd

df_A = pd.read_csv(r'Dataframes\df_employees.csv')
df_B = pd.read_csv(r'Dataframes\df_emp_details.csv')

print("DF Employees:\n", df_A.head(), "\n")
print("DF Employee Details:\n", df_B.head())

DF Employees:
   Emp_ID Dept_ID     Name  Age  Salary
0   E001    D001    Alice   49   69779
1   E002    D002      Bob   41   62247
2   E003    D004  Charlie   30   53546
3   E004    D001    David   46   43066
4   E005    D002   Evelyn   32   42264 

DF Employee Details:
   Emp_ID Dept_ID Location  Experience  Bonus
0   E010    D002   Sydney           3   9574
1   E011    D002    Tokyo          12   6596
2   E012    D001    Tokyo           2   8977
3   E013    D005   London           7   8311
4   E014    D002    Tokyo          10   4509


In [207]:
print("df1 is unique?: ",df_A['Emp_ID'].is_unique)
print("df2 is unique?: ",df_B['Emp_ID'].is_unique)

df1 is unique?:  True
df2 is unique?:  True


### 01 - Merging

1.1 - Merging using one key

In [208]:
merged_one = pd.merge(df_A, df_B, on='Emp_ID')
merged_one

Unnamed: 0,Emp_ID,Dept_ID_x,Name,Age,Salary,Dept_ID_y,Location,Experience,Bonus
0,E010,D002,Jane,47,79140,D002,Sydney,3,9574
1,E011,D003,Karl,39,47488,D002,Tokyo,12,6596
2,E012,D003,Lina,42,86429,D001,Tokyo,2,8977
3,E013,D001,Mike,37,46319,D005,London,7,8311
4,E014,D004,Nina,42,51768,D002,Tokyo,10,4509
5,E015,D003,Owen,30,55434,D005,Sydney,11,7031
6,E016,D001,Paul,33,68002,D005,Tokyo,5,9622
7,E017,D004,Queen,34,85616,D001,Sydney,14,7836
8,E018,D003,Rita,25,59459,D001,Sydney,7,2649
9,E019,D003,Steve,42,79202,D005,Tokyo,14,5529


1.2 - using multiple keys

In [209]:
merged_multi = pd.merge(df_A, df_B, on=['Emp_ID', 'Dept_ID'])
merged_multi

Unnamed: 0,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
0,E010,D002,Jane,47,79140,Sydney,3,9574


1.3 - Using how argument

In [210]:
# Inner join
inner_merge = pd.merge(df_A, df_B, on='Emp_ID', how='inner')
print("Inner join:",inner_merge.shape)

# Left join
left_merge = pd.merge(df_A, df_B, on='Emp_ID', how='left')
print("Left join:",left_merge.shape)

# Right join
right_merge = pd.merge(df_A, df_B, on='Emp_ID', how='right')
print("Right join:",right_merge.shape)

# Outer join
outer_merge = pd.merge(df_A, df_B, on='Emp_ID', how='outer')
print("Outer join:",outer_merge.shape)

Inner join: (11, 9)
Left join: (20, 9)
Right join: (20, 9)
Outer join: (29, 9)


In [211]:
left_merge

Unnamed: 0,Emp_ID,Dept_ID_x,Name,Age,Salary,Dept_ID_y,Location,Experience,Bonus
0,E001,D001,Alice,49,69779,,,,
1,E002,D002,Bob,41,62247,,,,
2,E003,D004,Charlie,30,53546,,,,
3,E004,D001,David,46,43066,,,,
4,E005,D002,Evelyn,32,42264,,,,
5,E006,D001,Frank,47,79051,,,,
6,E007,D004,Grace,31,51542,,,,
7,E008,D002,Helen,31,72880,,,,
8,E009,D001,Ian,26,68708,,,,
9,E010,D002,Jane,47,79140,D002,Sydney,3.0,9574.0


In [212]:
right_merge

Unnamed: 0,Emp_ID,Dept_ID_x,Name,Age,Salary,Dept_ID_y,Location,Experience,Bonus
0,E010,D002,Jane,47.0,79140.0,D002,Sydney,3,9574
1,E011,D003,Karl,39.0,47488.0,D002,Tokyo,12,6596
2,E012,D003,Lina,42.0,86429.0,D001,Tokyo,2,8977
3,E013,D001,Mike,37.0,46319.0,D005,London,7,8311
4,E014,D004,Nina,42.0,51768.0,D002,Tokyo,10,4509
5,E015,D003,Owen,30.0,55434.0,D005,Sydney,11,7031
6,E016,D001,Paul,33.0,68002.0,D005,Tokyo,5,9622
7,E017,D004,Queen,34.0,85616.0,D001,Sydney,14,7836
8,E018,D003,Rita,25.0,59459.0,D001,Sydney,7,2649
9,E019,D003,Steve,42.0,79202.0,D005,Tokyo,14,5529


### 02 - Joining

In [213]:
students_df = pd.read_csv(r'Dataframes\students.csv')
exam_df = pd.read_csv(r'Dataframes\exam_scores.csv')
attendance_df = pd.read_csv(r'Dataframes\attendance.csv', index_col=0)  # index column retained
class_scores_df = pd.read_csv(r'Dataframes\class_scores.csv')  # Multi-index

Setting the index

In [214]:
# Setting the index
students_indexed = students_df.set_index('Student_ID')
exam_indexed = exam_df.set_index('Student_ID')

In [215]:
# Inner join by index
join_inner = students_indexed.join(exam_indexed, how='inner')
print("Join by Index (Inner Join):")
join_inner.head()

Join by Index (Inner Join):


Unnamed: 0_level_0,Name,Grade,Math_Score,Science_Score
Student_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Amal,A,85,90
2,Binu,B,76,82
3,Chathura,A,92,89
5,Eshan,B,67,70
6,Farah,A,88,85


In [216]:
# Outer join by index
join_outer = students_indexed.join(exam_indexed, how='outer')
print("Join by Index (Outer Join):")
join_outer.head()

Join by Index (Outer Join):


Unnamed: 0_level_0,Name,Grade,Math_Score,Science_Score
Student_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Amal,A,85.0,90.0
2,Binu,B,76.0,82.0
3,Chathura,A,92.0,89.0
4,Dewmini,C,,
5,Eshan,B,67.0,70.0


In [217]:
# Join using 'on' argument (attendance)
join_on = students_df.join(attendance_df, on='Name', how='left')
print("Join using 'on' argument (Attendance by Name):")
join_on.head()

Join using 'on' argument (Attendance by Name):


Unnamed: 0,Student_ID,Name,Grade,Days_Present,Days_Total
0,1,Amal,A,40.0,45.0
1,2,Binu,B,38.0,45.0
2,3,Chathura,A,45.0,45.0
3,4,Dewmini,C,30.0,45.0
4,5,Eshan,B,36.0,45.0


### 03 - Concatenation

Concatenating DataFrames Horizontally

In [225]:
concat_v = pd.concat([df_A, df_B])
print(concat_v.shape)
concat_v

(40, 8)


Unnamed: 0,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
0,E001,D001,Alice,49.0,69779.0,,,
1,E002,D002,Bob,41.0,62247.0,,,
2,E003,D004,Charlie,30.0,53546.0,,,
3,E004,D001,David,46.0,43066.0,,,
4,E005,D002,Evelyn,32.0,42264.0,,,
5,E006,D001,Frank,47.0,79051.0,,,
6,E007,D004,Grace,31.0,51542.0,,,
7,E008,D002,Helen,31.0,72880.0,,,
8,E009,D001,Ian,26.0,68708.0,,,
9,E010,D002,Jane,47.0,79140.0,,,


Concatenating DataFrames Horizontally

In [226]:
concat_h = pd.concat([df_A, df_B], axis=1)
concat_h.head()

Unnamed: 0,Emp_ID,Dept_ID,Name,Age,Salary,Emp_ID.1,Dept_ID.1,Location,Experience,Bonus
0,E001,D001,Alice,49,69779,E010,D002,Sydney,3,9574
1,E002,D002,Bob,41,62247,E011,D002,Tokyo,12,6596
2,E003,D004,Charlie,30,53546,E012,D001,Tokyo,2,8977
3,E004,D001,David,46,43066,E013,D005,London,7,8311
4,E005,D002,Evelyn,32,42264,E014,D002,Tokyo,10,4509


Concatenating with Join Logic on Axes

In [239]:
concat_inner = pd.concat([df_A, df_B], axis=1, join='inner')
concat_outer = pd.concat([df_A, df_B], axis=1, join='outer')

print(f"Inner: {concat_inner.shape}\nOuter: {concat_outer.shape}")

Inner: (20, 10)
Outer: (20, 10)


In [234]:
print("Inner join concat:")
concat_inner.head()

Inner join concat:


Unnamed: 0,Emp_ID,Dept_ID,Name,Age,Salary,Emp_ID.1,Dept_ID.1,Location,Experience,Bonus
0,E001,D001,Alice,49,69779,E010,D002,Sydney,3,9574
1,E002,D002,Bob,41,62247,E011,D002,Tokyo,12,6596
2,E003,D004,Charlie,30,53546,E012,D001,Tokyo,2,8977
3,E004,D001,David,46,43066,E013,D005,London,7,8311
4,E005,D002,Evelyn,32,42264,E014,D002,Tokyo,10,4509


In [236]:
print("Outer join concat:")
concat_outer.head()

Outer join concat:


Unnamed: 0,Emp_ID,Dept_ID,Name,Age,Salary,Emp_ID.1,Dept_ID.1,Location,Experience,Bonus
0,E001,D001,Alice,49,69779,E010,D002,Sydney,3,9574
1,E002,D002,Bob,41,62247,E011,D002,Tokyo,12,6596
2,E003,D004,Charlie,30,53546,E012,D001,Tokyo,2,8977
3,E004,D001,David,46,43066,E013,D005,London,7,8311
4,E005,D002,Evelyn,32,42264,E014,D002,Tokyo,10,4509


Concatenating DataFrames by Ignoring Indexes

In [240]:
concat_ignore = pd.concat([df_A, df_B], ignore_index=True)
concat_ignore.head()

Unnamed: 0,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
0,E001,D001,Alice,49.0,69779.0,,,
1,E002,D002,Bob,41.0,62247.0,,,
2,E003,D004,Charlie,30.0,53546.0,,,
3,E004,D001,David,46.0,43066.0,,,
4,E005,D002,Evelyn,32.0,42264.0,,,


Concatenating with Group Keys

In [247]:
concat_keys = pd.concat([df_A, df_B], keys=['Group_A', 'Group_B'])
concat_keys

Unnamed: 0,Unnamed: 1,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
Group_A,0,E001,D001,Alice,49.0,69779.0,,,
Group_A,1,E002,D002,Bob,41.0,62247.0,,,
Group_A,2,E003,D004,Charlie,30.0,53546.0,,,
Group_A,3,E004,D001,David,46.0,43066.0,,,
Group_A,4,E005,D002,Evelyn,32.0,42264.0,,,
Group_A,5,E006,D001,Frank,47.0,79051.0,,,
Group_A,6,E007,D004,Grace,31.0,51542.0,,,
Group_A,7,E008,D002,Helen,31.0,72880.0,,,
Group_A,8,E009,D001,Ian,26.0,68708.0,,,
Group_A,9,E010,D002,Jane,47.0,79140.0,,,


### Additional methods

In [252]:
df = concat_keys.reset_index()
df.head()

Unnamed: 0,level_0,level_1,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
0,Group_A,0,E001,D001,Alice,49.0,69779.0,,,
1,Group_A,1,E002,D002,Bob,41.0,62247.0,,,
2,Group_A,2,E003,D004,Charlie,30.0,53546.0,,,
3,Group_A,3,E004,D001,David,46.0,43066.0,,,
4,Group_A,4,E005,D002,Evelyn,32.0,42264.0,,,


In [253]:
df = df.rename(columns={'level_0': 'Group'})
df.head()

Unnamed: 0,Group,level_1,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
0,Group_A,0,E001,D001,Alice,49.0,69779.0,,,
1,Group_A,1,E002,D002,Bob,41.0,62247.0,,,
2,Group_A,2,E003,D004,Charlie,30.0,53546.0,,,
3,Group_A,3,E004,D001,David,46.0,43066.0,,,
4,Group_A,4,E005,D002,Evelyn,32.0,42264.0,,,


In [255]:
df = df.drop(columns=['level_1'], errors='ignore')
df.head()

Unnamed: 0,Group,Emp_ID,Dept_ID,Name,Age,Salary,Location,Experience,Bonus
0,Group_A,E001,D001,Alice,49.0,69779.0,,,
1,Group_A,E002,D002,Bob,41.0,62247.0,,,
2,Group_A,E003,D004,Charlie,30.0,53546.0,,,
3,Group_A,E004,D001,David,46.0,43066.0,,,
4,Group_A,E005,D002,Evelyn,32.0,42264.0,,,
