## Joining The Datasets Together
Datasets : Cleaned_Outreach.csv , Cleaned_Campaign.csv , Cleaned_ApplicantData.csv

In [7]:
import pandas as pd

# Load the datasets
outreach_df = pd.read_csv('Cleaned_OutreachData.csv')
campaign_df = pd.read_csv('Cleaned_CampaignData.csv')
applicant_df = pd.read_csv('Cleaned_ApplicantData.csv')

# Step 1: Merge Outreach with Campaign
merged_df = outreach_df.merge(
    campaign_df,
    left_on='Campaign_ID',
    right_on='ID',
    how='left'  # keep all outreach rows
)

# Drop duplicate ID column from Campaign
merged_df.drop(columns=['ID'], inplace=True)

# Step 2: Merge the result with Applicant Data
final_df = merged_df.merge(
    applicant_df,
    left_on='Reference_ID',
    right_on='App_ID',
    how='left'  # keep all outreach rows even if no matching applicant
)

# Optional: drop duplicate App_ID column if needed
final_df.drop(columns=['App_ID'], inplace=True)

# Check final dataframe
print(final_df.head())

   Reference_ID          Received_At                      University_x  \
0         12345  2023-04-28 12:15:19  Illinois Institute of Technology   
1         12345  2023-04-28 12:15:19  Illinois Institute of Technology   
2         12345  2023-04-28 12:15:19  Illinois Institute of Technology   
3         12345  2023-04-28 13:04:05  Illinois Institute of Technology   
4         12345  2023-04-28 13:04:05  Illinois Institute of Technology   

  Caller_Name   Outcome_1     Remark Campaign_ID Escalation_Required  \
0     Shailja   Connected  No Remark      IANF23                  No   
1     Shailja   Connected  No Remark      IANF23                  No   
2     Shailja   Connected  No Remark      IANF23                  No   
3     Shailja  Reschedule  No Remark      IANF23                  No   
4     Shailja  Reschedule  No Remark      IANF23                  No   

                                     Name        Category  Intake  \
0  GR GS SP25 Campaign- Deferrals to SP25  Post Admis

checking shape

In [14]:
final_df.shape

(39717, 17)

In [15]:
final_df.dtypes

Reference_ID            int64
Received_At            object
University_x           object
Caller_Name            object
Outcome_1              object
Remark                 object
Campaign_ID            object
Escalation_Required    object
Name                   object
Category               object
Intake                 object
University_y           object
Status                 object
Start_Date             object
Country                object
University             object
Phone_Number           object
dtype: object

Verifying dataset 

In [16]:
# Check for missing values
missing_summary = final_df.isnull().sum()
print("Missing values in each column:")
print(missing_summary)

# Check for duplicate rows
duplicate_count = final_df.duplicated().sum()
print(f"\nNumber of duplicate rows: {duplicate_count}")

Missing values in each column:
Reference_ID            0
Received_At             0
University_x            0
Caller_Name             0
Outcome_1               0
Remark                  0
Campaign_ID             0
Escalation_Required     0
Name                    0
Category                0
Intake                  0
University_y            0
Status                  0
Start_Date              0
Country                 0
University              0
Phone_Number           11
dtype: int64

Number of duplicate rows: 0


missing values replacing

In [17]:
# Count missing Phone_Number before filling
missing_count = final_df['Phone_Number'].isna().sum()
print(f"Number of missing Phone_Number entries to be filled: {missing_count}")

# Fill missing phone numbers
final_df['Phone_Number'] = final_df['Phone_Number'].fillna('Not Available')

# Verify after filling
print("Missing values after filling:", final_df['Phone_Number'].isna().sum())


Number of missing Phone_Number entries to be filled: 11
Missing values after filling: 0


Exporting Cleaned joined Dataset

In [18]:
# Export the final dataset
final_df.to_csv('Outreach_Campaign_Applicants_FinalData.csv', index=False)

print("Outreach_Campaign_Applicants_FinalData.csv created successfully!")

Outreach_Campaign_Applicants_FinalData.csv created successfully!
