# Dataset: CampaignData.csv
# Data Cleaning Process

Importing necessary libraries

In [20]:
import numpy as np
import pandas as pd

Loading Dataset 

In [21]:
df = pd.read_csv("CampaignData.csv")

Total Rows & Columns

In [22]:
print(f"Total Rows: {df.shape[0]}")
print(f"Total Columns: {df.shape[1]}")

Total Rows: 23
Total Columns: 7


Missing Values Checking

In [23]:
print(df.isnull().sum())

ID            0
Name          0
Category      0
Intake        0
University    0
Status        0
Start_Date    0
dtype: int64


Duplicate Rows Detecting

In [24]:
duplicates = df.duplicated().sum()
print("Duplicate Rows : ",duplicates)

Duplicate Rows :  0


Checking Datatype

In [25]:
print(df.dtypes)

ID            object
Name          object
Category      object
Intake        object
University    object
Status        object
Start_Date    object
dtype: object


correcting datatype

In [26]:
# Converting Start_Date to datetime with exact format
df['Start_Date'] = pd.to_datetime(df['Start_Date'], format='%m/%d/%Y %H:%M', errors='raise')

# Check
print(df['Start_Date'].dtypes)
print(df['Start_Date'].head())

datetime64[ns]
0   2024-03-20
1   2024-09-11
2   2024-07-11
3   2024-03-06
4   2024-03-08
Name: Start_Date, dtype: datetime64[ns]


Consistency Checking

In [27]:
# Check unique values for each column
for col in df.columns:
    unique_vals = df[col].unique()
    print(f"{col} ({len(unique_vals)} unique values):")
    print(unique_vals)
    print("-" * 50)

ID (23 unique values):
['AANF23' 'AND23' 'BPNANF23' 'BPNND23' 'CTKANF23' 'DANE24' 'DNA24'
 'FA24AND' 'FA24DNA' 'FA24DNI' 'FA24IP' 'FA24SIC' 'IANF23' 'IND23'
 'OANF23' 'OND23' 'SP25AI2S' 'SP25AND' 'SP25DN1' 'SP25DSP' 'SP25IP'
 'SP25NIQ' 'SP25SIC']
--------------------------------------------------
Name (23 unique values):
['GR GS FA24 Campaign- Admit, No Deposit'
 'GR GS FA24 Campaign- Deposit No Action'
 'GR GS FA24 Campaign- Deposit, No I-20'
 'GR GS FA24 Campaign- In Progress'
 'GR GS FA24 Campaign- Submit, Incomplete'
 'GR GS Call Campaign: India ANF' 'GR GS Call Campaign: India No Deposit'
 'GR GS Call Campaign: Other ANF' 'GR GS Call Campaign: Other No Deposit'
 'GR GS SP25 Campaign- All I-20s Sent'
 'GR GS SP25 Campaign- Admit, No Deposit'
 'GR GS SP25 Campaign- Deposit, No I-20'
 'GR GS SP25 Campaign- Deferrals to SP25'
 'GR GS SP25 Campaign- In Progress' 'GR GS SP25 Campaign- New Inquiry'
 'GR GS SP25 Campaign- Submitted, Incomplete'
 'GR GS Call Campaign: Africa ANF'
 'GR GS C

Exporting Cleaned Dataset

In [28]:
df.to_csv("Cleaned_CampaignData.csv", index=False)

print("Dataset exported successfully as Cleaned_CampaignData.csv")

Dataset exported successfully as Cleaned_CampaignData.csv


# Finally "CampaignData.csv" Dataset is Cleaned !