In [1]:
import pandas as pd
import numpy as np

#Load Dataset
df = pd.read_csv("marketing_campaign.csv", sep="\t")

print("Initial Shape:", df.shape)

#Check Missing Values
print("\nMissing Values:\n", df.isnull().sum())

#Remove Duplicates
df = df.drop_duplicates()

#Standardize Column Names
df.columns = df.columns.str.lower().str.replace(" ", "_")

#Handle Missing Income
df['income'] = df['income'].fillna(df['income'].median())

#Convert Date Column
df['dt_customer'] = pd.to_datetime(df['dt_customer'], dayfirst=True)

#Create Age Column
df['age'] = 2026 - df['year_birth']

#Remove Unrealistic Age (Outlier Handling)
df = df[(df['age'] > 18) & (df['age'] < 90)]

#Standardize Text Columns
df['education'] = df['education'].str.lower().str.strip()
df['marital_status'] = df['marital_status'].str.lower().str.strip()

#Save Cleaned Dataset
df.to_csv("cleaned_customer_personality.csv", index=False)

print("\n✅ Data Cleaning Completed Successfully!")
print("Final Shape:", df.shape)


Initial Shape: (2240, 29)

Missing Values:
 ID                      0
Year_Birth              0
Education               0
Marital_Status          0
Income                 24
Kidhome                 0
Teenhome                0
Dt_Customer             0
Recency                 0
MntWines                0
MntFruits               0
MntMeatProducts         0
MntFishProducts         0
MntSweetProducts        0
MntGoldProds            0
NumDealsPurchases       0
NumWebPurchases         0
NumCatalogPurchases     0
NumStorePurchases       0
NumWebVisitsMonth       0
AcceptedCmp3            0
AcceptedCmp4            0
AcceptedCmp5            0
AcceptedCmp1            0
AcceptedCmp2            0
Complain                0
Z_CostContact           0
Z_Revenue               0
Response                0
dtype: int64

✅ Data Cleaning Completed Successfully!
Final Shape: (2237, 30)
