In [1]:
from google.colab import files
uploaded = files.upload()

Saving marketing_campaign.csv to marketing_campaign.csv


In [2]:
import pandas as pd

# 1. Load the dataset (it's tab-separated)
df = pd.read_csv("marketing_campaign.csv", sep="\t")
print("🔹 Raw Dataset Loaded. Shape:", df.shape)
print(df.head(), "\n")

# 2. Identify and handle missing values
print("🔹 Missing Values:")
print(df.isnull().sum(), "\n")

# 3. Remove duplicate rows
initial_shape = df.shape
df = df.drop_duplicates()
print(f"🔹 Removed {initial_shape[0] - df.shape[0]} duplicate rows.\n")

# 4. Standardize column headers (lowercase, no spaces)
print("🔹 Column Names BEFORE:")
print(list(df.columns))
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
print("🔹 Column Names AFTER:")
print(list(df.columns), "\n")

# 5. Standardize text values
if 'education' in df.columns:
    df['education'] = df['education'].str.strip().str.lower()

if 'marital_status' in df.columns:
    df['marital_status'] = df['marital_status'].str.strip().str.lower()

print("🔹 Unique Education Levels:", df['education'].unique())
print("🔹 Unique Marital Statuses:", df['marital_status'].unique(), "\n")

# 6. Convert date column to consistent format
if 'dt_customer' in df.columns:
    df['dt_customer'] = pd.to_datetime(df['dt_customer'], errors='coerce')
    df['dt_customer'] = df['dt_customer'].dt.strftime('%d-%m-%Y')
    print("🔹 Date Column Formatted:\n", df[['dt_customer']].head(), "\n")

# 7. Check and fix data types
# Convert income to numeric
if 'income' in df.columns:
    df['income'] = pd.to_numeric(df['income'], errors='coerce')

print("🔹 Data Types After Conversion:")
print(df.dtypes, "\n")

# 8. Final overview
print("🔹 Final Cleaned Dataset Info:")
print(df.info())
print("\n🔹 First 5 Rows of Cleaned Data:")
print(df.head())

# 9. Save and Download cleaned dataset
df.to_csv("Cleaned_Marketing_Campaign.csv", index=False)

from google.colab import files
files.download("Cleaned_Marketing_Campaign.csv")


🔹 Raw Dataset Loaded. Shape: (2240, 29)
     ID  Year_Birth   Education Marital_Status   Income  Kidhome  Teenhome  \
0  5524        1957  Graduation         Single  58138.0        0         0   
1  2174        1954  Graduation         Single  46344.0        1         1   
2  4141        1965  Graduation       Together  71613.0        0         0   
3  6182        1984  Graduation       Together  26646.0        1         0   
4  5324        1981         PhD        Married  58293.0        1         0   

  Dt_Customer  Recency  MntWines  ...  NumWebVisitsMonth  AcceptedCmp3  \
0  04-09-2012       58       635  ...                  7             0   
1  08-03-2014       38        11  ...                  5             0   
2  21-08-2013       26       426  ...                  4             0   
3  10-02-2014       26        11  ...                  6             0   
4  19-01-2014       94       173  ...                  5             0   

   AcceptedCmp4  AcceptedCmp5  AcceptedCmp1  A

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>