# 🧹 Data Cleaning and Preprocessing
This notebook demonstrates how to clean and preprocess a customer dataset step-by-step using Python and Pandas.

In [None]:
# Step 1: Import Required Libraries
import pandas as pd

In [None]:
# Step 2: Load the Dataset
df = pd.read_csv("customer.csv")  # Ensure the file is in the same directory
df.head()

In [None]:
# Step 3: View Basic Information
print("Initial Data Info:")
print(df.info())

In [None]:
# Step 4: Standardize Column Headers
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df.head()

In [None]:
# Step 5: Handle Missing Values
print("Missing values before cleaning:")
print(df.isnull().sum())

# Drop rows with missing 'age'
df = df.dropna(subset=['age'])

In [None]:
# Step 6: Convert 'age' to Integer
df['age'] = df['age'].astype(int)

In [None]:
# Step 7: Standardize 'gender' and 'country_name'
df['gender'] = df['gender'].str.strip().str.lower()
df['country_name'] = df['country_name'].str.strip().str.title()
df['country_name'] = df['country_name'].replace({
    'Usa': 'United States',
    'United States': 'United States',
    'United states': 'United States'
})

In [None]:
# Step 8: Convert 'registration_date' to datetime
df['registration_date'] = pd.to_datetime(df['registration_date'], dayfirst=True, errors='coerce')

In [None]:
# Step 9: Remove Duplicate Rows
df = df.drop_duplicates()

In [None]:
# Step 10: Final Overview and Save
print("Final Data Info:")
print(df.info())
df.to_csv("customer_cleaned.csv", index=False)
print("✅ Cleaned data saved as 'customer_cleaned.csv'")