# Client Data Cleaning Project (Pandas)
This notebook demonstrates a full data cleaning workflow using a simulated real-world dataset with common issues.


## Loading the dataset

In [None]:
import pandas as pd
df = pd.read_csv("/mnt/data/messy_data.csv")
df.head()

## Cleaning Steps

In [None]:
# Trim strings
df['name'] = df['name'].str.strip().str.title()

# Fix country
df['country'] = df['country'].str.strip().str.upper()
df['country'] = df['country'].replace({'USA':'US', 'U.S.':'US'})

# Parse amounts
df['amount_spent'] = df['amount_spent'].replace('[\$,]', '', regex=True).astype(float)

# Fix signup_date
df['signup_date'] = pd.to_datetime(df['signup_date'], errors='coerce')

# Age validation
df.loc[(df['age'] < 0) | (df['age'] > 120), 'age'] = None
df['age'] = df['age'].fillna(df['age'].median())

# Missing email
df['email'] = df['email'].fillna('unknown@example.com')

# Remove duplicates by customer_id keeping latest date
df = df.sort_values('signup_date').drop_duplicates(subset='customer_id', keep='last')

df