# Data Cleaning - Mall Customers Dataset

This notebook demonstrates data cleaning steps for the **Mall_Customers.csv** dataset as part of the Data Analyst Internship Task 1.

In [None]:
import pandas as pd
from google.colab import files

# Upload dataset
uploaded = files.upload()

df = pd.read_csv("Mall_Customers.csv")
df.head()

In [None]:
# Basic info
print(df.shape)
print(df.info())
df.describe()

In [None]:
# Handle missing values
print("Missing values before cleaning:\n", df.isnull().sum())
df.fillna(method='ffill', inplace=True)
print("Missing values after cleaning:\n", df.isnull().sum())

In [None]:
# Remove duplicates
print("Duplicates before:", df.duplicated().sum())
df.drop_duplicates(inplace=True)
print("Duplicates after:", df.duplicated().sum())

In [None]:
# Standardize text columns
df['Gender'] = df['Gender'].str.strip().str.lower()
df['Gender'].unique()

In [None]:
# Clean column names
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
df.head()

In [None]:
# Fix data types
df['age'] = df['age'].astype(int)

In [None]:
# Save cleaned dataset
df.to_csv("Mall_Customers_Cleaned.csv", index=False)
files.download("Mall_Customers_Cleaned.csv")