# Task 2 - Data Cleaning for Superstore Dataset
This notebook loads the raw **Sample - Superstore.xls** file, cleans it, and saves a new file called **Superstore_Cleaned.csv**.

In [None]:
import pandas as pd

# Load dataset (change the path to where you saved your Superstore file)
df = pd.read_excel("Sample - Superstore.xls")

# Show first few rows
df.head()

In [None]:
# 1. Drop duplicate rows
df = df.drop_duplicates()

# 2. Handle missing Postal Code values
if 'Postal Code' in df.columns:
    df['Postal Code'] = df['Postal Code'].fillna(0).astype(int)

# 3. Create Profit Margin safely
df['ProfitMargin'] = df.apply(
    lambda r: (r['Profit']/r['Sales']) if (pd.notnull(r['Sales']) and r['Sales']!=0) else 0,
    axis=1
)

# 4. Extract Year and Month from Order Date
df['OrderYear'] = pd.to_datetime(df['Order Date']).dt.year
df['OrderMonth'] = pd.to_datetime(df['Order Date']).dt.to_period('M').astype(str)

# 5. Save cleaned dataset
df.to_csv("Superstore_Cleaned.csv", index=False)
print("✅ Superstore_Cleaned.csv created successfully!")

In [None]:
# Quick check of cleaned data
df.info()
df.head()