
# **LEVIS Stocktake KPI Dashboard Project**

# 1. Data Cleaning
This notebook cleans the raw LEVIS stocktake data:
- Loads the raw `.csv`
- Parses period dates
- Cleans numeric columns (removes commas, extra zeros)
- Drops duplicates
- Checks for missing values
- Saves the cleaned file for analysis

**Next step:** Use this cleaned data for KPI calculation and dashboard

In [None]:
# 📦 1. Import libraries
import pandas as pd

# 📂 2. Load raw CSV data
# Replace with your Google Colab path if needed (e.g., /content/LEVIS_STOCKTAKE.csv)
df = pd.read_csv('data/LEVIS STOCKTAKE.csv')

# 🔍 3. Quick preview
print(df.head())
print(df.info())

In [None]:
# 📅 4. Parse Period Start and Period End
df['Period Start'] = pd.to_datetime(df['Period Start'], format='%d/%m/%Y')
df['Period End'] = pd.to_datetime(df['Period End'], format='%d/%m/%Y')

# 🔢 5. Clean numeric columns
numeric_cols = ['Beginning Inventory', 'Shipment', 'Transfer In',
                'Transfer Out', 'RTV', 'Sales', 'Ending Inventory']

In [None]:
for col in numeric_cols:
    # Remove unwanted trailing zeros if needed
    df[col] = (
        df[col].astype(str)
               .str.replace(',', '', regex=False)
               .str.replace('.0', '', regex=False)  # remove .0 if exists
               .str.strip()
               .astype(float) / 100 if df[col].astype(str).str.endswith('00.0').any() else
        df[col].astype(str)
               .str.replace(',', '', regex=False)
               .str.replace('.', '', regex=False)
               .astype(float)
    )

In [None]:
# 🧹 6. Drop duplicates if any
df.drop_duplicates(inplace=True)

# 🕵️ 7. Check for missing values
print("Missing values:\n", df.isnull().sum())

# 💾 8. Save cleaned file
df.to_csv('data/LEVIS_STOCKTAKE_cleaned.csv', index=False)

# ✅ 9. Preview cleaned data
df.head()