# 05_visualization_export.ipynb

## **Objective:**
Aggregate customer purchase data by region, product category, and purchase frequency, and export the results.

---

## **1️⃣ Import Necessary Libraries**

In [7]:
import pandas as pd
import logging
import os

## **2️⃣ Set Up Logging**

In [8]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

## **3️⃣ Define File Paths**

In [9]:
DATA_PATH = "data/customer_data_clean.csv"
OUTPUT_PATH = "data/aggregated_data.xlsx"

## **4️⃣ Load Dataset**

In [10]:
def load_data(file_path):
    """Load dataset with error handling."""
    if not os.path.exists(file_path):
        logging.error(f"File not found: {file_path}")
        return None
    try:
        df = pd.read_csv(file_path)
        logging.info("Data successfully loaded.")
        return df
    except Exception as e:
        logging.error(f"Error loading data: {e}")
        return None

# Load data
df = load_data(DATA_PATH)

2025-02-28 03:39:23,315 - ERROR - File not found: data/customer_data_clean.csv


## **5️⃣ Aggregate Data**

In [11]:
def aggregate_data(df):
    """Aggregate purchase amount by region, product category, and purchase frequency."""
    if df is None:
        logging.error("No data available for aggregation.")
        return None
    
    try:
        # Aggregate total purchase amount by region
        region_sales = df.groupby('region')['purchase_amount'].sum().reset_index()
        region_sales.rename(columns={'purchase_amount': 'total_sales'}, inplace=True)

        # Aggregate total purchase amount by product category
        category_sales = df.groupby('product_category')['purchase_amount'].sum().reset_index()
        category_sales.rename(columns={'purchase_amount': 'total_sales'}, inplace=True)

        # Aggregate total purchase amount by purchase frequency
        frequency_sales = df.groupby('purchase_frequency')['purchase_amount'].sum().reset_index()
        frequency_sales.rename(columns={'purchase_amount': 'total_sales'}, inplace=True)

        logging.info("Data aggregation complete.")
        return region_sales, category_sales, frequency_sales

    except Exception as e:
        logging.error(f"Error during data aggregation: {e}")
        return None

# Aggregate data
aggregated_data = aggregate_data(df)

2025-02-28 03:39:23,333 - ERROR - No data available for aggregation.


## **6️⃣ Export Data to Excel**

In [12]:
if aggregated_data:
    try:
        with pd.ExcelWriter(OUTPUT_PATH) as writer:
            aggregated_data[0].to_excel(writer, sheet_name="Region Sales", index=False)
            aggregated_data[1].to_excel(writer, sheet_name="Category Sales", index=False)
            aggregated_data[2].to_excel(writer, sheet_name="Frequency Sales", index=False)
        logging.info(f"Aggregated data exported successfully: {OUTPUT_PATH}")
    except Exception as e:
        logging.error(f"Error exporting data: {e}")

## **7️⃣ Summary & Next Steps**
✅ Data aggregated by region, product category, and purchase frequency.  
✅ Results saved to an Excel file.  
➡️ Next, use this data for **Tableau visualization** and **business insights**.