In [1]:
# ============================================================================
# 1. IMPORT LIBRARIES AND LOAD DATA
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

warnings.filterwarnings('ignore')

# Ensure required folders exist
os.makedirs('outputs/reports', exist_ok=True)
os.makedirs('outputs/figures', exist_ok=True)
os.makedirs('data/processed', exist_ok=True)

# Optional Plotly (used later for dashboards)
try:
    import plotly.express as px
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    PLOTLY_AVAILABLE = True
except ImportError:
    PLOTLY_AVAILABLE = False
    print("Plotly not installed. Interactive charts will be skipped.")

# Load processed datasets
df_sales = pd.read_csv('data/processed/cleaned_retail_sales.csv')
df_sales['Order_Date'] = pd.to_datetime(df_sales['Order_Date'], errors='coerce')

rfm = pd.read_csv('data/processed/customer_segments.csv')
customer_clv = pd.read_csv('data/processed/customer_clv.csv')

print("=" * 80)
print("KPI DESIGN AND DASHBOARD PREPARATION")
print("=" * 80)
print(f"\nSales Data Shape: {df_sales.shape}")
print(f"Customer Segments Shape: {rfm.shape}")
print(f"Customer CLV Shape: {customer_clv.shape}")


KPI DESIGN AND DASHBOARD PREPARATION

Sales Data Shape: (10000, 45)
Customer Segments Shape: (1986, 12)
Customer CLV Shape: (1986, 7)


In [4]:
# ============================================================================
# 2. COMPREHENSIVE KPI FRAMEWORK
# ============================================================================

print("\n" + "=" * 80)
print("COMPREHENSIVE KPI FRAMEWORK")
print("=" * 80)

# Initialize KPI dictionary
kpis = {}

# -----------------------------
# REVENUE METRICS
# -----------------------------
print("\nREVENUE METRICS")

kpis['Total_Revenue'] = df_sales['Sales'].sum()
kpis['Total_Orders'] = df_sales['Order_ID'].nunique()
kpis['Avg_Order_Value'] = df_sales.groupby('Order_ID')['Sales'].sum().mean()
kpis['Total_Units_Sold'] = df_sales['Quantity'].sum()

if 'Profit' in df_sales.columns:
    kpis['Total_Profit'] = df_sales['Profit'].sum()
    kpis['Profit_Margin_Pct'] = (kpis['Total_Profit'] / kpis['Total_Revenue']) * 100
else:
    kpis['Total_Profit'] = kpis['Total_Revenue'] * 0.25
    kpis['Profit_Margin_Pct'] = 25.0

print(f"Total Revenue: ${kpis['Total_Revenue']:,.2f}")
print(f"Total Orders: {kpis['Total_Orders']:,}")
print(f"Average Order Value: ${kpis['Avg_Order_Value']:,.2f}")
print(f"Total Units Sold: {kpis['Total_Units_Sold']:,}")
print(f"Total Profit: ${kpis['Total_Profit']:,.2f}")
print(f"Profit Margin: {kpis['Profit_Margin_Pct']:.2f}%")

# -----------------------------
# CUSTOMER METRICS
# -----------------------------
print("\nCUSTOMER METRICS")

kpis['Total_Customers'] = df_sales['Customer_ID'].nunique()
kpis['Revenue_Per_Customer'] = kpis['Total_Revenue'] / kpis['Total_Customers']
kpis['Avg_Orders_Per_Customer'] = kpis['Total_Orders'] / kpis['Total_Customers']

customer_order_counts = df_sales.groupby('Customer_ID')['Order_ID'].nunique()
repeat_customers = (customer_order_counts > 1).sum()

kpis['Repeat_Customers'] = repeat_customers
kpis['Repeat_Customer_Rate'] = (repeat_customers / kpis['Total_Customers']) * 100
kpis['One_Time_Customers'] = kpis['Total_Customers'] - repeat_customers

print(f"Total Customers: {kpis['Total_Customers']:,}")
print(f"Revenue per Customer: ${kpis['Revenue_Per_Customer']:,.2f}")
print(f"Avg Orders per Customer: {kpis['Avg_Orders_Per_Customer']:.2f}")
print(f"Repeat Customers: {kpis['Repeat_Customers']:,} ({kpis['Repeat_Customer_Rate']:.2f}%)")

# -----------------------------
# PRODUCT METRICS
# -----------------------------
print("\nPRODUCT METRICS")

kpis['Total_SKUs'] = df_sales['Product_ID'].nunique()
kpis['Avg_Items_Per_Order'] = df_sales.groupby('Order_ID')['Quantity'].sum().mean()

print(f"Total SKUs: {kpis['Total_SKUs']:,}")
print(f"Avg Items per Order: {kpis['Avg_Items_Per_Order']:.2f}")

# -----------------------------
# CLV & MARKETING METRICS
# -----------------------------
print("\nCLV & MARKETING METRICS")

kpis['Avg_Customer_Lifetime_Value'] = customer_clv['CLV'].mean()
kpis['Customer_Acquisition_Cost'] = 50  # assumed CAC
kpis['CLV_to_CAC_Ratio'] = (
    kpis['Avg_Customer_Lifetime_Value'] / kpis['Customer_Acquisition_Cost']
)
kpis['Profit_Per_Customer'] = kpis['Total_Profit'] / kpis['Total_Customers']

avg_monthly_revenue_per_customer = kpis['Revenue_Per_Customer'] / 12
kpis['CAC_Payback_Months'] = (
    kpis['Customer_Acquisition_Cost'] /
    (avg_monthly_revenue_per_customer * 0.25)
)

print(f"Avg CLV: ${kpis['Avg_Customer_Lifetime_Value']:,.2f}")
print(f"CLV / CAC Ratio: {kpis['CLV_to_CAC_Ratio']:.2f}x")
print(f"Profit per Customer: ${kpis['Profit_Per_Customer']:,.2f}")
print(f"CAC Payback Period: {kpis['CAC_Payback_Months']:.1f} months")




COMPREHENSIVE KPI FRAMEWORK

REVENUE METRICS
Total Revenue: $1,078,670.98
Total Orders: 10,000
Average Order Value: $107.87
Total Units Sold: 50,065
Total Profit: $198,274.85
Profit Margin: 18.38%

CUSTOMER METRICS
Total Customers: 1,986
Revenue per Customer: $543.14
Avg Orders per Customer: 5.04
Repeat Customers: 1,912 (96.27%)

PRODUCT METRICS
Total SKUs: 499
Avg Items per Order: 5.01

CLV & MARKETING METRICS
Avg CLV: $2,692.01
CLV / CAC Ratio: 53.84x
Profit per Customer: $99.84
CAC Payback Period: 4.4 months


In [6]:
# ============================================================================
# 3. CREATE KPI DASHBOARD DATA
# ============================================================================

print("\n" + "=" * 80)
print("PREPARING KPI DASHBOARD DATA")
print("=" * 80)

# Save KPI summary
kpi_df = pd.DataFrame(list(kpis.items()), columns=['KPI', 'Value'])
kpi_df.to_csv('outputs/reports/kpi_summary.csv', index=False)
print("✓ Saved: outputs/reports/kpi_summary.csv")

# -----------------------------
# Monthly KPI Trends
# -----------------------------
print("\nCalculating Monthly KPI Trends...")

df_sales['YearMonth'] = df_sales['Order_Date'].dt.to_period('M')

monthly_kpis = df_sales.groupby('YearMonth').agg({
    'Sales': 'sum',
    'Order_ID': 'nunique',
    'Customer_ID': 'nunique',
    'Quantity': 'sum',
    'Product_ID': 'nunique'
}).reset_index()

monthly_kpis.columns = [
    'YearMonth', 'Revenue', 'Orders',
    'Customers', 'Units_Sold', 'SKUs'
]

monthly_kpis['AOV'] = monthly_kpis['Revenue'] / monthly_kpis['Orders']
monthly_kpis['Revenue_Per_Customer'] = (
    monthly_kpis['Revenue'] / monthly_kpis['Customers']
)
monthly_kpis['Items_Per_Order'] = (
    monthly_kpis['Units_Sold'] / monthly_kpis['Orders']
)

monthly_kpis['Revenue_Growth_Pct'] = monthly_kpis['Revenue'].pct_change() * 100
monthly_kpis['Customer_Growth_Pct'] = monthly_kpis['Customers'].pct_change() * 100
monthly_kpis['Order_Growth_Pct'] = monthly_kpis['Orders'].pct_change() * 100

monthly_kpis['YearMonth'] = monthly_kpis['YearMonth'].astype(str)

print(f"✓ Monthly KPIs calculated for {len(monthly_kpis)} months")
print("\nLast 6 months:")
print(monthly_kpis.tail(6).round(2))

monthly_kpis.to_csv('data/processed/monthly_kpis.csv', index=False)
print("✓ Saved: data/processed/monthly_kpis.csv")

# -----------------------------
# Category-Level KPIs
# -----------------------------
if 'Product_Category' in df_sales.columns:
    print("\nCalculating Category-Level KPIs...")

    category_kpis = df_sales.groupby('Product_Category').agg({
        'Sales': ['sum', 'mean'],
        'Order_ID': 'nunique',
        'Customer_ID': 'nunique',
        'Quantity': 'sum',
        'Product_ID': 'nunique'
    }).reset_index()

    category_kpis.columns = [
        'Product_Category', 'Total_Revenue', 'Avg_Order_Value',
        'Order_Count', 'Customer_Count', 'Units_Sold', 'SKU_Count'
    ]

    category_kpis['Revenue_Share_Pct'] = (
        category_kpis['Total_Revenue'] /
        category_kpis['Total_Revenue'].sum() * 100
    )

    category_kpis = category_kpis.sort_values(
        'Total_Revenue', ascending=False
    )

    print(category_kpis.round(2))

    category_kpis.to_csv(
        'outputs/reports/category_kpis.csv', index=False
    )
    print("✓ Saved: outputs/reports/category_kpis.csv")

# -----------------------------
# Regional KPIs
# -----------------------------
if 'Region' in df_sales.columns:
    print("\nCalculating Regional KPIs...")

    regional_kpis = df_sales.groupby('Region').agg({
        'Sales': ['sum', 'mean'],
        'Order_ID': 'nunique',
        'Customer_ID': 'nunique',
        'Quantity': 'sum'
    }).reset_index()

    regional_kpis.columns = [
        'Region', 'Total_Revenue', 'Avg_Order_Value',
        'Order_Count', 'Customer_Count', 'Units_Sold'
    ]

    regional_kpis['Revenue_Share_Pct'] = (
        regional_kpis['Total_Revenue'] /
        regional_kpis['Total_Revenue'].sum() * 100
    )

    regional_kpis = regional_kpis.sort_values(
        'Total_Revenue', ascending=False
    )

    print(regional_kpis.round(2))

    regional_kpis.to_csv(
        'outputs/reports/regional_kpis.csv', index=False
    )
    print("✓ Saved: outputs/reports/regional_kpis.csv")




PREPARING KPI DASHBOARD DATA
✓ Saved: outputs/reports/kpi_summary.csv

Calculating Monthly KPI Trends...
✓ Monthly KPIs calculated for 14 months

Last 6 months:
   YearMonth   Revenue  Orders  Customers  Units_Sold  SKUs     AOV  \
8    2022-09  76398.58     720        608        3548   379  106.11   
9    2022-10  77173.03     744        620        3771   380  103.73   
10   2022-11  78702.98     720        595        3465   393  109.31   
11   2022-12  82235.81     744        622        3726   375  110.53   
12   2023-01  78408.91     744        618        3784   393  105.39   
13   2023-02  54090.00     496        445        2435   316  109.05   

    Revenue_Per_Customer  Items_Per_Order  Revenue_Growth_Pct  \
8                 125.66             4.93               -7.05   
9                 124.47             5.07                1.01   
10                132.27             4.81                1.98   
11                132.21             5.01                4.49   
12             

In [7]:
# ============================================================================
# 4. VISUALIZE KEY KPIs
# ============================================================================

print("\n" + "=" * 80)
print("CREATING KPI VISUALIZATIONS")
print("=" * 80)

# Ensure output folders exist
os.makedirs('outputs/figures', exist_ok=True)

# -----------------------------
# 4.1 Monthly Revenue Trend
# -----------------------------
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=monthly_kpis['YearMonth'],
    y=monthly_kpis['Revenue'],
    mode='lines+markers',
    name='Revenue'
))

fig.update_layout(
    title='Monthly Revenue Trend',
    xaxis_title='Month',
    yaxis_title='Revenue',
    hovermode='x unified'
)

fig.write_html('outputs/figures/25_monthly_revenue_trend.html')
print("✓ Saved: 25_monthly_revenue_trend.html")

# -----------------------------
# 4.2 Month-over-Month KPI Comparison
# -----------------------------
if len(monthly_kpis) >= 2:
    latest = monthly_kpis.iloc[-1]
    previous = monthly_kpis.iloc[-2]

    metrics = ['Revenue', 'Orders', 'Customers', 'AOV']
    latest_vals = [latest[m] for m in metrics]
    prev_vals = [previous[m] for m in metrics]

    fig = go.Figure()
    fig.add_bar(name='Previous Month', x=metrics, y=prev_vals)
    fig.add_bar(name='Latest Month', x=metrics, y=latest_vals)

    fig.update_layout(
        title='Month-over-Month KPI Comparison',
        barmode='group',
        yaxis_title='Value'
    )

    fig.write_html('outputs/figures/26_mom_kpi_comparison.html')
    print("✓ Saved: 26_mom_kpi_comparison.html")

# -----------------------------
# 4.3 Customer Segment Performance
# -----------------------------
if 'Cluster_Name' in rfm.columns and 'Monetary' in rfm.columns:
    segment_perf = rfm.groupby('Cluster_Name').agg({
        'Customer_ID': 'count',
        'Monetary': 'sum',
        'Frequency': 'mean'
    }).reset_index()

    segment_perf.columns = [
        'Segment', 'Customer_Count',
        'Total_Revenue', 'Avg_Frequency'
    ]

    fig = px.bar(
        segment_perf,
        x='Segment',
        y='Total_Revenue',
        color='Avg_Frequency',
        title='Revenue by Customer Segment'
    )

    fig.write_html('outputs/figures/27_segment_revenue.html')
    print("✓ Saved: 27_segment_revenue.html")

print("\n✓ KPI visualizations completed")



CREATING KPI VISUALIZATIONS
✓ Saved: 25_monthly_revenue_trend.html
✓ Saved: 26_mom_kpi_comparison.html

✓ KPI visualizations completed


In [8]:
# ============================================================================
# 5. EXECUTIVE SUMMARY REPORT
# ============================================================================

summary = f"""
EXECUTIVE SUMMARY

Total Revenue: ${kpis['Total_Revenue']:,.2f}
Total Customers: {kpis['Total_Customers']:,}
Repeat Customer Rate: {kpis['Repeat_Customer_Rate']:.2f}%
CLV/CAC Ratio: {kpis['CLV_to_CAC_Ratio']:.2f}
"""

with open('outputs/reports/executive_summary.txt', 'w') as f:
    f.write(summary)

print("Executive summary saved")


Executive summary saved


In [9]:
# ============================================================================
# 6. PROJECT COMPLETION SUMMARY
# ============================================================================

completion = """
PROJECT STATUS: COMPLETED

Deliverables:
- Data Cleaning
- EDA
- RFM Analysis
- Customer Segmentation
- KPI Framework
- Executive Report
"""

with open('outputs/reports/project_completion_summary.txt', 'w') as f:
    f.write(completion)

print("Project completion summary saved")


Project completion summary saved


In [10]:
# ============================================================================
# 7. FINAL OUTPUTS CHECKLIST
# ============================================================================

print("Final Outputs Generated:")
print("- Cleaned Data")
print("- Customer Segments")
print("- CLV")
print("- KPIs")
print("- Executive Summary")
print("- Visualizations")

print("PROJECT FULLY COMPLETE")


Final Outputs Generated:
- Cleaned Data
- Customer Segments
- CLV
- KPIs
- Executive Summary
- Visualizations
PROJECT FULLY COMPLETE
