# 04 - Product Performance Analysis

## Purpose
Analyze product-level performance metrics including sales, profitability, and inventory efficiency.

## Sections
1. Top/Bottom Performers
2. Profitability Analysis
3. Inventory Turnover
4. Price-Volume Relationship
5. Brand Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Load data
DATA_DIR = Path('../..') / 'ml' / 'data' / 'processed'

products = pd.read_csv(DATA_DIR / 'products.csv')
inventory = pd.read_csv(DATA_DIR / 'inventory.csv')
order_items = pd.read_csv(DATA_DIR / 'order_items.csv')
orders = pd.read_csv(DATA_DIR / 'orders.csv')

# Parse dates
orders['OrderDate'] = pd.to_datetime(orders['OrderDate'])

# Merge for analysis
full_orders = orders.merge(order_items, on='OrderID')
full_orders = full_orders.merge(products, on='ProductID')
full_orders = full_orders.merge(inventory, on='ProductID', how='left')

print(f"Products: {len(products):,}")
print(f"Order Items: {len(order_items):,}")

## 1. Top/Bottom Performers

In [None]:
# Product-level aggregation
product_performance = full_orders.groupby('ProductID').agg({
    'TotalAmount': 'sum',
    'Profit': 'sum',
    'Quantity': 'sum',
    'OrderID': 'nunique',
    'Profit_Margin': 'mean',
    'Category': 'first',
    'Brand': 'first'
}).reset_index()
product_performance.columns = ['ProductID', 'Revenue', 'Profit', 'UnitsSold', 'OrderCount', 'AvgMargin', 'Category', 'Brand']

# Top 10 by revenue
top_revenue = product_performance.nlargest(10, 'Revenue')
bottom_revenue = product_performance.nsmallest(10, 'Revenue')

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

axes[0].barh(range(10), top_revenue['Revenue'] / 1000)
axes[0].set_yticks(range(10))
axes[0].set_yticklabels(top_revenue['ProductID'])
axes[0].set_title('Top 10 Products by Revenue')
axes[0].set_xlabel('Revenue ($K)')
axes[0].invert_yaxis()

# Top 10 by profit
top_profit = product_performance.nlargest(10, 'Profit')
axes[1].barh(range(10), top_profit['Profit'] / 1000)
axes[1].set_yticks(range(10))
axes[1].set_yticklabels(top_profit['ProductID'])
axes[1].set_title('Top 10 Products by Profit')
axes[1].set_xlabel('Profit ($K)')
axes[1].invert_yaxis()

plt.tight_layout()
plt.show()

print("\n=== Top 10 Products by Revenue ===")
display(top_revenue[['ProductID', 'Category', 'Revenue', 'Profit', 'UnitsSold', 'AvgMargin']])

In [None]:
# Performance distribution
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

axes[0, 0].hist(product_performance['Revenue'], bins=50, edgecolor='black', alpha=0.7)
axes[0, 0].axvline(product_performance['Revenue'].median(), color='red', linestyle='--', label=f"Median: ${product_performance['Revenue'].median():,.0f}")
axes[0, 0].set_title('Revenue Distribution (per Product)')
axes[0, 0].set_xlabel('Revenue ($)')
axes[0, 0].legend()

axes[0, 1].hist(product_performance['Profit'], bins=50, edgecolor='black', alpha=0.7)
axes[0, 1].axvline(product_performance['Profit'].median(), color='red', linestyle='--', label=f"Median: ${product_performance['Profit'].median():,.0f}")
axes[0, 1].set_title('Profit Distribution (per Product)')
axes[0, 1].set_xlabel('Profit ($)')
axes[0, 1].legend()

axes[1, 0].hist(product_performance['UnitsSold'], bins=50, edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Units Sold Distribution (per Product)')
axes[1, 0].set_xlabel('Units Sold')

axes[1, 1].hist(product_performance['AvgMargin'], bins=50, edgecolor='black', alpha=0.7)
axes[1, 1].axvline(product_performance['AvgMargin'].mean(), color='red', linestyle='--', label=f"Mean: {product_performance['AvgMargin'].mean():.1f}%")
axes[1, 1].set_title('Profit Margin Distribution')
axes[1, 1].set_xlabel('Profit Margin (%)')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

## 2. Profitability Analysis

In [None]:
# Revenue vs Profit scatter
plt.figure(figsize=(12, 8))
scatter = plt.scatter(product_performance['Revenue'] / 1000, 
                     product_performance['Profit'] / 1000,
                     c=product_performance['AvgMargin'],
                     cmap='RdYlGn',
                     alpha=0.6,
                     s=product_performance['UnitsSold'] / 10)
plt.colorbar(scatter, label='Profit Margin (%)')
plt.xlabel('Revenue ($K)')
plt.ylabel('Profit ($K)')
plt.title('Revenue vs Profit (size = units sold, color = margin)')
plt.tight_layout()
plt.show()

In [None]:
# Profitability by category
category_profit = product_performance.groupby('Category').agg({
    'Revenue': 'sum',
    'Profit': 'sum',
    'UnitsSold': 'sum',
    'ProductID': 'count',
    'AvgMargin': 'mean'
}).reset_index()
category_profit.columns = ['Category', 'Revenue', 'Profit', 'UnitsSold', 'ProductCount', 'AvgMargin']
category_profit['ProfitPerProduct'] = category_profit['Profit'] / category_profit['ProductCount']
category_profit = category_profit.sort_values('Profit', ascending=True)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

axes[0].barh(category_profit['Category'], category_profit['Profit'] / 1000)
axes[0].set_title('Total Profit by Category')
axes[0].set_xlabel('Profit ($K)')

axes[1].barh(category_profit['Category'], category_profit['AvgMargin'])
axes[1].set_title('Average Profit Margin by Category')
axes[1].set_xlabel('Profit Margin (%)')

plt.tight_layout()
plt.show()

## 3. Inventory Turnover

In [None]:
# Calculate inventory turnover
inventory_analysis = products.merge(inventory, on='ProductID', how='left')
inventory_analysis = inventory_analysis.merge(
    product_performance[['ProductID', 'UnitsSold', 'Revenue', 'Profit']], 
    on='ProductID', 
    how='left'
)

# Fill NaN with 0
inventory_analysis['UnitsSold'] = inventory_analysis['UnitsSold'].fillna(0)
inventory_analysis['Current_Stock'] = inventory_analysis['Current_Stock'].fillna(0)

# Calculate turnover (annual basis, assuming 1 year of data)
inventory_analysis['Turnover'] = np.where(
    inventory_analysis['Current_Stock'] > 0,
    inventory_analysis['UnitsSold'] / inventory_analysis['Current_Stock'],
    0
)

# Days of stock (assuming avg daily sales)
date_range = (orders['OrderDate'].max() - orders['OrderDate'].min()).days
inventory_analysis['DailyVelocity'] = inventory_analysis['UnitsSold'] / max(date_range, 1)
inventory_analysis['DaysOfStock'] = np.where(
    inventory_analysis['DailyVelocity'] > 0,
    inventory_analysis['Current_Stock'] / inventory_analysis['DailyVelocity'],
    np.inf
)

print("\n=== Inventory Turnover Statistics ===")
print(f"Average Turnover: {inventory_analysis['Turnover'].mean():.2f}")
print(f"Median Turnover: {inventory_analysis['Turnover'].median():.2f}")
print(f"Products with no sales: {(inventory_analysis['UnitsSold'] == 0).sum()}")

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Turnover distribution
turnover_valid = inventory_analysis[inventory_analysis['Turnover'] > 0]['Turnover']
axes[0].hist(turnover_valid.clip(upper=20), bins=50, edgecolor='black', alpha=0.7)
axes[0].set_title('Inventory Turnover Distribution')
axes[0].set_xlabel('Turnover Ratio')

# Days of stock distribution
dos_valid = inventory_analysis[(inventory_analysis['DaysOfStock'] > 0) & (inventory_analysis['DaysOfStock'] < 365)]['DaysOfStock']
axes[1].hist(dos_valid, bins=50, edgecolor='black', alpha=0.7)
axes[1].set_title('Days of Stock Distribution')
axes[1].set_xlabel('Days of Stock')

plt.tight_layout()
plt.show()

## 4. Price-Volume Relationship

In [None]:
# Price vs Volume
price_volume = full_orders.groupby('ProductID').agg({
    'UnitPrice': 'mean',
    'Quantity': 'sum',
    'Category': 'first'
}).reset_index()

plt.figure(figsize=(12, 8))
for category in price_volume['Category'].unique()[:5]:  # Top 5 categories
    cat_data = price_volume[price_volume['Category'] == category]
    plt.scatter(cat_data['UnitPrice'], cat_data['Quantity'], label=category, alpha=0.6)

plt.xlabel('Average Unit Price ($)')
plt.ylabel('Total Units Sold')
plt.title('Price vs Volume by Category')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## 5. Brand Analysis

In [None]:
# Brand performance
brand_performance = product_performance.groupby('Brand').agg({
    'Revenue': 'sum',
    'Profit': 'sum',
    'UnitsSold': 'sum',
    'ProductID': 'count',
    'AvgMargin': 'mean'
}).reset_index()
brand_performance.columns = ['Brand', 'Revenue', 'Profit', 'UnitsSold', 'ProductCount', 'AvgMargin']

# Top 15 brands by revenue
top_brands = brand_performance.nlargest(15, 'Revenue')

fig, axes = plt.subplots(1, 2, figsize=(14, 8))

axes[0].barh(top_brands['Brand'], top_brands['Revenue'] / 1000)
axes[0].set_title('Top 15 Brands by Revenue')
axes[0].set_xlabel('Revenue ($K)')
axes[0].invert_yaxis()

axes[1].barh(top_brands['Brand'], top_brands['AvgMargin'])
axes[1].set_title('Top 15 Brands - Profit Margin')
axes[1].set_xlabel('Profit Margin (%)')
axes[1].invert_yaxis()

plt.tight_layout()
plt.show()

print("\n=== Top 15 Brands ===")
display(top_brands)

In [None]:
print("\n" + "="*60)
print("PRODUCT PERFORMANCE SUMMARY")
print("="*60)

print(f"\n=== Overall Metrics ===")
print(f"Total Products Analyzed: {len(product_performance):,}")
print(f"Total Revenue: ${product_performance['Revenue'].sum():,.2f}")
print(f"Total Profit: ${product_performance['Profit'].sum():,.2f}")
print(f"Average Profit Margin: {product_performance['AvgMargin'].mean():.2f}%")

print(f"\n=== Top Performers ===")
print(f"Best Product (Revenue): {top_revenue.iloc[0]['ProductID']} - ${top_revenue.iloc[0]['Revenue']:,.2f}")
print(f"Best Product (Profit): {top_profit.iloc[0]['ProductID']} - ${top_profit.iloc[0]['Profit']:,.2f}")
print(f"Best Category: {category_profit.iloc[-1]['Category']} - ${category_profit.iloc[-1]['Profit']:,.2f}")
print(f"Best Brand: {top_brands.iloc[0]['Brand']} - ${top_brands.iloc[0]['Revenue']:,.2f}")

print(f"\n=== Inventory Health ===")
print(f"Products with no sales: {(product_performance['UnitsSold'] == 0).sum()}")
print(f"Average Inventory Turnover: {inventory_analysis['Turnover'].mean():.2f}")