# 08 - ABC Analysis Development

## Purpose
Develop and validate ABC inventory classification model using the Pareto principle.

## Sections
1. ABC Classification Implementation
2. Multi-Criteria ABC (Revenue + Volume)
3. Category-Level Analysis
4. Recommendations by Class

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Load data
DATA_DIR = Path('../..') / 'ml' / 'data' / 'processed'

products = pd.read_csv(DATA_DIR / 'products.csv')
inventory = pd.read_csv(DATA_DIR / 'inventory.csv')
order_items = pd.read_csv(DATA_DIR / 'order_items.csv')
orders = pd.read_csv(DATA_DIR / 'orders.csv')

# Merge
full_orders = orders.merge(order_items, on='OrderID')
full_orders = full_orders.merge(products, on='ProductID')

print(f"Products: {len(products):,}")
print(f"Order items: {len(order_items):,}")

## 1. ABC Classification Implementation

In [None]:
# Aggregate product metrics
product_metrics = full_orders.groupby('ProductID').agg({
    'TotalAmount': 'sum',
    'Profit': 'sum',
    'Quantity': 'sum',
    'OrderID': 'nunique',
    'Category': 'first',
    'Brand': 'first'
}).reset_index()
product_metrics.columns = ['ProductID', 'Revenue', 'Profit', 'UnitsSold', 'OrderCount', 'Category', 'Brand']

# Sort by revenue
product_metrics = product_metrics.sort_values('Revenue', ascending=False).reset_index(drop=True)

# Calculate cumulative metrics
total_revenue = product_metrics['Revenue'].sum()
product_metrics['CumulativeRevenue'] = product_metrics['Revenue'].cumsum()
product_metrics['CumulativeRevenuePct'] = product_metrics['CumulativeRevenue'] / total_revenue * 100
product_metrics['ProductPct'] = (np.arange(1, len(product_metrics) + 1) / len(product_metrics)) * 100

print(f"Total Revenue: ${total_revenue:,.2f}")
print(f"Total Products: {len(product_metrics):,}")

In [None]:
def assign_abc_class(cumulative_pct, a_threshold=80, b_threshold=95):
    """
    Assign ABC class based on cumulative revenue percentage.
    A: Top products contributing to first 80% of revenue
    B: Products contributing to next 15% (80-95%)
    C: Remaining products (bottom 5%)
    """
    if cumulative_pct <= a_threshold:
        return 'A'
    elif cumulative_pct <= b_threshold:
        return 'B'
    else:
        return 'C'

# Apply classification
product_metrics['ABC_Class'] = product_metrics['CumulativeRevenuePct'].apply(assign_abc_class)

# Summary by class
abc_summary = product_metrics.groupby('ABC_Class').agg({
    'ProductID': 'count',
    'Revenue': 'sum',
    'Profit': 'sum',
    'UnitsSold': 'sum'
}).reset_index()
abc_summary.columns = ['Class', 'ProductCount', 'Revenue', 'Profit', 'UnitsSold']
abc_summary['ProductPct'] = abc_summary['ProductCount'] / abc_summary['ProductCount'].sum() * 100
abc_summary['RevenuePct'] = abc_summary['Revenue'] / abc_summary['Revenue'].sum() * 100

print("\n=== ABC Classification Summary ===")
display(abc_summary)

In [None]:
# Pareto chart
fig, ax1 = plt.subplots(figsize=(12, 6))

# Bar chart - Revenue
colors = product_metrics['ABC_Class'].map({'A': 'green', 'B': 'orange', 'C': 'red'})
ax1.bar(range(len(product_metrics)), product_metrics['Revenue'] / 1000, color=colors, alpha=0.7)
ax1.set_xlabel('Products (ranked by revenue)')
ax1.set_ylabel('Revenue ($K)', color='blue')

# Line chart - Cumulative %
ax2 = ax1.twinx()
ax2.plot(range(len(product_metrics)), product_metrics['CumulativeRevenuePct'], color='black', linewidth=2)
ax2.axhline(y=80, color='green', linestyle='--', label='80% (A threshold)')
ax2.axhline(y=95, color='orange', linestyle='--', label='95% (B threshold)')
ax2.set_ylabel('Cumulative Revenue %', color='black')
ax2.legend(loc='right')

plt.title('ABC Analysis - Pareto Chart')
plt.tight_layout()
plt.show()

In [None]:
# Class distribution visualization
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Product count
class_counts = product_metrics['ABC_Class'].value_counts().reindex(['A', 'B', 'C'])
colors = ['green', 'orange', 'red']
axes[0].pie(class_counts, labels=class_counts.index, autopct='%1.1f%%', colors=colors, startangle=90)
axes[0].set_title('Product Distribution by Class')

# Revenue
class_revenue = product_metrics.groupby('ABC_Class')['Revenue'].sum().reindex(['A', 'B', 'C'])
axes[1].pie(class_revenue, labels=class_revenue.index, autopct='%1.1f%%', colors=colors, startangle=90)
axes[1].set_title('Revenue Distribution by Class')

# Profit
class_profit = product_metrics.groupby('ABC_Class')['Profit'].sum().reindex(['A', 'B', 'C'])
axes[2].pie(class_profit, labels=class_profit.index, autopct='%1.1f%%', colors=colors, startangle=90)
axes[2].set_title('Profit Distribution by Class')

plt.tight_layout()
plt.show()

## 2. Multi-Criteria ABC (Revenue + Volume)

In [None]:
# ABC by Volume
product_volume = product_metrics.sort_values('UnitsSold', ascending=False).copy()
total_units = product_volume['UnitsSold'].sum()
product_volume['CumulativeUnits'] = product_volume['UnitsSold'].cumsum()
product_volume['CumulativeUnitsPct'] = product_volume['CumulativeUnits'] / total_units * 100
product_volume['ABC_Volume'] = product_volume['CumulativeUnitsPct'].apply(assign_abc_class)

# Merge classifications
product_metrics = product_metrics.merge(
    product_volume[['ProductID', 'ABC_Volume']], 
    on='ProductID'
)

# Combined ABC matrix
abc_matrix = pd.crosstab(product_metrics['ABC_Class'], product_metrics['ABC_Volume'])

print("\n=== ABC Cross-Classification Matrix ===")
print("(Revenue ABC vs Volume ABC)")
display(abc_matrix)

# Heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(abc_matrix, annot=True, fmt='d', cmap='YlOrRd')
plt.title('ABC Cross-Classification (Revenue vs Volume)')
plt.xlabel('Volume ABC Class')
plt.ylabel('Revenue ABC Class')
plt.tight_layout()
plt.show()

In [None]:
# Combined classification
def combined_abc(row):
    """Create combined ABC classification."""
    rev_class = row['ABC_Class']
    vol_class = row['ABC_Volume']
    
    if rev_class == 'A' and vol_class == 'A':
        return 'AA - High Priority'
    elif rev_class == 'A' or vol_class == 'A':
        return 'A - Important'
    elif rev_class == 'B' and vol_class == 'B':
        return 'BB - Medium Priority'
    elif rev_class == 'B' or vol_class == 'B':
        return 'B - Moderate'
    else:
        return 'CC - Low Priority'

product_metrics['Combined_Class'] = product_metrics.apply(combined_abc, axis=1)

combined_summary = product_metrics.groupby('Combined_Class').agg({
    'ProductID': 'count',
    'Revenue': 'sum',
    'UnitsSold': 'sum'
}).reset_index()
combined_summary.columns = ['Class', 'Products', 'Revenue', 'Units']
combined_summary = combined_summary.sort_values('Revenue', ascending=False)

print("\n=== Combined ABC Classification ===")
display(combined_summary)

## 3. Category-Level Analysis

In [None]:
# ABC distribution by category
category_abc = product_metrics.groupby(['Category', 'ABC_Class']).size().unstack(fill_value=0)
category_abc = category_abc.reindex(columns=['A', 'B', 'C'])

# Calculate percentages
category_abc_pct = category_abc.div(category_abc.sum(axis=1), axis=0) * 100

# Plot
category_abc_pct.plot(kind='barh', stacked=True, figsize=(12, 8), color=['green', 'orange', 'red'])
plt.title('ABC Distribution by Category')
plt.xlabel('Percentage of Products')
plt.legend(title='ABC Class')
plt.tight_layout()
plt.show()

In [None]:
# Category performance summary
category_summary = product_metrics.groupby('Category').agg({
    'ProductID': 'count',
    'Revenue': 'sum',
    'Profit': 'sum',
    'UnitsSold': 'sum'
}).reset_index()
category_summary.columns = ['Category', 'Products', 'Revenue', 'Profit', 'Units']

# Add A-class count
a_class_count = product_metrics[product_metrics['ABC_Class'] == 'A'].groupby('Category').size()
category_summary = category_summary.merge(
    a_class_count.reset_index().rename(columns={0: 'A_Products'}),
    on='Category',
    how='left'
)
category_summary['A_Products'] = category_summary['A_Products'].fillna(0).astype(int)
category_summary['A_Pct'] = category_summary['A_Products'] / category_summary['Products'] * 100

category_summary = category_summary.sort_values('Revenue', ascending=False)

print("\n=== Category ABC Summary ===")
display(category_summary)

## 4. Recommendations by Class

In [None]:
# Generate recommendations
def get_recommendation(abc_class, combined_class):
    """Generate inventory management recommendation."""
    recommendations = {
        'A': {
            'reorder_frequency': 'Weekly',
            'safety_stock': 'High (2-3 weeks)',
            'monitoring': 'Daily',
            'supplier_strategy': 'Multiple reliable suppliers',
            'forecasting': 'Advanced models required'
        },
        'B': {
            'reorder_frequency': 'Bi-weekly',
            'safety_stock': 'Medium (1-2 weeks)',
            'monitoring': 'Weekly',
            'supplier_strategy': 'Primary + backup supplier',
            'forecasting': 'Standard forecasting'
        },
        'C': {
            'reorder_frequency': 'Monthly',
            'safety_stock': 'Low (minimal)',
            'monitoring': 'Monthly',
            'supplier_strategy': 'Single supplier acceptable',
            'forecasting': 'Simple moving average'
        }
    }
    return recommendations.get(abc_class, recommendations['C'])

# Apply recommendations
product_metrics['Recommendation'] = product_metrics['ABC_Class'].apply(
    lambda x: get_recommendation(x, None)
)

print("\n" + "="*60)
print("ABC INVENTORY MANAGEMENT RECOMMENDATIONS")
print("="*60)

for abc_class in ['A', 'B', 'C']:
    class_data = product_metrics[product_metrics['ABC_Class'] == abc_class]
    rec = get_recommendation(abc_class, None)
    
    print(f"\n=== Class {abc_class} ({len(class_data)} products) ===")
    print(f"Revenue: ${class_data['Revenue'].sum():,.2f}")
    print(f"\nRecommendations:")
    print(f"  Reorder Frequency: {rec['reorder_frequency']}")
    print(f"  Safety Stock: {rec['safety_stock']}")
    print(f"  Monitoring: {rec['monitoring']}")
    print(f"  Supplier Strategy: {rec['supplier_strategy']}")
    print(f"  Forecasting: {rec['forecasting']}")

In [None]:
# Top A-class products
top_a_products = product_metrics[product_metrics['ABC_Class'] == 'A'].head(20)

print("\n=== Top 20 A-Class Products ===")
display(top_a_products[['ProductID', 'Category', 'Brand', 'Revenue', 'Profit', 'UnitsSold']])

In [None]:
print("\n" + "="*60)
print("ABC ANALYSIS SUMMARY")
print("="*60)

print(f"\n=== Classification Results ===")
for _, row in abc_summary.iterrows():
    print(f"Class {row['Class']}: {row['ProductCount']:,} products ({row['ProductPct']:.1f}%) | ${row['Revenue']:,.2f} ({row['RevenuePct']:.1f}%)")

print(f"\n=== Key Metrics ===")
a_products = len(product_metrics[product_metrics['ABC_Class'] == 'A'])
a_revenue = product_metrics[product_metrics['ABC_Class'] == 'A']['Revenue'].sum()
print(f"Top {a_products} products ({a_products/len(product_metrics)*100:.1f}%) generate ${a_revenue:,.2f} ({a_revenue/total_revenue*100:.1f}% of revenue)")

print(f"\n=== Pareto Validation ===")
pct_20 = int(len(product_metrics) * 0.2)
top_20_revenue = product_metrics.head(pct_20)['Revenue'].sum()
print(f"Top 20% of products: ${top_20_revenue:,.2f} ({top_20_revenue/total_revenue*100:.1f}% of revenue)")