In [None]:
import pandas as pd
from datetime import datetime


# Load the cleaned dataset
df = pd.read_csv('../data/processed/fully_cleaned_us_regional_sales_data.csv')

print("="*80)
print("COMPREHENSIVE SALES PERFORMANCE & PRODUCT INTELLIGENCE ANALYSIS")
print("="*80)

# Ensure date columns are properly formatted
df['OrderDate'] = pd.to_datetime(df['OrderDate'], errors='coerce')
df['Order Quarter'] = df['OrderDate'].dt.to_period('Q')
df['Order Month'] = df['OrderDate'].dt.month
df['Order Year'] = df['OrderDate'].dt.year

''' SECTION 1: ENHANCED SALES PERFORMANCE ANALYSIS '''
print("\n1. COMPREHENSIVE SALES PERFORMANCE BY CHANNEL")
print("-" * 60)

sales_performance_by_channel = df.groupby('Sales Channel').agg({
    'Order Quantity': ['sum', 'mean', 'count'],
    'Total Revenue': ['sum', 'mean', 'std'],
    'Total Profit': ['sum', 'mean'],
    'Profit Margin': ['mean', 'std'],
    'Unit Price': 'mean',
    'Discount Applied': 'mean',
    'OrderNumber': 'nunique'
}).round(2)

sales_performance_by_channel.columns = ['_'.join(col).strip() for col in sales_performance_by_channel.columns]
print("Enhanced Sales Performance by Channel:")
print(sales_performance_by_channel)

# Channel efficiency metrics
channel_efficiency = df.groupby('Sales Channel').agg({
    'Total Revenue': 'sum',
    'OrderNumber': 'nunique',
    'Order Quantity': 'sum'
}).round(2)

channel_efficiency['Revenue_per_Order'] = (channel_efficiency['Total Revenue'] / channel_efficiency['OrderNumber']).round(2)
channel_efficiency['Revenue_per_Unit'] = (channel_efficiency['Total Revenue'] / channel_efficiency['Order Quantity']).round(2)

print("\nChannel Efficiency Metrics:")
print(channel_efficiency)

In [None]:
''' SECTION 2: ADVANCED REVENUE ANALYSIS '''
print("\n" + "="*80)
print("ADVANCED REVENUE & MARKET ANALYSIS")
print("="*80)

print("\n2. REVENUE DISTRIBUTION ANALYSIS")
print("-" * 60)

# Enhanced revenue analysis
revenue_analysis = df.groupby('Sales Channel').agg({
    'Total Revenue': ['sum', 'mean', 'median', 'std', 'min', 'max'],
    'Order Quantity': 'sum',
    'OrderNumber': 'count'
}).round(2)

revenue_analysis.columns = ['_'.join(col).strip() for col in revenue_analysis.columns]

# Market share and concentration
total_revenue = df['Total Revenue'].sum()
channel_market_share = df.groupby('Sales Channel')['Total Revenue'].sum()
channel_market_share_pct = (channel_market_share / total_revenue * 100).round(2)

# Calculate Herfindahl-Hirschman Index (market concentration)
hhi = ((channel_market_share_pct / 100) ** 2).sum()
market_concentration = "High" if hhi > 0.25 else "Moderate" if hhi > 0.15 else "Low"

print("Revenue Distribution by Channel:")
print(revenue_analysis)
print(f"\nMarket Share by Channel (%):")
print(channel_market_share_pct.sort_values(ascending=False))
print(f"\nMarket Concentration Index (HHI): {hhi:.3f} - {market_concentration} Concentration")

In [None]:
print("\n3. REVENUE VOLATILITY & STABILITY ANALYSIS")
print("-" * 60)

# Revenue volatility by month for each channel
monthly_revenue = df.groupby(['Order Month', 'Sales Channel'])['Total Revenue'].sum().reset_index()
revenue_volatility = monthly_revenue.groupby('Sales Channel')['Total Revenue'].agg(['std', 'mean']).round(2)
revenue_volatility['Coefficient_of_Variation'] = (revenue_volatility['std'] / revenue_volatility['mean'] * 100).round(2)
revenue_volatility = revenue_volatility.sort_values('Coefficient_of_Variation')

print("Revenue Volatility Analysis (Lower CV = More Stable):")
print(revenue_volatility)

In [None]:
''' SECTION 3: PROFITABILITY ANALYSIS '''
print("\n" + "="*80)
print("PROFITABILITY & ROI ANALYSIS")
print("="*80)

print("\n4. COMPREHENSIVE PROFITABILITY METRICS")
print("-" * 60)

profitability_analysis = df.groupby('Sales Channel').agg({
    'Total Profit': ['sum', 'mean', 'std'],
    'Total Revenue': 'sum',
    'Total Cost': 'sum',
    'Profit Margin': ['mean', 'median', 'std', 'min', 'max'],
    'Unit Cost': 'mean',
    'Unit Price': 'mean'
}).round(2)

profitability_analysis.columns = ['_'.join(col).strip() for col in profitability_analysis.columns]

# Calculate additional profitability metrics
channel_profitability = df.groupby('Sales Channel').agg({
    'Total Profit': 'sum',
    'Total Revenue': 'sum',
    'Total Cost': 'sum',
    'OrderNumber': 'nunique'
})

channel_profitability['ROI_Percent'] = (channel_profitability['Total Profit'] / channel_profitability['Total Cost'] * 100).round(2)
channel_profitability['Profit_per_Order'] = (channel_profitability['Total Profit'] / channel_profitability['OrderNumber']).round(2)
channel_profitability['Revenue_to_Cost_Ratio'] = (channel_profitability['Total Revenue'] / channel_profitability['Total Cost']).round(2)

print("Detailed Profitability Analysis:")
print(profitability_analysis)
print(f"\nAdvanced Profitability Metrics:")
print(channel_profitability[['ROI_Percent', 'Profit_per_Order', 'Revenue_to_Cost_Ratio']])


In [None]:
''' SECTION 4: PRODUCT INTELLIGENCE ANALYSIS '''
print("\n" + "="*80)
print("PRODUCT INTELLIGENCE & PORTFOLIO ANALYSIS")
print("="*80)

print("\n5. PRODUCT PORTFOLIO PERFORMANCE")
print("-" * 60)

# Enhanced product analysis
product_performance = df.groupby('_ProductID').agg({
    'Total Revenue': ['sum', 'mean', 'count'],
    'Total Profit': 'sum',
    'Order Quantity': 'sum',
    'Profit Margin': 'mean',
    'Discount Applied': 'mean',
    'Unit Price': 'mean'
}).round(2)

product_performance.columns = ['_'.join(col).strip() for col in product_performance.columns]

# Product classification using BCG Matrix approach
product_metrics = df.groupby('_ProductID').agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'count',
    'Profit Margin': 'mean'
})

# Market share and growth proxies
product_metrics['Market_Share'] = product_metrics['Total Revenue'] / product_metrics['Total Revenue'].sum() * 100
product_metrics['Order_Frequency'] = product_metrics['OrderNumber']
product_metrics['Revenue_per_Unit'] = product_metrics['Total Revenue'] / product_metrics['Order Quantity']


# Classify products (simplified BCG matrix)
market_share_median = product_metrics['Market_Share'].median()
order_freq_median = product_metrics['Order_Frequency'].median()

def classify_product(row):
    if row['Market_Share'] > market_share_median and row['Order_Frequency'] > order_freq_median:
        return "Star Products"
    elif row['Market_Share'] > market_share_median and row['Order_Frequency'] <= order_freq_median:
        return "Cash Cow"
    elif row['Market_Share'] <= market_share_median and row['Order_Frequency'] > order_freq_median:
        return "Question Mark"
    else:
        return "Dog Products"

product_metrics['Product_Category'] = product_metrics.apply(classify_product, axis=1)

# Product sales share
product_sales_share = df.groupby('_ProductID')['Total Revenue'].sum()
product_sales_share_pct = (product_sales_share / product_sales_share.sum() * 100).sort_values(ascending=False)

print("Top 15 Products by Sales Share (% of Total Revenue):")
print(product_sales_share_pct.head(15).round(3))

print("\nProduct Portfolio Classification:")
portfolio_summary = product_metrics.groupby('Product_Category').agg({
    'Total Revenue': ['count', 'sum'],
    'Market_Share': 'mean',
    'Profit Margin': 'mean'
}).round(2)
portfolio_summary.columns = ['Product_Count', 'Total_Revenue', 'Avg_Market_Share', 'Avg_Profit_Margin']
print(portfolio_summary)


In [None]:
print("\n6. PRODUCT-CHANNEL PERFORMANCE MATRIX")
print("-" * 60)

# Product performance by channel
product_channel_matrix = pd.pivot_table(df, 
                                       values='Total Revenue', 
                                       index='_ProductID', 
                                       columns='Sales Channel', 
                                       aggfunc='sum', 
                                       fill_value=0).round(2)

# Top products by channel
print("Top 5 Products by Revenue for Each Channel:")
for channel in df['Sales Channel'].unique():
    channel_products = df[df['Sales Channel'] == channel].groupby('_ProductID')['Total Revenue'].sum().sort_values(ascending=False).head(5)
    print(f"\n{channel}:")
    print(channel_products.round(2))

In [None]:
print("\n7. ADVANCED PRODUCT INSIGHTS")
print("-" * 60)

# Product discount sensitivity analysis
discount_sensitivity = df.groupby('_ProductID').agg({
    'Discount Applied': ['mean', 'std', 'count'],
    'Total Revenue': 'sum',
    'Order Quantity': 'sum'
}).round(3)

discount_sensitivity.columns = ['Avg_Discount', 'Discount_Volatility', 'Discount_Frequency', 'Revenue', 'Quantity']
discount_sensitivity['Revenue_per_Discount_Point'] = (discount_sensitivity['Revenue'] / (discount_sensitivity['Avg_Discount'] * 100 + 1)).round(2)

print("Top 10 Products by Discount Sensitivity (High Revenue per Discount Point):")
print(discount_sensitivity.sort_values('Revenue_per_Discount_Point', ascending=False).head(10)['Revenue_per_Discount_Point'])

# Product lifecycle analysis
product_lifecycle = df.groupby(['_ProductID', 'Order Year'])['Total Revenue'].sum().reset_index()
product_growth = product_lifecycle.pivot(index='_ProductID', columns='Order Year', values='Total Revenue').fillna(0)

# Calculate growth rates if multiple years exist
if product_growth.shape[1] > 1:
    years = sorted(product_growth.columns)
    if len(years) >= 2:
        growth_cols = []
        for i in range(1, len(years)):
            col_name = f'Growth_{years[i-1]}_{years[i]}'
            product_growth[col_name] = ((product_growth[years[i]] - product_growth[years[i-1]]) / 
                                       (product_growth[years[i-1]] + 1) * 100).round(2)
            growth_cols.append(col_name)
        
        if growth_cols:
            print(f"\nProduct Growth Analysis (YoY %):")
            print("Top 10 Fastest Growing Products:")
            avg_growth = product_growth[growth_cols].mean(axis=1).sort_values(ascending=False)
            print(avg_growth.head(10).round(2))


In [None]:
''' SECTION 5: TEAM & WAREHOUSE PERFORMANCE '''
print("\n" + "="*80)
print("SALES TEAM & WAREHOUSE PERFORMANCE ANALYSIS")
print("="*80)

print("\n8. SALES TEAM PERFORMANCE ANALYSIS")
print("-" * 60)

team_performance = df.groupby('_SalesTeamID').agg({
    'Total Revenue': ['sum', 'mean', 'count'],
    'Total Profit': 'sum',
    'Order Quantity': 'sum',
    'Profit Margin': 'mean',
    'OrderNumber': 'nunique'
}).round(2)

team_performance.columns = ['_'.join(col).strip() for col in team_performance.columns]

# Team efficiency metrics
team_efficiency = df.groupby('_SalesTeamID').agg({
    'Total Revenue': 'sum',
    'OrderNumber': 'nunique'
})
team_efficiency['Revenue_per_Order'] = (team_efficiency['Total Revenue'] / team_efficiency['OrderNumber']).round(2)

print("Sales Team Performance:")
print(team_performance)
print(f"\nTeam Efficiency (Revenue per Order):")
print(team_efficiency['Revenue_per_Order'].sort_values(ascending=False))

In [None]:
print("\n9. WAREHOUSE PERFORMANCE BY PRODUCT CATEGORIES")
print("-" * 60)

warehouse_performance = df.groupby('WarehouseCode').agg({
    'Total Revenue': ['sum', 'mean'],
    'Order Quantity': 'sum',
    'Total Profit': 'sum',
    'OrderNumber': 'nunique',
    '_ProductID': 'nunique'
}).round(2)

warehouse_performance.columns = ['_'.join(col).strip() for col in warehouse_performance.columns]

# Warehouse specialization index
warehouse_specialization = df.groupby(['WarehouseCode', '_ProductID'])['Total Revenue'].sum().reset_index()
warehouse_diversity = warehouse_specialization.groupby('WarehouseCode')['_ProductID'].count()

print("Warehouse Performance Overview:")
print(warehouse_performance)
print(f"\nWarehouse Product Diversity (Number of Different Products):")
print(warehouse_diversity.sort_values(ascending=False))


In [None]:
''' SECTION 6: STRATEGIC INSIGHTS & RECOMMENDATIONS '''
print("\n" + "="*80)
print("STRATEGIC INSIGHTS & BUSINESS RECOMMENDATIONS")
print("="*80)

print("\n10. KEY PERFORMANCE INSIGHTS")
print("-" * 60)

# Generate key insights
top_channel = channel_market_share_pct.index[0]
top_channel_share = channel_market_share_pct.iloc[0]
most_profitable_channel = channel_profitability.sort_values('ROI_Percent', ascending=False).index[0]
highest_roi = channel_profitability.sort_values('ROI_Percent', ascending=False)['ROI_Percent'].iloc[0]

star_products = len(product_metrics[product_metrics['Product_Category'] == 'Star Products'])
total_products = len(product_metrics)
star_percentage = (star_products / total_products * 100)

print(f"""
📊 SALES PERFORMANCE HIGHLIGHTS:

🏆 CHANNEL PERFORMANCE:
   • Market leader: {top_channel} ({top_channel_share:.1f}% market share)
   • Most profitable: {most_profitable_channel} ({highest_roi:.1f}% ROI)
   • Market concentration: {market_concentration} ({hhi:.3f} HHI)

🌟 PRODUCT PORTFOLIO:
   • Total products analyzed: {total_products:,}
   • Star products (high share + frequency): {star_products} ({star_percentage:.1f}%)
   • Portfolio diversification: {"Well-diversified" if star_percentage < 20 else "Concentrated"}

💰 FINANCIAL METRICS:
   • Total revenue analyzed: ${total_revenue:,.0f}
   • Average order value: ${df['Total Revenue'].mean():.2f}
   • Overall profit margin: {df['Profit Margin'].mean():.1f}%
""")


In [None]:
print("\n11. STRATEGIC RECOMMENDATIONS")
print("-" * 60)

# Generate recommendations based on analysis
most_volatile_channel = revenue_volatility.sort_values('Coefficient_of_Variation', ascending=False).index[0]
most_stable_channel = revenue_volatility.sort_values('Coefficient_of_Variation', ascending=True).index[0]

underperforming_channels = channel_profitability[channel_profitability['ROI_Percent'] < channel_profitability['ROI_Percent'].median()].index.tolist()

print(f"""
🎯 STRATEGIC RECOMMENDATIONS:

CHANNEL OPTIMIZATION:
   • Invest more in {most_profitable_channel} (highest ROI channel)
   • Stabilize {most_volatile_channel} (highest revenue volatility)
   • Model best practices from {most_stable_channel} (most stable performance)
   • Review strategy for underperforming channels: {', '.join(underperforming_channels[:3])}

PRODUCT PORTFOLIO:
   • Focus on Star Products for growth acceleration
   • Evaluate Dog Products for discontinuation or repositioning
   • Develop Question Mark products with targeted marketing
   • Maintain Cash Cow products for steady revenue

OPERATIONAL EFFICIENCY:
   • Optimize warehouse-product allocation based on specialization
   • Enhance sales team training for lower-performing teams
   • Implement dynamic pricing for high-discount-sensitivity products
""")

In [None]:
print("\n12. PERFORMANCE MONITORING FRAMEWORK")
print("-" * 60)

# Key metrics for ongoing monitoring
monitoring_metrics = {
    'Revenue Growth (MoM)': 'Track monthly revenue changes by channel',
    'Market Share Stability': 'Monitor HHI and channel concentration',
    'Product Portfolio Balance': 'Maintain optimal Star/Cash Cow ratio',
    'Channel ROI': 'Ensure all channels exceed minimum ROI threshold',
    'Revenue Volatility': 'Keep CV below industry benchmarks',
    'Customer Acquisition Cost': 'Track cost efficiency by channel'
}

print("KEY METRICS FOR ONGOING MONITORING:")
for metric, description in monitoring_metrics.items():
    print(f"   • {metric}: {description}")

print("\n" + "="*80)
print("SALES PERFORMANCE ANALYSIS COMPLETE")
print("="*80)