In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

# Load the dataset
df = pd.read_csv('/home/abdou-salam/Desktop/us-regional-sales-analysis/data/processed/fully_cleaned_us_regional_sales_data.csv')

In [None]:
print("\n1. DELIVERY TIME CALCULATIONS")
print("-" * 50)

# Calculate delivery times (more robust handling)
df['Processing_Time'] = (df['Ship Day'] - df['Order Day']).clip(lower=0)
df['Delivery_Time'] = (df['Delivery Day'] - df['Ship Day']).clip(lower=0)
df['Total_Fulfillment_Time'] = (df['Delivery Day'] - df['Order Day']).clip(lower=0)

# Add delivery performance categories
df['Processing_Category'] = pd.cut(df['Processing_Time'], 
                                 bins=[0, 1, 3, 7, float('inf')],
                                 labels=['Same Day', '1-3 Days', '4-7 Days', '7+ Days'])

df['Delivery_Category'] = pd.cut(df['Delivery_Time'],
                               bins=[0, 2, 5, 10, float('inf')],
                               labels=['Express', 'Standard', 'Slow', 'Very Slow'])

df['Fulfillment_Category'] = pd.cut(df['Total_Fulfillment_Time'],
                                  bins=[0, 3, 7, 14, float('inf')],
                                  labels=['Fast', 'Standard', 'Slow', 'Very Slow'])

print("Delivery Time Categories Created:")
print("Processing: Same Day (0-1), 1-3 Days, 4-7 Days, 7+ Days")
print("Delivery: Express (0-2), Standard (3-5), Slow (6-10), Very Slow (10+ days)")
print("Fulfillment: Fast (0-3), Standard (4-7), Slow (8-14), Very Slow (14+ days)")


In [None]:
print("\n2. DETAILED DELIVERY EFFICIENCY BY SALES CHANNEL")
print("-" * 50)

delivery_efficiency = df.groupby('Sales Channel').agg({
    'Processing_Time': ['mean', 'median', 'std', 'min', 'max'],
    'Delivery_Time': ['mean', 'median', 'std', 'min', 'max'],
    'Total_Fulfillment_Time': ['mean', 'median', 'std', 'min', 'max'],
    'OrderNumber': 'count'
}).round(2)

delivery_efficiency.columns = ['_'.join(col).strip() for col in delivery_efficiency.columns]
print("Comprehensive Delivery Efficiency by Sales Channel:")
print(delivery_efficiency)


In [None]:
print("\n3. DELIVERY PERFORMANCE BENCHMARKING")
print("-" * 50)

# Performance percentiles
performance_benchmarks = df.groupby('Sales Channel').agg({
    'Processing_Time': [lambda x: np.percentile(x, 25), lambda x: np.percentile(x, 75), lambda x: np.percentile(x, 95)],
    'Delivery_Time': [lambda x: np.percentile(x, 25), lambda x: np.percentile(x, 75), lambda x: np.percentile(x, 95)],
    'Total_Fulfillment_Time': [lambda x: np.percentile(x, 25), lambda x: np.percentile(x, 75), lambda x: np.percentile(x, 95)]
}).round(2)

performance_benchmarks.columns = ['Processing_P25', 'Processing_P75', 'Processing_P95',
                                'Delivery_P25', 'Delivery_P75', 'Delivery_P95',
                                'Fulfillment_P25', 'Fulfillment_P75', 'Fulfillment_P95']

print("Performance Benchmarks by Channel (25th, 75th, 95th percentiles):")
print(performance_benchmarks)


In [None]:
print("\n4. SERVICE LEVEL ANALYSIS")
print("-" * 50)

# Define service level thresholds
service_levels = df.groupby('Sales Channel').agg({
    'Processing_Time': [
        lambda x: (x <= 1).mean() * 100,  # Same day processing
        lambda x: (x <= 3).mean() * 100,  # 3-day processing
        lambda x: (x <= 7).mean() * 100   # 1-week processing
    ],
    'Delivery_Time': [
        lambda x: (x <= 2).mean() * 100,  # Express delivery
        lambda x: (x <= 5).mean() * 100,  # Standard delivery
        lambda x: (x <= 10).mean() * 100  # Within 10 days
    ],
    'Total_Fulfillment_Time': [
        lambda x: (x <= 3).mean() * 100,  # Fast fulfillment
        lambda x: (x <= 7).mean() * 100,  # Standard fulfillment
        lambda x: (x <= 14).mean() * 100  # Within 2 weeks
    ]
}).round(2)

service_levels.columns = ['Same_Day_Processing_%', '3Day_Processing_%', '1Week_Processing_%',
                         'Express_Delivery_%', 'Standard_Delivery_%', '10Day_Delivery_%',
                         'Fast_Fulfillment_%', 'Standard_Fulfillment_%', '2Week_Fulfillment_%']

print("Service Level Achievement by Channel (% of orders meeting targets):")
print(service_levels)

In [None]:
print("\n5. CHANNEL PERFORMANCE RANKING")
print("-" * 50)

# Create composite delivery score
channel_performance = df.groupby('Sales Channel').agg({
    'Processing_Time': 'mean',
    'Delivery_Time': 'mean',
    'Total_Fulfillment_Time': 'mean',
    'OrderNumber': 'count'
}).round(2)

# Calculate composite score (lower time = better score)
max_processing = channel_performance['Processing_Time'].max()
max_delivery = channel_performance['Delivery_Time'].max()
max_fulfillment = channel_performance['Total_Fulfillment_Time'].max()

channel_performance['Processing_Score'] = (max_processing - channel_performance['Processing_Time']) / max_processing * 100
channel_performance['Delivery_Score'] = (max_delivery - channel_performance['Delivery_Time']) / max_delivery * 100
channel_performance['Fulfillment_Score'] = (max_fulfillment - channel_performance['Total_Fulfillment_Time']) / max_fulfillment * 100

# Weighted composite score
channel_performance['Composite_Score'] = (
    channel_performance['Processing_Score'] * 0.3 +
    channel_performance['Delivery_Score'] * 0.4 +
    channel_performance['Fulfillment_Score'] * 0.3
).round(2)

channel_performance_ranked = channel_performance.sort_values('Composite_Score', ascending=False)
print("Channel Performance Ranking (100 = Best Performance):")
print(channel_performance_ranked[['Processing_Time', 'Delivery_Time', 'Total_Fulfillment_Time', 'Composite_Score']])

In [None]:
''' SECTION 2: ADVANCED WAREHOUSE ANALYSIS '''
print("\n" + "="*80)
print("WAREHOUSE UTILIZATION & PERFORMANCE ANALYSIS")
print("="*80)

print("\n6. WAREHOUSE REVENUE & VOLUME ANALYSIS")
print("-" * 50)

warehouse_performance = df.groupby('WarehouseCode').agg({
    'Total Revenue': ['sum', 'mean'],
    'Order Quantity': ['sum', 'mean'],
    'OrderNumber': 'count',
    'Total Profit': ['sum', 'mean'],
    'Profit Margin': 'mean',
    'Unit Price': 'mean'
}).round(2)

warehouse_performance.columns = ['_'.join(col).strip() for col in warehouse_performance.columns]
print("Warehouse Performance Metrics:")
print(warehouse_performance)




In [None]:
print("\n7. WAREHOUSE DELIVERY EFFICIENCY")
print("-" * 50)

warehouse_delivery = df.groupby('WarehouseCode').agg({
    'Processing_Time': ['mean', 'median', 'std'],
    'Delivery_Time': ['mean', 'median', 'std'],
    'Total_Fulfillment_Time': ['mean', 'median', 'std']
}).round(2)

warehouse_delivery.columns = ['_'.join(col).strip() for col in warehouse_delivery.columns]
print("Warehouse Delivery Efficiency:")
print(warehouse_delivery)

# Warehouse efficiency ranking
warehouse_efficiency_score = df.groupby('WarehouseCode').agg({
    'Processing_Time': 'mean',
    'Delivery_Time': 'mean',
    'Total_Fulfillment_Time': 'mean',
    'Total Revenue': 'sum',
    'OrderNumber': 'count'
}).round(2)

# Calculate efficiency scores
max_proc_wh = warehouse_efficiency_score['Processing_Time'].max()
max_del_wh = warehouse_efficiency_score['Delivery_Time'].max()
max_fulfill_wh = warehouse_efficiency_score['Total_Fulfillment_Time'].max()

warehouse_efficiency_score['Efficiency_Score'] = (
    (max_proc_wh - warehouse_efficiency_score['Processing_Time']) / max_proc_wh * 30 +
    (max_del_wh - warehouse_efficiency_score['Delivery_Time']) / max_del_wh * 40 +
    (max_fulfill_wh - warehouse_efficiency_score['Total_Fulfillment_Time']) / max_fulfill_wh * 30
).round(2)

warehouse_ranked = warehouse_efficiency_score.sort_values('Efficiency_Score', ascending=False)
print("\nWarehouse Efficiency Ranking:")
print(warehouse_ranked)


In [None]:
print("\n8. WAREHOUSE-CHANNEL OPTIMIZATION MATRIX")
print("-" * 50)

# Revenue by warehouse-channel combination
warehouse_channel_revenue = pd.pivot_table(df, 
                                          values='Total Revenue', 
                                          index='WarehouseCode', 
                                          columns='Sales Channel', 
                                          aggfunc='sum').fillna(0)

print("Warehouse-Channel Revenue Matrix ($):")
print(warehouse_channel_revenue.round(0))

# Channel distribution by warehouse
warehouse_channel_dist = pd.crosstab(df['WarehouseCode'], df['Sales Channel'], 
                                    normalize='index') * 100
print("\nChannel Distribution by Warehouse (%):")
print(warehouse_channel_dist.round(1))

# Delivery performance by warehouse-channel
warehouse_channel_delivery = df.groupby(['WarehouseCode', 'Sales Channel']).agg({
    'Total_Fulfillment_Time': 'mean',
    'OrderNumber': 'count',
    'Total Revenue': 'sum'
}).round(2)

print("\nTop 10 Warehouse-Channel Combinations by Revenue:")
top_combinations = warehouse_channel_delivery.sort_values('Total Revenue', ascending=False).head(10)
print(top_combinations)

In [None]:
print("\n9. WAREHOUSE CAPACITY & UTILIZATION INSIGHTS")
print("-" * 50)

# Calculate relative utilization metrics
warehouse_utilization = df.groupby('WarehouseCode').agg({
    'OrderNumber': 'count',
    'Order Quantity': 'sum',
    'Total Revenue': 'sum',
    'Processing_Time': 'mean'
}).round(2)

# Ensure OrderDate is in datetime format
df['OrderDate'] = pd.to_datetime(df['OrderDate'], errors='coerce', format='mixed', dayfirst=True)

# Orders per day approximation (assuming data spans multiple days)
date_range = (df['OrderDate'].max() - df['OrderDate'].min()).days
warehouse_utilization['Daily_Orders'] = (warehouse_utilization['OrderNumber'] / max(date_range, 1)).round(1)
warehouse_utilization['Revenue_per_Order'] = (warehouse_utilization['Total Revenue'] / warehouse_utilization['OrderNumber']).round(2)

# Utilization categories
warehouse_utilization['Utilization_Level'] = pd.cut(warehouse_utilization['Daily_Orders'],
                                                   bins=[0, 10, 50, 100, float('inf')],
                                                   labels=['Low', 'Medium', 'High', 'Very High'])

print("Warehouse Utilization Analysis:")
print(warehouse_utilization)

In [None]:
''' SECTION 3: STRATEGIC INSIGHTS & RECOMMENDATIONS '''
print("\n" + "="*80)
print("STRATEGIC DELIVERY & WAREHOUSE INSIGHTS")
print("="*80)

print("\n10. DELIVERY PERFORMANCE SUMMARY")
print("-" * 50)

# Best and worst performers
best_delivery_channel = channel_performance_ranked.index[0]
worst_delivery_channel = channel_performance_ranked.index[-1]
best_warehouse = warehouse_ranked.index[0]
worst_warehouse = warehouse_ranked.index[-1]

# Overall averages
avg_processing = df['Processing_Time'].mean()
avg_delivery = df['Delivery_Time'].mean()
avg_fulfillment = df['Total_Fulfillment_Time'].mean()

print(f"""
DELIVERY PERFORMANCE INSIGHTS:

📦 CHANNEL PERFORMANCE:
   • Best performing channel: {best_delivery_channel} (Score: {channel_performance_ranked.iloc[0]['Composite_Score']:.1f})
   • Worst performing channel: {worst_delivery_channel} (Score: {channel_performance_ranked.iloc[-1]['Composite_Score']:.1f})
   
⏱️ AVERAGE DELIVERY TIMES:
   • Processing time: {avg_processing:.1f} days
   • Delivery time: {avg_delivery:.1f} days
   • Total fulfillment: {avg_fulfillment:.1f} days
   
🏭 WAREHOUSE PERFORMANCE:
   • Most efficient warehouse: {best_warehouse} (Score: {warehouse_ranked.iloc[0]['Efficiency_Score']:.1f})
   • Least efficient warehouse: {worst_warehouse} (Score: {warehouse_ranked.iloc[-1]['Efficiency_Score']:.1f})
""")

print("\n11. KEY RECOMMENDATIONS")
print("-" * 50)

# Generate recommendations based on analysis
fast_fulfillment_rate = (df['Total_Fulfillment_Time'] <= 3).mean() * 100
high_variance_channels = delivery_efficiency[delivery_efficiency['Processing_Time_std'] > delivery_efficiency['Processing_Time_std'].mean()].index.tolist()

print(f"""
🎯 STRATEGIC RECOMMENDATIONS:

IMMEDIATE ACTIONS:
   • Fast fulfillment rate: {fast_fulfillment_rate:.1f}% (Target: >80%)
   • Focus on improving {worst_delivery_channel} channel delivery times
   • Optimize {worst_warehouse} warehouse operations
   
OPTIMIZATION OPPORTUNITIES:
   • High variance channels needing attention: {', '.join(high_variance_channels[:3]) if high_variance_channels else 'None'}
   • Consider redistributing load from high-utilization warehouses
   • Implement express processing for high-value orders
   
PERFORMANCE MONITORING:
   • Track service levels against industry benchmarks
   • Monitor warehouse capacity utilization
   • Establish delivery time SLAs by channel
""")

print("\n12. OPERATIONAL METRICS DASHBOARD")
print("-" * 50)

# Create summary metrics for dashboard
total_orders = len(df)
total_revenue = df['Total Revenue'].sum()
avg_order_value = df['Total Revenue'].mean()

dashboard_metrics = {
    'Total Orders Processed': f"{total_orders:,}",
    'Total Revenue': f"${total_revenue:,.0f}",
    'Average Order Value': f"${avg_order_value:.2f}",
    'Fast Fulfillment Rate': f"{fast_fulfillment_rate:.1f}%",
    'Average Processing Time': f"{avg_processing:.1f} days",
    'Average Delivery Time': f"{avg_delivery:.1f} days",
    'Number of Active Warehouses': df['WarehouseCode'].nunique(),
    'Number of Sales Channels': df['Sales Channel'].nunique()
}

print("KEY OPERATIONAL METRICS:")
for metric, value in dashboard_metrics.items():
    print(f"   • {metric}: {value}")

print("\n" + "="*80)
print("DELIVERY & WAREHOUSE ANALYSIS COMPLETE")
print("="*80)