In [None]:
import pandas as pd 

#Load the cleaned data
df = pd.read_csv('../data/processed/fully_cleaned_us_regional_sales_data.csv')

In [None]:
print("="*80)
print("COMPREHENSIVE TIME-BASED GROWTH ANALYSIS")
print("="*80)

# Ensure date columns are properly formatted
df['OrderDate'] = pd.to_datetime(df['OrderDate'])
df['ShipDate'] = pd.to_datetime(df['ShipDate'])
df['DeliveryDate'] = pd.to_datetime(df['DeliveryDate'])

''' SECTION 1: BASIC TIME-BASED PERFORMANCE '''
print("\n1. MONTHLY PERFORMANCE BY CHANNEL")
print("-" * 50)

monthly_channel_performance = df.groupby(['Order Month', 'Sales Channel']).agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'nunique',
    'Total Profit': 'sum',
    'Profit Margin': 'mean'
}).reset_index()
print(monthly_channel_performance.round(2))


In [None]:
print("\n2. QUARTERLY PERFORMANCE BY CHANNEL")
print("-" * 50)

quarterly_channel_performance = df.groupby(['Order Quarter', 'Sales Channel']).agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'nunique',
    'Total Profit': 'sum',
    'Profit Margin': 'mean'
}).reset_index()
print(quarterly_channel_performance.round(2))


In [None]:
print("\n3. YEARLY PERFORMANCE BY CHANNEL")
print("-" * 50)

yearly_channel_performance = df.groupby(['Order Year', 'Sales Channel']).agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'nunique',
    'Total Profit': 'sum',
    'Profit Margin': 'mean',
    'Unit Price': 'mean',
    'Discount Applied': 'mean'
}).reset_index()
print(yearly_channel_performance.round(2))

In [None]:
''' SECTION 2: ADVANCED GROWTH METRICS '''
print("\n" + "="*80)
print("ADVANCED GROWTH ANALYSIS")
print("="*80)

print("\n4. YEAR-OVER-YEAR GROWTH ANALYSIS")
print("-" * 50)

yearly_growth = yearly_channel_performance.copy()
yearly_growth['YoY Revenue Growth (%)'] = yearly_growth.groupby('Sales Channel')['Total Revenue'].pct_change() * 100
yearly_growth['YoY Orders Growth (%)'] = yearly_growth.groupby('Sales Channel')['OrderNumber'].pct_change() * 100
yearly_growth['YoY Quantity Growth (%)'] = yearly_growth.groupby('Sales Channel')['Order Quantity'].pct_change() * 100
yearly_growth['YoY Profit Growth (%)'] = yearly_growth.groupby('Sales Channel')['Total Profit'].pct_change() * 100
yearly_growth = yearly_growth.fillna(0).round(2)

print("Detailed Year-over-Year Growth:")
print(yearly_growth[['Order Year', 'Sales Channel', 'Total Revenue', 'YoY Revenue Growth (%)', 
                    'YoY Orders Growth (%)', 'YoY Profit Growth (%)']].round(2))


In [None]:
print("\n5. COMPOUND ANNUAL GROWTH RATE (CAGR) BY CHANNEL")
print("-" * 50)

def calculate_cagr(df, value_col, channel_col, year_col):
    """Calculate CAGR for each channel"""
    cagr_results = []
    
    for channel in df[channel_col].unique():
        channel_data = df[df[channel_col] == channel].sort_values(year_col)
        if len(channel_data) > 1:
            first_year_value = channel_data.iloc[0][value_col]
            last_year_value = channel_data.iloc[-1][value_col]
            years = channel_data.iloc[-1][year_col] - channel_data.iloc[0][year_col]
            
            if first_year_value > 0 and years > 0:
                cagr = (((last_year_value / first_year_value) ** (1/years)) - 1) * 100
                cagr_results.append({
                    'Sales Channel': channel,
                    'First Year': channel_data.iloc[0][year_col],
                    'Last Year': channel_data.iloc[-1][year_col],
                    'First Year Revenue': first_year_value,
                    'Last Year Revenue': last_year_value,
                    'CAGR (%)': cagr
                })
    
    return pd.DataFrame(cagr_results)

cagr_analysis = calculate_cagr(yearly_channel_performance, 'Total Revenue', 'Sales Channel', 'Order Year')
cagr_analysis = cagr_analysis.sort_values('CAGR (%)', ascending=False)
print("Revenue CAGR by Channel:")
print(cagr_analysis.round(2))


In [None]:
print("\n6. QUARTERLY GROWTH TRENDS")
print("-" * 50)

quarterly_growth = quarterly_channel_performance.copy()
quarterly_growth['Quarter_ID'] = quarterly_growth['Order Quarter'].astype(str)
quarterly_growth['QoQ Revenue Growth (%)'] = quarterly_growth.groupby('Sales Channel')['Total Revenue'].pct_change() * 100
quarterly_growth = quarterly_growth.fillna(0).round(2)

print("Quarter-over-Quarter Growth (Last 8 quarters):")
print(quarterly_growth.tail(16)[['Order Quarter', 'Sales Channel', 'Total Revenue', 'QoQ Revenue Growth (%)']].round(2))


In [None]:
print("\n7. MONTHLY GROWTH VOLATILITY ANALYSIS")
print("-" * 50)

monthly_growth = monthly_channel_performance.copy()
monthly_growth['MoM Revenue Growth (%)'] = monthly_growth.groupby('Sales Channel')['Total Revenue'].pct_change() * 100

volatility_analysis = monthly_growth.groupby('Sales Channel').agg({
    'MoM Revenue Growth (%)': ['std', 'mean', 'min', 'max']
}).round(2)
volatility_analysis.columns = ['Growth Volatility (Std)', 'Avg Monthly Growth (%)', 'Min Monthly Growth (%)', 'Max Monthly Growth (%)']
volatility_analysis = volatility_analysis.sort_values('Growth Volatility (Std)', ascending=False)

print("Monthly Growth Volatility by Channel:")
print(volatility_analysis)


In [None]:
''' SECTION 3: SEASONAL AND TREND ANALYSIS '''
print("\n" + "="*80)
print("SEASONAL & TREND ANALYSIS")
print("="*80)

print("\n8. SEASONAL PERFORMANCE PATTERNS")
print("-" * 50)

seasonal_analysis = df.groupby(['Order Month', 'Sales Channel']).agg({
    'Total Revenue': ['mean', 'std'],
    'Order Quantity': 'mean',
    'OrderNumber': 'count'
}).round(2)

seasonal_analysis.columns = ['Avg Revenue', 'Revenue Std', 'Avg Quantity', 'Order Count']
seasonal_summary = seasonal_analysis.groupby('Sales Channel').apply(
    lambda x: pd.Series({
        'Peak Month': x['Avg Revenue'].idxmax()[0] if len(x) > 0 else None,
        'Peak Revenue': x['Avg Revenue'].max(),
        'Low Month': x['Avg Revenue'].idxmin()[0] if len(x) > 0 else None,
        'Low Revenue': x['Avg Revenue'].min(),
        'Seasonality Index': (x['Avg Revenue'].max() - x['Avg Revenue'].min()) / x['Avg Revenue'].mean() * 100
    })
).round(2)

print("Seasonal Performance Summary by Channel:")
print(seasonal_summary)


In [None]:
print("\n9. BUSINESS LIFECYCLE ANALYSIS")
print("-" * 50)

# Calculate growth stage for each channel
def classify_growth_stage(avg_growth, volatility):
    """Classify business growth stage based on growth rate and volatility"""
    if avg_growth > 20:
        if volatility < 15:
            return "Stable Growth"
        else:
            return "High Growth (Volatile)"
    elif avg_growth > 5:
        if volatility < 10:
            return "Mature Growth"
        else:
            return "Moderate Growth (Volatile)"
    elif avg_growth > -5:
        return "Mature/Declining"
    else:
        return "Declining"

lifecycle_analysis = pd.DataFrame()
for channel in df['Sales Channel'].unique():
    channel_yearly = yearly_growth[yearly_growth['Sales Channel'] == channel]
    if len(channel_yearly) > 1:
        avg_growth = channel_yearly['YoY Revenue Growth (%)'].mean()
        growth_volatility = channel_yearly['YoY Revenue Growth (%)'].std()
        
        lifecycle_analysis = pd.concat([lifecycle_analysis, pd.DataFrame({
            'Sales Channel': [channel],
            'Avg YoY Growth (%)': [avg_growth],
            'Growth Volatility': [growth_volatility],
            'Growth Stage': [classify_growth_stage(avg_growth, growth_volatility)]
        })], ignore_index=True)

lifecycle_analysis = lifecycle_analysis.sort_values('Avg YoY Growth (%)', ascending=False)
print("Business Lifecycle Analysis:")
print(lifecycle_analysis.round(2))


In [None]:
print("\n10. PERFORMANCE ACCELERATION/DECELERATION")
print("-" * 50)

# Calculate growth acceleration (change in growth rate)
acceleration_analysis = yearly_growth.copy()
acceleration_analysis['Growth Acceleration'] = acceleration_analysis.groupby('Sales Channel')['YoY Revenue Growth (%)'].diff()

recent_acceleration = acceleration_analysis.groupby('Sales Channel').agg({
    'Growth Acceleration': ['last', 'mean']
}).round(2)
recent_acceleration.columns = ['Latest Acceleration', 'Avg Acceleration']
recent_acceleration = recent_acceleration.sort_values('Latest Acceleration', ascending=False)

print("Growth Acceleration Analysis:")
print(recent_acceleration)

In [None]:
''' SECTION 4: ADVANCED INSIGHTS '''
print("\n" + "="*80)
print("STRATEGIC INSIGHTS & RECOMMENDATIONS")
print("="*80)

print("\n11. CHANNEL PERFORMANCE RANKING")
print("-" * 50)

# Create comprehensive channel scoring
channel_scores = pd.DataFrame()
for channel in df['Sales Channel'].unique():
    channel_data = yearly_channel_performance[yearly_channel_performance['Sales Channel'] == channel]
    growth_data = yearly_growth[yearly_growth['Sales Channel'] == channel]
    
    if len(channel_data) > 0 and len(growth_data) > 0:
        total_revenue = channel_data['Total Revenue'].sum()
        avg_profit_margin = channel_data['Profit Margin'].mean()
        avg_growth = growth_data['YoY Revenue Growth (%)'].mean()
        growth_stability = -growth_data['YoY Revenue Growth (%)'].std()  # Negative std for scoring
        
        # Composite score (weighted)
        composite_score = (
            (total_revenue / 1000000) * 0.3 +  # Revenue scale
            avg_profit_margin * 0.25 +         # Profitability
            avg_growth * 0.3 +                 # Growth rate
            growth_stability * 0.15            # Growth stability
        )
        
        channel_scores = pd.concat([channel_scores, pd.DataFrame({
            'Sales Channel': [channel],
            'Total Revenue (M)': [total_revenue / 1000000],
            'Avg Profit Margin (%)': [avg_profit_margin],
            'Avg YoY Growth (%)': [avg_growth],
            'Growth Stability': [-growth_stability],  # Show as positive for readability
            'Composite Score': [composite_score]
        })], ignore_index=True)

channel_scores = channel_scores.sort_values('Composite Score', ascending=False).round(2)
print("Channel Performance Ranking (Composite Score):")
print(channel_scores)

In [None]:
print("\n12. RISK & OPPORTUNITY ASSESSMENT")
print("-" * 50)

risk_opportunity = pd.DataFrame()
for channel in df['Sales Channel'].unique():
    channel_growth = yearly_growth[yearly_growth['Sales Channel'] == channel]['YoY Revenue Growth (%)']
    
    if len(channel_growth) > 1:
        recent_trend = channel_growth.tail(2).mean()  # Last 2 years average
        volatility = channel_growth.std()
        negative_years = (channel_growth < 0).sum()
        
        # Risk assessment
        if volatility > 30:
            risk_level = "High"
        elif volatility > 15:
            risk_level = "Medium"
        else:
            risk_level = "Low"
        
        # Opportunity assessment
        if recent_trend > 15:
            opportunity = "High Growth Potential"
        elif recent_trend > 5:
            opportunity = "Moderate Growth"
        elif recent_trend > -5:
            opportunity = "Stable"
        else:
            opportunity = "Turnaround Needed"
        
        risk_opportunity = pd.concat([risk_opportunity, pd.DataFrame({
            'Sales Channel': [channel],
            'Recent Trend (%)': [recent_trend],
            'Volatility': [volatility],
            'Negative Years': [negative_years],
            'Risk Level': [risk_level],
            'Opportunity Assessment': [opportunity]
        })], ignore_index=True)

risk_opportunity = risk_opportunity.sort_values('Recent Trend (%)', ascending=False).round(2)
print("Risk & Opportunity Assessment:")
print(risk_opportunity)


In [None]:
print("\n13. KEY INSIGHTS SUMMARY")
print("-" * 50)

# Generate key insights
total_revenue_growth = df.groupby('Order Year')['Total Revenue'].sum().pct_change().mean() * 100
best_performing_channel = channel_scores.iloc[0]['Sales Channel'] if not channel_scores.empty else "N/A"
highest_growth_channel = cagr_analysis.iloc[0]['Sales Channel'] if not cagr_analysis.empty else "N/A"
most_volatile_channel = volatility_analysis.index[0] if not volatility_analysis.empty else "N/A"

print(f"""
KEY BUSINESS INSIGHTS:

📈 GROWTH PERFORMANCE:
   • Overall average YoY growth: {total_revenue_growth:.1f}%
   • Best performing channel: {best_performing_channel}
   • Highest growth channel (CAGR): {highest_growth_channel}
   • Most volatile channel: {most_volatile_channel}

🎯 STRATEGIC RECOMMENDATIONS:
   • Focus investment on channels with high composite scores
   • Monitor volatile channels for risk management
   • Investigate seasonal patterns for demand planning
   • Consider portfolio rebalancing based on growth stages

📊 DATA QUALITY NOTES:
   • Analysis covers {df['Order Year'].nunique()} year(s) of data
   • {df['Sales Channel'].nunique()} channels analyzed
   • {len(df):,} total transactions processed
""")

print("\n" + "="*30)
print("ANALYSIS COMPLETE")
print("="*30)