In [1]:
import pandas as pd 

#Load the cleaned data
df = pd.read_csv('/home/abdou-salam/Desktop/us-regional-sales-analysis/data/processed/fully_cleaned_us_regional_sales_data.csv')

In [2]:
print("="*80)
print("COMPREHENSIVE TIME-BASED GROWTH ANALYSIS")
print("="*80)

# Ensure date columns are properly formatted
df['OrderDate'] = pd.to_datetime(df['OrderDate'])
df['ShipDate'] = pd.to_datetime(df['ShipDate'])
df['DeliveryDate'] = pd.to_datetime(df['DeliveryDate'])

''' SECTION 1: BASIC TIME-BASED PERFORMANCE '''
print("\n1. MONTHLY PERFORMANCE BY CHANNEL")
print("-" * 50)

monthly_channel_performance = df.groupby(['Order Month', 'Sales Channel']).agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'nunique',
    'Total Profit': 'sum',
    'Profit Margin': 'mean'
}).reset_index()
print(monthly_channel_performance.round(2))


COMPREHENSIVE TIME-BASED GROWTH ANALYSIS

1. MONTHLY PERFORMANCE BY CHANNEL
--------------------------------------------------
    Order Month Sales Channel  Total Revenue  Order Quantity  OrderNumber  \
0             1   Distributor      921395.39             379           87   
1             1      In-Store     2297415.60            1122          255   
2             1        Online     1649654.77             757          163   
3             1     Wholesale      646865.17             297           64   
4             2   Distributor      830643.35             442           98   
5             2      In-Store     1839861.14             957          207   
6             2        Online     1254878.58             608          145   
7             2     Wholesale      475515.95             251           52   
8             3   Distributor      816280.16             398           83   
9             3      In-Store     1554716.24             784          184   
10            3        Onl

In [3]:
print("\n2. QUARTERLY PERFORMANCE BY CHANNEL")
print("-" * 50)

quarterly_channel_performance = df.groupby(['Order Quarter', 'Sales Channel']).agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'nunique',
    'Total Profit': 'sum',
    'Profit Margin': 'mean'
}).reset_index()
print(quarterly_channel_performance.round(2))



2. QUARTERLY PERFORMANCE BY CHANNEL
--------------------------------------------------
    Order Quarter Sales Channel  Total Revenue  Order Quantity  OrderNumber  \
0               1   Distributor     2568318.91            1219          268   
1               1      In-Store     5691992.97            2863          646   
2               1        Online     4162980.61            1961          439   
3               1     Wholesale     1474026.73             753          167   
4               2   Distributor     2731482.20            1405          312   
5               2      In-Store     6886325.46            3258          754   
6               2        Online     4863715.12            2540          563   
7               2     Wholesale     1759618.59             874          194   
8               3   Distributor     3912721.14            1855          413   
9               3      In-Store     8768631.23            4284          941   
10              3        Online     6106817

In [4]:
print("\n3. YEARLY PERFORMANCE BY CHANNEL")
print("-" * 50)

yearly_channel_performance = df.groupby(['Order Year', 'Sales Channel']).agg({
    'Total Revenue': 'sum',
    'Order Quantity': 'sum',
    'OrderNumber': 'nunique',
    'Total Profit': 'sum',
    'Profit Margin': 'mean',
    'Unit Price': 'mean',
    'Discount Applied': 'mean'
}).reset_index()
print(yearly_channel_performance.round(2))


3. YEARLY PERFORMANCE BY CHANNEL
--------------------------------------------------
    Order Year Sales Channel  Total Revenue  Order Quantity  OrderNumber  \
0         2018   Distributor     3211182.36            1519          327   
1         2018      In-Store     7210169.09            3576          771   
2         2018        Online     4910466.58            2460          548   
3         2018     Wholesale     1745797.90             927          190   
4         2019   Distributor     4613407.88            2161          483   
5         2019      In-Store    11541454.84            5583         1244   
6         2019        Online     8334421.12            4236          936   
7         2019     Wholesale     3325000.69            1657          367   
8         2020   Distributor     5324616.83            2607          565   
9         2020      In-Store    11307398.98            5719         1283   
10        2020        Online     8422946.20            4201          941   
11 

In [5]:
''' SECTION 2: ADVANCED GROWTH METRICS '''
print("\n" + "="*80)
print("ADVANCED GROWTH ANALYSIS")
print("="*80)

print("\n4. YEAR-OVER-YEAR GROWTH ANALYSIS")
print("-" * 50)

yearly_growth = yearly_channel_performance.copy()
yearly_growth['YoY Revenue Growth (%)'] = yearly_growth.groupby('Sales Channel')['Total Revenue'].pct_change() * 100
yearly_growth['YoY Orders Growth (%)'] = yearly_growth.groupby('Sales Channel')['OrderNumber'].pct_change() * 100
yearly_growth['YoY Quantity Growth (%)'] = yearly_growth.groupby('Sales Channel')['Order Quantity'].pct_change() * 100
yearly_growth['YoY Profit Growth (%)'] = yearly_growth.groupby('Sales Channel')['Total Profit'].pct_change() * 100
yearly_growth = yearly_growth.fillna(0).round(2)

print("Detailed Year-over-Year Growth:")
print(yearly_growth[['Order Year', 'Sales Channel', 'Total Revenue', 'YoY Revenue Growth (%)', 
                    'YoY Orders Growth (%)', 'YoY Profit Growth (%)']].round(2))



ADVANCED GROWTH ANALYSIS

4. YEAR-OVER-YEAR GROWTH ANALYSIS
--------------------------------------------------
Detailed Year-over-Year Growth:
    Order Year Sales Channel  Total Revenue  YoY Revenue Growth (%)  \
0         2018   Distributor     3211182.36                    0.00   
1         2018      In-Store     7210169.09                    0.00   
2         2018        Online     4910466.58                    0.00   
3         2018     Wholesale     1745797.90                    0.00   
4         2019   Distributor     4613407.88                   43.67   
5         2019      In-Store    11541454.84                   60.07   
6         2019        Online     8334421.12                   69.73   
7         2019     Wholesale     3325000.69                   90.46   
8         2020   Distributor     5324616.83                   15.42   
9         2020      In-Store    11307398.98                   -2.03   
10        2020        Online     8422946.20                    1.06   
11  

In [6]:
print("\n5. COMPOUND ANNUAL GROWTH RATE (CAGR) BY CHANNEL")
print("-" * 50)

def calculate_cagr(df, value_col, channel_col, year_col):
    """Calculate CAGR for each channel"""
    cagr_results = []
    
    for channel in df[channel_col].unique():
        channel_data = df[df[channel_col] == channel].sort_values(year_col)
        if len(channel_data) > 1:
            first_year_value = channel_data.iloc[0][value_col]
            last_year_value = channel_data.iloc[-1][value_col]
            years = channel_data.iloc[-1][year_col] - channel_data.iloc[0][year_col]
            
            if first_year_value > 0 and years > 0:
                cagr = (((last_year_value / first_year_value) ** (1/years)) - 1) * 100
                cagr_results.append({
                    'Sales Channel': channel,
                    'First Year': channel_data.iloc[0][year_col],
                    'Last Year': channel_data.iloc[-1][year_col],
                    'First Year Revenue': first_year_value,
                    'Last Year Revenue': last_year_value,
                    'CAGR (%)': cagr
                })
    
    return pd.DataFrame(cagr_results)

cagr_analysis = calculate_cagr(yearly_channel_performance, 'Total Revenue', 'Sales Channel', 'Order Year')
cagr_analysis = cagr_analysis.sort_values('CAGR (%)', ascending=False)
print("Revenue CAGR by Channel:")
print(cagr_analysis.round(2))



5. COMPOUND ANNUAL GROWTH RATE (CAGR) BY CHANNEL
--------------------------------------------------
Revenue CAGR by Channel:
  Sales Channel  First Year  Last Year  First Year Revenue  Last Year Revenue  \
3     Wholesale        2018       2020          1745797.90         3089721.75   
2        Online        2018       2020          4910466.58         8422946.20   
0   Distributor        2018       2020          3211182.36         5324616.83   
1      In-Store        2018       2020          7210169.09        11307398.98   

   CAGR (%)  
3     33.03  
2     30.97  
0     28.77  
1     25.23  


In [7]:
print("\n6. QUARTERLY GROWTH TRENDS")
print("-" * 50)

quarterly_growth = quarterly_channel_performance.copy()
quarterly_growth['Quarter_ID'] = quarterly_growth['Order Quarter'].astype(str)
quarterly_growth['QoQ Revenue Growth (%)'] = quarterly_growth.groupby('Sales Channel')['Total Revenue'].pct_change() * 100
quarterly_growth = quarterly_growth.fillna(0).round(2)

print("Quarter-over-Quarter Growth (Last 8 quarters):")
print(quarterly_growth.tail(16)[['Order Quarter', 'Sales Channel', 'Total Revenue', 'QoQ Revenue Growth (%)']].round(2))



6. QUARTERLY GROWTH TRENDS
--------------------------------------------------
Quarter-over-Quarter Growth (Last 8 quarters):
    Order Quarter Sales Channel  Total Revenue  QoQ Revenue Growth (%)
0               1   Distributor     2568318.91                    0.00
1               1      In-Store     5691992.97                    0.00
2               1        Online     4162980.61                    0.00
3               1     Wholesale     1474026.73                    0.00
4               2   Distributor     2731482.20                    6.35
5               2      In-Store     6886325.46                   20.98
6               2        Online     4863715.12                   16.83
7               2     Wholesale     1759618.59                   19.37
8               3   Distributor     3912721.14                   43.25
9               3      In-Store     8768631.23                   27.33
10              3        Online     6106817.11                   25.56
11              3     

In [8]:
print("\n7. MONTHLY GROWTH VOLATILITY ANALYSIS")
print("-" * 50)

monthly_growth = monthly_channel_performance.copy()
monthly_growth['MoM Revenue Growth (%)'] = monthly_growth.groupby('Sales Channel')['Total Revenue'].pct_change() * 100

volatility_analysis = monthly_growth.groupby('Sales Channel').agg({
    'MoM Revenue Growth (%)': ['std', 'mean', 'min', 'max']
}).round(2)
volatility_analysis.columns = ['Growth Volatility (Std)', 'Avg Monthly Growth (%)', 'Min Monthly Growth (%)', 'Max Monthly Growth (%)']
volatility_analysis = volatility_analysis.sort_values('Growth Volatility (Std)', ascending=False)

print("Monthly Growth Volatility by Channel:")
print(volatility_analysis)



7. MONTHLY GROWTH VOLATILITY ANALYSIS
--------------------------------------------------
Monthly Growth Volatility by Channel:
               Growth Volatility (Std)  Avg Monthly Growth (%)  \
Sales Channel                                                    
Wholesale                        37.61                    9.25   
Distributor                      19.72                    5.11   
In-Store                         17.25                    3.30   
Online                           15.63                    3.76   

               Min Monthly Growth (%)  Max Monthly Growth (%)  
Sales Channel                                                  
Wholesale                      -26.49                   89.36  
Distributor                    -16.95                   45.03  
In-Store                       -19.92                   31.68  
Online                         -23.93                   38.66  


In [9]:
''' SECTION 3: SEASONAL AND TREND ANALYSIS '''
print("\n" + "="*80)
print("SEASONAL & TREND ANALYSIS")
print("="*80)

print("\n8. SEASONAL PERFORMANCE PATTERNS")
print("-" * 50)

seasonal_analysis = df.groupby(['Order Month', 'Sales Channel']).agg({
    'Total Revenue': ['mean', 'std'],
    'Order Quantity': 'mean',
    'OrderNumber': 'count'
}).round(2)

seasonal_analysis.columns = ['Avg Revenue', 'Revenue Std', 'Avg Quantity', 'Order Count']
seasonal_summary = seasonal_analysis.groupby('Sales Channel').apply(
    lambda x: pd.Series({
        'Peak Month': x['Avg Revenue'].idxmax()[0] if len(x) > 0 else None,
        'Peak Revenue': x['Avg Revenue'].max(),
        'Low Month': x['Avg Revenue'].idxmin()[0] if len(x) > 0 else None,
        'Low Revenue': x['Avg Revenue'].min(),
        'Seasonality Index': (x['Avg Revenue'].max() - x['Avg Revenue'].min()) / x['Avg Revenue'].mean() * 100
    })
).round(2)

print("Seasonal Performance Summary by Channel:")
print(seasonal_summary)



SEASONAL & TREND ANALYSIS

8. SEASONAL PERFORMANCE PATTERNS
--------------------------------------------------
Seasonal Performance Summary by Channel:
               Peak Month  Peak Revenue  Low Month  Low Revenue  \
Sales Channel                                                     
Distributor          11.0      11002.63        4.0      7877.01   
In-Store             11.0       9862.84       10.0      8420.74   
Online                1.0      10120.58        4.0      8074.31   
Wholesale             4.0      10915.79        3.0      6895.01   

               Seasonality Index  
Sales Channel                     
Distributor                32.88  
In-Store                   15.86  
Online                     22.81  
Wholesale                  44.27  


In [10]:
print("\n9. BUSINESS LIFECYCLE ANALYSIS")
print("-" * 50)

# Calculate growth stage for each channel
def classify_growth_stage(avg_growth, volatility):
    """Classify business growth stage based on growth rate and volatility"""
    if avg_growth > 20:
        if volatility < 15:
            return "Stable Growth"
        else:
            return "High Growth (Volatile)"
    elif avg_growth > 5:
        if volatility < 10:
            return "Mature Growth"
        else:
            return "Moderate Growth (Volatile)"
    elif avg_growth > -5:
        return "Mature/Declining"
    else:
        return "Declining"

lifecycle_analysis = pd.DataFrame()
for channel in df['Sales Channel'].unique():
    channel_yearly = yearly_growth[yearly_growth['Sales Channel'] == channel]
    if len(channel_yearly) > 1:
        avg_growth = channel_yearly['YoY Revenue Growth (%)'].mean()
        growth_volatility = channel_yearly['YoY Revenue Growth (%)'].std()
        
        lifecycle_analysis = pd.concat([lifecycle_analysis, pd.DataFrame({
            'Sales Channel': [channel],
            'Avg YoY Growth (%)': [avg_growth],
            'Growth Volatility': [growth_volatility],
            'Growth Stage': [classify_growth_stage(avg_growth, growth_volatility)]
        })], ignore_index=True)

lifecycle_analysis = lifecycle_analysis.sort_values('Avg YoY Growth (%)', ascending=False)
print("Business Lifecycle Analysis:")
print(lifecycle_analysis.round(2))



9. BUSINESS LIFECYCLE ANALYSIS
--------------------------------------------------
Business Lifecycle Analysis:
  Sales Channel  Avg YoY Growth (%)  Growth Volatility  \
3     Wholesale               27.79              54.39   
1        Online               23.60              39.96   
2   Distributor               19.70              22.15   
0      In-Store               19.35              35.28   

                 Growth Stage  
3      High Growth (Volatile)  
1      High Growth (Volatile)  
2  Moderate Growth (Volatile)  
0  Moderate Growth (Volatile)  


In [11]:
print("\n10. PERFORMANCE ACCELERATION/DECELERATION")
print("-" * 50)

# Calculate growth acceleration (change in growth rate)
acceleration_analysis = yearly_growth.copy()
acceleration_analysis['Growth Acceleration'] = acceleration_analysis.groupby('Sales Channel')['YoY Revenue Growth (%)'].diff()

recent_acceleration = acceleration_analysis.groupby('Sales Channel').agg({
    'Growth Acceleration': ['last', 'mean']
}).round(2)
recent_acceleration.columns = ['Latest Acceleration', 'Avg Acceleration']
recent_acceleration = recent_acceleration.sort_values('Latest Acceleration', ascending=False)

print("Growth Acceleration Analysis:")
print(recent_acceleration)


10. PERFORMANCE ACCELERATION/DECELERATION
--------------------------------------------------
Growth Acceleration Analysis:
               Latest Acceleration  Avg Acceleration
Sales Channel                                       
Distributor                 -28.25              7.71
In-Store                    -62.10             -1.02
Online                      -68.67              0.53
Wholesale                   -97.54             -3.54


In [12]:
''' SECTION 4: ADVANCED INSIGHTS '''
print("\n" + "="*80)
print("STRATEGIC INSIGHTS & RECOMMENDATIONS")
print("="*80)

print("\n11. CHANNEL PERFORMANCE RANKING")
print("-" * 50)

# Create comprehensive channel scoring
channel_scores = pd.DataFrame()
for channel in df['Sales Channel'].unique():
    channel_data = yearly_channel_performance[yearly_channel_performance['Sales Channel'] == channel]
    growth_data = yearly_growth[yearly_growth['Sales Channel'] == channel]
    
    if len(channel_data) > 0 and len(growth_data) > 0:
        total_revenue = channel_data['Total Revenue'].sum()
        avg_profit_margin = channel_data['Profit Margin'].mean()
        avg_growth = growth_data['YoY Revenue Growth (%)'].mean()
        growth_stability = -growth_data['YoY Revenue Growth (%)'].std()  # Negative std for scoring
        
        # Composite score (weighted)
        composite_score = (
            (total_revenue / 1000000) * 0.3 +  # Revenue scale
            avg_profit_margin * 0.25 +         # Profitability
            avg_growth * 0.3 +                 # Growth rate
            growth_stability * 0.15            # Growth stability
        )
        
        channel_scores = pd.concat([channel_scores, pd.DataFrame({
            'Sales Channel': [channel],
            'Total Revenue (M)': [total_revenue / 1000000],
            'Avg Profit Margin (%)': [avg_profit_margin],
            'Avg YoY Growth (%)': [avg_growth],
            'Growth Stability': [-growth_stability],  # Show as positive for readability
            'Composite Score': [composite_score]
        })], ignore_index=True)

channel_scores = channel_scores.sort_values('Composite Score', ascending=False).round(2)
print("Channel Performance Ranking (Composite Score):")
print(channel_scores)


STRATEGIC INSIGHTS & RECOMMENDATIONS

11. CHANNEL PERFORMANCE RANKING
--------------------------------------------------
Channel Performance Ranking (Composite Score):
  Sales Channel  Total Revenue (M)  Avg Profit Margin (%)  Avg YoY Growth (%)  \
0      In-Store              30.06                  28.33               19.35   
1        Online              21.67                  28.02               23.60   
2   Distributor              13.15                  28.56               19.70   
3     Wholesale               8.16                  28.66               27.79   

   Growth Stability  Composite Score  
0             35.28            16.61  
1             39.96            14.59  
2             22.15            13.67  
3             54.39             9.79  


In [13]:
print("\n12. RISK & OPPORTUNITY ASSESSMENT")
print("-" * 50)

risk_opportunity = pd.DataFrame()
for channel in df['Sales Channel'].unique():
    channel_growth = yearly_growth[yearly_growth['Sales Channel'] == channel]['YoY Revenue Growth (%)']
    
    if len(channel_growth) > 1:
        recent_trend = channel_growth.tail(2).mean()  # Last 2 years average
        volatility = channel_growth.std()
        negative_years = (channel_growth < 0).sum()
        
        # Risk assessment
        if volatility > 30:
            risk_level = "High"
        elif volatility > 15:
            risk_level = "Medium"
        else:
            risk_level = "Low"
        
        # Opportunity assessment
        if recent_trend > 15:
            opportunity = "High Growth Potential"
        elif recent_trend > 5:
            opportunity = "Moderate Growth"
        elif recent_trend > -5:
            opportunity = "Stable"
        else:
            opportunity = "Turnaround Needed"
        
        risk_opportunity = pd.concat([risk_opportunity, pd.DataFrame({
            'Sales Channel': [channel],
            'Recent Trend (%)': [recent_trend],
            'Volatility': [volatility],
            'Negative Years': [negative_years],
            'Risk Level': [risk_level],
            'Opportunity Assessment': [opportunity]
        })], ignore_index=True)

risk_opportunity = risk_opportunity.sort_values('Recent Trend (%)', ascending=False).round(2)
print("Risk & Opportunity Assessment:")
print(risk_opportunity)



12. RISK & OPPORTUNITY ASSESSMENT
--------------------------------------------------
Risk & Opportunity Assessment:
  Sales Channel  Recent Trend (%)  Volatility  Negative Years Risk Level  \
3     Wholesale             41.69       54.39               1       High   
1        Online             35.40       39.96               0       High   
2   Distributor             29.54       22.15               0     Medium   
0      In-Store             29.02       35.28               1       High   

  Opportunity Assessment  
3  High Growth Potential  
1  High Growth Potential  
2  High Growth Potential  
0  High Growth Potential  


In [14]:
print("\n13. KEY INSIGHTS SUMMARY")
print("-" * 50)

# Generate key insights
total_revenue_growth = df.groupby('Order Year')['Total Revenue'].sum().pct_change().mean() * 100
best_performing_channel = channel_scores.iloc[0]['Sales Channel'] if not channel_scores.empty else "N/A"
highest_growth_channel = cagr_analysis.iloc[0]['Sales Channel'] if not cagr_analysis.empty else "N/A"
most_volatile_channel = volatility_analysis.index[0] if not volatility_analysis.empty else "N/A"

print(f"""
KEY BUSINESS INSIGHTS:

📈 GROWTH PERFORMANCE:
   • Overall average YoY growth: {total_revenue_growth:.1f}%
   • Best performing channel: {best_performing_channel}
   • Highest growth channel (CAGR): {highest_growth_channel}
   • Most volatile channel: {most_volatile_channel}

🎯 STRATEGIC RECOMMENDATIONS:
   • Focus investment on channels with high composite scores
   • Monitor volatile channels for risk management
   • Investigate seasonal patterns for demand planning
   • Consider portfolio rebalancing based on growth stages

📊 DATA QUALITY NOTES:
   • Analysis covers {df['Order Year'].nunique()} year(s) of data
   • {df['Sales Channel'].nunique()} channels analyzed
   • {len(df):,} total transactions processed
""")

print("\n" + "="*30)
print("ANALYSIS COMPLETE")
print("="*30)


13. KEY INSIGHTS SUMMARY
--------------------------------------------------

KEY BUSINESS INSIGHTS:

📈 GROWTH PERFORMANCE:
   • Overall average YoY growth: 32.0%
   • Best performing channel: In-Store
   • Highest growth channel (CAGR): Wholesale
   • Most volatile channel: Wholesale

🎯 STRATEGIC RECOMMENDATIONS:
   • Focus investment on channels with high composite scores
   • Monitor volatile channels for risk management
   • Investigate seasonal patterns for demand planning
   • Consider portfolio rebalancing based on growth stages

📊 DATA QUALITY NOTES:
   • Analysis covers 3 year(s) of data
   • 4 channels analyzed
   • 7,991 total transactions processed


ANALYSIS COMPLETE
