# 06 - Seasonal Pattern Analysis

This notebook performs comprehensive seasonal analysis including seasonal decomposition to identify seasonal trends and patterns.

## Objectives
- Perform seasonal decomposition (STL or classical)
- Identify seasonal trends (monthly, quarterly)
- Calculate seasonal indices
- Analyze seasonal strength and patterns
- Compare seasonal effects across different time periods
- Visualize seasonal components

## Phase 2 Requirements
- ✅ Seasonal decomposition
- ✅ Seasonal pattern identification
- ✅ Seasonal indices calculation


In [2]:
# Load required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
import warnings
import os

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("=" * 80)
print("SEASONAL PATTERN ANALYSIS")
print("=" * 80)

# Load data
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
data_path = os.path.join(project_root, 'data', 'raw', 'Online Retail.csv')

df = pd.read_csv(data_path, encoding='latin-1')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'], errors='coerce')
df = df[~df['InvoiceNo'].astype(str).str.startswith('C')]
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]
df = df[df['Description'].notna()]
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']
df = df[df['InvoiceDate'].notna()]

print(f"\nDataset loaded: {df.shape[0]:,} transactions")
print(f"Date range: {df['InvoiceDate'].min()} to {df['InvoiceDate'].max()}")


ModuleNotFoundError: No module named 'statsmodels'

## Step 1: Prepare Monthly Time-Series Data

Aggregate data by month for seasonal decomposition analysis.


In [None]:
# Aggregate by month
df['YearMonth'] = df['InvoiceDate'].dt.to_period('M')
monthly_data = df.groupby('YearMonth').agg({
    'TotalPrice': ['sum', 'mean', 'count'],
    'Quantity': 'sum',
    'InvoiceNo': 'nunique',
    'CustomerID': 'nunique'
}).reset_index()

monthly_data.columns = ['YearMonth', 'MonthlyRevenue', 'AvgTransactionValue', 'TransactionCount',
                        'TotalQuantity', 'UniqueInvoices', 'UniqueCustomers']

# Convert to datetime index
monthly_data['Date'] = monthly_data['YearMonth'].astype(str)
monthly_data['Date'] = pd.to_datetime(monthly_data['Date'])
monthly_data = monthly_data.sort_values('Date').reset_index(drop=True)

# Create time-series
ts_monthly = monthly_data.set_index('Date')['MonthlyRevenue']

print(f"\nMonthly time-series prepared:")
print(f"  Total months: {len(ts_monthly)}")
print(f"  Date range: {ts_monthly.index.min()} to {ts_monthly.index.max()}")
print(f"\nMonthly revenue statistics:")
print(ts_monthly.describe())

# Display monthly data
print("\nMonthly Revenue Data:")
display(monthly_data[['YearMonth', 'MonthlyRevenue', 'TransactionCount', 'UniqueCustomers']])


## Step 2: Seasonal Decomposition

Perform seasonal decomposition to extract trend, seasonal, and residual components.


In [None]:
# Seasonal decomposition
# Using multiplicative model for retail data
# Period = 12 for monthly seasonality (annual pattern)
print("=" * 80)
print("SEASONAL DECOMPOSITION")
print("=" * 80)

# Perform decomposition with annual seasonality (period=12 months)
decomposition = seasonal_decompose(ts_monthly, model='multiplicative', period=12, extrapolate_trend='freq')

# Extract components
trend_monthly = decomposition.trend
seasonal_monthly = decomposition.seasonal
residual_monthly = decomposition.resid

print("\nDecomposition Components:")
print(f"  Trend - Mean: £{trend_monthly.mean():,.2f}, Std: £{trend_monthly.std():,.2f}")
print(f"  Seasonal - Mean: {seasonal_monthly.mean():.4f}, Std: {seasonal_monthly.std():.4f}")
print(f"  Residual - Mean: {residual_monthly.mean():.4f}, Std: {residual_monthly.std():.4f}")

# Visualize decomposition
fig, axes = plt.subplots(4, 1, figsize=(16, 14))
fig.suptitle('Seasonal Decomposition: Trend, Seasonality, and Residuals', fontsize=16, y=0.995)

# Original time-series
axes[0].plot(ts_monthly.index, ts_monthly.values, marker='o', color='steelblue', linewidth=2, markersize=6)
axes[0].set_title('Original Time-Series (Monthly Revenue)', fontweight='bold')
axes[0].set_ylabel('Revenue (£)')
axes[0].grid(True, alpha=0.3)

# Trend component
axes[1].plot(trend_monthly.index, trend_monthly.values, marker='o', color='darkgreen', linewidth=2, markersize=6)
axes[1].set_title('Trend Component', fontweight='bold')
axes[1].set_ylabel('Revenue (£)')
axes[1].grid(True, alpha=0.3)

# Seasonal component
axes[2].plot(seasonal_monthly.index, seasonal_monthly.values, marker='o', color='coral', linewidth=2, markersize=6)
axes[2].set_title('Seasonal Component (Annual Pattern)', fontweight='bold')
axes[2].set_ylabel('Seasonal Factor')
axes[2].axhline(y=1.0, color='red', linestyle='--', linewidth=1)
axes[2].grid(True, alpha=0.3)

# Residual component
axes[3].plot(residual_monthly.index, residual_monthly.values, marker='o', color='purple', linewidth=1.5, markersize=4)
axes[3].set_title('Residual Component (Random Fluctuations)', fontweight='bold')
axes[3].set_ylabel('Residual')
axes[3].set_xlabel('Date')
axes[3].axhline(y=1.0, color='red', linestyle='--', linewidth=1)
axes[3].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


## Step 3: Calculate Seasonal Indices

Calculate seasonal indices to quantify seasonal effects.


In [None]:
# Calculate seasonal indices
print("=" * 80)
print("SEASONAL INDICES CALCULATION")
print("=" * 80)

# Extract month from seasonal component
seasonal_df = pd.DataFrame({
    'Date': seasonal_monthly.index,
    'Seasonal': seasonal_monthly.values,
    'Month': seasonal_monthly.index.month
})

# Calculate average seasonal factor for each month
monthly_seasonal = seasonal_df.groupby('Month')['Seasonal'].mean().reset_index()
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
monthly_seasonal['MonthName'] = monthly_seasonal['Month'].apply(lambda x: month_names[x-1])

print("\nSeasonal Indices by Month:")
print("(Values > 1.0 indicate above-average months, < 1.0 indicate below-average)")
print(monthly_seasonal[['MonthName', 'Seasonal']].to_string(index=False))

# Visualize seasonal indices
fig, ax = plt.subplots(figsize=(12, 6))
bars = ax.bar(range(len(monthly_seasonal)), monthly_seasonal['Seasonal'], 
              alpha=0.7, color=['coral' if x > 1.0 else 'steelblue' for x in monthly_seasonal['Seasonal']],
              edgecolor='black')
ax.set_xticks(range(len(monthly_seasonal)))
ax.set_xticklabels(monthly_seasonal['MonthName'])
ax.set_title('Seasonal Indices by Month', fontweight='bold', fontsize=14)
ax.set_ylabel('Seasonal Index')
ax.axhline(y=1.0, color='red', linestyle='--', linewidth=2, label='Average (1.0)')
ax.grid(True, alpha=0.3, axis='y')
ax.legend()

# Add value labels on bars
for i, (bar, val) in enumerate(zip(bars, monthly_seasonal['Seasonal'])):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{val:.3f}', ha='center', va='bottom' if val > 1.0 else 'top', fontweight='bold')

plt.tight_layout()
plt.show()

# Identify peak and trough months
peak_month_idx = monthly_seasonal['Seasonal'].idxmax()
trough_month_idx = monthly_seasonal['Seasonal'].idxmin()

print(f"\nPeak Season: {monthly_seasonal.loc[peak_month_idx, 'MonthName']} (Index: {monthly_seasonal.loc[peak_month_idx, 'Seasonal']:.3f})")
print(f"Trough Season: {monthly_seasonal.loc[trough_month_idx, 'MonthName']} (Index: {monthly_seasonal.loc[trough_month_idx, 'Seasonal']:.3f})")
print(f"Seasonal Range: {monthly_seasonal['Seasonal'].max() - monthly_seasonal['Seasonal'].min():.3f}")
print(f"Seasonal Strength: {monthly_seasonal['Seasonal'].std():.4f}")


In [None]:
# Seasonal pattern analysis
print("=" * 80)
print("SEASONAL PATTERN ANALYSIS")
print("=" * 80)

# Extract month and year for analysis
df['Month'] = df['InvoiceDate'].dt.month
df['Year'] = df['InvoiceDate'].dt.year
df['Quarter'] = df['InvoiceDate'].dt.quarter

# Monthly patterns by year
monthly_by_year = df.groupby(['Year', 'Month'])['TotalPrice'].sum().reset_index()
monthly_by_year['MonthName'] = monthly_by_year['Month'].apply(lambda x: month_names[x-1])

# Quarterly patterns
quarterly_revenue = df.groupby('Quarter')['TotalPrice'].sum().sort_index()
quarter_names = {1: 'Q1 (Jan-Mar)', 2: 'Q2 (Apr-Jun)', 3: 'Q3 (Jul-Sep)', 4: 'Q4 (Oct-Dec)'}

# Visualizations
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Seasonal Pattern Analysis', fontsize=16, y=0.995)

# Monthly pattern (all years combined)
monthly_combined = df.groupby('Month')['TotalPrice'].sum().sort_index()
axes[0, 0].bar(range(len(monthly_combined)), monthly_combined.values, alpha=0.7, color='teal', edgecolor='black')
axes[0, 0].set_xticks(range(len(monthly_combined)))
axes[0, 0].set_xticklabels([month_names[i-1] for i in monthly_combined.index], rotation=45)
axes[0, 0].set_title('Monthly Revenue Pattern (All Years)', fontweight='bold')
axes[0, 0].set_ylabel('Revenue (£)')
axes[0, 0].grid(True, alpha=0.3, axis='y')

# Quarterly pattern
axes[0, 1].bar(range(len(quarterly_revenue)), quarterly_revenue.values, alpha=0.7, color='coral', edgecolor='black')
axes[0, 1].set_xticks(range(len(quarterly_revenue)))
axes[0, 1].set_xticklabels([quarter_names[q] for q in quarterly_revenue.index])
axes[0, 1].set_title('Quarterly Revenue Pattern', fontweight='bold')
axes[0, 1].set_ylabel('Revenue (£)')
axes[0, 1].grid(True, alpha=0.3, axis='y')

# Monthly pattern by year (heatmap)
if len(monthly_by_year['Year'].unique()) > 1:
    pivot_monthly = monthly_by_year.pivot(index='Month', columns='Year', values='TotalPrice')
    sns.heatmap(pivot_monthly, annot=True, fmt='.0f', cmap='YlOrRd', ax=axes[1, 0], cbar_kws={'label': 'Revenue (£)'})
    axes[1, 0].set_title('Monthly Revenue by Year (Heatmap)', fontweight='bold')
    axes[1, 0].set_ylabel('Month')
    axes[1, 0].set_yticklabels([month_names[i-1] for i in pivot_monthly.index], rotation=0)
else:
    axes[1, 0].text(0.5, 0.5, 'Insufficient years for comparison', ha='center', va='center',
                    transform=axes[1, 0].transAxes, fontsize=12)
    axes[1, 0].set_title('Monthly Revenue by Year', fontweight='bold')

# Seasonal component visualization
axes[1, 1].plot(seasonal_monthly.index, seasonal_monthly.values, marker='o', color='purple', linewidth=2, markersize=6)
axes[1, 1].axhline(y=1.0, color='red', linestyle='--', linewidth=2, label='Average')
axes[1, 1].fill_between(seasonal_monthly.index, 1.0, seasonal_monthly.values, 
                        where=(seasonal_monthly.values > 1.0), alpha=0.3, color='green', label='Above Average')
axes[1, 1].fill_between(seasonal_monthly.index, 1.0, seasonal_monthly.values,
                        where=(seasonal_monthly.values < 1.0), alpha=0.3, color='red', label='Below Average')
axes[1, 1].set_title('Seasonal Component Over Time', fontweight='bold')
axes[1, 1].set_ylabel('Seasonal Factor')
axes[1, 1].set_xlabel('Date')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Summary statistics
print("\nSeasonal Pattern Summary:")
print(f"  Peak Quarter: {quarter_names[quarterly_revenue.idxmax()]} (£{quarterly_revenue.max():,.2f})")
print(f"  Lowest Quarter: {quarter_names[quarterly_revenue.idxmin()]} (£{quarterly_revenue.min():,.2f})")
print(f"  Quarterly variation: {(quarterly_revenue.max() - quarterly_revenue.min())/quarterly_revenue.mean()*100:.1f}%")


## Step 5: Business Insights and Stock Management Implications

Interpret seasonal patterns for retail stock market behavior and inventory management.


In [None]:
# Business insights
print("=" * 80)
print("BUSINESS INSIGHTS FROM SEASONAL ANALYSIS")
print("=" * 80)

print("\n1. SEASONAL STRENGTH:")
seasonal_strength = monthly_seasonal['Seasonal'].std()
if seasonal_strength > 0.15:
    print(f"   - STRONG seasonality detected (std: {seasonal_strength:.4f})")
    print("   - Seasonal patterns are significant and should guide stock planning")
elif seasonal_strength > 0.08:
    print(f"   - MODERATE seasonality detected (std: {seasonal_strength:.4f})")
    print("   - Seasonal patterns exist but may be less predictable")
else:
    print(f"   - WEAK seasonality detected (std: {seasonal_strength:.4f})")
    print("   - Limited seasonal variation; other factors may dominate")

print("\n2. PEAK SEASON IDENTIFICATION:")
print(f"   - Peak month: {monthly_seasonal.loc[peak_month_idx, 'MonthName']}")
print(f"   - Peak index: {monthly_seasonal.loc[peak_month_idx, 'Seasonal']:.3f} ({monthly_seasonal.loc[peak_month_idx, 'Seasonal']*100-100:.1f}% above average)")
print(f"   - Stock should be INCREASED by {(monthly_seasonal.loc[peak_month_idx, 'Seasonal']-1)*100:.1f}% for peak month")

print("\n3. TROUGH SEASON IDENTIFICATION:")
print(f"   - Lowest month: {monthly_seasonal.loc[trough_month_idx, 'MonthName']}")
print(f"   - Trough index: {monthly_seasonal.loc[trough_month_idx, 'Seasonal']:.3f} ({monthly_seasonal.loc[trough_month_idx, 'Seasonal']*100-100:.1f}% below average)")
print(f"   - Stock can be REDUCED by {(1-monthly_seasonal.loc[trough_month_idx, 'Seasonal'])*100:.1f}% for trough month")

print("\n4. STOCK PLANNING RECOMMENDATIONS:")
print("   - Use seasonal indices to adjust base stock levels by month")
print("   - Plan inventory purchases 2-3 months before peak seasons")
print("   - Reduce stock levels during trough months to minimize holding costs")
print("   - Consider promotional strategies during low seasons to boost demand")

print("\n5. SEASONAL ADJUSTMENT FACTORS:")
print("   Monthly stock adjustment factors (multiply base stock by these values):")
for _, row in monthly_seasonal.iterrows():
    print(f"   {row['MonthName']:3s}: {row['Seasonal']:.3f}")

print("\n6. FORECASTING IMPLICATIONS:")
print("   - Seasonal decomposition enables deseasonalized forecasting")
print("   - Trend component can be forecasted independently")
print("   - Seasonal component can be applied to future forecasts")
print("   - Residual analysis helps identify unusual months requiring investigation")

print("\n" + "=" * 80)
print("SEASONAL ANALYSIS COMPLETE")
print("=" * 80)
