# 04 - Temporal Pattern Analysis

This notebook performs comprehensive temporal analysis including time-series decomposition to understand purchase patterns across different time dimensions.

## Objectives
- Perform time-series decomposition (trend, seasonality, residuals)
- Analyze purchase patterns by hour of day
- Analyze purchase patterns by day of week
- Analyze purchase patterns by month
- Identify peak purchase times
- Visualize temporal trends and decomposition components
- Statistical analysis of temporal patterns

## Phase 2 Requirements
- ✅ Time-series decomposition (trend, seasonality, residuals)
- ✅ Temporal pattern identification
- ✅ Statistical validation


In [None]:
# Load required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings
import os

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("=" * 80)
print("TEMPORAL PATTERN ANALYSIS")
print("=" * 80)

# Load data
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
data_path = os.path.join(project_root, 'data', 'raw', 'Online Retail.csv')

df = pd.read_csv(data_path, encoding='latin-1')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'], errors='coerce')
df = df[~df['InvoiceNo'].astype(str).str.startswith('C')]
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]
df = df[df['Description'].notna()]
df['TotalPrice'] = df['Quantity'] * df['UnitPrice']
df = df[df['InvoiceDate'].notna()]

print(f"\nDataset loaded: {df.shape[0]:,} transactions")
print(f"Date range: {df['InvoiceDate'].min()} to {df['InvoiceDate'].max()}")


## Step 1: Prepare Time-Series Data

Aggregate transaction data by date for time-series analysis.


In [None]:
# Aggregate by date for time-series
daily_data = df.groupby(df['InvoiceDate'].dt.date).agg({
    'TotalPrice': ['sum', 'mean', 'count'],
    'Quantity': 'sum',
    'InvoiceNo': 'nunique',
    'CustomerID': 'nunique'
}).reset_index()

daily_data.columns = ['Date', 'DailyRevenue', 'AvgTransactionValue', 'TransactionCount', 
                      'TotalQuantity', 'UniqueInvoices', 'UniqueCustomers']
daily_data['Date'] = pd.to_datetime(daily_data['Date'])
daily_data = daily_data.sort_values('Date').reset_index(drop=True)

# Create complete date range to handle missing dates
date_range = pd.date_range(start=daily_data['Date'].min(), end=daily_data['Date'].max(), freq='D')
daily_complete = pd.DataFrame({'Date': date_range})
daily_complete = daily_complete.merge(daily_data, on='Date', how='left')
daily_complete = daily_complete.fillna(0)  # Fill missing dates with 0

# Set Date as index for time-series analysis
ts_data = daily_complete.set_index('Date')['DailyRevenue']

print(f"\nTime-series prepared:")
print(f"  Total days: {len(ts_data)}")
print(f"  Days with transactions: {len(daily_data)}")
print(f"  Missing days: {len(ts_data) - len(daily_data)}")
print(f"\nTime-series statistics:")
print(ts_data.describe())


## Step 2: Time-Series Decomposition

Decompose the time-series into trend, seasonality, and residuals components.


In [None]:
# Perform time-series decomposition
# Using multiplicative model (better for retail data with increasing variance)
# Period = 7 for weekly seasonality
print("=" * 80)
print("TIME-SERIES DECOMPOSITION")
print("=" * 80)

# Remove zeros for decomposition (replace with small value)
ts_for_decomp = ts_data.copy()
ts_for_decomp = ts_for_decomp.replace(0, np.nan).interpolate(method='linear')

# Perform decomposition with weekly seasonality (period=7)
decomposition = seasonal_decompose(ts_for_decomp, model='multiplicative', period=7, extrapolate_trend='freq')

# Extract components
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

print("\nDecomposition Components:")
print(f"  Trend - Mean: £{trend.mean():,.2f}, Std: £{trend.std():,.2f}")
print(f"  Seasonal - Mean: {seasonal.mean():.4f}, Std: {seasonal.std():.4f}")
print(f"  Residual - Mean: {residual.mean():.4f}, Std: {residual.std():.4f}")

# Visualize decomposition
fig, axes = plt.subplots(4, 1, figsize=(16, 14))
fig.suptitle('Time-Series Decomposition: Trend, Seasonality, and Residuals', fontsize=16, y=0.995)

# Original time-series
axes[0].plot(ts_for_decomp.index, ts_for_decomp.values, color='steelblue', linewidth=1.5)
axes[0].set_title('Original Time-Series (Daily Revenue)', fontweight='bold')
axes[0].set_ylabel('Revenue (£)')
axes[0].grid(True, alpha=0.3)

# Trend component
axes[1].plot(trend.index, trend.values, color='darkgreen', linewidth=2)
axes[1].set_title('Trend Component', fontweight='bold')
axes[1].set_ylabel('Revenue (£)')
axes[1].grid(True, alpha=0.3)

# Seasonal component
axes[2].plot(seasonal.index, seasonal.values, color='coral', linewidth=1.5, alpha=0.7)
axes[2].set_title('Seasonal Component (Weekly Pattern)', fontweight='bold')
axes[2].set_ylabel('Seasonal Factor')
axes[2].axhline(y=1.0, color='red', linestyle='--', linewidth=1)
axes[2].grid(True, alpha=0.3)

# Residual component
axes[3].plot(residual.index, residual.values, color='purple', linewidth=1, alpha=0.7)
axes[3].set_title('Residual Component (Random Fluctuations)', fontweight='bold')
axes[3].set_ylabel('Residual')
axes[3].set_xlabel('Date')
axes[3].axhline(y=1.0, color='red', linestyle='--', linewidth=1)
axes[3].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Statistical analysis of components
print("\n" + "=" * 80)
print("DECOMPOSITION STATISTICS")
print("=" * 80)

print("\nTrend Analysis:")
print(f"  Trend direction: {'Increasing' if trend.iloc[-1] > trend.iloc[0] else 'Decreasing'}")
print(f"  Trend change: £{trend.iloc[-1] - trend.iloc[0]:,.2f}")
print(f"  Average trend: £{trend.mean():,.2f}")

print("\nSeasonal Analysis:")
print(f"  Seasonal strength: {seasonal.std():.4f}")
print(f"  Max seasonal factor: {seasonal.max():.4f} (day {seasonal.idxmax().day_name()})")
print(f"  Min seasonal factor: {seasonal.min():.4f} (day {seasonal.idxmin().day_name()})")

print("\nResidual Analysis:")
print(f"  Residual mean: {residual.mean():.4f} (should be ~1.0 for multiplicative)")
print(f"  Residual std: {residual.std():.4f}")
print(f"  Residual range: [{residual.min():.4f}, {residual.max():.4f}]")


In [None]:
# Test for stationarity
print("=" * 80)
print("STATIONARITY TESTING")
print("=" * 80)

def adf_test(timeseries):
    """Perform Augmented Dickey-Fuller test"""
    result = adfuller(timeseries.dropna())
    print(f"\nADF Statistic: {result[0]:.4f}")
    print(f"p-value: {result[1]:.4f}")
    print(f"Critical Values:")
    for key, value in result[4].items():
        print(f"  {key}: {value:.4f}")
    
    if result[1] <= 0.05:
        print("\n✓ Series is STATIONARY (p-value <= 0.05)")
        return True
    else:
        print("\n✗ Series is NON-STATIONARY (p-value > 0.05)")
        return False

print("\n1. Original Time-Series:")
is_stationary_original = adf_test(ts_for_decomp)

print("\n2. Trend Component:")
is_stationary_trend = adf_test(trend)

print("\n3. Residual Component:")
is_stationary_residual = adf_test(residual)


## Step 4: Temporal Pattern Analysis

Analyze patterns by hour, day of week, and month.


In [None]:
# Temporal pattern analysis
print("=" * 80)
print("TEMPORAL PATTERN ANALYSIS")
print("=" * 80)

# Hourly patterns
df['Hour'] = df['InvoiceDate'].dt.hour
hourly_revenue = df.groupby('Hour')['TotalPrice'].sum().sort_index()

# Day of week patterns
df['DayOfWeek'] = df['InvoiceDate'].dt.dayofweek
dow_map = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
dow_revenue = df.groupby('DayOfWeek')['TotalPrice'].sum().sort_index()
dow_revenue.index = dow_revenue.index.map(dow_map)

# Monthly patterns
df['Month'] = df['InvoiceDate'].dt.month
monthly_revenue = df.groupby('Month')['TotalPrice'].sum().sort_index()
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
monthly_revenue.index = [month_names[i-1] for i in monthly_revenue.index]

# Visualizations
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Temporal Pattern Analysis', fontsize=16, y=0.995)

# Hourly pattern
axes[0, 0].bar(hourly_revenue.index, hourly_revenue.values, alpha=0.7, color='steelblue', edgecolor='black')
axes[0, 0].set_title('Revenue by Hour of Day', fontweight='bold')
axes[0, 0].set_xlabel('Hour')
axes[0, 0].set_ylabel('Revenue (£)')
axes[0, 0].grid(True, alpha=0.3, axis='y')
peak_hour = hourly_revenue.idxmax()
axes[0, 0].axvline(x=peak_hour, color='red', linestyle='--', label=f'Peak: {peak_hour}:00')
axes[0, 0].legend()

# Day of week pattern
axes[0, 1].bar(range(len(dow_revenue)), dow_revenue.values, alpha=0.7, color='coral', edgecolor='black')
axes[0, 1].set_xticks(range(len(dow_revenue)))
axes[0, 1].set_xticklabels(dow_revenue.index)
axes[0, 1].set_title('Revenue by Day of Week', fontweight='bold')
axes[0, 1].set_ylabel('Revenue (£)')
axes[0, 1].grid(True, alpha=0.3, axis='y')
peak_dow = dow_revenue.idxmax()
axes[0, 1].axvline(x=list(dow_revenue.index).index(peak_dow), color='red', linestyle='--', label=f'Peak: {peak_dow}')
axes[0, 1].legend()

# Monthly pattern
axes[1, 0].bar(range(len(monthly_revenue)), monthly_revenue.values, alpha=0.7, color='teal', edgecolor='black')
axes[1, 0].set_xticks(range(len(monthly_revenue)))
axes[1, 0].set_xticklabels(monthly_revenue.index, rotation=45)
axes[1, 0].set_title('Revenue by Month', fontweight='bold')
axes[1, 0].set_ylabel('Revenue (£)')
axes[1, 0].grid(True, alpha=0.3, axis='y')
peak_month = monthly_revenue.idxmax()
axes[1, 0].axvline(x=list(monthly_revenue.index).index(peak_month), color='red', linestyle='--', label=f'Peak: {peak_month}')
axes[1, 0].legend()

# Daily revenue trend with decomposition overlay
axes[1, 1].plot(ts_for_decomp.index, ts_for_decomp.values, alpha=0.5, color='steelblue', label='Original', linewidth=1)
axes[1, 1].plot(trend.index, trend.values, color='darkgreen', label='Trend', linewidth=2)
axes[1, 1].set_title('Daily Revenue with Trend Component', fontweight='bold')
axes[1, 1].set_ylabel('Revenue (£)')
axes[1, 1].set_xlabel('Date')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Summary statistics
print("\nPeak Times Identified:")
print(f"  Peak Hour: {peak_hour}:00 (£{hourly_revenue.max():,.2f})")
print(f"  Peak Day: {peak_dow} (£{dow_revenue.max():,.2f})")
print(f"  Peak Month: {peak_month} (£{monthly_revenue.max():,.2f})")

print("\nTemporal Variation:")
print(f"  Hourly variation: {(hourly_revenue.max() - hourly_revenue.min())/hourly_revenue.mean()*100:.1f}%")
print(f"  Day-of-week variation: {(dow_revenue.max() - dow_revenue.min())/dow_revenue.mean()*100:.1f}%")
print(f"  Monthly variation: {(monthly_revenue.max() - monthly_revenue.min())/monthly_revenue.mean()*100:.1f}%")


## Step 5: Autocorrelation Analysis

Analyze autocorrelation and partial autocorrelation to identify temporal dependencies.


In [None]:
# Autocorrelation analysis
print("=" * 80)
print("AUTOCORRELATION ANALYSIS")
print("=" * 80)

fig, axes = plt.subplots(2, 1, figsize=(14, 10))
fig.suptitle('Autocorrelation and Partial Autocorrelation Analysis', fontsize=16, y=0.995)

# ACF plot
plot_acf(ts_for_decomp.dropna(), lags=30, ax=axes[0], alpha=0.05)
axes[0].set_title('Autocorrelation Function (ACF)', fontweight='bold')
axes[0].set_xlabel('Lag')
axes[0].set_ylabel('Autocorrelation')
axes[0].grid(True, alpha=0.3)

# PACF plot
plot_pacf(ts_for_decomp.dropna(), lags=30, ax=axes[1], alpha=0.05)
axes[1].set_title('Partial Autocorrelation Function (PACF)', fontweight='bold')
axes[1].set_xlabel('Lag')
axes[1].set_ylabel('Partial Autocorrelation')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Calculate autocorrelation values
acf_values = acf(ts_for_decomp.dropna(), nlags=7, fft=True)
print("\nAutocorrelation at key lags:")
for lag in [1, 7, 14]:
    if lag < len(acf_values):
        print(f"  Lag {lag}: {acf_values[lag]:.4f}")

# Identify significant lags
significant_lags = [i for i, val in enumerate(acf_values[1:8]) if abs(val) > 0.2]
if significant_lags:
    print(f"\nSignificant autocorrelation at lags: {[i+1 for i in significant_lags]}")
    print("  (Indicates weekly patterns)")


## Step 6: Business Insights and Conclusions

Interpret temporal patterns for retail stock market behavior.


In [None]:
# Business insights
print("=" * 80)
print("BUSINESS INSIGHTS FROM TEMPORAL ANALYSIS")
print("=" * 80)

print("\n1. TREND ANALYSIS:")
if trend.iloc[-1] > trend.iloc[0]:
    print("   - Revenue shows an INCREASING trend over time")
    print("   - Indicates business growth or market expansion")
else:
    print("   - Revenue shows a DECREASING trend over time")
    print("   - May indicate market saturation or competitive pressure")

print("\n2. SEASONAL PATTERNS:")
print(f"   - Strong weekly seasonality detected (seasonal std: {seasonal.std():.4f})")
print(f"   - Peak day: {peak_dow} - Stock should be optimized for this day")
print(f"   - Peak hour: {peak_hour}:00 - Staffing and inventory should align")
print(f"   - Peak month: {peak_month} - Seasonal stock planning critical")

print("\n3. STOCK MANAGEMENT IMPLICATIONS:")
print("   - Weekly patterns suggest regular stock replenishment cycles")
print("   - Daily patterns inform optimal restocking times")
print("   - Monthly patterns guide seasonal inventory planning")
print("   - Trend component helps forecast long-term demand")

print("\n4. VOLATILITY INSIGHTS:")
residual_volatility = residual.std()
print(f"   - Residual volatility: {residual_volatility:.4f}")
if residual_volatility > 0.2:
    print("   - HIGH volatility: Safety stock levels should be increased")
else:
    print("   - MODERATE volatility: Standard safety stock sufficient")

print("\n5. FORECASTING READINESS:")
if is_stationary_residual:
    print("   - Residuals are stationary: Good for forecasting models")
else:
    print("   - Residuals are non-stationary: May need differencing for forecasting")

print("\n" + "=" * 80)
print("TEMPORAL ANALYSIS COMPLETE")
print("=" * 80)
