# 03 - Sales Trend Analysis

## Purpose
Analyze sales patterns, trends, and seasonality in the Smart Inventory Manager dataset.

## Sections
1. Daily/Weekly/Monthly Trends
2. Seasonality Detection
3. Growth Analysis
4. Category Trends
5. Key Insights

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

# Load data
DATA_DIR = Path('../..') / 'ml' / 'data' / 'processed'

orders = pd.read_csv(DATA_DIR / 'orders.csv')
order_items = pd.read_csv(DATA_DIR / 'order_items.csv')
products = pd.read_csv(DATA_DIR / 'products.csv')

# Parse dates
orders['OrderDate'] = pd.to_datetime(orders['OrderDate'])

# Merge for analysis
full_orders = orders.merge(order_items, on='OrderID')
full_orders = full_orders.merge(products, on='ProductID')

print(f"Dataset: {len(full_orders):,} order items")
print(f"Date range: {orders['OrderDate'].min()} to {orders['OrderDate'].max()}")

## 1. Daily/Weekly/Monthly Trends

In [None]:
# Daily aggregation
daily_sales = full_orders.groupby(full_orders['OrderDate'].dt.date).agg({
    'OrderID': 'nunique',
    'TotalAmount': 'sum',
    'Quantity': 'sum',
    'Profit': 'sum'
}).reset_index()
daily_sales.columns = ['Date', 'Orders', 'Revenue', 'Units', 'Profit']
daily_sales['Date'] = pd.to_datetime(daily_sales['Date'])

# Weekly aggregation
weekly_sales = daily_sales.set_index('Date').resample('W').sum().reset_index()

# Monthly aggregation
monthly_sales = daily_sales.set_index('Date').resample('M').sum().reset_index()

fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# Daily with moving average
axes[0].plot(daily_sales['Date'], daily_sales['Revenue'] / 1000, alpha=0.4, label='Daily')
axes[0].plot(daily_sales['Date'], (daily_sales['Revenue'] / 1000).rolling(7).mean(), 
             color='red', linewidth=2, label='7-day MA')
axes[0].plot(daily_sales['Date'], (daily_sales['Revenue'] / 1000).rolling(30).mean(), 
             color='green', linewidth=2, label='30-day MA')
axes[0].set_title('Daily Revenue Trends')
axes[0].set_ylabel('Revenue ($K)')
axes[0].legend()

# Weekly
axes[1].bar(weekly_sales['Date'], weekly_sales['Revenue'] / 1000, width=5, alpha=0.7)
axes[1].set_title('Weekly Revenue')
axes[1].set_ylabel('Revenue ($K)')

# Monthly
axes[2].bar(monthly_sales['Date'], monthly_sales['Revenue'] / 1000, width=20, alpha=0.7)
axes[2].set_title('Monthly Revenue')
axes[2].set_ylabel('Revenue ($K)')

plt.tight_layout()
plt.show()

## 2. Seasonality Detection

In [None]:
# Day of week patterns
full_orders['DayOfWeek'] = full_orders['OrderDate'].dt.day_name()
full_orders['Month'] = full_orders['OrderDate'].dt.month_name()
full_orders['Quarter'] = full_orders['OrderDate'].dt.quarter

day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
month_order = ['January', 'February', 'March', 'April', 'May', 'June',
               'July', 'August', 'September', 'October', 'November', 'December']

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Day of week
dow_sales = full_orders.groupby('DayOfWeek')['TotalAmount'].sum().reindex(day_order)
axes[0, 0].bar(dow_sales.index, dow_sales.values / 1000)
axes[0, 0].set_title('Revenue by Day of Week')
axes[0, 0].set_ylabel('Revenue ($K)')
axes[0, 0].tick_params(axis='x', rotation=45)

# Month
month_sales = full_orders.groupby('Month')['TotalAmount'].sum().reindex(month_order)
axes[0, 1].bar(month_sales.index, month_sales.values / 1000)
axes[0, 1].set_title('Revenue by Month')
axes[0, 1].set_ylabel('Revenue ($K)')
axes[0, 1].tick_params(axis='x', rotation=45)

# Quarter
quarter_sales = full_orders.groupby('Quarter')['TotalAmount'].sum()
axes[1, 0].bar([f'Q{q}' for q in quarter_sales.index], quarter_sales.values / 1000)
axes[1, 0].set_title('Revenue by Quarter')
axes[1, 0].set_ylabel('Revenue ($K)')

# Hour of day (if available)
dow_orders = full_orders.groupby('DayOfWeek')['OrderID'].count().reindex(day_order)
axes[1, 1].bar(dow_orders.index, dow_orders.values)
axes[1, 1].set_title('Order Count by Day of Week')
axes[1, 1].set_ylabel('Number of Orders')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 3. Growth Analysis

In [None]:
# Month-over-month growth
monthly_sales['Revenue_Growth'] = monthly_sales['Revenue'].pct_change() * 100
monthly_sales['Orders_Growth'] = monthly_sales['Orders'].pct_change() * 100

fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Revenue growth
colors = ['green' if x >= 0 else 'red' for x in monthly_sales['Revenue_Growth'].fillna(0)]
axes[0].bar(monthly_sales['Date'], monthly_sales['Revenue_Growth'].fillna(0), color=colors, width=20)
axes[0].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
axes[0].set_title('Month-over-Month Revenue Growth (%)')
axes[0].set_ylabel('Growth Rate (%)')

# Cumulative revenue
monthly_sales['Cumulative_Revenue'] = monthly_sales['Revenue'].cumsum()
axes[1].fill_between(monthly_sales['Date'], monthly_sales['Cumulative_Revenue'] / 1000000, alpha=0.3)
axes[1].plot(monthly_sales['Date'], monthly_sales['Cumulative_Revenue'] / 1000000, linewidth=2)
axes[1].set_title('Cumulative Revenue Over Time')
axes[1].set_ylabel('Cumulative Revenue ($M)')

plt.tight_layout()
plt.show()

# Growth statistics
print("\n=== Growth Statistics ===")
print(f"Average Monthly Revenue Growth: {monthly_sales['Revenue_Growth'].mean():.2f}%")
print(f"Total Revenue: ${monthly_sales['Revenue'].sum():,.2f}")
print(f"Best Month: {monthly_sales.loc[monthly_sales['Revenue'].idxmax(), 'Date'].strftime('%Y-%m')} (${monthly_sales['Revenue'].max():,.2f})")
print(f"Worst Month: {monthly_sales.loc[monthly_sales['Revenue'].idxmin(), 'Date'].strftime('%Y-%m')} (${monthly_sales['Revenue'].min():,.2f})")

## 4. Category Trends

In [None]:
# Category trends over time
full_orders['YearMonth'] = full_orders['OrderDate'].dt.to_period('M')

category_monthly = full_orders.groupby(['YearMonth', 'Category'])['TotalAmount'].sum().unstack(fill_value=0)

# Plot top 5 categories
top_categories = full_orders.groupby('Category')['TotalAmount'].sum().nlargest(5).index

plt.figure(figsize=(14, 6))
for cat in top_categories:
    if cat in category_monthly.columns:
        plt.plot(category_monthly.index.astype(str), category_monthly[cat] / 1000, label=cat, linewidth=2)

plt.title('Top 5 Categories - Revenue Over Time')
plt.xlabel('Month')
plt.ylabel('Revenue ($K)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Category growth comparison
def calculate_category_growth(df, category):
    cat_data = df[df['Category'] == category].copy()
    monthly = cat_data.groupby('YearMonth')['TotalAmount'].sum()
    if len(monthly) >= 2:
        first_half = monthly.iloc[:len(monthly)//2].mean()
        second_half = monthly.iloc[len(monthly)//2:].mean()
        if first_half > 0:
            return ((second_half - first_half) / first_half) * 100
    return 0

category_growth = {}
for cat in full_orders['Category'].unique():
    category_growth[cat] = calculate_category_growth(full_orders, cat)

growth_df = pd.DataFrame(list(category_growth.items()), columns=['Category', 'Growth'])
growth_df = growth_df.sort_values('Growth', ascending=True)

plt.figure(figsize=(10, 8))
colors = ['green' if x >= 0 else 'red' for x in growth_df['Growth']]
plt.barh(growth_df['Category'], growth_df['Growth'], color=colors)
plt.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
plt.title('Category Growth (First Half vs Second Half)')
plt.xlabel('Growth Rate (%)')
plt.tight_layout()
plt.show()

## 5. Key Insights

In [None]:
print("\n" + "="*60)
print("SALES TREND KEY INSIGHTS")
print("="*60)

# Best performing day
best_day = dow_sales.idxmax()
print(f"\n=== Best Performing Day: {best_day} ===")
print(f"Revenue: ${dow_sales.max():,.2f}")

# Best performing month
best_month = month_sales.idxmax()
print(f"\n=== Best Performing Month: {best_month} ===")
print(f"Revenue: ${month_sales.max():,.2f}")

# Growing categories
print(f"\n=== Top Growing Categories ===")
for _, row in growth_df.nlargest(3, 'Growth').iterrows():
    print(f"  {row['Category']}: {row['Growth']:+.1f}%")

# Declining categories
print(f"\n=== Declining Categories ===")
for _, row in growth_df.nsmallest(3, 'Growth').iterrows():
    print(f"  {row['Category']}: {row['Growth']:+.1f}%")

# Overall trend
x = np.arange(len(monthly_sales))
slope, intercept, r_value, p_value, std_err = stats.linregress(x, monthly_sales['Revenue'])
trend_direction = "Upward" if slope > 0 else "Downward"
print(f"\n=== Overall Trend: {trend_direction} ===")
print(f"Monthly change: ${slope:,.2f}")
print(f"R-squared: {r_value**2:.3f}")