# Sales Performance Optimization Dashboard - Analysis Overview

This notebook demonstrates the complete sales analytics workflow, from data loading through key insights generation.

## Project Goal

Analyze sales performance across regions, products, and time periods to:
- Identify top-performing regions and products
- Measure variance vs. sales targets
- Track trends over time
- Support planning accuracy improvements


## 1. Data Loading


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# Load the sample sales data
df = pd.read_csv('../data/sample_sales_data.csv')

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

print(f"Data loaded: {len(df)} rows, {len(df.columns)} columns")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
df.head()


## 2. Data Overview & Quality Check


In [None]:
# Basic info about the dataset
print("Dataset Info:")
print(df.info())
print("\nMissing Values:")
print(df.isnull().sum())
print("\nSummary Statistics:")
print(df.describe())


## 3. Key Performance Indicators (KPIs)


In [None]:
# Calculate overall KPIs
total_revenue = df['revenue'].sum()
total_target = df['sales_target'].sum()
variance = total_revenue - total_target
variance_pct = (variance / total_target) * 100
total_units = df['units_sold'].sum()
avg_order_value = total_revenue / len(df)

print("=== KEY PERFORMANCE INDICATORS ===")
print(f"Total Revenue: ${total_revenue:,.2f}")
print(f"Total Sales Target: ${total_target:,.2f}")
print(f"Variance: ${variance:,.2f} ({variance_pct:.2f}%)")
print(f"Total Units Sold: {total_units:,}")
print(f"Average Order Value: ${avg_order_value:,.2f}")


## 4. Regional Performance Analysis


In [None]:
# Regional aggregation
regional_stats = df.groupby('region').agg({
    'revenue': 'sum',
    'sales_target': 'sum',
    'units_sold': 'sum',
    'product_name': 'count'
}).rename(columns={'product_name': 'transaction_count'})

regional_stats['variance'] = regional_stats['revenue'] - regional_stats['sales_target']
regional_stats['variance_pct'] = (regional_stats['variance'] / regional_stats['sales_target']) * 100
regional_stats['avg_order_value'] = regional_stats['revenue'] / regional_stats['transaction_count']

regional_stats = regional_stats.sort_values('revenue', ascending=False)
print("=== REGIONAL PERFORMANCE ===")
print(regional_stats)

# Visualize regional revenue
plt.figure(figsize=(12, 6))
regions = regional_stats.index
x = np.arange(len(regions))
width = 0.35

plt.bar(x - width/2, regional_stats['revenue'], width, label='Actual Revenue', alpha=0.8)
plt.bar(x + width/2, regional_stats['sales_target'], width, label='Sales Target', alpha=0.8)

plt.xlabel('Region')
plt.ylabel('Revenue ($)')
plt.title('Regional Performance: Actual vs. Target')
plt.xticks(x, regions)
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


## 5. Product Category Analysis


In [None]:
# Product category performance
category_stats = df.groupby('product_category').agg({
    'revenue': 'sum',
    'units_sold': 'sum',
    'product_name': 'nunique'
}).rename(columns={'product_name': 'unique_products'})

category_stats = category_stats.sort_values('revenue', ascending=False)
print("=== PRODUCT CATEGORY PERFORMANCE ===")
print(category_stats)

# Visualize category revenue
plt.figure(figsize=(10, 6))
category_stats['revenue'].plot(kind='bar', color='steelblue', alpha=0.8)
plt.xlabel('Product Category')
plt.ylabel('Total Revenue ($)')
plt.title('Revenue by Product Category')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()


## 6. Time Trend Analysis


In [None]:
# Monthly aggregation
df['year_month'] = df['date'].dt.to_period('M')
monthly_stats = df.groupby('year_month').agg({
    'revenue': 'sum',
    'sales_target': 'sum',
    'units_sold': 'sum'
}).reset_index()

monthly_stats['year_month_str'] = monthly_stats['year_month'].astype(str)

# Visualize monthly trends
plt.figure(figsize=(14, 6))
plt.plot(monthly_stats['year_month_str'], monthly_stats['revenue'], marker='o', label='Actual Revenue', linewidth=2)
plt.plot(monthly_stats['year_month_str'], monthly_stats['sales_target'], marker='s', label='Sales Target', linewidth=2, linestyle='--')
plt.xlabel('Month')
plt.ylabel('Revenue ($)')
plt.title('Monthly Sales Trends: Actual vs. Target')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 7. Top Products Analysis


In [None]:
# Top 10 products by revenue
top_products = df.groupby('product_name').agg({
    'revenue': 'sum',
    'units_sold': 'sum',
    'region': 'nunique'
}).rename(columns={'region': 'regions_sold_in'})

top_products = top_products.sort_values('revenue', ascending=False).head(10)
print("=== TOP 10 PRODUCTS BY REVENUE ===")
print(top_products)

# Visualize top products
plt.figure(figsize=(12, 6))
top_products['revenue'].plot(kind='barh', color='darkgreen', alpha=0.8)
plt.xlabel('Total Revenue ($)')
plt.ylabel('Product')
plt.title('Top 10 Products by Revenue')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()


## 8. Key Insights Summary


In [None]:
print("=== KEY INSIGHTS SUMMARY ===")
print(f"\n1. Overall Performance:")
print(f"   - Total Revenue: ${total_revenue:,.2f}")
print(f"   - Variance vs Target: {variance_pct:.2f}%")

print(f"\n2. Top Performing Region: {regional_stats.index[0]}")
print(f"   - Revenue: ${regional_stats.iloc[0]['revenue']:,.2f}")
print(f"   - Variance: {regional_stats.iloc[0]['variance_pct']:.2f}%")

print(f"\n3. Top Product Category: {category_stats.index[0]}")
print(f"   - Revenue: ${category_stats.iloc[0]['revenue']:,.2f}")

print(f"\n4. Top Product: {top_products.index[0]}")
print(f"   - Revenue: ${top_products.iloc[0]['revenue']:,.2f}")
print(f"   - Units Sold: {top_products.iloc[0]['units_sold']:,.0f}")

print("\n=== NEXT STEPS ===")
print("- Integrate findings into Power BI dashboard")
print("- Standardize KPI definitions for consistent reporting")
print("- Automate monthly refresh process")
print("- Develop predictive forecasting for next quarter")
