# Sales Analysis

Analyze regional sales performance and identify trends.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('data/sales.csv')
df['date'] = pd.to_datetime(df['date'])
print(f"Loaded {len(df)} rows")

In [None]:
# Clean: drop nulls and negative revenue
clean_df = df.dropna(subset=['revenue'])
clean_df = clean_df[clean_df['revenue'] >= 0]
print(f"Clean: {len(clean_df)} rows (dropped {len(df) - len(clean_df)})")

In [None]:
# Compute profit
df['profit'] = df['revenue'] - df['cost']
print(f"Profit column added. Mean profit: ${df['profit'].mean():,.2f}")

In [None]:
# Revenue by region
sales_by_region = clean_df.groupby('region')['revenue'].sum().sort_values(ascending=False)
print(sales_by_region)

In [None]:
# Plot
fig, ax = plt.subplots(figsize=(8, 5))
sales_by_region.plot(kind='bar', ax=ax, color='steelblue')
ax.set_title('Revenue by Region')
ax.set_ylabel('Total Revenue ($)')
plt.tight_layout()
plt.show()

In [None]:
# Monthly trend
monthly = clean_df.set_index('date').resample('ME')['revenue'].sum()
monthly.plot(figsize=(10, 4), marker='o')
plt.title('Monthly Revenue')
plt.ylabel('Revenue ($)')
plt.tight_layout()
plt.show()

In [None]:
# Top category by revenue
results = clean_df.groupby('category')['revenue'].agg(['sum', 'mean', 'count']).sort_values('sum', ascending=False)
print(results)

In [None]:
# Summary stats
summary = clean_df.describe()
print(summary)