# RETAIL STORE SALES ANALYSIS - SECTION D
## Visualizations (Question 9)

## Import Libraries and Load Data

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import FuncFormatter
import os

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

notebook_dir = os.getcwd()
project_root = os.path.dirname(notebook_dir)
transformed_path = os.path.join(project_root, 'data', 'processed', 'transformed_sales_data.csv')

df = pd.read_csv(transformed_path)
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'])

print(f"Data loaded: {df.shape[0]} transactions")
print(f"Date range: {df['Transaction Date'].min().date()} to {df['Transaction Date'].max().date()}")

FileNotFoundError: [Errno 2] No such file or directory: '/home/spookie/Desktop/All/Folders/Zetech/4.1/BI-Assgmnt/data/processed/transformed_sales_data.csv'

## Setup for Professional Visualizations

In [None]:
# Helper function to format currency on axes
def currency_formatter(x, p):
    return f'${x:,.0f}'

plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.size'] = 11

# Q9: Plots
## Plot 1: Monthly Sales Trend Line Chart

## Prepare Monthly Sales Data

In [None]:
df['Year-Month'] = df['Transaction Date'].dt.to_period('M')
monthly_sales = df.groupby('Year-Month')['Total Spent'].sum().reset_index()
monthly_sales['Year-Month'] = monthly_sales['Year-Month'].astype(str)
monthly_sales['Date'] = pd.to_datetime(monthly_sales['Year-Month'] + '-01')
monthly_sales = monthly_sales.sort_values('Date')

print("Monthly sales data prepared:")
print(f"Total months: {len(monthly_sales)}")
print(f"Date range: {monthly_sales['Date'].min().date()} to {monthly_sales['Date'].max().date()}")

## Create Line Chart

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(monthly_sales['Date'], monthly_sales['Total Spent'], 
        marker='o', linewidth=2.5, markersize=6, color='crimson', 
        markerfacecolor='white', markeredgewidth=1.5, markeredgecolor='crimson')

ax.set_title('Monthly Sales Trend Analysis', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Month', fontsize=12, labelpad=10)
ax.set_ylabel('Total Revenue ($)', fontsize=12, labelpad=10)
ax.yaxis.set_major_formatter(FuncFormatter(currency_formatter))
ax.grid(True, alpha=0.3, linestyle='--')

# Highlight peak
max_idx = monthly_sales['Total Spent'].idxmax()
max_date = monthly_sales.loc[max_idx, 'Date']
max_value = monthly_sales.loc[max_idx, 'Total Spent']
ax.plot(max_date, max_value, 'ro', markersize=12, markeredgecolor='gold', markeredgewidth=2)
ax.annotate(f'Peak: ${max_value:,.0f}\n{max_date.strftime("%B %Y")}', 
            xy=(max_date, max_value), xytext=(10, 30), textcoords='offset points',
            bbox=dict(boxstyle='round,pad=0.5', facecolor='yellow', alpha=0.7),
            arrowprops=dict(arrowstyle='->', color='black'))

plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(os.path.join(project_root, 'reports/figures/monthly_sales_trend.png'), dpi=300, bbox_inches='tight')
plt.show()

print("✅ Monthly sales trend chart saved")

## Plot 2: Revenue by Category Bar Chart

## Prepare Category Data

In [None]:
category_revenue = df.groupby('Category')['Total Spent'].sum().sort_values(ascending=False).round(2)

print("Category revenue summary:")
for cat, rev in category_revenue.items():
    pct = (rev / category_revenue.sum()) * 100
    print(f"  {cat:<30}: ${rev:10,.2f} ({pct:.1f}%)")

## Create Bar Chart

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))

bars = ax.bar(category_revenue.index, category_revenue.values, 
              color=plt.cm.Set3(np.linspace(0, 1, len(category_revenue))),
              edgecolor='black', linewidth=0.5)

ax.set_title('Total Revenue by Product Category', fontsize=16, fontweight='bold', pad=20)
ax.set_xlabel('Category', fontsize=12, labelpad=10)
ax.set_ylabel('Total Revenue ($)', fontsize=12, labelpad=10)
ax.yaxis.set_major_formatter(FuncFormatter(currency_formatter))
ax.grid(True, alpha=0.3, axis='y', linestyle='--')

for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height + 500,
            f'${height:,.0f}', ha='center', va='bottom', fontweight='bold', fontsize=10)

plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

total = category_revenue.sum()
for i, (cat, val) in enumerate(category_revenue.items()):
    pct = (val / total) * 100
    ax.text(i, val/2, f'{pct:.1f}%', ha='center', va='center', 
            color='white', fontweight='bold', fontsize=11)

plt.tight_layout()
plt.savefig(os.path.join(project_root, 'reports/figures/revenue_by_category.png'), dpi=300, bbox_inches='tight')
plt.show()

print("✅ Revenue by category chart saved")

## Plot 3: Payment Method Distribution Pie Chart

## Prepare Payment Method Data

In [1]:
payment_counts = df['Payment Method'].value_counts()
payment_revenue = df.groupby('Payment Method')['Total Spent'].sum()

print("Payment method distribution:")
for method in payment_counts.index:
    count_pct = (payment_counts[method] / len(df)) * 100
    revenue_pct = (payment_revenue[method] / df['Total Spent'].sum()) * 100
    print(f"  {method:<15}: {payment_counts[method]:4d} trans ({count_pct:.1f}%) | Revenue: ${payment_revenue[method]:,.0f} ({revenue_pct:.1f}%)")

NameError: name 'df' is not defined

## Create Pie Chart

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']

wedges1, texts1, autotexts1 = ax1.pie(
    payment_counts.values, 
    labels=payment_counts.index,
    autopct='%1.1f%%',
    colors=colors[:len(payment_counts)],
    startangle=90,
    shadow=True,
    explode=[0.05] * len(payment_counts)
)
ax1.set_title('Transactions by Payment Method', fontsize=14, fontweight='bold')
for autotext in autotexts1:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(11)

wedges2, texts2, autotexts2 = ax2.pie(
    payment_revenue.values,
    labels=payment_revenue.index,
    autopct='%1.1f%%',
    colors=colors[:len(payment_revenue)],
    startangle=90,
    shadow=True,
    explode=[0.05] * len(payment_revenue)
)
ax2.set_title('Revenue by Payment Method', fontsize=14, fontweight='bold')
for autotext in autotexts2:
    autotext.set_color('white')
    autotext.set_fontweight('bold')
    autotext.set_fontsize(11)

plt.tight_layout()
plt.savefig(os.path.join(project_root, 'reports/figures/payment_distribution.png'), dpi=300, bbox_inches='tight')
plt.show()

print("✅ Payment distribution charts saved")

## Plot 4: Boxplot - Discounted vs Non-Discounted

## Prepare Data for Boxplot

In [None]:
discounted = df[df['Discount Applied']]['Total Spent']
non_discounted = df[~df['Discount Applied']]['Total Spent']

print("Discount comparison statistics:")
print(f"\nWith Discount ({len(discounted)} transactions):")
print(f"  Mean: ${discounted.mean():.2f}")
print(f"  Median: ${discounted.median():.2f}")
print(f"  Std: ${discounted.std():.2f}")
print(f"  Min: ${discounted.min():.2f}")
print(f"  Max: ${discounted.max():.2f}")

print(f"\nWithout Discount ({len(non_discounted)} transactions):")
print(f"  Mean: ${non_discounted.mean():.2f}")
print(f"  Median: ${non_discounted.median():.2f}")
print(f"  Std: ${non_discounted.std():.2f}")
print(f"  Min: ${non_discounted.min():.2f}")
print(f"  Max: ${non_discounted.max():.2f}")

## Create Boxplot

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

data_to_plot = [non_discounted.values, discounted.values]

bp = ax.boxplot(data_to_plot, labels=['No Discount', 'With Discount'],
                patch_artist=True, notch=True, showmeans=True)

colors = ['lightcoral', 'lightgreen']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)

for whisker in bp['whiskers']:
    whisker.set(color='gray', linewidth=1.5, linestyle='--')
for cap in bp['caps']:
    cap.set(color='gray', linewidth=1.5)
for median in bp['medians']:
    median.set(color='black', linewidth=2)
for mean in bp['means']:
    mean.set(marker='o', markerfacecolor='red', markeredgecolor='red', markersize=8)

for i, data in enumerate(data_to_plot, 1):
    y = data
    x = np.random.normal(i, 0.04, size=len(y))
    ax.plot(x, y, 'o', alpha=0.3, color=colors[i-1], markersize=3)

ax.set_title('Transaction Amounts: Discounted vs Non-Discounted', 
             fontsize=16, fontweight='bold', pad=20)
ax.set_ylabel('Transaction Amount ($)', fontsize=12, labelpad=10)
ax.yaxis.set_major_formatter(FuncFormatter(currency_formatter))
ax.grid(True, alpha=0.3, axis='y', linestyle='--')

plt.tight_layout()
plt.savefig(os.path.join(project_root, 'reports/figures/discount_boxplot.png'), dpi=300, bbox_inches='tight')
plt.show()

print("✅ Discount comparison boxplot saved")

# SECTION D SUMMARY

✅ **Q9 Completed: All Required Visualizations**

1. **Line Chart**: Monthly sales trends
2. **Bar Chart**: Revenue by category
3. **Pie Chart**: Payment method distribution
4. **Boxplot**: Discounted vs non-discounted transactions

All visualizations are publication-ready with proper formatting.

**Ready to proceed to Section E: Advanced Analysis (RFM)**