## Scenario
You're a junior data analyst at a retail company. The sales team has provided you with a CSV file containing transaction records for the past year. Your manager needs insights to make strategic decisions about inventory, regional focus, and product mix.

In [None]:
import csv
from pprint import pprint


In [None]:

STORE_DATA_FILE = "Files/sample_superstore.csv"

### Part 1 : Data Loading & Exploration

In [None]:
def load_sales_data(filename):
    """Load and clean sales data from CSV file"""
    
    sales_data = []
    
    try:
        with open(filename, "r") as file:
            reader = csv.DictReader(file)
            
            for row in reader:
                try:
                    # Convert numeric fields
                    sales = float(row['Sales'])
                    quantity = int(row['Quantity'])
                    discount = float(row['Discount'])
                    profit = float(row['Profit'])
                    
                    # Fix discount if entered as 20 instead of 0.20
                    if discount > 1:
                        discount = discount / 100
                    
                    if sales < 0:
                        continue   # skip negative sales
                    
                    if quantity <= 0:
                        continue   # skip zero or negative quantity
                    
                    if discount < 0 or discount > 1:
                        continue   # skip invalid discount
                    
                    # Save cleaned values back into row
                    row['Sales'] = sales
                    row['Quantity'] = quantity
                    row['Discount'] = discount
                    row['Profit'] = profit
                    
                    sales_data.append(row)
                
                except ValueError:
                    # Skip rows with invalid numbers
                    continue
        
        return sales_data
    
    except FileNotFoundError:
        print(f"Error: {filename} not found")
        return []
    
    except Exception as e:
        print(f"Error loading data: {e}")
        return []
    
data = load_sales_data(STORE_DATA_FILE)
pprint(f"Rows loaded: {len(data)}")
print("************************")
pprint(data[:5])

In [None]:
def explore_data(sales_data):
    """Display dataset exploration statistics"""
    # Total number of orders (rows)
    total_orders = len(sales_data)
    
    # Date range
    order_dates = []
    for row in sales_data:
        order_dates.append(row["Order Date"])
    
    earliest_date = min(order_dates)
    latest_date = max(order_dates)
    
    # Unique Regions
    regions = set()
    for row in sales_data:
        regions.add(row["Region"])
    
    # Unique Categories
    categories = set()
    for row in sales_data:
        categories.add(row["Category"])
    
    # Unique products
    products = set()
    for row in sales_data:
        products.add(row["Product Name"])
    
  
    # Display results
    print("DATASET EXPLORATION")
    print("-----------------")
    print("Total Orders:", total_orders)
    print("Date Range:", earliest_date, "to", latest_date)
    print("Unique Regions:", list(regions))
    print("Unique Categories:", list(categories))
    print("Number of Unique Products:", len(products))

explore_data(data)

### Part 2: Sales Perfomance Analysis

In [None]:
def calculate_revenue_metrics(sales_data):
    """Calculate overall revenue metrics"""
    if not sales_data:
        return {
            "Total Revenue": 0,
            "Total Profit": 0,
            "Total Quantity Sold": 0,
            "Average Order Value": 0,
            "Profit Margin (%)": 0
        }

    total_revenue = 0
    total_profit = 0
    total_quantity = 0

    for row in sales_data:
        total_revenue += row["Sales"]
        total_profit += row["Profit"]
        total_quantity += row["Quantity"]

    number_orders = len(sales_data)
    avg_order_value = total_revenue / number_orders

    if total_revenue != 0:
        profit_margin = (total_profit / total_revenue) * 100
    else:
        profit_margin = 0

    return {
        "Total Revenue": total_revenue,
        "Total Profit": total_profit,
        "Total Quantity Sold": total_quantity,
        "Average Order Value": avg_order_value,
        "Profit Margin (%)": profit_margin
    }

In [None]:
def analyze_by_region(sales_data):
    """Analyze sales performance by region"""
    if not sales_data:
        return {}

    region_data = {}

    # sum totals
    for row in sales_data:
        region = row["Region"]
        sales = row["Sales"]
        profit = row["Profit"]

        if region not in region_data:
            region_data[region] = {
                "total_sales": 0,
                "total_profit": 0,
                "order_count": 0
            }

        region_data[region]["total_sales"] += sales  #region_data[region]["total_sales"] = region_data[region]["total_sales"] + row["Sales"]
        region_data[region]["total_profit"] += profit
        region_data[region]["order_count"] += 1

    # Calculate average order value
    for region in region_data:
        total_sales = region_data[region]["total_sales"]
        order_count = region_data[region]["order_count"]

        if order_count > 0:
            region_data[region]["avg_order_value"] = total_sales / order_count
        else:
            region_data[region]["avg_order_value"] = 0

    return region_data

In [None]:
def analyze_by_category(sales_data):
    """Analyze sales performance by category"""
    if not sales_data:
        return {}

    category_data = {}

    # Sum the totals
    for row in sales_data:
        category = row["Category"]
        sales = row["Sales"]
        profit = row["Profit"]

        if category not in category_data:
            category_data[category] = {
                "total_sales": 0,
                "total_profit": 0,
                "order_count": 0
            }

        category_data[category]["total_sales"] += sales  
        category_data[category]["total_profit"] += profit
        category_data[category]["order_count"] += 1

    # Calculate average order value
    for category in category_data:
        total_sales = category_data[category]["total_sales"]
        order_count = category_data[category]["order_count"]

        if order_count > 0:
            category_data[category]["avg_order_value"] = total_sales / order_count
        else:
            category_data[category]["avg_order_value"] = 0

    return category_data



In [None]:
def print_regional_performance(region_metrics):
    print("REGIONAL PERFORMANCE")
    print("====================")
    for region, metrics in region_metrics.items():
        print(
            f"{region:<8} "
            f"${metrics['total_sales']:,.2f} | "
            f"Profit: ${metrics['total_profit']:,.2f} | "
            f"Orders: {metrics['order_count']:<4} | "
            f"Avg: ${metrics['avg_order_value']:,.2f}"
        )


def print_category_report(category_metrics):
    print("CATEGORY PERFORMANCE")
    print("====================")
    for category, metrics in category_metrics.items():
        print(category)
        print("-" * len(category))
        print(f"  Total Sales: ${metrics['total_sales']:,.2f}")
        print(f"  Total Profit: ${metrics['total_profit']:,.2f}")
        print(f"  Orders: {metrics['order_count']}")
        print(f"  Avg Order Value: ${metrics['avg_order_value']:,.2f}")
        print()


In [None]:
metrics = calculate_revenue_metrics(data)
print("REVENUE METRICS")
print("===============")
for key, value in metrics.items():
    if "Profit Margin" in key:
        print(f"{key}: {value:.2f}%")
    elif "Quantity" in key:
        print(f"{key}: {int(value)}")
    else:
        print(f"{key}: ${value:,.2f}")
print()

region_metrics = analyze_by_region(data)
print_regional_performance(region_metrics)
print()

category_metrics = analyze_by_category(data)
print_category_report(category_metrics)





### Part 3: Product Analysis