üìù Mini-Project: Sales Analysis with NumPy

Goal: Analyze monthly sales data of multiple products to find trends, total sales, high-performing products, and insights using NumPy operations.

1Ô∏è‚É£ Problem Statement

You are given a monthly sales dataset for 5 products over 12 months:

Month	Product_A	Product_B	Product_C	Product_D	Product_E
Jan	120	200	150	170	90
Feb	130	210	160	180	100
...	...	...	...	...	...

Tasks:

Load the sales data as a NumPy array.

Find total sales per product.

Find total sales per month.

Identify products with monthly sales > 200.

Compute average sales per product.

Find the best-selling product.

Find the month with highest total sales.

Apply masking to find months where Product_A sold < 130.

Normalize sales data between 0 and 1 (optional).

Optional: visualize using Matplotlib (not required for NumPy-only).



In [None]:

# 2Ô∏è‚É£ Implementation

import numpy as np

# 1Ô∏è‚É£ Load Sales Data (12 months, 5 products)
sales_data = np.array([
    [120, 200, 150, 170, 90],
    [130, 210, 160, 180, 100],
    [125, 220, 155, 190, 110],
    [140, 230, 165, 200, 120],
    [150, 240, 170, 210, 130],
    [160, 250, 175, 220, 140],
    [170, 260, 180, 230, 150],
    [180, 270, 185, 240, 160],
    [190, 280, 190, 250, 170],
    [200, 290, 195, 260, 180],
    [210, 300, 200, 270, 190],
    [220, 310, 205, 280, 200]
])

products = np.array(['Product_A','Product_B','Product_C','Product_D','Product_E'])
months = np.array(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])

# 3Ô∏è‚É£ Total Sales per Product

total_per_product = np.sum(sales_data, axis=0)
for prod, total in zip(products, total_per_product):
    print(f"{prod}: {total}")

# 4Ô∏è‚É£ Total Sales per Month

total_per_month = np.sum(sales_data, axis=1)
for month, total in zip(months, total_per_month):
    print(f"{month}: {total}")

# 5Ô∏è‚É£ Products with Sales > 200 (Masking)

mask = sales_data > 200
high_sales = sales_data[mask]
print("High sales values (>200):", high_sales)

# 6Ô∏è‚É£ Average Sales per Product

avg_per_product = np.mean(sales_data, axis=0)
for prod, avg in zip(products, avg_per_product):
    print(f"{prod}: {avg:.2f}")

# 7Ô∏è‚É£ Best-Selling Product

best_product_index = np.argmax(total_per_product)
print(f"Best-selling product: {products[best_product_index]}")

# 8Ô∏è‚É£ Month with Highest Total Sales

best_month_index = np.argmax(total_per_month)
print(f"Month with highest total sales: {months[best_month_index]}")

# 9Ô∏è‚É£ Product_A Sales < 130 (Masking)

mask_product_a = sales_data[:,0] < 130
print("Months where Product_A sold < 130:", months[mask_product_a])

# üîπ Optional: Normalize Sales Data

# Min-Max Normalization
min_val = np.min(sales_data)
max_val = np.max(sales_data)
normalized_sales = (sales_data - min_val) / (max_val - min_val)
print(normalized_sales)


In [None]:
import csv
import numpy as np
from datetime import datetime

file_path = "path_to_your_sales_data.csv"  # Replace with your actual file path

rows = []
with open(file_path, mode='r') as file:
    reader = csv.reader(file)
    header = next(reader)  # Skip header
    
    for row in reader:
        # Extract columns: QUANTITYORDERED, PRICEEACH, SALES, ORDERDATE, PRODUCTLINE, DEALSIZE
        # Indices from your sample data (adjust if needed):
        # QUANTITYORDERED = 1, PRICEEACH=2, SALES=4, ORDERDATE=5, PRODUCTLINE=10, DEALSIZE=26
        quantity = int(row[1])
        price_each = float(row[2])
        sales = float(row[4])
        order_date = row[5]
        product_line = row[10]
        deal_size = row[26]
        
        rows.append([quantity, price_each, sales, order_date, product_line, deal_size])

# Convert to NumPy structured array for numeric fields
# We'll separate numeric fields for easy NumPy processing:
numeric_data = np.array([(r[0], r[1], r[2]) for r in rows], dtype=[('Quantity', int), ('PriceEach', float), ('Sales', float)])

print("Loaded numeric data sample:")
print(numeric_data[:5])

# Step 2: Basic Sales Summary (NumPy)

total_sales = np.sum(numeric_data['Sales'])
average_sales = np.mean(numeric_data['Sales'])
total_quantity = np.sum(numeric_data['Quantity'])
average_price = np.mean(numeric_data['PriceEach'])

print(f"Total Sales: ${total_sales:,.2f}")
print(f"Average Sales per Order: ${average_sales:,.2f}")
print(f"Total Quantity Ordered: {total_quantity}")
print(f"Average Price Each: ${average_price:.2f}")

# Step 3: Monthly Sales Analysis (with Python datetime + NumPy)

# Parse order dates into datetime objects
order_dates = [datetime.strptime(r[3], '%m/%d/%Y %H:%M') if '/' in r[3] else datetime.strptime(r[3], '%m-%d-%Y %H:%M') for r in rows]

# Extract year-month string (e.g. "2003-02")
year_months = [d.strftime('%Y-%m') for d in order_dates]

# Aggregate sales by year-month using a dictionary
monthly_sales = {}

for i, ym in enumerate(year_months):
    monthly_sales[ym] = monthly_sales.get(ym, 0) + numeric_data['Sales'][i]

print("\nMonthly Sales Summary:")
for ym in sorted(monthly_sales.keys()):
    print(f"{ym}: ${monthly_sales[ym]:,.2f}")

# Step 4: Sales by Product Line

# Aggregate sales by product line using dictionary
product_line_sales = {}

for i, row in enumerate(rows):
    pl = row[4]
    product_line_sales[pl] = product_line_sales.get(pl, 0) + numeric_data['Sales'][i]

print("\nSales by Product Line:")
for pl, sales in product_line_sales.items():
    print(f"{pl}: ${sales:,.2f}")

# Step 5: Average Sales by Deal Size

deal_size_sales = {}
deal_size_counts = {}

for i, row in enumerate(rows):
    ds = row[5]
    deal_size_sales[ds] = deal_size_sales.get(ds, 0) + numeric_data['Sales'][i]
    deal_size_counts[ds] = deal_size_counts.get(ds, 0) + 1

print("\nAverage Sales by Deal Size:")
for ds in deal_size_sales:
    avg = deal_size_sales[ds] / deal_size_counts[ds]
    print(f"{ds}: ${avg:,.2f}")