## File Handler

**File Format**:

- Pipe-delimited (`|`) format
- **Non-UTF-8 encoding** (you'll need to handle encoding issues)
- Contains **data quality issues**:
    - Some fields have comma-separated values within them
    - Some rows may have missing or extra fields
    - Some numeric values may have formatting issues (commas in numbers)
    - Some records have invalid data (zero quantities, negative prices, wrong ID formats)

#### Read Sales Data with Encoding Handling

In [1]:
import csv
from pathlib import Path


In [2]:
def read_sales_data(filename, file_encoder):
    data = []
    try:
        with open(filename, mode='r', encoding=file_encoder, newline='\n') as file:
            file_content = csv.reader(file, delimiter='|')
            header = next(file_content, None)

            for row in file_content:
                if row and any(field.strip() for field in row):
                    data.append('|'.join(row))
        return data

    except UnicodeDecodeError:
        print(f'{filename} file is not in UTF-8 encoding')
        return data
    except FileNotFoundError:
        print(f'{filename} file does not exist.')
        return data

In [3]:
BASE_DIR = Path.cwd().parent   # sales-analytics-system
file_path = BASE_DIR / 'data' / 'sales_data.txt'

In [4]:
a = read_sales_data(file_path,'utf-8')

In [5]:
first_row = a[0].split('|')

In [6]:
first_row

['T018', '2024-12-29', 'P107', 'USB Cable', '8', '173', 'C009', 'South']

In [None]:
def parse_transactions(raw_line):

    data = []
    for line in raw_line:
        t_id, dt, p_id, p_name, qty_raw, price_raw, c_id, region = [f.strip() for f in line.split('|')]
        
        # Handle commas within ProductName (replace commas with space)
        p_name_clean = p_name.replace(",", " ").strip()

        # Remove commas from numeric fields (e.g., "45,000")
        qty_clean = qty_raw.replace(",", "").strip()
        price_clean = price_raw.replace(",", "").strip()

        try:
            qty = int(qty_clean)
            unit_price = float(price_clean)
        except ValueError:
            continue

        data.append(
            {
                "TransactionID": t_id,
                "Date": dt,
                "ProductID": p_id,
                "ProductName": p_name_clean,
                "Quantity": qty,
                "UnitPrice": unit_price,
                "CustomerID": c_id,
                "Region": region,
            }
        )

    return data

In [9]:
a

['T018|2024-12-29|P107|USB Cable|8|173|C009|South',
 'T063|2024-12-07|P110|Laptop Charger|6|1,916|C022|East',
 'T075|2024-12-10|P106|Headphones|0|2826|C001|South',
 'T023|2024-12-09|P109|Wireless Mouse|9|523|C022|North',
 'T059|2024-12-29|P102|Mouse,Wireless|4|1056|C010|South',
 'T035|2024-12-08|P102|Mouse|4|431|C011|North',
 'T061|2024-12-10|P109|Wireless Mouse|2|775|C009|North',
 'T057|2024-12-15|P101|Laptop,Premium|10|81896|C004|North',
 'T034|2024-12-22|P107|USB Cable|6|324|C029|West',
 'T050|2024-12-02|P104|Monitor,LED|10|9997|C024|East',
 'T024|2024-12-25|P109|Wireless Mouse|5|1812|C011|North',
 'T004|2024-12-07|P109|Wireless Mouse|9|1359|C008|West',
 'T068|2024-12-02|P109|Wireless Mouse|6|1,692|C018|South',
 'T066|2024-12-06|P105|Webcam|8|4,259|C023|West',
 'T064|2024-12-16|P109|Wireless Mouse|5|604|C003|West',
 'T045|2024-12-26|P108|External Hard Drive|9|3802|C002|North',
 'T015|2024-12-30|P105|Webcam|9|2899|C022|East',
 'T055|2024-12-07|P105|Webcam,HD|6|2977|C009|West',
 'T072

In [21]:
def validate_and_filter(transactions, region=None, min_amount=None, max_amount=None):
    """
    Validates transactions and applies optional filters
    """
    required_fields = [
        "TransactionID", "Date", "ProductID", "ProductName",
        "Quantity", "UnitPrice", "CustomerID", "Region"
    ]

    total_input = len(transactions)
    invalid_count = 0
    valid_transactions = []

    # --- Print available regions (from all input, if present) ---
    regions = sorted({
        t.get("Region", "").strip()
        for t in transactions
        if isinstance(t, dict) and t.get("Region")
    })
    print("Available regions:", regions if regions else "None found")

    # --- Validate transactions ---
    for txn in transactions:
        # Must be a dict
        if not isinstance(txn, dict):
            invalid_count += 1
            continue

        # All required fields must exist and be non-empty (basic check)
        missing = [k for k in required_fields if k not in txn or txn[k] in (None, "")]
        if missing:
            invalid_count += 1
            continue

        # ID prefix rules
        if not str(txn["TransactionID"]).startswith("T"):
            invalid_count += 1
            continue
        if not str(txn["ProductID"]).startswith("P"):
            invalid_count += 1
            continue
        if not str(txn["CustomerID"]).startswith("C"):
            invalid_count += 1
            continue

        # Quantity and UnitPrice positive + type-safe
        try:
            qty = int(txn["Quantity"])
            price = float(txn["UnitPrice"])
        except (ValueError, TypeError):
            invalid_count += 1
            continue

        if qty <= 0 or price <= 0:
            invalid_count += 1
            continue

        # Store normalized numeric values back (optional but helpful)
        txn["Quantity"] = qty
        txn["UnitPrice"] = price

        valid_transactions.append(txn)

    # --- Amount range print (computed from valid transactions) ---
    if valid_transactions:
        amounts = [t["Quantity"] * t["UnitPrice"] for t in valid_transactions]
        print(f"Transaction amount range (valid only): min={min(amounts):.2f}, max={max(amounts):.2f}")
    else:
        print("Transaction amount range: no valid transactions to compute range.")

    # Summary counters
    filtered_by_region = 0
    filtered_by_amount = 0

    current = valid_transactions
    print(f"After validation: {len(current)} records (invalid: {invalid_count})")

    # --- Region filter ---
    if region is not None:
        before = len(current)
        current = [t for t in current if str(t.get("Region", "")).strip().lower() == str(region).strip().lower()]
        filtered_by_region = before - len(current)
        print(f"After region filter ({region}): {len(current)} records")

    # --- Amount filters ---
    # Compute amounts once for filtering
    def amount(t):
        return t["Quantity"] * t["UnitPrice"]

    if min_amount is not None:
        before = len(current)
        current = [t for t in current if amount(t) >= float(min_amount)]
        filtered_by_amount += before - len(current)
        print(f"After min_amount filter ({min_amount}): {len(current)} records")

    if max_amount is not None:
        before = len(current)
        current = [t for t in current if amount(t) <= float(max_amount)]
        filtered_by_amount += before - len(current)
        print(f"After max_amount filter ({max_amount}): {len(current)} records")

    filter_summary = {
        "total_input": total_input,
        "invalid": invalid_count,
        "valid_records": len(valid_transactions),
        "filtered_by_region": filtered_by_region,
        "filtered_by_amount": filtered_by_amount,
        "final_count": len(current)
    }

    return current, filter_summary


In [22]:
first_row = parse_transactions(a)


In [23]:
clean_data = parse_transactions(a)

In [24]:
clean_data, summary_data = validate_and_filter(clean_data,'North', 300, 5000)

Available regions: ['East', 'North', 'South', 'West']
Transaction amount range (valid only): min=257.00, max=818960.00
After validation: 70 records (invalid: 10)
After region filter (North): 21 records
After min_amount filter (300): 20 records
After max_amount filter (5000): 7 records


In [25]:
summary_data

{'total_input': 80,
 'invalid': 10,
 'valid_records': 70,
 'filtered_by_region': 49,
 'filtered_by_amount': 14,
 'final_count': 7}

In [26]:
clean_data

[{'TransactionID': 'T023',
  'Date': '2024-12-09',
  'ProductID': 'P109',
  'ProductName': 'Wireless Mouse',
  'Quantity': 9,
  'UnitPrice': 523.0,
  'CustomerID': 'C022',
  'Region': 'North'},
 {'TransactionID': 'T035',
  'Date': '2024-12-08',
  'ProductID': 'P102',
  'ProductName': 'Mouse',
  'Quantity': 4,
  'UnitPrice': 431.0,
  'CustomerID': 'C011',
  'Region': 'North'},
 {'TransactionID': 'T061',
  'Date': '2024-12-10',
  'ProductID': 'P109',
  'ProductName': 'Wireless Mouse',
  'Quantity': 2,
  'UnitPrice': 775.0,
  'CustomerID': 'C009',
  'Region': 'North'},
 {'TransactionID': 'T008',
  'Date': '2024-12-09',
  'ProductID': 'P110',
  'ProductName': 'Laptop Charger',
  'Quantity': 1,
  'UnitPrice': 2994.0,
  'CustomerID': 'C015',
  'Region': 'North'},
 {'TransactionID': 'T030',
  'Date': '2024-12-08',
  'ProductID': 'P105',
  'ProductName': 'Webcam',
  'Quantity': 1,
  'UnitPrice': 2986.0,
  'CustomerID': 'C029',
  'Region': 'North'},
 {'TransactionID': 'T037',
  'Date': '2024-12

## Data pre-processor

In [1]:
def calculate_total_revenue(transactions):
    
    total_revenue = 0.0

    for txn in transactions:
        try:
            qty = int(txn.get("Quantity", 0))
            price = float(txn.get("UnitPrice", 0.0))
            total_revenue += qty * price
        except (ValueError, TypeError, AttributeError):
            continue

    return round(total_revenue, 2)

In [9]:
calculate_total_revenue(clean_data)

3527808.0

In [10]:
def region_wise_sales(transactions):

    region_stats = {}
    grand_total = 0.0

    # --- Aggregate sales and counts ---
    for txn in transactions:
        try:
            region = str(txn.get("Region")).strip()
            qty = int(txn.get("Quantity"))
            price = float(txn.get("UnitPrice"))
            amount = qty * price
        except (ValueError, TypeError, AttributeError):
            continue

        if not region:
            continue

        grand_total += amount

        if region not in region_stats:
            region_stats[region] = {
                "total_sales": 0.0,
                "transaction_count": 0
            }

        region_stats[region]["total_sales"] += amount
        region_stats[region]["transaction_count"] += 1

    # --- Calculate percentage contribution ---
    for region in region_stats:
        if grand_total > 0:
            pct = (region_stats[region]["total_sales"] / grand_total) * 100
        else:
            pct = 0.0
        region_stats[region]["percentage"] = round(pct, 2)
        region_stats[region]["total_sales"] = round(region_stats[region]["total_sales"], 2)

    # --- Sort by total_sales (descending) ---
    sorted_region_stats = dict(
        sorted(
            region_stats.items(),
            key=lambda x: x[1]["total_sales"],
            reverse=True
        )
    )

    return sorted_region_stats


In [11]:
region_wise_sales(clean_data)

{'North': {'total_sales': 1321605.0,
  'transaction_count': 21,
  'percentage': 37.46},
 'South': {'total_sales': 889332.0,
  'transaction_count': 13,
  'percentage': 25.21},
 'West': {'total_sales': 848902.0,
  'transaction_count': 19,
  'percentage': 24.06},
 'East': {'total_sales': 467969.0,
  'transaction_count': 17,
  'percentage': 13.27}}

In [12]:
def top_selling_products(transactions, n=5):

    product_stats = {}

    # --- Aggregate quantity and revenue per product ---
    for txn in transactions:
        try:
            product = str(txn.get("ProductName")).strip()
            qty = int(txn.get("Quantity"))
            price = float(txn.get("UnitPrice"))
            revenue = qty * price
        except (ValueError, TypeError, AttributeError):
            continue

        if not product:
            continue

        if product not in product_stats:
            product_stats[product] = {
                "total_qty": 0,
                "total_revenue": 0.0
            }

        product_stats[product]["total_qty"] += qty
        product_stats[product]["total_revenue"] += revenue

    # --- Sort by total quantity (descending) ---
    sorted_products = sorted(
        product_stats.items(),
        key=lambda x: x[1]["total_qty"],
        reverse=True
    )

    # --- Return top n in required tuple format ---
    top_n = [
        (
            product,
            stats["total_qty"],
            round(stats["total_revenue"], 2)
        )
        for product, stats in sorted_products[:n]
    ]

    return top_n


In [13]:
top_selling_products(clean_data)

[('Mouse', 61, 40297.0),
 ('Wireless Mouse', 45, 49981.0),
 ('Webcam', 35, 128187.0),
 ('USB Cable', 33, 7622.0),
 ('Monitor', 30, 493759.0)]

In [14]:
def customer_analysis(transactions):
    customer_stats = {}

    # --- Aggregate per customer ---
    for txn in transactions:
        try:
            customer = str(txn.get("CustomerID")).strip()
            product = str(txn.get("ProductName")).strip()
            qty = int(txn.get("Quantity"))
            price = float(txn.get("UnitPrice"))
            amount = qty * price
        except (ValueError, TypeError, AttributeError):
            continue

        if not customer:
            continue

        if customer not in customer_stats:
            customer_stats[customer] = {
                "total_spent": 0.0,
                "purchase_count": 0,
                "products_bought": set()
            }

        customer_stats[customer]["total_spent"] += amount
        customer_stats[customer]["purchase_count"] += 1

        if product:
            customer_stats[customer]["products_bought"].add(product)

    # --- Final calculations (avg order value, formatting) ---
    for cust in customer_stats:
        purchases = customer_stats[cust]["purchase_count"]
        total = customer_stats[cust]["total_spent"]

        avg = total / purchases if purchases > 0 else 0.0

        customer_stats[cust]["avg_order_value"] = round(avg, 2)
        customer_stats[cust]["total_spent"] = round(total, 2)
        customer_stats[cust]["products_bought"] = sorted(
            list(customer_stats[cust]["products_bought"])
        )

    # --- Sort by total_spent (descending) ---
    sorted_customers = dict(
        sorted(
            customer_stats.items(),
            key=lambda x: x[1]["total_spent"],
            reverse=True
        )
    )

    return sorted_customers


In [15]:
customer_analysis(clean_data)

{'C004': {'total_spent': 857124.0,
  'purchase_count': 3,
  'products_bought': ['Headphones', 'Laptop Charger', 'Laptop Premium'],
  'avg_order_value': 285708.0},
 'C017': {'total_spent': 762460.0,
  'purchase_count': 1,
  'products_bought': ['Laptop Premium'],
  'avg_order_value': 762460.0},
 'C010': {'total_spent': 457186.0,
  'purchase_count': 3,
  'products_bought': ['External Hard Drive 1TB',
   'Laptop Premium',
   'Mouse Wireless'],
  'avg_order_value': 152395.33},
 'C024': {'total_spent': 249451.0,
  'purchase_count': 2,
  'products_bought': ['Monitor', 'Monitor LED'],
  'avg_order_value': 124725.5},
 'C008': {'total_spent': 216176.0,
  'purchase_count': 5,
  'products_bought': ['Laptop',
   'Laptop Charger',
   'Monitor',
   'Mouse',
   'Wireless Mouse'],
  'avg_order_value': 43235.2},
 'C023': {'total_spent': 165391.0,
  'purchase_count': 2,
  'products_bought': ['Monitor', 'Webcam'],
  'avg_order_value': 82695.5},
 'C003': {'total_spent': 118144.0,
  'purchase_count': 3,
  '

In [16]:
def daily_sales_trend(transactions):

    daily = {}

    # --- Aggregate by date ---
    for txn in transactions:
        try:
            dt = str(txn.get("Date")).strip()
            cust = str(txn.get("CustomerID")).strip()
            qty = int(txn.get("Quantity"))
            price = float(txn.get("UnitPrice"))
            amount = qty * price
        except (ValueError, TypeError, AttributeError):
            continue

        if not dt:
            continue

        if dt not in daily:
            daily[dt] = {
                "revenue": 0.0,
                "transaction_count": 0,
                "unique_customers": set()
            }

        daily[dt]["revenue"] += amount
        daily[dt]["transaction_count"] += 1
        if cust:
            daily[dt]["unique_customers"].add(cust)

    # --- Finalize: convert set to count + rounding ---
    for dt in daily:
        daily[dt]["revenue"] = round(daily[dt]["revenue"], 2)
        daily[dt]["unique_customers"] = len(daily[dt]["unique_customers"])

    # --- Sort chronologically by date string (YYYY-MM-DD sorts correctly) ---
    sorted_daily = dict(sorted(daily.items(), key=lambda x: x[0]))

    return sorted_daily


In [17]:
daily_sales_trend(clean_data)

{'2024-12-01': {'revenue': 123969.0,
  'transaction_count': 3,
  'unique_customers': 2},
 '2024-12-02': {'revenue': 882906.0,
  'transaction_count': 5,
  'unique_customers': 5},
 '2024-12-03': {'revenue': 61851.0,
  'transaction_count': 5,
  'unique_customers': 5},
 '2024-12-05': {'revenue': 257.0,
  'transaction_count': 1,
  'unique_customers': 1},
 '2024-12-06': {'revenue': 34072.0,
  'transaction_count': 1,
  'unique_customers': 1},
 '2024-12-07': {'revenue': 204912.0,
  'transaction_count': 10,
  'unique_customers': 7},
 '2024-12-08': {'revenue': 70383.0,
  'transaction_count': 3,
  'unique_customers': 3},
 '2024-12-09': {'revenue': 25339.0,
  'transaction_count': 4,
  'unique_customers': 4},
 '2024-12-10': {'revenue': 1550.0,
  'transaction_count': 1,
  'unique_customers': 1},
 '2024-12-11': {'revenue': 13207.0,
  'transaction_count': 2,
  'unique_customers': 2},
 '2024-12-13': {'revenue': 417923.0,
  'transaction_count': 3,
  'unique_customers': 3},
 '2024-12-14': {'revenue': 453

In [18]:
def find_peak_sales_day(transactions):

    daily = {}

    # --- Aggregate revenue and count by date ---
    for txn in transactions:
        try:
            dt = str(txn.get("Date")).strip()
            qty = int(txn.get("Quantity"))
            price = float(txn.get("UnitPrice"))
            amount = qty * price
        except (ValueError, TypeError, AttributeError):
            continue

        if not dt:
            continue

        if dt not in daily:
            daily[dt] = {
                "revenue": 0.0,
                "transaction_count": 0
            }

        daily[dt]["revenue"] += amount
        daily[dt]["transaction_count"] += 1

    if not daily:
        return None, 0.0, 0

    # --- Find peak revenue day ---
    peak_date, peak_stats = max(
        daily.items(),
        key=lambda x: x[1]["revenue"]
    )

    return (
        peak_date,
        round(peak_stats["revenue"], 2),
        peak_stats["transaction_count"]
    )


In [19]:
find_peak_sales_day(clean_data)

('2024-12-02', 882906.0, 5)

In [20]:
def low_performing_products(transactions, threshold=10):

    product_stats = {}

    # --- Aggregate quantity and revenue per product ---
    for txn in transactions:
        try:
            product = str(txn.get("ProductName")).strip()
            qty = int(txn.get("Quantity"))
            price = float(txn.get("UnitPrice"))
            revenue = qty * price
        except (ValueError, TypeError, AttributeError):
            continue

        if not product:
            continue

        if product not in product_stats:
            product_stats[product] = {
                "total_qty": 0,
                "total_revenue": 0.0
            }

        product_stats[product]["total_qty"] += qty
        product_stats[product]["total_revenue"] += revenue

    # --- Filter products with quantity below threshold ---
    low_products = [
        (
            product,
            stats["total_qty"],
            round(stats["total_revenue"], 2)
        )
        for product, stats in product_stats.items()
        if stats["total_qty"] < threshold
    ]

    # --- Sort by total quantity (ascending) ---
    low_products_sorted = sorted(
        low_products,
        key=lambda x: x[1]
    )

    return low_products_sorted


In [21]:
low_performing_products(clean_data)

[('Laptop', 3, 184329.0),
 ('Keyboard Mechanical', 5, 13360.0),
 ('Webcam HD', 6, 17862.0),
 ('Laptop Charger 65W', 7, 19922.0),
 ('Mouse Wireless', 8, 6784.0)]

## API handler

In [22]:
import requests

def fetch_all_products():
    url = "https://dummyjson.com/products"

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # raises HTTPError for 4xx/5xx

        data = response.json()
        products = data.get("products", [])

        print(f"Successfully fetched {len(products)} products.")
        return products

    except requests.exceptions.RequestException as e:
        print(f"Failed to fetch products: {e}")
        return []

In [23]:
a = fetch_all_products()

Successfully fetched 30 products.


In [24]:
a

[{'id': 1,
  'title': 'Essence Mascara Lash Princess',
  'description': 'The Essence Mascara Lash Princess is a popular mascara known for its volumizing and lengthening effects. Achieve dramatic lashes with this long-lasting and cruelty-free formula.',
  'category': 'beauty',
  'price': 9.99,
  'discountPercentage': 10.48,
  'rating': 2.56,
  'stock': 99,
  'tags': ['beauty', 'mascara'],
  'brand': 'Essence',
  'sku': 'BEA-ESS-ESS-001',
  'weight': 4,
  'dimensions': {'width': 15.14, 'height': 13.08, 'depth': 22.99},
  'warrantyInformation': '1 week warranty',
  'shippingInformation': 'Ships in 3-5 business days',
  'availabilityStatus': 'In Stock',
  'reviews': [{'rating': 3,
    'comment': 'Would not recommend!',
    'date': '2025-04-30T09:41:02.053Z',
    'reviewerName': 'Eleanor Collins',
    'reviewerEmail': 'eleanor.collins@x.dummyjson.com'},
   {'rating': 4,
    'comment': 'Very satisfied!',
    'date': '2025-04-30T09:41:02.053Z',
    'reviewerName': 'Lucas Gordon',
    'reviewe

In [25]:
def create_product_mapping(api_products):

    product_map = {}

    for product in api_products:
        try:
            product_id = product.get("id")
            title = product.get("title")
            category = product.get("category")
            brand = product.get("brand")
            rating = product.get("rating")
        except AttributeError:
            continue

        # Validate required fields
        if product_id is None or title is None:
            continue

        product_map[product_id] = {
            "title": title,
            "category": category,
            "brand": brand,
            "rating": rating
        }

    return product_map


In [26]:
create_product_mapping(a)

{1: {'title': 'Essence Mascara Lash Princess',
  'category': 'beauty',
  'brand': 'Essence',
  'rating': 2.56},
 2: {'title': 'Eyeshadow Palette with Mirror',
  'category': 'beauty',
  'brand': 'Glamour Beauty',
  'rating': 2.86},
 3: {'title': 'Powder Canister',
  'category': 'beauty',
  'brand': 'Velvet Touch',
  'rating': 4.64},
 4: {'title': 'Red Lipstick',
  'category': 'beauty',
  'brand': 'Chic Cosmetics',
  'rating': 4.36},
 5: {'title': 'Red Nail Polish',
  'category': 'beauty',
  'brand': 'Nail Couture',
  'rating': 4.32},
 6: {'title': 'Calvin Klein CK One',
  'category': 'fragrances',
  'brand': 'Calvin Klein',
  'rating': 4.37},
 7: {'title': 'Chanel Coco Noir Eau De',
  'category': 'fragrances',
  'brand': 'Chanel',
  'rating': 4.26},
 8: {'title': "Dior J'adore",
  'category': 'fragrances',
  'brand': 'Dior',
  'rating': 3.8},
 9: {'title': 'Dolce Shine Eau de',
  'category': 'fragrances',
  'brand': 'Dolce & Gabbana',
  'rating': 3.96},
 10: {'title': 'Gucci Bloom Eau d

In [27]:
import os
import re

def enrich_sales_data(transactions, product_mapping):

    enriched = []

    # Output file path
    output_dir = "data"
    output_file = os.path.join(output_dir, "enriched_sales_data.txt")
    os.makedirs(output_dir, exist_ok=True)

    # Columns for file output (pipe-delimited)
    base_cols = [
        "TransactionID", "Date", "ProductID", "ProductName",
        "Quantity", "UnitPrice", "CustomerID", "Region"
    ]
    new_cols = ["API_Category", "API_Brand", "API_Rating", "API_Match"]
    header_cols = base_cols + new_cols

    def extract_numeric_id(product_id):
        # P101 -> 101, P5 -> 5, also handles "P-101" or "P101A" by extracting digits
        if product_id is None:
            return None
        m = re.search(r"(\d+)", str(product_id))
        return int(m.group(1)) if m else None

    for txn in transactions:
        # Default enriched fields
        api_category = None
        api_brand = None
        api_rating = None
        api_match = False

        try:
            pid_num = extract_numeric_id(txn.get("ProductID"))
            if pid_num is not None and pid_num in product_mapping:
                info = product_mapping[pid_num]
                api_category = info.get("category")
                api_brand = info.get("brand")
                api_rating = info.get("rating")
                api_match = True
        except Exception:
            # Graceful handling: keep defaults (no match)
            pass

        # Build enriched transaction (do not mutate original)
        enriched_txn = dict(txn)
        enriched_txn["API_Category"] = api_category
        enriched_txn["API_Brand"] = api_brand
        enriched_txn["API_Rating"] = api_rating
        enriched_txn["API_Match"] = api_match

        enriched.append(enriched_txn)

    # ---- Write pipe-delimited output with header ----
    def to_str(val):
        if val is None:
            return ""
        if isinstance(val, bool):
            return "True" if val else "False"
        return str(val)

    try:
        with open(output_file, "w", encoding="utf-8", newline="\n") as f:
            # Header
            f.write("|".join(header_cols) + "\n")

            # Rows
            for row in enriched:
                line = "|".join(to_str(row.get(col)) for col in header_cols)
                f.write(line + "\n")

        print(f"Enriched data saved successfully to: {output_file}")
    except Exception as e:
        print(f"Failed to write enriched file: {e}")

    return enriched


In [28]:
enrich_sales_data(clean_data,create_product_mapping(a))

Enriched data saved successfully to: data/enriched_sales_data.txt


[{'TransactionID': 'T018',
  'Date': '2024-12-29',
  'ProductID': 'P107',
  'ProductName': 'USB Cable',
  'Quantity': 8,
  'UnitPrice': 173.0,
  'CustomerID': 'C009',
  'Region': 'South',
  'API_Category': None,
  'API_Brand': None,
  'API_Rating': None,
  'API_Match': False},
 {'TransactionID': 'T063',
  'Date': '2024-12-07',
  'ProductID': 'P110',
  'ProductName': 'Laptop Charger',
  'Quantity': 6,
  'UnitPrice': 1916.0,
  'CustomerID': 'C022',
  'Region': 'East',
  'API_Category': None,
  'API_Brand': None,
  'API_Rating': None,
  'API_Match': False},
 {'TransactionID': 'T023',
  'Date': '2024-12-09',
  'ProductID': 'P109',
  'ProductName': 'Wireless Mouse',
  'Quantity': 9,
  'UnitPrice': 523.0,
  'CustomerID': 'C022',
  'Region': 'North',
  'API_Category': None,
  'API_Brand': None,
  'API_Rating': None,
  'API_Match': False},
 {'TransactionID': 'T059',
  'Date': '2024-12-29',
  'ProductID': 'P102',
  'ProductName': 'Mouse Wireless',
  'Quantity': 4,
  'UnitPrice': 1056.0,
  'Cust

In [53]:
from utili import file_handler

In [None]:
file_handler.read_sales_data()