In [6]:
# Input and output file paths (edit as needed for your environment)
import os
customer_in = r"C:\temp\samples\order_generation_input\Customer_Samples.csv"
account_in = r"C:\temp\samples\order_generation_input\CustomerAccount_Samples.csv"
product_in = r"C:\temp\samples\order_generation_input\Product_Samples_ADB.csv"
product_category_in = r"C:\temp\samples\order_generation_input\ProductCategory_Samples_ADB.csv"
location_in = r"C:\temp\samples\order_generation_input\Location_Samples.csv"

output_dir = r"C:\temp\samples\order_generation_output"
order_out = os.path.join(output_dir, "Order_Samples_ADB.csv")
orderline_out = os.path.join(output_dir, "OrderLine_Samples_ADB.csv")
orderpayment_out = os.path.join(output_dir, "OrderPayment_ADB.csv")

In [7]:
# 1. Import libraries and read input data

import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import uuid
import os

os.makedirs(output_dir, exist_ok=True)

# Read input data
df_customer = pd.read_csv(customer_in)
df_account = pd.read_csv(account_in)
df_product = pd.read_csv(product_in)
df_location = pd.read_csv(location_in)

print(f"✅ Loaded {len(df_customer)} customers, {len(df_account)} accounts, {len(df_product)} products, {len(df_location)} locations.")

# 2. Define helper functions

def random_date(start, end):
    """Generate a random datetime between `start` and `end`"""
    delta = end - start
    int_delta = delta.days
    random_day = random.randrange(int_delta)
    return start + timedelta(days=random_day)

def pick_payment_method():
    return random.choice(["VISA", "MC", "Discover", "PayPal"])

def pick_order_status():
    return random.choices(
        ["Completed", "Pending", "Cancelled"],
        weights=[0.85, 0.10, 0.05]
    )[0]

# 3. Generate Orders, OrderLines, OrderPayments

orders = []
orderlines = []
orderpayments = []

order_id_set = set()
order_number_counter = 100000

start_date = datetime(2019, 1, 1)
end_date = datetime(2025, 6, 30)

# Customer segmentation for order frequency and size
segment_order_freq = {
    "Standard": (1, 2),
    "Premium": (2, 5),
    "VIP": (5, 12),
    "SMB": (2, 6),
    "Premier": (5, 12),
    "Partner": (3, 8),
    "Federal": (1, 3),
    "State": (1, 2),
    "Local": (1, 2)
}

for idx, cust in df_customer.iterrows():
    cust_id = cust["CustomerId"]
    cust_type = cust.get("CustomerTypeId", "Individual")
    rel_type = cust.get("CustomerRelationshipTypeId", "Standard")
    # Find account for this customer (ADB channel only)
    acc_row = df_account[df_account["CustomerId"] == cust_id]
    if acc_row.empty:
        continue
    acc_id = acc_row.iloc[0]["CustomerAccountId"]
    # Determine order frequency for this customer
    freq_range = segment_order_freq.get(rel_type, (1, 2))
    num_orders = random.randint(freq_range[0], freq_range[1])
    for _ in range(num_orders):
        order_id = str(uuid.uuid4())
        while order_id in order_id_set:
            order_id = str(uuid.uuid4())
        order_id_set.add(order_id)
        order_number = f"F{order_number_counter}"
        order_number_counter += 1
        order_date = random_date(start_date, end_date).date()
        order_status = pick_order_status()
        payment_method = pick_payment_method()
        iso_currency = "USD"
        created_by = "SampleGen"
        # Generate order lines (1-5 per order)
        num_lines = random.randint(1, 5)
        chosen_products = df_product.sample(n=num_lines)
        subtotal = 0
        total_tax = 0
        orderline_list = []
        for line_num, (_, prod) in enumerate(chosen_products.iterrows(), start=1):
            prod_id = prod["ProductID"] if "ProductID" in prod else prod["ProductId"]
            prod_name = prod["ProductName"]
            unit_price = float(prod["ListPrice"])
            quantity = random.randint(1, 5)
            discount = round(unit_price * random.uniform(0, 0.15), 2)
            line_total = round((unit_price * quantity) - discount, 2)
            tax = round(line_total * 0.05, 2)
            subtotal += line_total
            total_tax += tax
            orderlines.append({
                "OrderId": order_id,
                "OrderLineNumber": line_num,
                "ProductId": prod_id,
                "ProductName": prod_name,
                "Quantity": quantity,
                "UnitPrice": unit_price,
                "LineTotal": line_total,
                "DiscountAmount": discount,
                "TaxAmount": tax
            })
        order_total = round(subtotal + total_tax, 2)
        orders.append({
            "OrderId": order_id,
            "SalesChannelId": "Fabric",
            "OrderNumber": order_number,
            "CustomerId": cust_id,
            "CustomerAccountId": acc_id,
            "OrderDate": order_date,
            "OrderStatus": order_status,
            "SubTotal": round(subtotal, 2),
            "TaxAmount": round(total_tax, 2),
            "OrderTotal": order_total,
            "PaymentMethod": payment_method,
            "IsoCurrencyCode": iso_currency,
            "CreatedBy": created_by
        })
        orderpayments.append({
            "OrderId": order_id,
            "PaymentMethod": payment_method,
            "TransactionId": str(uuid.uuid4())
        })

print(f"✅ Generated {len(orders)} orders, {len(orderlines)} order lines, {len(orderpayments)} payments.")

# 4. Write output files

pd.DataFrame(orders).to_csv(order_out, index=False)
pd.DataFrame(orderlines).to_csv(orderline_out, index=False)
pd.DataFrame(orderpayments).to_csv(orderpayment_out, index=False)

print(f"✅ Order data written to {order_out}")
print(f"✅ OrderLine data written to {orderline_out}")
print(f"✅ OrderPayment data written to {orderpayment_out}")

✅ Loaded 513 customers, 925 accounts, 12 products, 513 locations.
✅ Generated 1811 orders, 5422 order lines, 1811 payments.
✅ Order data written to C:\temp\samples\order_generation_output\Order_Samples_ADB.csv
✅ OrderLine data written to C:\temp\samples\order_generation_output\OrderLine_Samples_ADB.csv
✅ OrderPayment data written to C:\temp\samples\order_generation_output\OrderPayment_ADB.csv
✅ Generated 1811 orders, 5422 order lines, 1811 payments.
✅ Order data written to C:\temp\samples\order_generation_output\Order_Samples_ADB.csv
✅ OrderLine data written to C:\temp\samples\order_generation_output\OrderLine_Samples_ADB.csv
✅ OrderPayment data written to C:\temp\samples\order_generation_output\OrderPayment_ADB.csv
