In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Number of orders
num_orders = 100  

# Generate synthetic Orders data
order_ids = [f"O{1000 + i}" for i in range(num_orders)]
customer_ids = np.random.randint(2000, 3000, size=num_orders)
order_dates = pd.date_range(start="2024-01-01", periods=num_orders, freq="D")
total_amounts = np.round(np.random.uniform(50, 500, size=num_orders), 2)
countries = np.random.choice(["USA", "UK", "Germany", "France", "Netherlands"], num_orders)

orders_df = pd.DataFrame({
    "OrderID": order_ids,
    "CustomerID": customer_ids,
    "OrderDate": order_dates,
    "TotalAmount": total_amounts,
    "Country": countries
})

# Generate synthetic Payments data (Some orders are missing payments)
payment_methods = ["Credit Card", "PayPal", "Bank Transfer"]
payment_prob = np.random.choice([True, False], size=num_orders, p=[0.85, 0.15])  # 15% missing payments

payments_df = orders_df[payment_prob].copy()  # Select only orders that received payments
payments_df["PaymentID"] = range(5000, 5000 + len(payments_df))
payments_df["PaymentDate"] = payments_df["OrderDate"] + pd.to_timedelta(np.random.randint(1, 5, size=len(payments_df)), unit="D")
payments_df["PaymentMethod"] = np.random.choice(payment_methods, size=len(payments_df))

# Generate synthetic Invoices data (Some invoices are unpaid or partially paid)
invoice_statuses = ["Paid", "Unpaid", "Partially Paid"]
invoice_probs = [0.75, 0.15, 0.10]  # 75% Paid, 15% Unpaid, 10% Partially Paid

invoices_df = orders_df.copy()
invoices_df["InvoiceID"] = range(8000, 8000 + len(invoices_df))
invoices_df["InvoiceDate"] = invoices_df["OrderDate"] + pd.to_timedelta(np.random.randint(0, 3, size=len(invoices_df)), unit="D")
invoices_df["Status"] = np.random.choice(invoice_statuses, size=len(invoices_df), p=invoice_probs)

# Save datasets locally
orders_file = "C:/Users/manos/OneDrive/Desktop/simulated_orders.csv"
payments_file = "C:/Users/manos/OneDrive/Desktop/simulated_payments.csv"
invoices_file = "C:/Users/manos/OneDrive/Desktop/simulated_invoices.csv"

orders_df.to_csv(orders_file, index=False)
payments_df.to_csv(payments_file, index=False)
invoices_df.to_csv(invoices_file, index=False)

print(" Datasets saved successfully!")

 Datasets saved successfully!
