In [1]:
import random
import pandas as pd
from faker import Faker
from datetime import timedelta, datetime  # Import datetime

In [2]:
data_path = "./data/"

In [3]:
fake = Faker()

In [4]:
# Configuration
NUM_INVESTORS = 30
NUM_BROKERS = 15
NUM_COMM_PLANS = 6
NUM_EXCHANGES = 8
NUM_INSTRUMENTS = 80
NUM_ORDERS = 150
NUM_TRADES = 250
NUM_PORTFOLIOS = 40
NUM_POSITIONS = 100
NUM_CASH_ACCOUNTS = 50

In [5]:
# We'll define small sets for code logic
ORDER_TYPES = ["Market", "Limit", "Stop"]
ORDER_STATUS_OPTIONS = ["Open", "PartiallyFilled", "Filled", "Cancelled"]
ORDER_SIDES = ["Buy", "Sell"]
TIME_IN_FORCE_OPTS = ["DAY", "GTC"]

INSTRUMENT_TYPES = ["Equity", "ETF"]
TICK_SIZE_OPTIONS = [0.01, 0.001]  # approximate
COUNTRIES = ["USA", "Canada", "UK", "Germany", "Japan"]

# RULE from CommissionPlan: rate < 0.05
commission_plans = []
for i in range(NUM_COMM_PLANS):
    rate = round(random.uniform(0.0001, 0.05), 4)  # ensures commissionRate is >0 and <0.05
    commission_plans.append({
        "id": f"plan_{i}",
        "planName": fake.bs(),
        "commissionRate": rate
    })

In [6]:
# 1 Generate Investors
investor_types = ["Retail", "Institutional"]
investors = []
for i in range(NUM_INVESTORS):
    investors.append({
        "id": f"inv_{i}",
        "investorName": fake.name(),
        "investorType": random.choice(investor_types)
    })

In [7]:
# 2 Generate Brokers
brokers = []
for i in range(NUM_BROKERS):
    # randomly choose a CommissionPlan
    plan = random.choice(commission_plans)
    brokers.append({
        "id": f"broker_{i}",
        "brokerName": fake.company() + " Brokerage",
        "brokerLicenseID": f"BR-{random.randint(1000,9999)}",
        "commissionPlanID": plan["id"]
    })

In [8]:
# 3 Generate Exchanges
exchanges = []
for i in range(NUM_EXCHANGES):
    exchanges.append({
        "id": f"exch_{i}",
        "exchangeName": fake.company() + " Exchange",
        "country": random.choice(COUNTRIES)
    })

In [9]:
# 4 Generate Financial Instruments
instruments = []
for i in range(NUM_INSTRUMENTS):
    sym = "".join(random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ", k=3)) + str(random.randint(10,99))
    instruments.append({
        "id": f"instr_{i}",
        "symbol": sym,
        "description": fake.catch_phrase(),
        "tickSize": random.choice(TICK_SIZE_OPTIONS),
        "instrumentType": random.choice(INSTRUMENT_TYPES)
    })

In [10]:
# 5 Generate Portfolios
portfolios = []
for i in range(NUM_PORTFOLIOS):
    inv = random.choice(investors)
    portfolios.append({
        "id": f"port_{i}",
        "portfolioID": f"PORT-{i}",
        "portfolioName": inv["investorName"] + "'s Portfolio",
        "investorID": inv["id"]
    })

In [11]:
# 6 Generate Positions
positions = []
for i in range(NUM_POSITIONS):
    port = random.choice(portfolios)
    instr = random.choice(instruments)
    # negative quantity -> short
    quantity = random.randint(-300, 1000)
    avg_cost = round(random.uniform(1, 2000), 2)
    positions.append({
        "id": f"pos_{i}",
        "portfolioID": port["id"],
        "instrumentID": instr["id"],
        "averageCost": avg_cost,
        "currentQuantity": quantity
    })

In [12]:
# 7 Generate CashAccounts
cash_accounts = []
for i in range(NUM_CASH_ACCOUNTS):
    inv = random.choice(investors)
    bal = round(random.uniform(500.0, 100000.0), 2)
    cash_accounts.append({
        "id": f"cash_{i}",
        "accountID": f"CASH-{i}",
        "balance": bal,
        "belongsToInvestorID": inv["id"]  # or None if we want broker accounts
    })

In [13]:
# 8 Generate Orders
orders = []
for i in range(NUM_ORDERS):
    inv = random.choice(investors)
    # ~80% of orders have a broker, ~20% self-directed
    maybe_broker = random.choice(brokers) if random.random() < 0.8 else None
    exch = random.choice(exchanges)
    instr = random.choice(instruments)

    otype = random.choice(ORDER_TYPES)
    side = random.choice(ORDER_SIDES)
    qty = random.randint(10, 1000)
    dt_created = fake.date_time_between(start_date='-1y', end_date='now')

    # RULE 1 & RULE 2: If orderType=Limit => limitPrice != None, else limitPrice=None
    limit_px = None
    if otype == "Limit":
        limit_px = round(random.uniform(5, 500), 2)

    # pick a random status
    status = random.choice(ORDER_STATUS_OPTIONS)
    # We'll note partial fill logic in trades. For now, we just pick a status.

    # Create the order
    order_data = {
        "id": f"ord_{i}",
        "orderID": f"ORD-{random.randint(100000,999999)}",
        "orderType": otype,
        "side": side,
        "quantity": qty,
        "limitPrice": limit_px,  # If Market -> should be None
        "timeInForce": random.choice(TIME_IN_FORCE_OPTS),
        "status": status,
        "creationDateTime": dt_created.isoformat(),
        "investorID": inv["id"],
        "brokerID": maybe_broker["id"] if maybe_broker else None,
        "exchangeID": exch["id"],
        "instrumentID": instr["id"]
    }
    orders.append(order_data)

In [14]:
# 9 Generate Trades
# We'll attempt to respect partial/fill logic: 
# If status='Filled', sum of trades == quantity
# If 'PartiallyFilled', sum(trades) < quantity, etc.
trades = []
order_fills_map = {}  # track how many shares filled so far

def create_trades_for_order(order_obj, trades_list, trade_count):
    """Generate trades for a single order, respecting rule 3 & 4 about sums of trade quantities vs. order qty."""
    filled_so_far = 0
    order_qty = order_obj["quantity"]
    order_status = order_obj["status"]
    # we choose how many trades we create
    if order_status in ["Filled", "PartiallyFilled"]:
        # create 1-3 trades if partial/filled
        n_trades = random.randint(1,3)
        for _ in range(n_trades):
            remain = order_qty - filled_so_far
            if remain <= 0:
                break
            # pick a portion
            portion = random.randint(1, remain)
            price = round(random.uniform(5, 500), 2)  # ignoring tickSize for simplicity
            dt = fake.date_time_between_dates(datetime_start=datetime.fromisoformat(order_obj["creationDateTime"]),
                                              datetime_end=datetime.now())
            trade_id = f"TR-{random.randint(100000,999999)}"
            trades_list.append({
                "id": f"trade_{trade_count[0]}",
                "tradeID": trade_id,
                "orderID": order_obj["id"],
                "price": price,
                "quantity": portion,
                "tradeDateTime": dt.isoformat()
            })
            filled_so_far += portion
            trade_count[0] += 1

        # now check if we match the status:
        # If status = Filled, ideally filled_so_far == order_qty
        # If status = PartiallyFilled, filled_so_far < order_qty
        if order_status == "Filled" and filled_so_far < order_qty:
            # force a final trade to top it up
            remain = order_qty - filled_so_far
            price = round(random.uniform(5, 500), 2)
            dt = fake.date_time_between_dates(datetime_start=datetime.fromisoformat(order_obj["creationDateTime"]),
                                              datetime_end=datetime.now())
            trades_list.append({
                "id": f"trade_{trade_count[0]}",
                "tradeID": f"TR-{random.randint(100000,999999)}",
                "orderID": order_obj["id"],
                "price": price,
                "quantity": remain,
                "tradeDateTime": dt.isoformat()
            })
            filled_so_far += remain
            trade_count[0] += 1

    elif order_status in ["Open", "Cancelled"]:
        # RULE 5: sum of trades should be 0 or less than quantity
        # We'll skip creating trades or create 0 trades for Open/Cancelled
        pass

trade_counter = [0]
for ord_obj in orders:
    create_trades_for_order(ord_obj, trades, trade_counter)

In [15]:
# Summaries
print("\n--- Generated Entities ---\n")
print("CommissionPlans:", len(commission_plans))
print("Investors:", len(investors))
print("Brokers:", len(brokers))
print("Exchanges:", len(exchanges))
print("Instruments:", len(instruments))
print("Orders:", len(orders))
print("Trades:", len(trades))
print("Portfolios:", len(portfolios))
print("Positions:", len(positions))
print("CashAccounts:", len(cash_accounts))


--- Generated Entities ---

CommissionPlans: 6
Investors: 30
Brokers: 15
Exchanges: 8
Instruments: 80
Orders: 150
Trades: 184
Portfolios: 40
Positions: 100
CashAccounts: 50


In [16]:
# Show some sample data
print("\n--- Samples ---\n")
print("Sample CommissionPlan:", commission_plans[0] if commission_plans else None)
print("Sample Investor:", investors[0])
print("Sample Broker:", brokers[0])
print("Sample Exchange:", exchanges[0])
print("Sample Instrument:", instruments[0])
print("Sample Order:", orders[0])
print("Sample Trade:", trades[0] if trades else None)
print("Sample Portfolio:", portfolios[0])
print("Sample Position:", positions[0])
print("Sample CashAccount:", cash_accounts[0])


--- Samples ---

Sample CommissionPlan: {'id': 'plan_0', 'planName': 'engineer cross-media experiences', 'commissionRate': 0.0186}
Sample Investor: {'id': 'inv_0', 'investorName': 'Christopher Sanchez', 'investorType': 'Retail'}
Sample Broker: {'id': 'broker_0', 'brokerName': 'Austin, Smith and Walton Brokerage', 'brokerLicenseID': 'BR-1818', 'commissionPlanID': 'plan_4'}
Sample Exchange: {'id': 'exch_0', 'exchangeName': 'Baker, Rodriguez and Reyes Exchange', 'country': 'Germany'}
Sample Instrument: {'id': 'instr_0', 'symbol': 'VHE37', 'description': 'Integrated fresh-thinking hierarchy', 'tickSize': 0.01, 'instrumentType': 'Equity'}
Sample Order: {'id': 'ord_0', 'orderID': 'ORD-619957', 'orderType': 'Limit', 'side': 'Buy', 'quantity': 721, 'limitPrice': 87.16, 'timeInForce': 'GTC', 'status': 'PartiallyFilled', 'creationDateTime': '2024-04-06T01:25:56', 'investorID': 'inv_12', 'brokerID': None, 'exchangeID': 'exch_3', 'instrumentID': 'instr_64'}
Sample Trade: {'id': 'trade_0', 'tradeI

In [17]:
# persist the data
pd.DataFrame(commission_plans).to_csv(data_path+"commission_plans.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(investors).to_csv(data_path+"investors.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(brokers).to_csv(data_path+"brokers.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(exchanges).to_csv(data_path+"exchanges.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(instruments).to_csv(data_path+"instruments.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(orders).to_csv(data_path+"orders.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(portfolios).to_csv(data_path+"portfolios.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(positions).to_csv(data_path+"positions.csv", encoding = "utf-8", escapechar = "\"", index=False)
pd.DataFrame(cash_accounts).to_csv(data_path+"cash_accounts.csv", encoding = "utf-8", escapechar = "\"", index=False)