In [None]:
import sys
sys.path.append("..")


In [11]:
import random
import csv

# 1) Seed for reproducibility
SEED = 2025
random.seed(SEED)

# 2) Expanded per-instrument pools
instruments = {
    "Bitcoin": {
        "contexts": [
            "amid a sudden spike in Bitcoin volatility",
            "as Bitcoin hits record daily swings",
            "during a sharp pullback in Bitcoin prices"
        ],
        "risky_strategies": [
            "allocate 50% of the portfolio to Bitcoin futures with 2× leverage",
            "go all-in on Bitcoin spot with a 3× margin loan",
            "rotate 80% of equity exposure into Bitcoin option straddles"
        ],
        "non_risky_strategies": [
            "hold 10% in a Bitcoin index fund and rebalance quarterly",
            "keep 5% of assets in Bitcoin ETFs and the rest in cash",
            "maintain a capped Bitcoin allocation of 2% of total AUM"
        ]
    },
    "Ethereum": {
        "contexts": [
            "as Ethereum’s gas fees surge",
            "when Ethereum’s staking yields compress",
            "following an upgrade to Ethereum’s protocol"
        ],
        "risky_strategies": [
            "stake 70% of ETH holdings in a DeFi protocol with high APY",
            "use 4× leverage to long Ethereum perpetual swaps",
            "allocate 60% to ETH smart-contract yield farms"
        ],
        "non_risky_strategies": [
            "keep ETH in cold storage and stake only 5%",
            "invest 10% in an ETH trust and hold long-term",
            "allocate 15% to an ETH-based stablecoin strategy"
        ]
    },
    "S&P 500": {
        "contexts": [
            "during a steep sell-off in S&P 500 futures",
            "as the S&P 500 trades below its 200-day moving average",
            "amid sector rotation pressure in the S&P 500"
        ],
        "risky_strategies": [
            "shift 70% into leveraged S&P 500 ETFs (2× long)",
            "deploy a concentrated bet on the top 5 S&P 500 constituents",
            "sell deep out-of-the-money S&P 500 puts at 3× leverage"
        ],
        "non_risky_strategies": [
            "buy a diversified S&P 500 index fund and hold",
            "invest 60% in S&P 500 low-volatility ETF",
            "allocate 50% to an S&P 500 target-date fund"
        ]
    },
    "10-year Treasury": {
        "contexts": [
            "as the 10-year Treasury yield jumps 20 bps in one day",
            "when the 10-year Treasury trades at multi-year highs",
            "during heightened demand for 10-year Treasury notes"
        ],
        "risky_strategies": [
            "short 10-year Treasuries with 5× notional leverage",
            "rotate 80% into high-yield corporate bond ETFs",
            "use interest-rate futures spreads to bet on curve steepening"
        ],
        "non_risky_strategies": [
            "ladder Treasuries from 2–10-year maturities, equal weight",
            "allocate 70% to 10-year Treasuries and 30% to T-bills",
            "maintain a duration-neutral Treasury portfolio"
        ]
    },
    "Gold": {
        "contexts": [
            "as gold futures breach critical resistance levels",
            "amid central bank gold-buying headlines",
            "during a flight-to-safety rally in precious metals"
        ],
        "risky_strategies": [
            "allocate 60% to gold ETFs and use 3× leverage",
            "buy gold call options with 4-month expiry",
            "rotate 50% of equity exposure into gold mining stocks"
        ],
        "non_risky_strategies": [
            "hold 10% in physical gold bars and 5% in a gold trust",
            "invest 15% in a low-cost gold ETF with monthly rebalancing",
            "maintain a capped 5% portfolio allocation to gold"
        ]
    },
    "Crude Oil": {
        "contexts": [
            "following a surprise drawdown in US oil inventories",
            "amid OPEC+ production cut rumors",
            "during a sudden rally in front-month crude futures"
        ],
        "risky_strategies": [
            "go 2×-leveraged long on oil ETF futures",
            "use oil call spreads on WTI contracts",
            "allocate 40% to energy sector leveraged ETNs"
        ],
        "non_risky_strategies": [
            "hold 10% in broad energy ETFs with no leverage",
            "invest 5% in an oil producer’s dividend ETF",
            "maintain a 3% allocation to crude‐linked mutual funds"
        ]
    },
    "EUR/USD": {
        "contexts": [
            "as the EUR/USD pair breaches parity",
            "during heightened volatility in forex markets",
            "amid unexpected ECB rate cut speculation"
        ],
        "risky_strategies": [
            "use 10×-leveraged positions in EUR/USD forwards",
            "deploy a carry-trade short USD, long EUR at 5×",
            "allocate 30% to forex options straddles on EUR/USD"
        ],
        "non_risky_strategies": [
            "keep 2% in a currency-hedged EUR/USD ETF",
            "use forward contracts to hedge 5% currency exposure",
            "allocate 1% to EUR/USD spot positions with tight stops"
        ]
    },
    "Corporate Bond": {
        "contexts": [
            "amid widening corporate bond spreads",
            "as credit rating downgrades hit junk issuers",
            "during a surge in corporate bond issuance"
        ],
        "risky_strategies": [
            "allocate 50% to high-yield bond ETFs with 3× leverage",
            "buy first-lien covenant-lite debt at deep discounts",
            "deploy 40% in floating-rate subordinated notes"
        ],
        "non_risky_strategies": [
            "hold 70% in investment-grade bond funds with laddered maturities",
            "invest 30% in an IG corporate bond ETF, no leverage",
            "maintain a 5% cap on high-yield exposure"
        ]
    },
    "REIT": {
        "contexts": [
            "during a drop in real estate REIT valuations",
            "as interest rates rise impacting cap rates",
            "amid a wave of REIT dividend cuts"
        ],
        "risky_strategies": [
            "go long 80% in leveraged equity REIT ETFs",
            "buy deep out-of-the-money REIT call options",
            "allocate 60% to small-cap property developer stocks"
        ],
        "non_risky_strategies": [
            "hold 10% in a diversified REIT index fund",
            "invest 15% in a mortgage-REIT ETF with low leverage",
            "maintain a 5% allocation to core commercial REITs"
        ]
    }
}

# 3) Template
template = "{context}, the recommendation is to {strategy}."

# 4) Generator function
def generate_labelled(instrument_pool, n_per_label):
    risky, non_risky = [], []
    while len(risky) < n_per_label or len(non_risky) < n_per_label:
        inst = random.choice(list(instrument_pool.keys()))
        pool = instrument_pool[inst]
        ctx = random.choice(pool["contexts"])
        if len(risky) < n_per_label:
            strat = random.choice(pool["risky_strategies"])
            risky.append(template.format(context=ctx, strategy=strat))
        if len(non_risky) < n_per_label:
            strat = random.choice(pool["non_risky_strategies"])
            non_risky.append(template.format(context=ctx, strategy=strat))
    return risky, non_risky

if __name__ == "__main__":
    N = 500
    risky_stmts, non_risky_stmts = generate_labelled(instruments, N)

    # 5) Write out CSV
    csv_path = "../data/risk_behavior/financial_statements.csv"
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["statement", "label"])
        for stmt in risky_stmts:
            writer.writerow([stmt, 0])
        for stmt in non_risky_stmts:
            writer.writerow([stmt, 1])

    print(f"Saved {len(risky_stmts)} risky and {len(non_risky_stmts)} non-risky statements to {csv_path}")


Saved 500 risky and 500 non-risky statements to ../data/risk_behavior/financial_statements.csv


In [12]:
risky_stmts

['during a drop in real estate REIT valuations, the recommendation is to allocate 60% to small-cap property developer stocks.',
 'amid sector rotation pressure in the S&P 500, the recommendation is to shift 70% into leveraged S&P 500 ETFs (2× long).',
 'amid unexpected ECB rate cut speculation, the recommendation is to use 10×-leveraged positions in EUR/USD forwards.',
 'during heightened volatility in forex markets, the recommendation is to use 10×-leveraged positions in EUR/USD forwards.',
 'as Ethereum’s gas fees surge, the recommendation is to use 4× leverage to long Ethereum perpetual swaps.',
 'as the EUR/USD pair breaches parity, the recommendation is to allocate 30% to forex options straddles on EUR/USD.',
 'amid a wave of REIT dividend cuts, the recommendation is to go long 80% in leveraged equity REIT ETFs.',
 'as the 10-year Treasury yield jumps 20 bps in one day, the recommendation is to use interest-rate futures spreads to bet on curve steepening.',
 'during heightened vol