In [2]:
# generate_data.py
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import trange

def significance_stars(p):
    if p < 0.01:
        return "***"
    elif p < 0.05:
        return "**"
    elif p < 0.10:
        return "*"
    else:
        return ""

def get_rewards(bids, auction_type="first"):
    """
    Generalized for multiple bidders.
    bids: array of shape (n_bidders,).
    auction_type: "first" or "second".

    Returns:
      (rewards, winner, winner_bid)
        - rewards: array of shape (n_bidders,) with payoff for each bidder
        - winner: integer index of the winning bidder
        - winner_bid: the winning bidder's actual bid

    We assume valuations[i] = 1 for all i (can be changed if desired).
    """
    n_bidders = len(bids)
    valuations = np.ones(n_bidders)  # or some other distribution if desired

    sorted_indices = np.argsort(bids)[::-1]  # descending
    highest_idx = [sorted_indices[0]]
    highest_bid = bids[sorted_indices[0]]
    rewards = np.zeros(n_bidders)

    # Check if there's a tie among the top
    for idx in sorted_indices[1:]:
        if np.isclose(bids[idx], highest_bid):
            highest_idx.append(idx)
        else:
            break

    # Resolve tie by random choice among highest_idx
    if len(highest_idx) > 1:
        winner = np.random.choice(highest_idx)
    else:
        winner = highest_idx[0]
    winner_bid = bids[winner]

    # Second-highest
    if len(highest_idx) == n_bidders:
        second_highest_bid = highest_bid
    else:
        second_idx = None
        for idx in sorted_indices:
            if idx not in highest_idx:
                second_idx = idx
                break
        second_highest_bid = bids[second_idx] if second_idx is not None else highest_bid

    # Payoffs
    if auction_type == "first":
        rewards[winner] = valuations[winner] - winner_bid
    else:  # second-price
        rewards[winner] = valuations[winner] - second_highest_bid

    return rewards, winner, winner_bid


def build_state_space(median_opp_past_bid_index, winner_bid_index_state, n_actions):
    """
    Decide how many total states we have, based on:
      - median_opp_past_bid_index (bool)
      - winner_bid_index_state (bool)
      - n_actions = 6

    If both are False -> 1 state
    If exactly one is True -> n_actions states
    If both True -> n_actions * n_actions states

    We'll define a helper function to map (median_idx, winner_idx) -> single integer in [0..n_states-1].
    """
    if not median_opp_past_bid_index and not winner_bid_index_state:
        n_states = 1
    elif median_opp_past_bid_index and not winner_bid_index_state:
        n_states = n_actions
    elif not median_opp_past_bid_index and winner_bid_index_state:
        n_states = n_actions
    else:
        # both True
        n_states = n_actions * n_actions

    return n_states

def state_to_index(median_idx, winner_idx, median_flag, winner_flag, n_actions):
    """
    Convert the pair (median_idx, winner_idx) into a single integer.

    Cases:
     1) If both flags are False -> always return 0 (only 1 state).
     2) If exactly one flag is True -> return median_idx OR winner_idx as the state ID.
     3) If both are True -> we do a 2D -> 1D mapping: state_id = median_idx * n_actions + winner_idx
    """
    if not median_flag and not winner_flag:
        return 0
    elif median_flag and not winner_flag:
        return median_idx
    elif not median_flag and winner_flag:
        return winner_idx
    else:
        # both True
        return median_idx * n_actions + winner_idx

def run_experiment(alpha, gamma, episodes, auction_type, init, exploration,
                   asynchronous, n_bidders, median_opp_past_bid_index,
                   winner_bid_index_state,
                   seed=0):
    """
    Runs a single experiment with multiple bidders, asynchronous vs synchronous updates,
    optionally using:
      - median_of_opponents' last bids (median_opp_past_bid_index)
      - past winning bid (winner_bid_index_state)

    Returns dict of outcome metrics:
      - avg_rev_last_1000
      - time_to_converge
      - avg_regret_of_seller
    """

    np.random.seed(seed)
    random.seed(seed)

    n_actions = 11
    action_space = np.linspace(0, 1, n_actions)

    # Build state space size
    n_states = build_state_space(median_opp_past_bid_index, winner_bid_index_state, n_actions)

    # Q shape: (n_bidders, n_states, n_actions)
    if init == "random":
        Q = np.random.rand(n_bidders, n_states, n_actions)
    else:
        Q = np.zeros((n_bidders, n_states, n_actions))

    # We'll track the previous round's bids to compute median; start with zeros
    prev_bids = np.zeros(n_bidders)

    # We'll track the previous winning bid (start at 0.0, effectively index=0)
    prev_winner_bid = 0.0
    # Discretize a continuous bid to nearest action idx
    def bid_to_state_index(bid):
        return np.argmin(np.abs(action_space - bid))

    revenues = []
    window_size = 1000

    start_eps, end_eps = 1.0, 0.0
    decay_end = int(0.9 * episodes)

    # Main training loop
    for ep in range(episodes):
        # Epsilon
        if ep < decay_end:
            eps = start_eps - (ep / decay_end) * (start_eps - end_eps)
        else:
            eps = end_eps

        # Determine median index among opponents if needed
        median_idx = 0  # default
        if median_opp_past_bid_index:
            # For simplicity, we compute the median of "other" bidders for each agent
            # but in typical usage, we might do 1 median for each agent's perspective
            # OR a single global median (like we do for "opponents" excluding me).
            # Let's define a "global" median of all bids from last round:
            median_val = np.median(prev_bids)
            median_idx = bid_to_state_index(median_val)

        # Determine previous winner index if needed
        winner_idx = 0
        if winner_bid_index_state:
            winner_idx = bid_to_state_index(prev_winner_bid)

        # State for all bidders is the same if we interpret "opponent median" globally
        # and "previous winner's bid" globally.
        s = state_to_index(
            median_idx,
            winner_idx,
            median_opp_past_bid_index,
            winner_bid_index_state,
            n_actions
        )

        # Each bidder chooses an action
        chosen_actions = []
        for i in range(n_bidders):
            if exploration == "egreedy":
                if np.random.rand() > eps:
                    a_i = np.argmax(Q[i, s])
                else:
                    a_i = np.random.randint(n_actions)
            elif exploration == "boltzmann":
                qvals = Q[i, s]
                exp_q = np.exp(qvals - np.max(qvals))
                probs = exp_q / np.sum(exp_q)
                a_i = np.random.choice(range(n_actions), p=probs)
            else:
                # default egreedy
                if np.random.rand() > eps:
                    a_i = np.argmax(Q[i, s])
                else:
                    a_i = np.random.randint(n_actions)
            chosen_actions.append(a_i)

        # Convert to bids
        bids = np.array([action_space[a_i] for a_i in chosen_actions])

        # Compute rewards, find winner + winning bid
        rewards, winner, winner_bid = get_rewards(bids, auction_type)

        # Q update
        if asynchronous == 1:
            for i in range(n_bidders):
                old_q = Q[i, s, chosen_actions[i]]
                td_target = rewards[i] + gamma * np.max(Q[i, s])
                Q[i, s, chosen_actions[i]] = old_q + alpha * (td_target - old_q)
        else:
            # synchronous
            for i in range(n_bidders):
                cf_rewards = np.zeros(n_actions)
                for a_alt in range(n_actions):
                    cf_bids = bids.copy()
                    cf_bids[i] = action_space[a_alt]
                    cf_r, _, _ = get_rewards(cf_bids, auction_type)
                    cf_rewards[a_alt] = cf_r[i]

                max_next_q = np.max(Q[i, s])
                Q[i, s, :] = (1 - alpha)*Q[i, s, :] + \
                             alpha*(cf_rewards + gamma * max_next_q)

        # Seller revenue
        revenue_t = np.max(bids)
        revenues.append(revenue_t)

        # Update prev_bids
        prev_bids = bids
        # Update prev_winner_bid for next state
        prev_winner_bid = winner_bid

    # Final stats
    if len(revenues) >= window_size:
        avg_rev_last_1000 = np.mean(revenues[-window_size:])
    else:
        avg_rev_last_1000 = np.mean(revenues)

    final_rev = avg_rev_last_1000
    rev_series = pd.Series(revenues)
    roll_avg = rev_series.rolling(window_size).mean()

    lower_band = 0.95 * final_rev
    upper_band = 1.05 * final_rev
    time_to_converge = episodes
    for t in range(len(revenues) - window_size):
        window_val = roll_avg.iloc[t + window_size - 1]
        if (window_val >= lower_band) and (window_val <= upper_band):
            stay_in_band = True
            for j in range(t + window_size, len(revenues) - window_size):
                v_j = roll_avg.iloc[j + window_size - 1]
                if not (lower_band <= v_j <= upper_band):
                    stay_in_band = False
                    break
            if stay_in_band:
                time_to_converge = t + window_size
                break

    regrets = [1.0 - r for r in revenues]
    avg_regret_of_seller = np.mean(regrets)

    return {
        "avg_rev_last_1000": avg_rev_last_1000,
        "time_to_converge": time_to_converge,
        "avg_regret_of_seller": avg_regret_of_seller
    }


if __name__ == "__main__":
    os.makedirs("experiment1", exist_ok=True)

    param_space = {
        "auction_type": ["first", "second"],
        "init": ["random", "zeros"],
        "exploration": ["egreedy", "boltzmann"],
        "asynchronous": [0, 1],
        "n_bidders": [2, 4, 6],
        "median_opp_past_bid_index": [False, True],
        "winner_bid_index_state": [False, True]
    }

    outcomes = ["avg_rev_last_1000", "time_to_converge", "avg_regret_of_seller"]
    K = 500
    results = []

    for k in trange(K, desc="Overall Experiments"):
        alpha = random.uniform(0.01, 0.1)
        gamma = random.uniform(0.0, 0.99)
        episodes = int(random.uniform(10_000, 100_000))

        # pick from discrete
        auction_type = random.choice(param_space["auction_type"])
        init = random.choice(param_space["init"])
        exploration = random.choice(param_space["exploration"])
        asynchronous = random.choice(param_space["asynchronous"])
        n_bidders = random.choice(param_space["n_bidders"])
        median_flag = random.choice(param_space["median_opp_past_bid_index"])
        winner_flag = random.choice(param_space["winner_bid_index_state"])

        outcome = run_experiment(
            alpha=alpha,
            gamma=gamma,
            episodes=episodes,
            auction_type=auction_type,
            init=init,
            exploration=exploration,
            asynchronous=asynchronous,
            n_bidders=n_bidders,
            median_opp_past_bid_index=median_flag,
            winner_bid_index_state=winner_flag,
            seed=k
        )

        outcome["alpha"] = alpha
        outcome["gamma"] = gamma
        outcome["episodes"] = episodes
        outcome["auction_type"] = auction_type
        outcome["init"] = init
        outcome["exploration"] = exploration
        outcome["asynchronous"] = asynchronous
        outcome["n_bidders"] = n_bidders
        outcome["median_opp_past_bid_index"] = median_flag
        outcome["winner_bid_index_state"] = winner_flag

        results.append(outcome)

    df = pd.DataFrame(results)
    csv_path = "experiment1/data.csv"
    df.to_csv(csv_path, index=False)
    print(f"Data generation complete. Saved to '{csv_path}'")

Overall Experiments: 100%|██████████| 500/500 [10:17:16<00:00, 74.07s/it]    

Data generation complete. Saved to 'experiment1/data.csv'





In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# DoubleML
import doubleml as dml
from doubleml import DoubleMLData, DoubleMLIRM

# LightGBM
from lightgbm import LGBMRegressor, LGBMClassifier

# For spline expansions
import patsy

# ------------------------------------------------------------------------------
# 1) Load your RL experiment data
# ------------------------------------------------------------------------------
df = pd.read_csv("experiment1/data.csv")

# ------------------------------------------------------------------------------
# 2) Preprocess data
# ------------------------------------------------------------------------------
# Treatment T = 'auction_type' -> map "first" -> 0, "second" -> 1
df['D'] = (df['auction_type'] == 'first').astype(int)

# Encode non-numeric categorical columns
categorical_cols = [col for col in df.columns if df[col].dtype == 'object']
for col in categorical_cols:
    df[col] = pd.factorize(df[col])[0]  # Encode categories as integers

# Dynamically detect binary and continuous covariates
binary_cols = [col for col in df.columns if df[col].nunique() == 2 and col not in ['auction_type', 'D']]
cont_cols = [col for col in df.columns if col not in binary_cols + ['auction_type', 'D', 'avg_rev_last_1000', 'time_to_converge', 'avg_regret_of_seller']]
X_cols = binary_cols + cont_cols

print(f"Binary covariates: {binary_cols}")
print(f"Continuous covariates: {cont_cols}")

# ------------------------------------------------------------------------------
# 3) Outcomes
# ------------------------------------------------------------------------------
outcomes = ['avg_rev_last_1000', 'time_to_converge', 'avg_regret_of_seller']

# ------------------------------------------------------------------------------
# 4) Loop over outcomes and perform DoubleML inference
# ------------------------------------------------------------------------------
for outcome in outcomes:
    print(f"\n========== Running for Outcome: {outcome} ==========")

    # Define Y (outcome)
    df['Y'] = df[outcome]

    # Create DoubleMLData object
    dml_data = DoubleMLData(df, y_col='Y', d_cols='D', x_cols=X_cols)

    # Define learners
    ml_g = LGBMRegressor(verbose=-1, random_state=123)
    ml_m = LGBMClassifier(verbose=-1, random_state=123)

    # Define and fit DoubleML IRM
    dml_irm = DoubleMLIRM(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=3, score='ATE')
    dml_irm.fit()

    # Print ATE results
    print(dml_irm.summary)

    # ------------------------------------------------------------------------------
    # 5) GATE Analysis
    # ------------------------------------------------------------------------------
    n_bin = len(binary_cols)
    nrows_gate = int(np.ceil(n_bin / 3))
    ncols_gate = min(n_bin, 3)

    fig_gate, axes_gate = plt.subplots(nrows=nrows_gate, ncols=ncols_gate, figsize=(5 * ncols_gate, 4 * nrows_gate))

    if n_bin == 1:
        axes_gate = np.array([axes_gate])

    for i, bin_col in enumerate(binary_cols):
        groups_df = df[[bin_col]].astype('category')
        gate_obj = dml_irm.gate(groups=groups_df)
        ci_95_gate = gate_obj.confint(level=0.95)

        effects = ci_95_gate['effect']
        lower_95 = ci_95_gate['2.5 %']
        upper_95 = ci_95_gate['97.5 %']

        ax = axes_gate.flatten()[i] if n_bin > 1 else axes_gate[0]

        x_positions = [0, 1]
        ax.errorbar(
            x_positions, effects,
            yerr=[effects - lower_95, upper_95 - effects],
            fmt='o', capsize=5
        )
        ax.set_title(f"GATE: {bin_col} ({outcome})")
        ax.set_xticks(x_positions)
        ax.set_xticklabels([f"{bin_col}=0", f"{bin_col}=1"])
        ax.set_ylabel("Estimated GATE")

    fig_gate.tight_layout()
    gate_plot_path = os.path.join("experiment1", f"gate_plots_{outcome}.png")
    fig_gate.savefig(gate_plot_path, bbox_inches='tight')
    plt.close(fig_gate)
    print(f"GATE plots saved for {outcome}.")

    # ------------------------------------------------------------------------------
    # 6) CATE Analysis
    # ------------------------------------------------------------------------------
    n_cont = len(cont_cols)
    nrows_cate = int(np.ceil(n_cont / 3))
    ncols_cate = min(n_cont, 3)

    fig_cate, axes_cate = plt.subplots(nrows=nrows_cate, ncols=ncols_cate, figsize=(5 * ncols_cate, 4 * nrows_cate))

    if n_cont == 1:
        axes_cate = np.array([axes_cate])

    for i, cont_col in enumerate(cont_cols):
        design_matrix = patsy.dmatrix(f"bs({cont_col}, df=5, degree=2)", df)
        spline_basis = pd.DataFrame(design_matrix)

        cate_obj = dml_irm.cate(basis=spline_basis)
        ci_95_cate = cate_obj.confint(basis=spline_basis, level=0.95)

        effects_cate = ci_95_cate['effect'].values
        lower_95_cate = ci_95_cate['2.5 %'].values
        upper_95_cate = ci_95_cate['97.5 %'].values

        x_values = df[cont_col].values
        idx_sort = np.argsort(x_values)

        x_sorted = x_values[idx_sort]
        eff_sorted = effects_cate[idx_sort]
        low_sorted = lower_95_cate[idx_sort]
        up_sorted = upper_95_cate[idx_sort]

        ax_cate = axes_cate.flatten()[i] if n_cont > 1 else axes_cate[0]

        ax_cate.plot(x_sorted, eff_sorted, label='CATE')
        ax_cate.fill_between(x_sorted, low_sorted, up_sorted, alpha=0.2, label='95% CI')
        ax_cate.set_title(f"CATE: {cont_col} ({outcome})")
        ax_cate.set_xlabel(cont_col)
        ax_cate.set_ylabel("Estimated Treatment Effect")
        ax_cate.legend()

    fig_cate.tight_layout()
    cate_plot_path = os.path.join("experiment1", f"cate_plots_{outcome}.png")
    fig_cate.savefig(cate_plot_path, bbox_inches='tight')
    plt.close(fig_cate)
    print(f"CATE plots saved for {outcome}.")


Binary covariates: ['init', 'exploration', 'asynchronous', 'median_opp_past_bid_index', 'winner_bid_index_state']
Continuous covariates: ['alpha', 'gamma', 'episodes', 'n_bidders']

       coef   std err         t         P>|t|     2.5 %    97.5 %
D -0.074651  0.008739 -8.542212  1.316761e-17 -0.091779 -0.057523
GATE plots saved for avg_rev_last_1000.
CATE plots saved for avg_rev_last_1000.

          coef      std err         t     P>|t|        2.5 %       97.5 %
D  1764.748478  1639.488518  1.076402  0.281748 -1448.589971  4978.086927
GATE plots saved for time_to_converge.
CATE plots saved for time_to_converge.

       coef   std err          t         P>|t|    2.5 %    97.5 %
D  0.060362  0.002986  20.216334  7.031873e-91  0.05451  0.066215
GATE plots saved for avg_regret_of_seller.
CATE plots saved for avg_regret_of_seller.


# INFERENCE

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import doubleml as dml
from doubleml import DoubleMLData, DoubleMLIRM
from lightgbm import LGBMRegressor, LGBMClassifier
import patsy
from scipy.stats import norm
from tabulate import tabulate
import math
np.random.seed(12234334)

# 1) Dictionary of Variable Definitions
var_definitions = {
    "alpha": "Learning rate for Q-updates (0.01 to 0.1).",
    "gamma": "Discount factor for future rewards (0.0 to 0.99).",
    "episodes": "Total number of training episodes (10k to 100k).",
    "auction_type": "'first' or 'second' price auction (treatment).",
    "init": "Q-initialization: 'random' or 'zeros'.",
    "exploration": "Exploration strategy: 'egreedy' or 'boltzmann'.",
    "asynchronous": "Update mode: 0=synchronous, 1=asynchronous.",
    "n_bidders": "Number of bidding agents (2, 4, 6).",
    "median_opp_past_bid_index": "Use median of opponents' past bids in state?",
    "winner_bid_index_state": "Track winning bid index in state?",
    "avg_rev_last_1000": "Mean seller revenue in the final 1000 episodes.",
    "time_to_converge": "Fraction of episodes until ±5% convergence.",
    "avg_regret_of_seller": "Average regret for the seller (valuations=1)."
}

df_var_defs = pd.DataFrame([{"Parameter": k, "Definition": v} for k, v in var_definitions.items()])

# 2) Print Variable Definitions
print("\n=== Variable Definitions ===")
print(tabulate(df_var_defs, headers="keys", tablefmt="github"))

# 3) Load and Preprocess Data
df = pd.read_csv("experiment1/data.csv")
df['auction_type'] = (df['auction_type'] == 'first').astype(int)
df['time_to_converge'] = df['time_to_converge'] / df['episodes']

treatment_col = 'auction_type'
covariates_list = [
    'alpha', 'gamma', 'episodes', 'init', 'exploration',
    'asynchronous', 'n_bidders', 'median_opp_past_bid_index',
    'winner_bid_index_state'
]
outcomes_list = ['avg_rev_last_1000', 'time_to_converge', 'avg_regret_of_seller']

for col in ['median_opp_past_bid_index', 'winner_bid_index_state']:
    if df[col].dtype == bool:
        df[col] = df[col].astype(int)
for c in covariates_list:
    if df[c].dtype not in [np.float64, np.int64, float, int]:
        df[c], _ = pd.factorize(df[c])

# 4) Print Summary Statistics
cols_for_summary = covariates_list + [treatment_col] + outcomes_list
summary_stats = df[cols_for_summary].describe().T.drop('count', axis=1, errors='ignore')
print("\n=== Summary Statistics ===")
print(tabulate(summary_stats, headers='keys', tablefmt='github'))

# 5) DoubleML IRM with GATE and CATE
for outcome in outcomes_list:
    df['Y'] = df[outcome]
    dml_data = dml.DoubleMLData(df, y_col='Y', d_cols=treatment_col, x_cols=covariates_list)
    ml_g = LGBMRegressor(random_state=123, verbose=-1)
    ml_m = LGBMClassifier(random_state=123, verbose=-1)
    dml_irm = DoubleMLIRM(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=10, n_rep=1, score='ATE')
    dml_irm.fit()

    print(f"\n========== {outcome.upper()} | ATE Results ==========")
    print(dml_irm.summary)

    # 5a) GATE
    binary_covs = [c for c in covariates_list if df[c].nunique() == 2]
    n_bin = len(binary_covs)
    nrows_gate = math.ceil(n_bin / 3)
    ncols_gate = min(n_bin, 3)
    fig_gate, axes_gate = plt.subplots(nrows=nrows_gate, ncols=ncols_gate, figsize=(5*ncols_gate, 4*nrows_gate))
    if n_bin == 1:
        axes_gate = np.array([axes_gate])
    gate_results = []

    for i, bin_col in enumerate(binary_covs):
        groups_df = df[[bin_col]].astype('category')
        gate_obj = dml_irm.gate(groups=groups_df)
        ci_95 = gate_obj.confint(level=0.95)
        eff = ci_95['effect']
        lo = ci_95['2.5 %']
        hi = ci_95['97.5 %']
        gate_sum = gate_obj.summary
        errs = gate_sum['std err']
        ax = axes_gate.flatten()[i] if n_bin > 1 else axes_gate[0]
        x_pos = np.arange(len(eff))
        ax.errorbar(x_pos, eff, yerr=[eff - lo, hi - eff], fmt='o', capsize=5)
        ax.set_title(f"GATE: {bin_col} ({outcome})")
        ax.set_xticks(x_pos)
        ax.set_xticklabels([f"{bin_col}={lvl}" for lvl in range(len(eff))])
        ax.set_ylabel("Estimated GATE")

        if len(eff) == 2:
            dval = eff.iloc[1] - eff.iloc[0]
            dvar = errs.iloc[1]**2 + errs.iloc[0]**2
            dse = math.sqrt(dvar)
            tval = dval / dse
            pval = 2.0*(1.0 - norm.cdf(abs(tval)))
            gate_results.append({
                'Variable': bin_col,
                'Group0_Effect': f"{eff.iloc[0]:.4f}",
                'Group1_Effect': f"{eff.iloc[1]:.4f}",
                'Diff(Group1-Group0)': f"{dval:.4f}",
                'StdErr(Diff)': f"{dse:.4f}",
                't-value': f"{tval:.4f}",
                'p-value': f"{pval:.4f}"
            })

    fig_gate.tight_layout()
    gate_path = os.path.join("experiment1", f"gate_plots_{outcome}.png")
    fig_gate.savefig(gate_path, bbox_inches='tight')
    plt.close(fig_gate)

    if gate_results:
        print(f"\nT-tests for GATE ({outcome}):")
        print(tabulate(gate_results, headers='keys', tablefmt='github'))

    # 5b) CATE
    cont_covs = [c for c in covariates_list if df[c].nunique() > 2]
    n_cont = len(cont_covs)
    nrows_cate = math.ceil(n_cont / 3)
    ncols_cate = min(n_cont, 3)
    fig_cate, axes_cate = plt.subplots(nrows=nrows_cate, ncols=ncols_cate, figsize=(5*ncols_cate, 4*nrows_cate))
    if n_cont == 1:
        axes_cate = np.array([axes_cate])

    for i, cont_col in enumerate(cont_covs):
        design_matrix = patsy.dmatrix(f"bs({cont_col}, df=5, degree=2)", df)
        spline_basis = pd.DataFrame(design_matrix)
        cate_obj = dml_irm.cate(basis=spline_basis)
        ci_95_cate = cate_obj.confint(basis=spline_basis, level=0.95)
        eff_cate = ci_95_cate['effect'].values
        lo_cate = ci_95_cate['2.5 %'].values
        hi_cate = ci_95_cate['97.5 %'].values
        xvals = df[cont_col].values
        idx = np.argsort(xvals)
        x_sort = xvals[idx]
        eff_sort = eff_cate[idx]
        lo_sort = lo_cate[idx]
        hi_sort = hi_cate[idx]
        axc = axes_cate.flatten()[i] if n_cont > 1 else axes_cate[0]
        axc.plot(x_sort, eff_sort, label='CATE')
        axc.fill_between(x_sort, lo_sort, hi_sort, alpha=0.2, label='95% CI')
        axc.set_title(f"CATE: {cont_col} ({outcome})")
        axc.set_xlabel(cont_col)
        axc.set_ylabel("Estimated TE")
        axc.legend()

    fig_cate.tight_layout()
    cate_path = os.path.join("experiment1", f"cate_plots_{outcome}.png")
    fig_cate.savefig(cate_path, bbox_inches='tight')
    plt.close(fig_cate)



=== Variable Definitions ===
|    | Parameter                 | Definition                                        |
|----|---------------------------|---------------------------------------------------|
|  0 | alpha                     | Learning rate for Q-updates (0.01 to 0.1).        |
|  1 | gamma                     | Discount factor for future rewards (0.0 to 0.99). |
|  2 | episodes                  | Total number of training episodes (10k to 100k).  |
|  3 | auction_type              | 'first' or 'second' price auction (treatment).    |
|  4 | init                      | Q-initialization: 'random' or 'zeros'.            |
|  5 | exploration               | Exploration strategy: 'egreedy' or 'boltzmann'.   |
|  6 | asynchronous              | Update mode: 0=synchronous, 1=asynchronous.       |
|  7 | n_bidders                 | Number of bidding agents (2, 4, 6).               |
|  8 | median_opp_past_bid_index | Use median of opponents' past bids in state?      |
|  9 | winner