In [10]:
# generate_data.py
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import os
from tqdm import trange

def significance_stars(p):
    if p < 0.01:
        return "***"
    elif p < 0.05:
        return "**"
    elif p < 0.10:
        return "*"
    else:
        return ""

def get_rewards(bids, auction_type=1, reserve_price=0.0):
    """
    Generalized for multiple bidders.

    Parameters:
    -----------
    bids : np.array
        Shape (n_bidders,). Each entry is the bid of a bidder.
    auction_type : int
        1 => first-price auction,
        0 => second-price auction.
    reserve_price : float
        in [0, 1]. If highest bid < reserve_price, no sale.

    Returns:
    --------
    rewards : np.array
        Shape (n_bidders,). payoff for each bidder (valuation - payment).
    winner : int
        index of the winning bidder (or -1 if no sale).
    winner_bid : float
        the winning bidder's actual bid (0 if no sale).
    revenue : float
        the amount the seller receives (0 if no sale).
    """
    n_bidders = len(bids)
    valuations = np.ones(n_bidders)  # or some other distribution if desired

    # Sort bids descending
    sorted_indices = np.argsort(bids)[::-1]
    highest_idx_list = [sorted_indices[0]]
    highest_bid = bids[sorted_indices[0]]
    rewards = np.zeros(n_bidders)

    # Check if there's a tie among the top
    for idx in sorted_indices[1:]:
        if np.isclose(bids[idx], highest_bid):
            highest_idx_list.append(idx)
        else:
            break

    # Resolve tie by random choice among highest_idx_list
    if len(highest_idx_list) > 1:
        winner = np.random.choice(highest_idx_list)
    else:
        winner = highest_idx_list[0]
    winner_bid = bids[winner]

    # If highest_bid < reserve_price, no sale
    if highest_bid < reserve_price:
        return rewards, -1, 0.0, 0.0

    # Find second-highest
    if len(highest_idx_list) == n_bidders:
        second_highest_bid = highest_bid
    else:
        second_idx = None
        for idx in sorted_indices:
            if idx not in highest_idx_list:
                second_idx = idx
                break
        second_highest_bid = bids[second_idx] if second_idx is not None else highest_bid

    # Payoffs and revenue
    if auction_type == 1:
        # first-price: winner pays their own bid
        rewards[winner] = valuations[winner] - winner_bid
        revenue = winner_bid
    else:
        # second-price: winner pays second-highest bid
        rewards[winner] = valuations[winner] - second_highest_bid
        revenue = second_highest_bid

    return rewards, winner, winner_bid, revenue


def build_state_space(median_opp_past_bid_index, winner_bid_index_state, n_actions):
    """
    Decide how many total states we have, based on:
      - median_opp_past_bid_index (bool)
      - winner_bid_index_state (bool)
      - n_actions (for discrete bids)
    """
    if not median_opp_past_bid_index and not winner_bid_index_state:
        n_states = 1
    elif median_opp_past_bid_index and not winner_bid_index_state:
        n_states = n_actions
    elif not median_opp_past_bid_index and winner_bid_index_state:
        n_states = n_actions
    else:
        # both True
        n_states = n_actions * n_actions
    return n_states


def state_to_index(median_idx, winner_idx, median_flag, winner_flag, n_actions):
    """
    Convert the pair (median_idx, winner_idx) into a single integer for indexing into Q-table.
    """
    if not median_flag and not winner_flag:
        return 0
    elif median_flag and not winner_flag:
        return median_idx
    elif not median_flag and winner_flag:
        return winner_idx
    else:
        return median_idx * n_actions + winner_idx


def run_experiment(alpha, gamma, episodes, auction_type, init, exploration,
                   asynchronous, n_bidders, median_opp_past_bid_index,
                   winner_bid_index_state, seed=0):
    """
    Runs a single experiment with multiple bidders. 
    Returns dict of outcome metrics:
      - avg_rev_last_1000
      - std_rev_last_1000
      - time_to_converge
      - avg_regret_of_seller
      - reserve_price
    """
    np.random.seed(seed)
    random.seed(seed)

    # 1) Bid granularity
    n_actions = 21
    action_space = np.linspace(0, 1, n_actions)

    # 2) Discrete reserve prices in [0, 1], also 21 points
    possible_reserve_prices = np.linspace(0, 1, 21)
    reserve_price = np.random.choice(possible_reserve_prices)

    # Build state space size
    n_states = build_state_space(median_opp_past_bid_index, winner_bid_index_state, n_actions)

    # Q initialization
    if init == "random":
        Q = np.random.rand(n_bidders, n_states, n_actions)
    elif init == "optimistic":
        # Max possible NPV = 100 (assuming discount=0.99, max reward=1)
        Q = np.ones((n_bidders, n_states, n_actions)) * 100.0
    else:
        # default to zeros
        Q = np.zeros((n_bidders, n_states, n_actions))

    # Track the previous round's bids for median
    prev_bids = np.zeros(n_bidders)
    # Track the previous winning bid
    prev_winner_bid = 0.0

    def bid_to_state_index(bid):
        return np.argmin(np.abs(action_space - bid))

    revenues = []
    window_size = 1000

    start_eps, end_eps = 1.0, 0.0
    decay_end = int(0.9 * episodes)

    # Main training loop
    for ep in range(episodes):
        # Epsilon decay
        if ep < decay_end:
            eps = start_eps - (ep / decay_end) * (start_eps - end_eps)
        else:
            eps = end_eps

        # State building
        median_idx = 0
        if median_opp_past_bid_index:
            median_val = np.median(prev_bids)
            median_idx = bid_to_state_index(median_val)

        winner_idx = 0
        if winner_bid_index_state:
            winner_idx = bid_to_state_index(prev_winner_bid)

        s = state_to_index(
            median_idx, winner_idx,
            median_opp_past_bid_index, winner_bid_index_state,
            n_actions
        )

        # Each bidder chooses an action
        chosen_actions = []
        for i in range(n_bidders):
            if exploration == "egreedy":
                # E-greedy
                if np.random.rand() > eps:
                    a_i = np.argmax(Q[i, s])
                else:
                    a_i = np.random.randint(n_actions)
            elif exploration == "boltzmann":
                # Boltzmann exploration
                qvals = Q[i, s]
                exp_q = np.exp(qvals - np.max(qvals))
                probs = exp_q / np.sum(exp_q)
                a_i = np.random.choice(range(n_actions), p=probs)
            else:
                # default egreedy
                if np.random.rand() > eps:
                    a_i = np.argmax(Q[i, s])
                else:
                    a_i = np.random.randint(n_actions)
            chosen_actions.append(a_i)

        # Convert actions to bids
        bids = np.array([action_space[a_i] for a_i in chosen_actions])

        # Get rewards with reserve price
        rewards, winner, winner_bid_value, revenue_t = get_rewards(
            bids, auction_type, reserve_price=reserve_price
        )

        # Q update
        if asynchronous == 1:
            # Asynchronous update
            for i in range(n_bidders):
                old_q = Q[i, s, chosen_actions[i]]
                td_target = rewards[i] + gamma * np.max(Q[i, s])
                Q[i, s, chosen_actions[i]] = old_q + alpha * (td_target - old_q)
        else:
            # Synchronous update
            for i in range(n_bidders):
                cf_rewards = np.zeros(n_actions)
                for a_alt in range(n_actions):
                    cf_bids = bids.copy()
                    cf_bids[i] = action_space[a_alt]
                    cf_r, _, _, _ = get_rewards(cf_bids, auction_type, reserve_price)
                    cf_rewards[a_alt] = cf_r[i]

                max_next_q = np.max(Q[i, s])
                Q[i, s, :] = (1 - alpha)*Q[i, s, :] + \
                             alpha*(cf_rewards + gamma * max_next_q)

        # Seller revenue
        revenues.append(revenue_t)

        # Update previous bids and winner bid
        prev_bids = bids
        if winner != -1:
            prev_winner_bid = winner_bid_value
        else:
            prev_winner_bid = 0.0  # no sale, reset

    # Final stats: avg/std of last 1000
    if len(revenues) >= window_size:
        avg_rev_last_1000 = np.mean(revenues[-window_size:])
        std_rev_last_1000 = np.std(revenues[-window_size:])
    else:
        avg_rev_last_1000 = np.mean(revenues)
        std_rev_last_1000 = np.std(revenues)

    final_rev = avg_rev_last_1000
    rev_series = pd.Series(revenues)
    roll_avg = rev_series.rolling(window_size).mean()

    # Time to converge: first time the rolling avg is within ±5% band of final_rev
    lower_band = 0.95 * final_rev
    upper_band = 1.05 * final_rev
    time_to_converge = episodes
    for t in range(len(revenues) - window_size):
        window_val = roll_avg.iloc[t + window_size - 1]
        if (window_val >= lower_band) and (window_val <= upper_band):
            stay_in_band = True
            for j in range(t + window_size, len(revenues) - window_size):
                v_j = roll_avg.iloc[j + window_size - 1]
                if not (lower_band <= v_j <= upper_band):
                    stay_in_band = False
                    break
            if stay_in_band:
                time_to_converge = t + window_size
                break

    regrets = [1.0 - r for r in revenues]
    avg_regret_of_seller = np.mean(regrets)

    return {
        "avg_rev_last_1000": avg_rev_last_1000,
        "std_rev_last_1000": std_rev_last_1000,
        "time_to_converge": time_to_converge,
        "avg_regret_of_seller": avg_regret_of_seller,
        "reserve_price": reserve_price,
    }


def run_experiment_with_revenues(alpha, gamma, episodes, auction_type, init, 
                                 exploration, asynchronous, n_bidders,
                                 median_opp_past_bid_index,
                                 winner_bid_index_state,
                                 seed=0):
    """
    Same logic, but returns entire revenue sequence (useful for plotting).
    Auction type:
        1 => first-price
        0 => second-price
    Reserve price drawn from discrete grid in [0,1].
    """
    np.random.seed(seed)
    random.seed(seed)

    # 1) Bid granularity
    n_actions = 21
    action_space = np.linspace(0, 1, n_actions)

    # 2) Discrete reserve prices in [0, 1], 21 points
    possible_reserve_prices = np.linspace(0, 1, 21)
    reserve_price = np.random.choice(possible_reserve_prices)

    # Build state space
    n_states = build_state_space(median_opp_past_bid_index, winner_bid_index_state, n_actions)

    # Q initialization
    if init == "random":
        Q = np.random.rand(n_bidders, n_states, n_actions)
    elif init == "optimistic":
        Q = np.ones((n_bidders, n_states, n_actions)) * 100.0
    else:
        Q = np.zeros((n_bidders, n_states, n_actions))

    prev_bids = np.zeros(n_bidders)
    prev_winner_bid = 0.0

    def bid_to_state_index(bid):
        return np.argmin(np.abs(action_space - bid))

    revenues = []

    start_eps, end_eps = 1.0, 0.0
    decay_end = int(0.9 * episodes)

    for ep in range(episodes):
        # Epsilon
        if ep < decay_end:
            eps = start_eps - (ep / decay_end) * (start_eps - end_eps)
        else:
            eps = end_eps

        # Build the state index
        median_idx = 0
        if median_opp_past_bid_index:
            median_val = np.median(prev_bids)
            median_idx = bid_to_state_index(median_val)

        winner_idx = 0
        if winner_bid_index_state:
            winner_idx = bid_to_state_index(prev_winner_bid)

        s = state_to_index(
            median_idx, winner_idx,
            median_opp_past_bid_index, winner_bid_index_state,
            n_actions
        )

        # Choose actions
        chosen_actions = []
        for i in range(n_bidders):
            if exploration == "egreedy":
                if np.random.rand() > eps:
                    a_i = np.argmax(Q[i, s])
                else:
                    a_i = np.random.randint(n_actions)
            elif exploration == "boltzmann":
                qvals = Q[i, s]
                exp_q = np.exp(qvals - np.max(qvals))
                probs = exp_q / np.sum(exp_q)
                a_i = np.random.choice(range(n_actions), p=probs)
            else:
                # default egreedy
                if np.random.rand() > eps:
                    a_i = np.argmax(Q[i, s])
                else:
                    a_i = np.random.randint(n_actions)
            chosen_actions.append(a_i)

        bids = np.array([action_space[a_i] for a_i in chosen_actions])

        # Get rewards and revenue
        rewards, winner, winner_bid_val, revenue_t = get_rewards(
            bids, auction_type, reserve_price=reserve_price
        )

        # Q update
        if asynchronous == 1:
            for i in range(n_bidders):
                old_q = Q[i, s, chosen_actions[i]]
                td_target = rewards[i] + gamma * np.max(Q[i, s])
                Q[i, s, chosen_actions[i]] = old_q + alpha * (td_target - old_q)
        else:
            for i in range(n_bidders):
                cf_rewards = np.zeros(n_actions)
                for a_alt in range(n_actions):
                    cf_bids = bids.copy()
                    cf_bids[i] = action_space[a_alt]
                    cf_r, _, _, _ = get_rewards(cf_bids, auction_type, reserve_price)
                    cf_rewards[a_alt] = cf_r[i]

                max_next_q = np.max(Q[i, s])
                Q[i, s, :] = (1 - alpha)*Q[i, s, :] + \
                             alpha*(cf_rewards + gamma * max_next_q)

        revenues.append(revenue_t)

        # Update previous bids
        prev_bids = bids
        if winner != -1:
            prev_winner_bid = winner_bid_val
        else:
            prev_winner_bid = 0.0  # no sale

    return revenues


if __name__ == "__main__":
    os.makedirs("experiment1", exist_ok=True)

    # ---------------------------------------------------------
    # Print out the parameter grid in one place:
    # ---------------------------------------------------------
    param_space = {
        "auction_type": [0, 1],         # 0 => second-price, 1 => first-price
        "init": ["random", "zeros", "optimistic"],
        "exploration": ["egreedy", "boltzmann"],
        "asynchronous": [0, 1],
        "n_bidders": [2, 4, 6],
        "median_opp_past_bid_index": [False, True],
        "winner_bid_index_state": [False, True]
    }
    print("Parameter grid:", param_space)

    # ---------------------------------------------------------
    # A) Demonstration with "both" state dimensions turned on
    # ---------------------------------------------------------
    alpha_demo = 0.01
    gamma_demo = 0.99
    episodes_demo = 100_000
    init_demo = "random"
    exploration_demo = "egreedy"
    asynchronous_demo = 1
    n_bidders_demo = 2
    median_opp_past_bid_index_demo = True
    winner_bid_index_state_demo = True
    seed_demo = 999

    # Let's do an example with first-price (auction_type=1)
    revenues_first = run_experiment_with_revenues(
        alpha=alpha_demo,
        gamma=gamma_demo,
        episodes=episodes_demo,
        auction_type=1,      # first-price
        init=init_demo,
        exploration=exploration_demo,
        asynchronous=asynchronous_demo,
        n_bidders=n_bidders_demo,
        median_opp_past_bid_index=median_opp_past_bid_index_demo,
        winner_bid_index_state=winner_bid_index_state_demo,
        seed=seed_demo
    )

    # second-price => 0
    revenues_second = run_experiment_with_revenues(
        alpha=alpha_demo,
        gamma=gamma_demo,
        episodes=episodes_demo,
        auction_type=0,      # second-price
        init=init_demo,
        exploration=exploration_demo,
        asynchronous=asynchronous_demo,
        n_bidders=n_bidders_demo,
        median_opp_past_bid_index=median_opp_past_bid_index_demo,
        winner_bid_index_state=winner_bid_index_state_demo,
        seed=seed_demo
    )

    # Rolling average plot
    window_size = 500
    roll_first = pd.Series(revenues_first).rolling(window_size, min_periods=1).mean()
    roll_second = pd.Series(revenues_second).rolling(window_size, min_periods=1).mean()

    plt.figure(figsize=(8,5))
    plt.plot(roll_first, label="First-price Auction", alpha=0.9)
    plt.plot(roll_second, label="Second-price Auction", alpha=0.9)
    plt.title("Rolling Avg of Revenue: (2 Bidders, Both State Dimensions ON, With Reserve Price)")
    plt.xlabel("Episode")
    plt.ylabel("Revenue")
    plt.legend()
    plot_path = "experiment1/demo_revenue_comparison.png"
    plt.savefig(plot_path, bbox_inches='tight')
    plt.close()
    print(f"Saved demo revenue comparison plot to {plot_path}")

    # ---------------------------------------------------------
    # B) Single progress bar over K experiments
    # ---------------------------------------------------------
    outcomes = ["avg_rev_last_1000", "std_rev_last_1000", 
                "time_to_converge", "avg_regret_of_seller"]
    K = 20
    results = []

    for k in trange(K, desc="Overall Experiments"):
        alpha = random.uniform(0.01, 0.1)
        gamma = random.uniform(0.0, 0.99)
        episodes = int(random.uniform(100_000, 250_000))

        # Pick from discrete parameter lists
        a_type = random.choice(param_space["auction_type"])
        init_mode = random.choice(param_space["init"])
        explore_mode = random.choice(param_space["exploration"])
        async_flag = random.choice(param_space["asynchronous"])
        n_bids = random.choice(param_space["n_bidders"])
        median_flag = random.choice(param_space["median_opp_past_bid_index"])
        winner_flag = random.choice(param_space["winner_bid_index_state"])

        # Run experiment
        outcome = run_experiment(
            alpha=alpha,
            gamma=gamma,
            episodes=episodes,
            auction_type=a_type,
            init=init_mode,
            exploration=explore_mode,
            asynchronous=async_flag,
            n_bidders=n_bids,
            median_opp_past_bid_index=median_flag,
            winner_bid_index_state=winner_flag,
            seed=k
        )

        outcome["alpha"] = alpha
        outcome["gamma"] = gamma
        outcome["episodes"] = episodes
        outcome["auction_type"] = a_type
        outcome["init"] = init_mode
        outcome["exploration"] = explore_mode
        outcome["asynchronous"] = async_flag
        outcome["n_bidders"] = n_bids
        outcome["median_opp_past_bid_index"] = median_flag
        outcome["winner_bid_index_state"] = winner_flag
        
        results.append(outcome)

    df = pd.DataFrame(results)
    csv_path = "experiment1/data.csv"
    df.to_csv(csv_path, index=False)
    print(f"Data generation complete. Saved to '{csv_path}'")


Parameter grid: {'auction_type': [0, 1], 'init': ['random', 'zeros', 'optimistic'], 'exploration': ['egreedy', 'boltzmann'], 'asynchronous': [0, 1], 'n_bidders': [2, 4, 6], 'median_opp_past_bid_index': [False, True], 'winner_bid_index_state': [False, True]}
Saved demo revenue comparison plot to experiment1/demo_revenue_comparison.png


Overall Experiments:  20%|██        | 4/20 [02:32<10:08, 38.04s/it]


KeyboardInterrupt: 

In [14]:
from multiprocessing import Pool, cpu_count
import numpy as np
import random
import pandas as pd
from tqdm import tqdm
from experiment_utils import run_single_experiment  # Import the function

if __name__ == "__main__":
    os.makedirs("experiment1", exist_ok=True)

    # Parameter space
    param_space = {
        "auction_type": [0, 1],         # 0 => second-price, 1 => first-price
        "init": ["random", "zeros", "optimistic"],
        "exploration": ["egreedy", "boltzmann"],
        "asynchronous": [0, 1],
        "n_bidders": [2, 4, 6],
        "median_opp_past_bid_index": [False, True],
        "winner_bid_index_state": [False, True]
    }
    print("Parameter grid:", param_space)

    # Experiment setup
    K = 50  # Number of experiments

    # Prepare argument list for parallel execution
    experiment_args = []
    for k in range(K):
        alpha = random.uniform(0.01, 0.1)
        gamma = random.uniform(0.0, 0.99)
        episodes = int(random.uniform(100_000, 250_000))
        auction_type = random.choice(param_space["auction_type"])
        init = random.choice(param_space["init"])
        exploration = random.choice(param_space["exploration"])
        asynchronous = random.choice(param_space["asynchronous"])
        n_bidders = random.choice(param_space["n_bidders"])
        median_flag = random.choice(param_space["median_opp_past_bid_index"])
        winner_flag = random.choice(param_space["winner_bid_index_state"])
        seed = k

        experiment_args.append((alpha, gamma, episodes, auction_type, init, exploration, asynchronous, n_bidders, median_flag, winner_flag, seed))

    # Run experiments in parallel
    print(f"Running {K} experiments in parallel using {cpu_count()} CPUs...")
    with Pool(cpu_count()) as pool:
        results = list(tqdm(pool.starmap(run_single_experiment, experiment_args), total=K))

    # Save results
    df = pd.DataFrame(results)
    csv_path = "experiment1/data2.csv"
    df.to_csv(csv_path, index=False)
    print(f"Data generation complete. Saved to '{csv_path}'")


Parameter grid: {'auction_type': [0, 1], 'init': ['random', 'zeros', 'optimistic'], 'exploration': ['egreedy', 'boltzmann'], 'asynchronous': [0, 1], 'n_bidders': [2, 4, 6], 'median_opp_past_bid_index': [False, True], 'winner_bid_index_state': [False, True]}
Running 50 experiments in parallel using 11 CPUs...


ModuleNotFoundError: No module named 'generate_data'

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# DoubleML
import doubleml as dml
from doubleml import DoubleMLData, DoubleMLIRM

# LightGBM
from lightgbm import LGBMRegressor, LGBMClassifier

# For spline expansions
import patsy

# ------------------------------------------------------------------------------
# 1) Load your RL experiment data
# ------------------------------------------------------------------------------
df = pd.read_csv("experiment1/data.csv")

# ------------------------------------------------------------------------------
# 2) Preprocess data
# ------------------------------------------------------------------------------
# Treatment T = 'auction_type' -> map "first" -> 0, "second" -> 1
df['D'] = (df['auction_type'] == 'second').astype(int)

# Encode non-numeric categorical columns
categorical_cols = [col for col in df.columns if df[col].dtype == 'object']
for col in categorical_cols:
    df[col] = pd.factorize(df[col])[0]  # Encode categories as integers

# Dynamically detect binary and continuous covariates
binary_cols = [col for col in df.columns if df[col].nunique() == 2 and col not in ['auction_type', 'D']]
cont_cols = [col for col in df.columns if col not in binary_cols + ['auction_type', 'D', 'avg_rev_last_1000', 'time_to_converge', 'avg_regret_of_seller']]
X_cols = binary_cols + cont_cols

print(f"Binary covariates: {binary_cols}")
print(f"Continuous covariates: {cont_cols}")

# ------------------------------------------------------------------------------
# 3) Outcomes
# ------------------------------------------------------------------------------
outcomes = ['avg_rev_last_1000', 'time_to_converge', 'avg_regret_of_seller']

# ------------------------------------------------------------------------------
# 4) Loop over outcomes and perform DoubleML inference
# ------------------------------------------------------------------------------
for outcome in outcomes:
    print(f"\n========== Running for Outcome: {outcome} ==========")

    # Define Y (outcome)
    df['Y'] = df[outcome]

    # Create DoubleMLData object
    dml_data = DoubleMLData(df, y_col='Y', d_cols='D', x_cols=X_cols)

    # Define learners
    ml_g = LGBMRegressor(verbose=-1, random_state=123)
    ml_m = LGBMClassifier(verbose=-1, random_state=123)

    # Define and fit DoubleML IRM
    dml_irm = DoubleMLIRM(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=3, score='ATE')
    dml_irm.fit()

    # Print ATE results
    print(dml_irm.summary)

    # ------------------------------------------------------------------------------
    # 5) GATE Analysis
    # ------------------------------------------------------------------------------
    n_bin = len(binary_cols)
    nrows_gate = int(np.ceil(n_bin / 3))
    ncols_gate = min(n_bin, 3)

    fig_gate, axes_gate = plt.subplots(nrows=nrows_gate, ncols=ncols_gate, figsize=(5 * ncols_gate, 4 * nrows_gate))

    if n_bin == 1:
        axes_gate = np.array([axes_gate])

    for i, bin_col in enumerate(binary_cols):
        groups_df = df[[bin_col]].astype('category')
        gate_obj = dml_irm.gate(groups=groups_df)
        ci_95_gate = gate_obj.confint(level=0.95)

        effects = ci_95_gate['effect']
        lower_95 = ci_95_gate['2.5 %']
        upper_95 = ci_95_gate['97.5 %']

        ax = axes_gate.flatten()[i] if n_bin > 1 else axes_gate[0]

        x_positions = [0, 1]
        ax.errorbar(
            x_positions, effects,
            yerr=[effects - lower_95, upper_95 - effects],
            fmt='o', capsize=5
        )
        ax.set_title(f"GATE: {bin_col} ({outcome})")
        ax.set_xticks(x_positions)
        ax.set_xticklabels([f"{bin_col}=0", f"{bin_col}=1"])
        ax.set_ylabel("Estimated GATE")

    fig_gate.tight_layout()
    gate_plot_path = os.path.join("experiment1", f"gate_plots_{outcome}.png")
    fig_gate.savefig(gate_plot_path, bbox_inches='tight')
    plt.close(fig_gate)
    print(f"GATE plots saved for {outcome}.")

    # ------------------------------------------------------------------------------
    # 6) CATE Analysis
    # ------------------------------------------------------------------------------
    n_cont = len(cont_cols)
    nrows_cate = int(np.ceil(n_cont / 3))
    ncols_cate = min(n_cont, 3)

    fig_cate, axes_cate = plt.subplots(nrows=nrows_cate, ncols=ncols_cate, figsize=(5 * ncols_cate, 4 * nrows_cate))

    if n_cont == 1:
        axes_cate = np.array([axes_cate])

    for i, cont_col in enumerate(cont_cols):
        design_matrix = patsy.dmatrix(f"bs({cont_col}, df=5, degree=2)", df)
        spline_basis = pd.DataFrame(design_matrix)

        cate_obj = dml_irm.cate(basis=spline_basis)
        ci_95_cate = cate_obj.confint(basis=spline_basis, level=0.95)

        effects_cate = ci_95_cate['effect'].values
        lower_95_cate = ci_95_cate['2.5 %'].values
        upper_95_cate = ci_95_cate['97.5 %'].values

        x_values = df[cont_col].values
        idx_sort = np.argsort(x_values)

        x_sorted = x_values[idx_sort]
        eff_sorted = effects_cate[idx_sort]
        low_sorted = lower_95_cate[idx_sort]
        up_sorted = upper_95_cate[idx_sort]

        ax_cate = axes_cate.flatten()[i] if n_cont > 1 else axes_cate[0]

        ax_cate.plot(x_sorted, eff_sorted, label='CATE')
        ax_cate.fill_between(x_sorted, low_sorted, up_sorted, alpha=0.2, label='95% CI')
        ax_cate.set_title(f"CATE: {cont_col} ({outcome})")
        ax_cate.set_xlabel(cont_col)
        ax_cate.set_ylabel("Estimated Treatment Effect")
        ax_cate.legend()

    fig_cate.tight_layout()
    cate_plot_path = os.path.join("experiment1", f"cate_plots_{outcome}.png")
    fig_cate.savefig(cate_plot_path, bbox_inches='tight')
    plt.close(fig_cate)
    print(f"CATE plots saved for {outcome}.")


Binary covariates: ['init', 'exploration', 'asynchronous', 'median_opp_past_bid_index', 'winner_bid_index_state']
Continuous covariates: ['alpha', 'gamma', 'episodes', 'n_bidders']

       coef   std err          t         P>|t|    2.5 %   97.5 %
D  0.110655  0.007227  15.310714  6.484830e-53  0.09649  0.12482
GATE plots saved for avg_rev_last_1000.
CATE plots saved for avg_rev_last_1000.

          coef      std err         t         P>|t|         2.5 %       97.5 %
D -9590.463494  1337.683688 -7.169455  7.529671e-13 -12212.275345 -6968.651643
GATE plots saved for time_to_converge.
CATE plots saved for time_to_converge.

       coef   std err          t          P>|t|     2.5 %    97.5 %
D -0.082281  0.003711 -22.171802  6.428508e-109 -0.089554 -0.075007
GATE plots saved for avg_regret_of_seller.
CATE plots saved for avg_regret_of_seller.
