In [1]:
#!/usr/bin/env python3

import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
from tqdm import trange

DEBUG = False
def dprint(*args):
    if DEBUG:
        print(*args)

# --------------------------------------------------------------
# 1) Valuation with eta (normalized)
# --------------------------------------------------------------
def get_valuation(eta, own_signal, others_signals):
    alpha = 1.0 - 0.5 * eta
    beta = 0.5 * eta
    return alpha * own_signal + beta * np.mean(others_signals)

# --------------------------------------------------------------
# 2) Payoffs
# --------------------------------------------------------------
def get_payoffs(bids, valuations, auction_type):
    n_bidders = len(bids)
    rewards = np.zeros(n_bidders)
    sorted_indices = np.argsort(bids)[::-1]
    winner = sorted_indices[0]
    highest_bid = bids[winner]

    # Tie-breaking
    tied_indices = [i for i in sorted_indices if bids[i] == highest_bid]
    if len(tied_indices) > 1:
        winner = random.choice(tied_indices)

    second_highest_bid = bids[sorted_indices[1]] if len(bids) > 1 else highest_bid

    if auction_type == "first":
        rewards[winner] = valuations[winner] - highest_bid
    else:
        rewards[winner] = valuations[winner] - second_highest_bid

    return rewards, winner, highest_bid

# --------------------------------------------------------------
# 3) Bandit helpers: UCB and Linear (Contextual)
# --------------------------------------------------------------
class UCBBandit:
    def __init__(self, n_actions, c):
        self.n_actions = n_actions
        self.c = c
        self.counts = np.zeros(n_actions)
        self.sums = np.zeros(n_actions)
        self.total_pulls = 0

    def select_action(self):
        # If any action not tried, pick it
        untried = np.where(self.counts == 0)[0]
        if len(untried) > 0:
            return np.random.choice(untried)
        # Otherwise pick UCB
        avg = self.sums / self.counts
        ucb = avg + self.c * np.sqrt(np.log(self.total_pulls) / self.counts)
        return np.argmax(ucb)

    def update(self, action, reward):
        self.counts[action] += 1
        self.sums[action] += reward
        self.total_pulls += 1

class LinearContextualBandit:
    def __init__(self, n_actions, context_dim, c, reg=1.0):
        self.n_actions = n_actions
        self.context_dim = context_dim
        self.c = c
        self.reg = reg
        # For each action, maintain A (X'X + regI) and b (X'y)
        self.A = [reg * np.eye(context_dim) for _ in range(n_actions)]
        self.b = [np.zeros((context_dim,)) for _ in range(n_actions)]

    def select_action(self, context):
        # For each action, estimate reward = theta' * x + bonus
        # theta = A_inv * b
        mu = []
        for a in range(self.n_actions):
            A_inv = np.linalg.inv(self.A[a])
            theta_hat = A_inv @ self.b[a]
            mean_est = theta_hat @ context
            var_est = context.T @ A_inv @ context
            bonus = self.c * np.sqrt(var_est)
            mu.append(mean_est + bonus)
        return np.argmax(mu)

    def update(self, action, context, reward):
        # A[a] += x*x', b[a] += x*r
        self.A[action] += np.outer(context, context)
        self.b[action] += context * reward

# --------------------------------------------------------------
# 4) Run bandit experiment (replaces Q-learning)
# --------------------------------------------------------------
def run_bandit_experiment(
    eta, auction_type, bandit_type, c, n_bidders,
    n_val_bins=6, n_bid_bins=6, seed=0, max_rounds=200_000,
    conv_window=1000, conv_thresh=1e-3
):
    np.random.seed(seed)
    random.seed(seed)

    # Discretized bid actions
    actions = np.linspace(0, 1, n_bid_bins)

    # Initialize bandits
    if bandit_type == "ucb":
        bandits = [UCBBandit(n_bid_bins, c) for _ in range(n_bidders)]
    else:  # 'contextual'
        # context_dim = 3 (own_signal, last_median_bid, last_winning_bid)
        bandits = [LinearContextualBandit(n_bid_bins, 3, c) for _ in range(n_bidders)]

    revenues = []
    past_bids = np.zeros(n_bidders)
    past_winner_bid = 0.0

    def get_context(bidder_signal, median_bid, winner_bid):
        return np.array([bidder_signal, median_bid, winner_bid])

    time_to_converge = max_rounds
    for r in range(max_rounds):
        # Signals in [0, 1]
        signals = np.random.randint(n_val_bins, size=n_bidders) / (n_val_bins - 1)

        # Valuations
        valuations = [get_valuation(eta, signals[i], np.delete(signals, i)) 
                      for i in range(n_bidders)]

        # Each bidder picks bid
        chosen_bids = []
        for i in range(n_bidders):
            context = get_context(signals[i], np.median(np.delete(past_bids, i)), past_winner_bid)
            if bandit_type == "ucb":
                a = bandits[i].select_action()
            else:
                a = bandits[i].select_action(context)
            chosen_bids.append((i, a, context))

        bids = [actions[a[1]] for a in chosen_bids]
        rewards, winner, highest_bid = get_payoffs(bids, valuations, auction_type)

        # Update bandits
        for (i, a, context) in chosen_bids:
            bandit_reward = rewards[i]
            if bandit_type == "ucb":
                bandits[i].update(a, bandit_reward)
            else:
                bandits[i].update(a, context, bandit_reward)

        # Track revenue (max bid)
        revenues.append(np.max(bids))
        # Update memory
        past_bids = np.array(bids)
        past_winner_bid = highest_bid

        # Convergence check
        if r >= conv_window:
            recent = revenues[-conv_window:]
            if np.std(recent) < conv_thresh:
                time_to_converge = r
                break

    avg_rev = np.mean(revenues[-conv_window:])
    return avg_rev, time_to_converge, np.mean(1.0 - np.array(revenues))

# --------------------------------------------------------------
# 5) Main experiment loop
# --------------------------------------------------------------
def main_experiment(K=50):
    results = []
    auction_type_options = ["first", "second"]
    bandit_type_options = ["ucb", "contextual"]

    for seed in trange(K, desc="Generating experiments"):
        eta = random.uniform(0.0, 1.0)
        c = random.uniform(0.01, 2.0)  # exploration parameter
        n_bidders = random.choice([2, 4, 6])
        bandit_type = random.choice(bandit_type_options)
        auction_type = random.choice(auction_type_options)

        avg_rev, time_to_converge, avg_regret = run_bandit_experiment(
            eta=eta,
            auction_type=auction_type,
            bandit_type=bandit_type,
            c=c,
            n_bidders=n_bidders,
            seed=seed
        )

        results.append({
            "eta": eta,
            "c": c,
            "auction_type": auction_type,
            "bandit_type": bandit_type,
            "n_bidders": n_bidders,
            "avg_rev": avg_rev,
            "time_to_converge": time_to_converge,
            "avg_regret_seller": avg_regret
        })

    df = pd.DataFrame(results)
    return df

# --------------------------------------------------------------
# 6) Run and save data
# --------------------------------------------------------------
if __name__ == "__main__":
    os.makedirs("experiment3", exist_ok=True)
    df = main_experiment(K=500)
    csv_path = "experiment3/data.csv"
    df.to_csv(csv_path, index=False)
    print(f"Data generation complete. Saved to '{csv_path}'.")


Generating experiments:   9%|▉         | 46/500 [25:56<3:16:55, 26.03s/it]

In [4]:
#!/usr/bin/env python3

import numpy as np
import pandas as pd
import doubleml
import matplotlib.pyplot as plt
import os
import doubleml as dml
from doubleml import DoubleMLData, DoubleMLIRM
from lightgbm import LGBMRegressor, LGBMClassifier
import patsy
from scipy.stats import norm, pearsonr
from tabulate import tabulate
import math

if __name__ == "__main__":
    # ------------------------------------------------------------------------------
    # 1) Variable definitions and reading data
    # ------------------------------------------------------------------------------
    var_definitions = {
        "eta": "Affiliation parameter (0.0 to 1.0).",
        "c": "Exploration parameter in UCB/Contextual bandits.",
        "auction_type": "Treatment: 0 = first-price, 1 = second-price.",
        "bandit_type": "0 = linear contextual, 1 = UCB.",
        "n_bidders": "Number of bidders (2, 4, or 6).",
        "avg_rev": "Average seller revenue (last ~convergence window).",
        "time_to_converge": "Round index at which revenue stabilized.",
        "avg_regret_seller": "Average regret for the seller (1 - revenue)."
    }

    df_var_defs = pd.DataFrame(
        [{"Parameter": k, "Definition": v} for k, v in var_definitions.items()]
    )
    print("\n=== Variable Definitions ===")
    print(tabulate(df_var_defs, headers="keys", tablefmt="github"))

    # Load data from experiment3
    df = pd.read_csv("experiment3/data.csv")

    # Recode auction_type: 'second' -> 1, 'first' -> 0 if needed
    # Note: If the data was stored as strings, we'd do something like:
    # df["auction_type"] = (df["auction_type"] == "second").astype(int)
    # But if it's already 0/1, skip. Make sure to unify to 0=first, 1=second.
    df["auction_type"] = (df["auction_type"] == "second").astype(int)

    # Convert time_to_converge to fraction of total rounds if desired
    # (In this experiment, it might be an absolute round index, no denominator.)
    # Example if max_rounds is known, we can do:
    # df["time_to_converge"] /= 200000

    # ------------------------------------------------------------------------------
    # 2) Set up columns
    # ------------------------------------------------------------------------------
    treatment_col = "auction_type"
    # We'll treat bandit_type as binary: 0=linear, 1=ucb
    # So let's recode it similarly:
    df["bandit_type"] = (df["bandit_type"] == "ucb").astype(int)

    covariates_list = ["eta", "c", "n_bidders", "bandit_type"]
    outcomes_list = ["avg_rev", "time_to_converge", "avg_regret_seller"]

    # Ensure numeric columns are numeric
    # (In case they've come in as strings, factor them):
    for col in [treatment_col] + covariates_list + outcomes_list:
        if df[col].dtype not in [np.float64, np.int64, float, int]:
            df[col], _ = pd.factorize(df[col])

    # ------------------------------------------------------------------------------
    # 3) Summary statistics
    # ------------------------------------------------------------------------------
    cols_for_summary = covariates_list + [treatment_col] + outcomes_list
    summary_stats = df[cols_for_summary].describe().T.drop("count", axis=1, errors="ignore")
    print("\n=== Summary Statistics ===")
    print(tabulate(summary_stats, headers="keys", tablefmt="github"))

    # ------------------------------------------------------------------------------
    # 4) Correlations with treatment
    # ------------------------------------------------------------------------------
    corr_results = []
    for col in cols_for_summary:
        if col != treatment_col:
            r, p = pearsonr(df[col], df[treatment_col])
            corr_results.append({"Variable": col, "Correlation": r, "p-value": p})

    print("\n=== Correlations with auction_type ===")
    print(tabulate(corr_results, headers="keys", tablefmt="github"))

    # ------------------------------------------------------------------------------
    # 5) DoubleML Analysis: ATE, GATE, CATE
    # ------------------------------------------------------------------------------
    # Create an output folder
    out_dir = "experiment3"
    os.makedirs(out_dir, exist_ok=True)

    for outcome in outcomes_list:
        # Prepare for DoubleML
        df["Y"] = df[outcome]
        dml_data = doubleml.DoubleMLData(
            df, y_col="Y", d_cols=treatment_col, x_cols=covariates_list
        )

        ml_g = LGBMRegressor(random_state=123, verbose=-1)
        ml_m = LGBMClassifier(random_state=123, verbose=-1)

        dml_irm = doubleml.DoubleMLIRM(
            dml_data, ml_g=ml_g, ml_m=ml_m,
            n_folds=10, n_rep=1, score="ATE"
        )
        dml_irm.fit()

        print(f"\n========== {outcome.upper()} | ATE Results ==========")
        print(dml_irm.summary)

        # --------------------------------------
        # GATE for binary covariates
        # --------------------------------------
        binary_covs = [c for c in covariates_list if df[c].nunique() == 2]
        n_bin = len(binary_covs)
        if n_bin > 0:
            nrows_gate = math.ceil(n_bin / 3)
            ncols_gate = min(n_bin, 3)
            fig_gate, axes_gate = plt.subplots(
                nrows=nrows_gate, ncols=ncols_gate,
                figsize=(5 * ncols_gate, 4 * nrows_gate)
            )
            if n_bin == 1:
                axes_gate = np.array([axes_gate])
            gate_results = []

            for i, bin_col in enumerate(binary_covs):
                groups_df = df[[bin_col]].astype("category")
                gate_obj = dml_irm.gate(groups=groups_df)
                ci_95 = gate_obj.confint(level=0.95)
                eff = ci_95["effect"]
                lo = ci_95["2.5 %"]
                hi = ci_95["97.5 %"]
                gate_sum = gate_obj.summary
                errs = gate_sum["std err"]

                ax = axes_gate.flatten()[i] if n_bin > 1 else axes_gate[0]
                x_pos = np.arange(len(eff))
                ax.errorbar(x_pos, eff, yerr=[eff - lo, hi - eff], fmt="o", capsize=5)
                ax.set_title(f"GATE: {bin_col} ({outcome})")
                ax.set_xticks(x_pos)
                ax.set_xticklabels([f"{bin_col}={lvl}" for lvl in range(len(eff))])
                ax.set_ylabel("Estimated GATE")

                # For two-group difference (if any)
                if len(eff) == 2:
                    dval = eff.iloc[1] - eff.iloc[0]
                    dvar = errs.iloc[1]**2 + errs.iloc[0]**2
                    dse = math.sqrt(dvar)
                    tval = dval / dse
                    pval = 2.0 * (1.0 - norm.cdf(abs(tval)))
                    gate_results.append({
                        "Variable": bin_col,
                        "Group0_Effect": f"{eff.iloc[0]:.4f}",
                        "Group1_Effect": f"{eff.iloc[1]:.4f}",
                        "Diff(Group1-Group0)": f"{dval:.4f}",
                        "StdErr(Diff)": f"{dse:.4f}",
                        "t-value": f"{tval:.4f}",
                        "p-value": f"{pval:.4f}"
                    })

            fig_gate.tight_layout()
            gate_path = os.path.join(out_dir, f"gate_plots_{outcome}.png")
            fig_gate.savefig(gate_path, bbox_inches="tight")
            plt.close(fig_gate)

            if gate_results:
                print(f"\nT-tests for GATE ({outcome}):")
                print(tabulate(gate_results, headers="keys", tablefmt="github"))

        # --------------------------------------
        # CATE for continuous covariates
        # --------------------------------------
        cont_covs = [c for c in covariates_list if df[c].nunique() > 2]
        n_cont = len(cont_covs)
        if n_cont > 0:
            nrows_cate = math.ceil(n_cont / 3)
            ncols_cate = min(n_cont, 3)
            fig_cate, axes_cate = plt.subplots(
                nrows=nrows_cate, ncols=ncols_cate,
                figsize=(5 * ncols_cate, 4 * nrows_cate)
            )
            if n_cont == 1:
                axes_cate = np.array([axes_cate])

            for i, cont_col in enumerate(cont_covs):
                # Using spline expansions
                design_matrix = patsy.dmatrix(f"bs({cont_col}, df=5, degree=2)", df)
                spline_basis = pd.DataFrame(design_matrix)
                cate_obj = dml_irm.cate(basis=spline_basis)
                ci_95_cate = cate_obj.confint(basis=spline_basis, level=0.95)
                eff_cate = ci_95_cate["effect"].values
                lo_cate = ci_95_cate["2.5 %"].values
                hi_cate = ci_95_cate["97.5 %"].values

                xvals = df[cont_col].values
                idx_sort = np.argsort(xvals)
                x_sort = xvals[idx_sort]
                eff_sort = eff_cate[idx_sort]
                lo_sort = lo_cate[idx_sort]
                hi_sort = hi_cate[idx_sort]

                axc = axes_cate.flatten()[i] if n_cont > 1 else axes_cate[0]
                axc.plot(x_sort, eff_sort, label="CATE")
                axc.fill_between(x_sort, lo_sort, hi_sort, alpha=0.2, label="95% CI")
                axc.set_title(f"CATE: {cont_col} ({outcome})")
                axc.set_xlabel(cont_col)
                axc.set_ylabel("Estimated TE")
                axc.legend()

            fig_cate.tight_layout()
            cate_path = os.path.join(out_dir, f"cate_plots_{outcome}.png")
            fig_cate.savefig(cate_path, bbox_inches="tight")
            plt.close(fig_cate)

    print("\nAll analysis complete. Results saved in 'experiment3/' directory.")



=== Variable Definitions ===
|    | Parameter         | Definition                                         |
|----|-------------------|----------------------------------------------------|
|  0 | eta               | Affiliation parameter (0.0 to 1.0).                |
|  1 | c                 | Exploration parameter in UCB/Contextual bandits.   |
|  2 | auction_type      | Treatment: 0 = first-price, 1 = second-price.      |
|  3 | bandit_type       | 0 = linear contextual, 1 = UCB.                    |
|  4 | n_bidders         | Number of bidders (2, 4, or 6).                    |
|  5 | avg_rev           | Average seller revenue (last ~convergence window). |
|  6 | time_to_converge  | Round index at which revenue stabilized.           |
|  7 | avg_regret_seller | Average regret for the seller (1 - revenue).       |

=== Summary Statistics ===
|                   |          mean |          std |          min |          25% |           50% |           75% |           max |
|----------