In [None]:
#!/usr/bin/env python3

import numpy as np
import pandas as pd
import random
import os
import matplotlib.pyplot as plt
from tqdm import trange

DEBUG = False
def dprint(*args):
    if DEBUG:
        print(*args)

# --------------------------------------------------------------
# 1) Valuation with eta (normalized)
# --------------------------------------------------------------
def get_valuation(eta, own_signal, others_signals):
    alpha = 1.0 - 0.5 * eta
    beta = 0.5 * eta
    return alpha * own_signal + beta * np.mean(others_signals)

# --------------------------------------------------------------
# 2) Payoffs
# --------------------------------------------------------------
def get_payoffs(bids, valuations, auction_type):
    n_bidders = len(bids)
    rewards = np.zeros(n_bidders)
    sorted_indices = np.argsort(bids)[::-1]
    winner = sorted_indices[0]
    highest_bid = bids[winner]

    # Tie-breaking
    tied_indices = [i for i in sorted_indices if bids[i] == highest_bid]
    if len(tied_indices) > 1:
        winner = random.choice(tied_indices)

    second_highest_bid = bids[sorted_indices[1]] if len(bids) > 1 else highest_bid

    if auction_type == "first":
        rewards[winner] = valuations[winner] - highest_bid
    else:
        rewards[winner] = valuations[winner] - second_highest_bid

    return rewards, winner, highest_bid

# --------------------------------------------------------------
# 3) Bandit helpers: UCB and Linear (Contextual)
# --------------------------------------------------------------
class UCBBandit:
    def __init__(self, n_actions, c):
        self.n_actions = n_actions
        self.c = c
        self.counts = np.zeros(n_actions)
        self.sums = np.zeros(n_actions)
        self.total_pulls = 0

    def select_action(self):
        # If any action not tried, pick it
        untried = np.where(self.counts == 0)[0]
        if len(untried) > 0:
            return np.random.choice(untried)
        # Otherwise pick UCB
        avg = self.sums / self.counts
        ucb = avg + self.c * np.sqrt(np.log(self.total_pulls) / self.counts)
        return np.argmax(ucb)

    def update(self, action, reward):
        self.counts[action] += 1
        self.sums[action] += reward
        self.total_pulls += 1

class LinearContextualBandit:
    def __init__(self, n_actions, context_dim, c, reg=1.0):
        self.n_actions = n_actions
        self.context_dim = context_dim
        self.c = c
        self.reg = reg
        # For each action, maintain A (X'X + regI) and b (X'y)
        self.A = [reg * np.eye(context_dim) for _ in range(n_actions)]
        self.b = [np.zeros((context_dim,)) for _ in range(n_actions)]

    def select_action(self, context):
        # For each action, estimate reward = theta' * x + bonus
        # theta = A_inv * b
        mu = []
        for a in range(self.n_actions):
            A_inv = np.linalg.inv(self.A[a])
            theta_hat = A_inv @ self.b[a]
            mean_est = theta_hat @ context
            var_est = context.T @ A_inv @ context
            bonus = self.c * np.sqrt(var_est)
            mu.append(mean_est + bonus)
        return np.argmax(mu)

    def update(self, action, context, reward):
        # A[a] += x*x', b[a] += x*r
        self.A[action] += np.outer(context, context)
        self.b[action] += context * reward

# --------------------------------------------------------------
# 4) Run bandit experiment (replaces Q-learning)
# --------------------------------------------------------------
def run_bandit_experiment(
    eta, auction_type, bandit_type, c, n_bidders,
    n_val_bins=6, n_bid_bins=6, seed=0, max_rounds=200_000,
    conv_window=1000, conv_thresh=1e-3
):
    np.random.seed(seed)
    random.seed(seed)

    # Discretized bid actions
    actions = np.linspace(0, 1, n_bid_bins)

    # Initialize bandits
    if bandit_type == "ucb":
        bandits = [UCBBandit(n_bid_bins, c) for _ in range(n_bidders)]
    else:  # 'contextual'
        # context_dim = 3 (own_signal, last_median_bid, last_winning_bid)
        bandits = [LinearContextualBandit(n_bid_bins, 3, c) for _ in range(n_bidders)]

    revenues = []
    past_bids = np.zeros(n_bidders)
    past_winner_bid = 0.0

    def get_context(bidder_signal, median_bid, winner_bid):
        return np.array([bidder_signal, median_bid, winner_bid])

    time_to_converge = max_rounds
    for r in range(max_rounds):
        # Signals in [0, 1]
        signals = np.random.randint(n_val_bins, size=n_bidders) / (n_val_bins - 1)

        # Valuations
        valuations = [get_valuation(eta, signals[i], np.delete(signals, i)) 
                      for i in range(n_bidders)]

        # Each bidder picks bid
        chosen_bids = []
        for i in range(n_bidders):
            context = get_context(signals[i], np.median(np.delete(past_bids, i)), past_winner_bid)
            if bandit_type == "ucb":
                a = bandits[i].select_action()
            else:
                a = bandits[i].select_action(context)
            chosen_bids.append((i, a, context))

        bids = [actions[a[1]] for a in chosen_bids]
        rewards, winner, highest_bid = get_payoffs(bids, valuations, auction_type)

        # Update bandits
        for (i, a, context) in chosen_bids:
            bandit_reward = rewards[i]
            if bandit_type == "ucb":
                bandits[i].update(a, bandit_reward)
            else:
                bandits[i].update(a, context, bandit_reward)

        # Track revenue (max bid)
        revenues.append(np.max(bids))
        # Update memory
        past_bids = np.array(bids)
        past_winner_bid = highest_bid

        # Convergence check
        if r >= conv_window:
            recent = revenues[-conv_window:]
            if np.std(recent) < conv_thresh:
                time_to_converge = r
                break

    avg_rev = np.mean(revenues[-conv_window:])
    return avg_rev, time_to_converge, np.mean(1.0 - np.array(revenues))

# --------------------------------------------------------------
# 5) Main experiment loop
# --------------------------------------------------------------
def main_experiment(K=50):
    results = []
    auction_type_options = ["first", "second"]
    bandit_type_options = ["ucb", "contextual"]

    for seed in trange(K, desc="Generating experiments"):
        eta = random.uniform(0.0, 1.0)
        c = random.uniform(0.01, 2.0)  # exploration parameter
        n_bidders = random.choice([2, 4, 6])
        bandit_type = random.choice(bandit_type_options)
        auction_type = random.choice(auction_type_options)

        avg_rev, time_to_converge, avg_regret = run_bandit_experiment(
            eta=eta,
            auction_type=auction_type,
            bandit_type=bandit_type,
            c=c,
            n_bidders=n_bidders,
            seed=seed
        )

        results.append({
            "eta": eta,
            "c": c,
            "auction_type": auction_type,
            "bandit_type": bandit_type,
            "n_bidders": n_bidders,
            "avg_rev": avg_rev,
            "time_to_converge": time_to_converge,
            "avg_regret_seller": avg_regret
        })

    df = pd.DataFrame(results)
    return df

# --------------------------------------------------------------
# 6) Run and save data
# --------------------------------------------------------------
if __name__ == "__main__":
    os.makedirs("experiment3", exist_ok=True)
    df = main_experiment(K=30)
    csv_path = "experiment3/data.csv"
    df.to_csv(csv_path, index=False)
    print(f"Data generation complete. Saved to '{csv_path}'.")
