In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

import numpy as np
import random
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque, namedtuple
import time


In [None]:
# ---------- AUCTION ENVIRONMENT ----------

class AuctionEnv:
    def __init__(self, 
                 max_time_steps=100, 
                 min_item_value=50, 
                 max_item_value=200,
                 n_snipers=1,
                 n_incrementalists=1,
                 n_jump_bidders=1,
                 min_sniper_value=50,
                 max_sniper_value=150,
                 min_incrementalist_value=50,
                 max_incrementalist_value=150,
                 min_jump_value=50,
                 max_jump_value=150):
        
        self.max_time_steps = max_time_steps
        self.min_item_value = min_item_value
        self.max_item_value = max_item_value
        
        # Other bidder counts
        self.n_snipers = n_snipers
        self.n_incrementalists = n_incrementalists
        self.n_jump_bidders = n_jump_bidders
        
        # Value ranges for other bidders
        self.min_sniper_value = min_sniper_value
        self.max_sniper_value = max_sniper_value
        self.min_incrementalist_value = min_incrementalist_value
        self.max_incrementalist_value = max_incrementalist_value
        self.min_jump_value = min_jump_value
        self.max_jump_value = max_jump_value
        
        # Available bid increments (actions)
        self.actions = [0, 1, 5, 10]  # 0 = no bid, others are bid increments
        self.action_space = len(self.actions)
        
        # State space components
        self.state_dim = 3  # time remaining, current price, private value
        
        # Initialize the environment
        self.reset()
    
    def reset(self):
        """Reset the auction environment for a new episode."""
        # Initialize auction state
        self.current_time_step = 0
        self.current_price = 0
        self.last_bid_change = 0
        self.auction_ended = False
        self.agent_is_winner = False
        
        # Generate a random private value for the agent
        self.agent_private_value = random.uniform(self.min_item_value, self.max_item_value)
        
        # Initialize other bidders
        self.snipers = []
        self.incrementalists = []
        self.jump_bidders = []
        
        # Create snipers
        for _ in range(self.n_snipers):
            value = random.uniform(self.min_sniper_value, self.max_sniper_value)
            snipe_time = random.randint(self.max_time_steps - 5, self.max_time_steps - 1)
            self.snipers.append({
                'value': value,
                'snipe_time': snipe_time,
                'has_bid': False
            })
        
        # Create incrementalists
        for _ in range(self.n_incrementalists):
            value = random.uniform(self.min_incrementalist_value, self.max_incrementalist_value)
            bid_frequency = random.randint(5, 15)  # How often they bid
            bid_increment = random.choice([1, 2, 3])  # How much they increase
            self.incrementalists.append({
                'value': value,
                'bid_frequency': bid_frequency,
                'bid_increment': bid_increment,
                'last_bid_time': -bid_frequency  # To ensure they can bid at the start
            })
        
        # Create jump bidders
        for _ in range(self.n_jump_bidders):
            value = random.uniform(self.min_jump_value, self.max_jump_value)
            jump_time = random.randint(int(0.1 * self.max_time_steps), 
                                      int(0.4 * self.max_time_steps))
            jump_fraction = random.uniform(0.4, 0.7)  # They bid this fraction of their value
            self.jump_bidders.append({
                'value': value,
                'jump_time': jump_time,
                'jump_fraction': jump_fraction,
                'has_jumped': False
            })
        
        # Return the initial state
        return self._get_state()
    
    def _get_state(self):
        """Return the current state as a numpy array."""
        normalized_time = (self.max_time_steps - self.current_time_step) / self.max_time_steps
        normalized_price = self.current_price / self.max_item_value
        normalized_value = self.agent_private_value / self.max_item_value
        
        return np.array([normalized_time, normalized_price, normalized_value])
    
    def step(self, action_idx):
        """Take an action in the environment and return next state, reward, done."""
        if self.auction_ended:
            return self._get_state(), 0, True, {}
        
        # Process the agent's action
        bid_increment = self.actions[action_idx]
        
        # If the agent bids
        if bid_increment > 0:
            potential_bid = self.current_price + bid_increment
            
            # Only update if the agent's bid is higher than current price
            if potential_bid > self.current_price:
                self.last_bid_change = bid_increment
                self.current_price = potential_bid
                self.agent_is_winner = True
        
        # Process other bidders' actions
        self._process_other_bidders()
        
        # Increment time step
        self.current_time_step += 1
        
        # Check if auction has ended
        done = self.current_time_step >= self.max_time_steps
        
        # Calculate reward (only at the end of auction)
        reward = 0
        if done:
            self.auction_ended = True
            if self.agent_is_winner:
                reward = self.agent_private_value - self.current_price
        
        return self._get_state(), reward, done, {}
    
    def _process_other_bidders(self):
        """Process the bidding behavior of other bidders."""
        # Process snipers
        for sniper in self.snipers:
            if not sniper['has_bid'] and self.current_time_step >= sniper['snipe_time']:
                # Sniper places a bid if they value the item more than current price
                if sniper['value'] > self.current_price:
                    # Bid just enough to win
                    bid = min(sniper['value'], self.current_price + random.choice([1, 2, 5]))
                    if bid > self.current_price:
                        self.current_price = bid
                        self.agent_is_winner = False
                        self.last_bid_change = bid - self.current_price
                sniper['has_bid'] = True
        
        # Process incrementalists
        for inc in self.incrementalists:
            # Check if it's time for this incrementalist to bid
            if (self.current_time_step - inc['last_bid_time'] >= inc['bid_frequency']):
                # Incrementalist places a bid if they value the item more than current price
                if inc['value'] > self.current_price:
                    bid = min(inc['value'], self.current_price + inc['bid_increment'])
                    if bid > self.current_price:
                        self.current_price = bid
                        self.agent_is_winner = False
                        self.last_bid_change = bid - self.current_price
                inc['last_bid_time'] = self.current_time_step
        
        # Process jump bidders
        for jumper in self.jump_bidders:
            if not jumper['has_jumped'] and self.current_time_step == jumper['jump_time']:
                # Jump bidder places a big bid if they value the item more than current price
                if jumper['value'] > self.current_price:
                    # Jump to a significant fraction of their value
                    bid = min(jumper['value'], max(self.current_price + 1, jumper['value'] * jumper['jump_fraction']))
                    if bid > self.current_price:
                        self.current_price = bid
                        self.agent_is_winner = False
                        self.last_bid_change = bid - self.current_price
                jumper['has_jumped'] = True


In [None]:
# ---------- DEEP Q-NETWORK AGENT ----------

class QNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_dim)
    
    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_dim, action_dim, learning_rate=0.001, gamma=0.99, 
                epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995,
                memory_size=10000, batch_size=64):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma  # discount factor
        self.epsilon = epsilon  # exploration rate
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.learning_rate = learning_rate
        self.memory = deque(maxlen=memory_size)
        self.batch_size = batch_size
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Q-Network
        self.q_network = QNetwork(state_dim, action_dim).to(self.device)
        self.target_network = QNetwork(state_dim, action_dim).to(self.device)
        self.target_network.load_state_dict(self.q_network.state_dict())
        self.target_network.eval()
        
        self.optimizer = optim.Adam(self.q_network.parameters(), lr=learning_rate)
        
        # For storing experiences
        self.Transition = namedtuple('Transition', 
                                     ('state', 'action', 'next_state', 'reward', 'done'))
    
    def select_action(self, state):
        """Select an action using epsilon-greedy policy."""
        if random.random() < self.epsilon:
            return random.randrange(self.action_dim)
        else:
            with torch.no_grad():
                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
                q_values = self.q_network(state_tensor)
                return q_values.argmax().item()
    
    def remember(self, state, action, next_state, reward, done):
        """Store experience in memory."""
        self.memory.append(self.Transition(state, action, next_state, reward, done))
    
    def replay(self):
        """Experience replay to update Q-network."""
        if len(self.memory) < self.batch_size:
            return
        
        # Sample a mini-batch from memory
        transitions = random.sample(self.memory, self.batch_size)
        batch = self.Transition(*zip(*transitions))
        
        # Convert to PyTorch tensors
        state_batch = torch.FloatTensor(np.array(batch.state)).to(self.device)
        action_batch = torch.LongTensor(batch.action).unsqueeze(1).to(self.device)
        reward_batch = torch.FloatTensor(batch.reward).unsqueeze(1).to(self.device)
        next_state_batch = torch.FloatTensor(np.array(batch.next_state)).to(self.device)
        done_batch = torch.FloatTensor(batch.done).unsqueeze(1).to(self.device)
        
        # Compute Q(s_t, a)
        q_values = self.q_network(state_batch).gather(1, action_batch)
        
        # Compute max_a Q(s_{t+1}, a) for all next states
        next_q_values = self.target_network(next_state_batch).max(1)[0].unsqueeze(1).detach()
        
        # Compute the expected Q values
        expected_q_values = reward_batch + (self.gamma * next_q_values * (1 - done_batch))
        
        # Compute loss
        loss = F.mse_loss(q_values, expected_q_values)
        
        # Optimize the model
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        # Decay exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def update_target_network(self):
        """Update the target network with the weights from the Q-network."""
        self.target_network.load_state_dict(self.q_network.state_dict())
    
    def save(self, filename):
        """Save the model."""
        torch.save({
            'q_network_state_dict': self.q_network.state_dict(),
            'target_network_state_dict': self.target_network.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
        }, filename)
    
    def load(self, filename):
        """Load the model."""
        checkpoint = torch.load(filename)
        self.q_network.load_state_dict(checkpoint['q_network_state_dict'])
        self.target_network.load_state_dict(checkpoint['target_network_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# ---------- TRAINING LOOP ----------

def train(env, agent, episodes=1000, target_update=10, display_freq=100):
    """Train the agent in the environment."""
    rewards = []
    wins = []
    profits = []
    
    for episode in range(1, episodes + 1):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            # Select an action
            action = agent.select_action(state)
            
            # Take a step in the environment
            next_state, reward, done, _ = env.step(action)
            
            # Store the experience
            agent.remember(state, action, next_state, reward, done)
            
            # Update state
            state = next_state
            
            # Accumulate reward
            total_reward += reward
            
            # Perform experience replay
            agent.replay()
        
        # Update the target network
        if episode % target_update == 0:
            agent.update_target_network()
        
        # Track metrics
        rewards.append(total_reward)
        wins.append(env.agent_is_winner)
        if env.agent_is_winner:
            profits.append(env.agent_private_value - env.current_price)
        else:
            profits.append(0)
        
        # Display progress
        if episode % display_freq == 0:
            win_rate = sum(wins[-display_freq:]) / display_freq
            avg_reward = sum(rewards[-display_freq:]) / display_freq
            avg_profit = sum(profits[-display_freq:]) / display_freq
            print(f"Episode: {episode}, Win Rate: {win_rate:.2f}, Avg Reward: {avg_reward:.2f}, Avg Profit: {avg_profit:.2f}, Epsilon: {agent.epsilon:.2f}")
    
    return rewards, wins, profits


In [None]:

# ---------- EVALUATION ----------

def evaluate(env, agent, episodes=100):
    """Evaluate the trained agent."""
    rewards = []
    wins = []
    profits = []
    bid_times = []  # When the agent bids
    bid_amounts = []  # How much the agent bids
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        episode_bid_times = []
        episode_bid_amounts = []
        
        while not done:
            # Select an action (no exploration during evaluation)
            with torch.no_grad():
                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(agent.device)
                q_values = agent.q_network(state_tensor)
                action = q_values.argmax().item()
            
            # Record bid if agent places one
            if env.actions[action] > 0:
                episode_bid_times.append(env.current_time_step / env.max_time_steps)
                episode_bid_amounts.append(env.actions[action])
            
            # Take a step in the environment
            next_state, reward, done, _ = env.step(action)
            
            # Update state
            state = next_state
            
            # Accumulate reward
            total_reward += reward
        
        # Track metrics
        rewards.append(total_reward)
        wins.append(env.agent_is_winner)
        if env.agent_is_winner:
            profits.append(env.agent_private_value - env.current_price)
        else:
            profits.append(0)
        
        bid_times.append(episode_bid_times)
        bid_amounts.append(episode_bid_amounts)
    
    win_rate = sum(wins) / episodes
    avg_reward = sum(rewards) / episodes
    avg_profit = sum([p for p in profits if p > 0]) / (sum(wins) if sum(wins) > 0 else 1)
    
    print(f"Evaluation Results:")
    print(f"Win Rate: {win_rate:.2f}")
    print(f"Average Reward: {avg_reward:.2f}")
    print(f"Average Profit (when winning): {avg_profit:.2f}")
    
    return rewards, wins, profits, bid_times, bid_amounts


In [None]:

# ---------- VISUALIZATION ----------

def plot_training_metrics(rewards, wins, profits):
    """Plot training metrics."""
    plt.figure(figsize=(15, 5))
    
    # Plot average reward over episodes
    plt.subplot(1, 3, 1)
    plt.plot(rewards)
    plt.title('Average Reward per Episode')
    plt.xlabel('Episode')
    plt.ylabel('Reward')
    
    # Plot win rate
    plt.subplot(1, 3, 2)
    window_size = 100
    win_rate = [sum(wins[i:i+window_size])/window_size for i in range(0, len(wins)-window_size+1)]
    plt.plot(range(window_size, len(wins)+1), win_rate)
    plt.title(f'Win Rate (Moving Average, Window={window_size})')
    plt.xlabel('Episode')
    plt.ylabel('Win Rate')
    
    # Plot average profit
    plt.subplot(1, 3, 3)
    non_zero_profits = [(i, p) for i, p in enumerate(profits) if p > 0]
    if non_zero_profits:
        indices, values = zip(*non_zero_profits)
        plt.scatter(indices, values, alpha=0.5)
        plt.title('Profit When Winning')
        plt.xlabel('Episode')
        plt.ylabel('Profit')
    
    plt.tight_layout()
    plt.show()

def plot_bidding_behavior(bid_times, bid_amounts):
    """Plot the agent's bidding behavior."""
    plt.figure(figsize=(15, 6))
    
    # Flatten the lists
    all_times = [time for episode_times in bid_times for time in episode_times]
    all_amounts = [amount for episode_amounts in bid_amounts for amount in episode_amounts]
    
    if not all_times:  # Check if the agent placed any bids
        plt.text(0.5, 0.5, 'No bids placed by the agent', 
                 horizontalalignment='center', verticalalignment='center')
    else:
        # Create a scatter plot of bid times vs. bid amounts
        plt.scatter(all_times, all_amounts, alpha=0.5)
        plt.title('Agent Bidding Behavior')
        plt.xlabel('Normalized Time in Auction')
        plt.ylabel('Bid Increment Amount')
        
        # Add a histogram to show the distribution of bid times
        plt.figure(figsize=(15, 6))
        plt.hist(all_times, bins=20)
        plt.title('Distribution of Agent Bid Timings')
        plt.xlabel('Normalized Time in Auction')
        plt.ylabel('Frequency')
    
    plt.tight_layout()
    plt.show()

def evaluate_against_specific_opponents(agent, episodes=100):
    """Evaluate the agent against specific opponent compositions."""
    scenarios = [
        {"name": "Only Snipers", "snipers": 3, "incrementalists": 0, "jump_bidders": 0},
        {"name": "Only Incrementalists", "snipers": 0, "incrementalists": 3, "jump_bidders": 0},
        {"name": "Only Jump Bidders", "snipers": 0, "incrementalists": 0, "jump_bidders": 3},
        {"name": "Mixed (1 each)", "snipers": 1, "incrementalists": 1, "jump_bidders": 1},
    ]
    
    results = {}
    
    for scenario in scenarios:
        print(f"\nEvaluating against {scenario['name']}...")
        
        # Create a specific environment
        env = AuctionEnv(
            n_snipers=scenario["snipers"],
            n_incrementalists=scenario["incrementalists"],
            n_jump_bidders=scenario["jump_bidders"]
        )
        
        # Evaluate
        rewards, wins, profits, _, _ = evaluate(env, agent, episodes)
        
        win_rate = sum(wins) / episodes
        avg_reward = sum(rewards) / episodes
        avg_profit = sum([p for p in profits if p > 0]) / (sum(wins) if sum(wins) > 0 else 1)
        
        results[scenario["name"]] = {
            "win_rate": win_rate,
            "avg_reward": avg_reward,
            "avg_profit": avg_profit
        }
    
    # Display results
    print("\nComparison Against Different Opponent Types:")
    for name, metrics in results.items():
        print(f"{name}:")
        print(f"  Win Rate: {metrics['win_rate']:.2f}")
        print(f"  Avg Reward: {metrics['avg_reward']:.2f}")
        print(f"  Avg Profit (when winning): {metrics['avg_profit']:.2f}")
    
    return results


In [None]:
# ---------- MAIN FUNCTION ----------

def main():
    # Create the auction environment
    env = AuctionEnv(
        max_time_steps=100,
        min_item_value=50,
        max_item_value=200,
        n_snipers=1,
        n_incrementalists=1,
        n_jump_bidders=1
    )
    
    # Create the DQN agent
    agent = DQNAgent(
        state_dim=env.state_dim,
        action_dim=env.action_space,
        learning_rate=0.001,
        gamma=0.99,
        epsilon=1.0,
        epsilon_min=0.01,
        epsilon_decay=0.995,
        memory_size=10000,
        batch_size=64
    )
    
    # Train the agent
    print("Starting training...")
    rewards, wins, profits = train(env, agent, episodes=5000, target_update=10, display_freq=100)
    
    # Save the trained model
    agent.save("auction_bidder_dqn.pth")
    
    # Plot training metrics
    plot_training_metrics(rewards, wins, profits)
    
    # Evaluate the agent
    print("\nEvaluating the agent...")
    eval_rewards, eval_wins, eval_profits, bid_times, bid_amounts = evaluate(env, agent, episodes=100)
    
    # Plot bidding behavior
    plot_bidding_behavior(bid_times, bid_amounts)
    
    # Evaluate against specific opponent compositions
    results = evaluate_against_specific_opponents(agent)

if __name__ == "__main__":
    main()