In [1]:
import rlcard
from rlcard.utils import set_seed, get_device
import numpy as np
from deep_cfr_agent import DeepCFRAgent

In [2]:
# Environment setup
set_seed(24)
device = get_device()
env = rlcard.make('no-limit-holdem', config={'game_num_players': 3})



--> Running on the GPU


In [3]:
# Create agents
agents = []
for _ in range(3):
    agent = DeepCFRAgent(
        num_actions=52,   # Adjust to match your env action space
        state_shape=(84,),  # Adjust to match your env state space
        hidden_dim=128,
        lr=0.001,
        batch_size=64,
        device= device  
    )
    agents.append(agent)

env.set_agents(agents)

In [4]:
# ====== Training parameters ======
num_iterations = 100
num_traversals_per_iter = 100

# ====== Training loop ======
for iteration in range(num_iterations):
    print(f"=== Deep CFR Training Iteration {iteration+1}/{num_iterations} ===")
    
    # --- Advantage Memory Sampling ---
    for i, agent in enumerate(agents):
        print(f"Sampling advantage memory for Player {i}...")
        agent.sample_advantage_memory(env, num_traversals=num_traversals_per_iter)
    
    # --- Train Networks ---
    for i, agent in enumerate(agents):
        print(f"Training networks for Player {i}...")
        agent.train_networks()
    
    # --- Periodic Evaluation ---
    if (iteration + 1) % 10 == 0:
        print("Evaluating agents...")
        payoffs = []
        for _ in range(100):
            trajectories, rewards = env.run(is_training=False)
            payoffs.append(rewards)
        
        avg_payoffs = np.mean(payoffs, axis=0)
        print(f"Average payoffs after iteration {iteration+1}: {avg_payoffs}")

print("Training complete.")

=== Deep CFR Training Iteration 1/100 ===
Sampling advantage memory for Player 0...


KeyError: 'payoffs'