In [12]:
# Let's do independent Q-learning in Airline Seats, and play it against random.
# RL is based on python/examples/independent_tabular_qlearning.py
from open_spiel.python import rl_environment
from open_spiel.python import rl_tools
from open_spiel.python.algorithms import tabular_qlearner

# Create the environment
env = rl_environment.Environment("airline_seats")
num_players = env.num_players
num_actions = env.action_spec()["num_actions"]

# Create the agents
agents = [
    tabular_qlearner.QLearner(player_id=idx, num_actions=num_actions)
    for idx in range(num_players)
]
# Train the Q-learning agents in self-play.
ep_rewards = 0.0
for cur_episode in range(25000):
  time_step = env.reset()
  while not time_step.last():
    player_id = time_step.observations["current_player"]
    agent_output = agents[player_id].step(time_step)
    time_step = env.step([agent_output.action])
  ep_reward = time_step.rewards[0]
  ep_rewards += ep_reward
  if cur_episode %1000 == 0:
    print(f"Episodes: {cur_episode} Reward: {ep_rewards/1000}")
    ep_rewards = 0.0
    
  # Episode is over, step all agents with final info state.
  for agent in agents:
    agent.step(time_step)
print("Done!")
# Evaluate the Q-learning agent against a random agent.
from open_spiel.python.algorithms import random_agent
eval_agents = [agents[0], random_agent.RandomAgent(1, num_actions, "Entropy Master 2000") ]

eval_rewards = [0.0, 0.0]
for i in range(1000):
    time_step = env.reset()
    while not time_step.last():
      player_id = time_step.observations["current_player"]
      # Note the evaluation flag. A Q-learner will set epsilon=0 here.
      agent_output = eval_agents[player_id].step(time_step, is_evaluation=True)
      time_step = env.step([agent_output.action])
    eval_rewards[0] += time_step.rewards[0]
    eval_rewards[1] += time_step.rewards[1]

print(f"Agent: {eval_rewards[0]/1000} Random: {eval_rewards[1]/1000}")

Episodes: 0 Reward: -1.74
Episodes: 1000 Reward: -3508.891
Episodes: 2000 Reward: -3600.432
Episodes: 3000 Reward: -3560.059
Episodes: 4000 Reward: -3512.851
Episodes: 5000 Reward: -3488.582
Episodes: 6000 Reward: -3565.956
Episodes: 7000 Reward: -3512.771
Episodes: 8000 Reward: -3423.055
Episodes: 9000 Reward: -3489.543
Episodes: 10000 Reward: -3406.598
Episodes: 11000 Reward: -3488.776
Episodes: 12000 Reward: -3400.617
Episodes: 13000 Reward: -3528.313
Episodes: 14000 Reward: -3580.066
Episodes: 15000 Reward: -3579.763
Episodes: 16000 Reward: -3532.979
Episodes: 17000 Reward: -3587.932
Episodes: 18000 Reward: -3549.46
Episodes: 19000 Reward: -3472.792
Episodes: 20000 Reward: -3515.662
Episodes: 21000 Reward: -3469.883
Episodes: 22000 Reward: -3541.295
Episodes: 23000 Reward: -3632.977
Episodes: 24000 Reward: -3522.108
Done!
Agent: -3707.077 Random: -3660.647
