## RL
In this project, you'll train two kinds of RL agents (DQN and CFR) on No-limit Texas Holdem Poker and a simpler version of poker called le-duc holdem.

In [None]:
!pip3 install rlcard[torch]

In [None]:
import torch
import numpy as np
import pandas as pd

Le-Duc Hold Em

In [None]:
import rlcard
from rlcard import models
from rlcard.agents import LeducholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
env = rlcard.make('leduc-holdem')
human_agent = HumanAgent(env.num_actions)
cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([
    human_agent,
    cfr_agent,
])

print(">> Leduc Hold'em pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     CFR Agent    ===============')
    print_card(env.get_perfect_information()['hand_cards'][1])

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import os
import argparse

import rlcard
from rlcard.agents import (
    CFRAgent,
    RandomAgent,
    DQNAgent
)
from rlcard.utils import (
    set_seed,
    tournament,
    Logger,
    plot_curve,
    reorganize,
)

def train(agent_type, game_type = 'leduc-holdem', seed = 42, num_episodes = 5000, evaluate_every = 100, num_eval_games = 2000, log_dir = "cfr/"):
    env = rlcard.make(
        game_type,
        config={
            'seed': 0,
            'allow_step_back': True,
        }
    )

    eval_env = rlcard.make(
        game_type,
        config={
            'seed': 0,
        }
    )

    # Seed numpy, torch, random
    set_seed(seed)

    # Initilize CFR Agent
    if agent_type == "cfr":
      agent = CFRAgent(
          env,
          os.path.join(
              log_dir,
              'cfr_model',
          ),
      )

      agent.load()  # If we have saved model, we first load the model
    elif agent_type == "dqn":

      agent = DQNAgent(
          num_actions=env.num_actions,
          state_shape=env.state_shape[0],
          mlp_layers=[64,64],
          save_path = log_dir,
          save_every = num_episodes / 5,
      )

      env.set_agents([agent, agent])

    # Evaluate Agent against random
    eval_env.set_agents([
        agent,
        RandomAgent(num_actions=env.num_actions),
    ])

    # Start training
    with Logger(log_dir) as logger:
        for episode in range(num_episodes):
            if agent_type == "cfr":
              agent.train()
            elif agent_type == "dqn":
              trajectories, payoffs = env.run(is_training=True)

              # Reorganaize the data to be state, action, reward, next_state, done
              trajectories = reorganize(trajectories, payoffs)

              # Feed transitions into agent memory, and train the agent
              for ts in trajectories[0]:
                  agent.feed(ts)

            print('\rIteration {}'.format(episode), end='')
            # Evaluate the performance. Play with Random agents.
            if episode % evaluate_every == 0:
                if agent_type == "cfr":
                  agent.save() # Save model

                logger.log_performance(
                    episode,
                    tournament(
                        eval_env,
                        num_eval_games
                    )[0]
                )

        # Get the paths
        csv_path, fig_path = logger.csv_path, logger.fig_path
    # Plot the learning curve
    plot_curve(csv_path, fig_path, agent_type)


agent_type = "dqn"
game_type = "no-limit-holdem"
train(agent_type, game_type = game_type, num_episodes = 2000, log_dir = agent_type + "-" + game_type + "/")

In [None]:
eval_env = rlcard.make(
    'leduc-holdem',
    config={
        'seed': 0,
    }
)

pretrained_agent = models.load('leduc-holdem-cfr').agents[0]

our_dqn_agent = DQNAgent(
          num_actions=eval_env.num_actions,
          state_shape=eval_env.state_shape[0],
          mlp_layers=[64,64],
          save_path = "dqn/",
          )

checkpoint = torch.load("dqn/checkpoint_dqn.pt")
our_dqn_agent.from_checkpoint(checkpoint)

our_cfr_agent = CFRAgent(
    eval_env,
    os.path.join(
        "cfr/",
        'cfr_model',
    ),
)

our_cfr_agent.load()

eval_env.set_agents([
    our_dqn_agent,
    our_cfr_agent,
])

out = tournament(eval_env, 1000)
print(out)

Play No-Limit Texas Hold-em against your AI

In [None]:
import rlcard
from rlcard import models
from rlcard.agents import NolimitholdemHumanAgent
from rlcard.utils import print_card

# Make environment
env = rlcard.make('no-limit-holdem')

human_agent = NolimitholdemHumanAgent(env.num_actions)

our_dqn_agent = DQNAgent(
          num_actions=env.num_actions,
          state_shape=env.state_shape[0],
          mlp_layers=[64,64],
          save_path = "dqn/",
          )

checkpoint = torch.load("dqn-no-limit-holdem/checkpoint_dqn.pt")
our_dqn_agent.from_checkpoint(checkpoint)

# cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([
    our_dqn_agent,
    human_agent,
])

print(">> Play No-Limit Texas Holdem against a random agent")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    print(trajectories)
    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     DQN Agent    ===============')
    print_card(env.get_perfect_information()['hand_cards'][0])

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")