<a href="https://colab.research.google.com/github/sebastianT5/RL-Poker/blob/main/Sebastian_Tejada_RL_Poker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reinforcement Learning



## Poker
In this project, I trained two kinds of RL agents (DQN and CFR) on No-limit Texas Holdem Poker and a simpler version of poker called le-duc holdem. We'll primarily use a nice library called rlcard.

https://github.com/datamllab/rlcard/tree/master

This project is less about understanding the inner mechanisms of these RL algorithms and more about working on your programming skills. You should spend a little bit of time reading through the repo before attempting the project.

In [None]:
!pip3 install rlcard[torch]

Collecting rlcard[torch]
  Downloading rlcard-1.2.0.tar.gz (269 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/269.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.7/269.0 kB[0m [31m2.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m266.2/269.0 kB[0m [31m4.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.0/269.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting GitPython (from rlcard[torch])
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gitdb2 (from rlcard[torch])
  Downloading gitdb2-4.0.2-py3-none-any.whl (1.1 kB)
Collecting gitdb>=4.0.1 (from gitdb2->rlcard[to

In [None]:
import torch
import numpy as np
import pandas as pd

Le-Duc Hold Em

In [None]:
import rlcard
from rlcard import models
from rlcard.agents import LeducholdemHumanAgent as HumanAgent
from rlcard.utils import print_card

# Make environment
env = rlcard.make('leduc-holdem')
human_agent = HumanAgent(env.num_actions)
cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([
    human_agent,
    cfr_agent,
])

print(">> Leduc Hold'em pre-trained model")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)

    # If the human does not take the final action, we need to
    # print other players action

    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     CFR Agent    ===============')
    print_card(env.get_perfect_information()['hand_cards'][1])

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")

>> Leduc Hold'em pre-trained model
>> Start a new game
>> Player 1 chooses raise

┌─────────┐
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
└─────────┘
┌─────────┐
│J        │
│         │
│         │
│    ♥    │
│         │
│         │
│        J│
└─────────┘
Yours:   ++
Agent 1: ++++
0: call, 1: raise, 2: fold

>> You choose action (integer): 3
Action illegal...
>> Re-choose action (integer): 2
>> Player 0 chooses fold
┌─────────┐
│Q        │
│         │
│         │
│    ♠    │
│         │
│         │
│        Q│
└─────────┘
You lose 1.0 chips!

Press any key to continue...
>> Start a new game
>> Player 1 chooses raise

┌─────────┐
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
│░░░░░░░░░│
└─────────┘
┌─────────┐
│K        │
│         │
│         │
│    ♥    │
│         │
│         │
│        K│
└─────────┘
Yours:   ++
Agent 1: ++++
0: call, 1: raise, 2: fold

>> You choose action (integer): 1
>> Player 1 chooses call

┌──

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import os
import argparse

import rlcard
from rlcard.agents import (
    CFRAgent,
    RandomAgent,
    DQNAgent
)
from rlcard.utils import (
    set_seed,
    tournament,
    Logger,
    plot_curve,
    reorganize,
)

def train(agent_type, game_type = 'leduc-holdem', seed = 42, num_episodes = 5000, evaluate_every = 100, num_eval_games = 2000, log_dir = "cfr/"):
    """
    This function trains an agent using a specified reinforcement learning algorithm (either DQN or CFR) on a chosen poker game type (e.g., leduc-holdem or no-limit-holdem).
    It takes several hyperparameters to customize the training process, evaluates the agent's performance periodically, and logs the results.
    Refer to this documentation for additional insights: https://github.com/datamllab/rlcard/blob/master/docs/toy-examples.md
    """

    env = rlcard.make(
        game_type,
        config={
            'seed': 0,
            'allow_step_back': True,
        }
    )

    eval_env = rlcard.make(
        game_type,
        config={
            'seed': 0,
        }
    )

    # Seed numpy, torch, random number generators for reproducibility
    set_seed(seed)

    # Initialize the agent based on the specified type
    if agent_type == "cfr":
      agent = CFRAgent(
          env,
          os.path.join(
              log_dir,
              'cfr_model',
          ),
      )

      agent.load()  # Load a saved model if available
    elif agent_type == "dqn":

      agent = DQNAgent(
          num_actions=env.num_actions,
          state_shape=env.state_shape[0],
          mlp_layers=[64,64],
          save_path = log_dir,
          save_every = num_episodes / 5,
      )

      env.set_agents([agent, agent])

    # Set evaluation environment agents
    eval_env.set_agents([
        agent,
        RandomAgent(num_actions=env.num_actions),
    ])

    # Start training process
    with Logger(log_dir) as logger:
        for episode in range(num_episodes):
            if agent_type == "cfr":
              agent.train()
            elif agent_type == "dqn":
              trajectories, payoffs = env.run(is_training=True)

              # Reorganize the data into state, action, reward, next_state, done format
              trajectories = reorganize(trajectories, payoffs)

              # Feed transitions into agent memory and train the agent
              for ts in trajectories[0]:
                  agent.feed(ts)

            print('\rIteration {}'.format(episode), end='')
            # Periodically evaluate the agent's performance against random agents
            if episode % evaluate_every == 0:
                if agent_type == "cfr":
                  agent.save()  # Save the model

                logger.log_performance(
                    episode,
                    tournament(
                        eval_env,
                        num_eval_games
                    )[0]
                )

        # Retrieve paths for logging data
        csv_path, fig_path = logger.csv_path, logger.fig_path
    # Plot the learning curve based on logged performance data
    plot_curve(csv_path, fig_path, agent_type)


agent_type = "dqn"
game_type = "no-limit-holdem"
train(agent_type, game_type = game_type, num_episodes = 2000, log_dir = agent_type + "-" + game_type + "/")


In [None]:
#the tournament method from rlcard takes in an environment with agents and plays the agents against each other
#Following code plays two of the agents you've trained (ie random agent vs dqn or dqn vs cfr) against each other
#to see how well they do

eval_env = rlcard.make(
    'leduc-holdem',
    config={
        'seed': 0,
    }
)

pretrained_agent = models.load('leduc-holdem-cfr').agents[0]

our_dqn_agent = DQNAgent(
          num_actions=eval_env.num_actions,
          state_shape=eval_env.state_shape[0],
          mlp_layers=[64,64],
          save_path = "dqn/",
          )

checkpoint = torch.load("dqn/checkpoint_dqn.pt")
our_dqn_agent.from_checkpoint(checkpoint)

our_cfr_agent = CFRAgent(
    eval_env,
    os.path.join(
        "cfr/",
        'cfr_model',
    ),
)

our_cfr_agent.load()

eval_env.set_agents([
    our_dqn_agent,
    our_cfr_agent,
])

out = tournament(eval_env, 1000)
print(out)

Play No-Limit Texas Hold-em against your AI

In [None]:
import rlcard
from rlcard import models
from rlcard.agents import NolimitholdemHumanAgent
from rlcard.utils import print_card

# Make environment
env = rlcard.make('no-limit-holdem')

human_agent = NolimitholdemHumanAgent(env.num_actions)

our_dqn_agent = DQNAgent(
          num_actions=env.num_actions,
          state_shape=env.state_shape[0],
          mlp_layers=[64,64],
          save_path = "dqn/",
          )

checkpoint = torch.load("dqn-no-limit-holdem/checkpoint_dqn.pt")
our_dqn_agent.from_checkpoint(checkpoint)

# cfr_agent = models.load('leduc-holdem-cfr').agents[0]
env.set_agents([
    our_dqn_agent,
    human_agent,
])

print(">> Play No-Limit Texas Holdem against a random agent")

while (True):
    print(">> Start a new game")

    trajectories, payoffs = env.run(is_training=False)
    # If the human does not take the final action, we need to
    # print other players action
    print(trajectories)
    final_state = trajectories[0][-1]
    action_record = final_state['action_record']
    state = final_state['raw_obs']
    _action_list = []
    for i in range(1, len(action_record)+1):
        if action_record[-i][0] == state['current_player']:
            break
        _action_list.insert(0, action_record[-i])
    for pair in _action_list:
        print('>> Player', pair[0], 'chooses', pair[1])

    # Let's take a look at what the agent card is
    print('===============     DQN Agent    ===============')
    print_card(env.get_perfect_information()['hand_cards'][0])

    print('===============     Result     ===============')
    if payoffs[0] > 0:
        print('You win {} chips!'.format(payoffs[0]))
    elif payoffs[0] == 0:
        print('It is a tie.')
    else:
        print('You lose {} chips!'.format(-payoffs[0]))
    print('')

    input("Press any key to continue...")