In [1]:
from treys import Card
from treys import Evaluator
from treys import Deck
import random
from typing import List, Dict, Any
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel
import re
import json
import torch

ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm
    PyTorch 2.9.0+cu130 with CUDA 1300 (you have 2.9.0+cu128)
    Python  3.10.19 (you have 3.11.14)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


Switching to PyTorch attention since your Xformers is broken.

Unsloth: Xformers was not installed correctly.
Please install xformers separately first.
Then confirm if it's correctly installed by running:
python -m xformers.info

Longer error message:
xFormers can't load C++/CUDA extensions. xFormers was built for:
    PyTorch 2.9.0+cu130 with CUDA 1300 (you have 2.9.0+cu128)
    Python  3.10.19 (you have 3.11.14)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


### LLM Agent Class

In [2]:
def safe_parse_json(raw_text):
    try:
        # extract first {...}
        m = re.search(r"\{.*\}", raw_text, flags=re.S)
        if not m:
            raise ValueError("No JSON object found")
        js_text = m.group()
        # normalize single quotes to double quotes
        js_text = js_text.replace("'", '"')
        # remove trailing commas
        js_text = re.sub(r",\s*}", "}", js_text)
        js_text = re.sub(r",\s*\]", "]", js_text)
        return json.loads(js_text)
    except Exception as e:
        print("COULD NOT PARSE JSON:", e)
        # fallback
        return {"action": "check", "amount": 0}
    

In [3]:
import random
import json
import re
from typing import List, Any, Dict
import torch
from unsloth.chat_templates import get_chat_template

class LLMAgent:
    def __init__(self, name: str, model, tokenizer, seed: int, chips: int):
        self.name = name
        self.model = model
        self.tokenizer = get_chat_template(tokenizer, chat_template="llama-3.1")
        self.base_seed = int(seed)
        self.rng = random.Random(self.base_seed)
        self.chips = int(chips)
        self.hand = None
        self.folded = False
        self.dialogue_history: List[str] = []
        self.win_pct = 0.0
        self.last_action = None

        self.system_prompt = (
            f"You are {self.name}, an expert poker player. "
            "Your goal is to win as many chips as possible over multiple hands.\n\n"
            "--- Game Rules ---\n"
            "1. Each hand starts with players contributing to the pot through bets.\n"
            "2. If you win a hand, you collect all the chips in the pot.\n"
            "3. On your turn, you can perform one of the following legal actions: check, call, raise, or fold.\n"
            "   - 'Check' means you do not contribute more chips but stay in the hand.\n"
            "   - 'Call' means you match the current bet to stay in the hand.\n"
            "   - 'Raise' means you increase the bet and contribute more chips to the pot.\n"
            "   - 'Fold' means you exit the hand and forfeit any chips you've contributed so far.\n"
            "4. Bets and raises contribute to the pot, which is collected by the winner at showdown.\n"
            "5. You can raise or call any amount within your available chips.\n\n"
            "--- Table Conversation Rules ---\n"
            "The goal of conversation is to strategically influence your opponents, not to declare your moves.\n"
        )


    def generate_dialogue(self, stage: str, board: List[Any], other_chips: dict, pot: int, to_call: int, turn_index: int, dialogue_history:dict, max_new_tokens: int = 516) -> str:
        visible_board = ", ".join(map(str, board))
        history_text = "\n".join(dialogue_history)
        opponent_status = ", ".join([f"{name} has {chips} chips" for name, chips in other_chips.items()])

        system_msg = {"role": "system", "content": self.system_prompt}
        user_msg = {
            "role": "user",
            "content": (
                f"--- Game Context ---\n"
                f"Stage: {stage}\n"
                f"Your chips: {self.chips}\n"
                f"{opponent_status}\n"
                f"Pot: {pot}\n"
                f"To call: {to_call}\n"
                f"Your chance of winning this hand is {self.win_pct*100:.1f}%\n"
                f"Board: {visible_board}\n"
                f"Your hand: {self.hand}\n"
                f"Conversation so far:\n{history_text}\n"
                "Return ONLY a valid JSON object like {'reasoning':'My hand is quite good, I don't want my opponent to fold so I should make it seem like my hand isn't very good', 'text':'Wow interesting cards, a lot could happen here'}.\n"
                "Do not include any text outside the JSON."
            )
        }

        inputs = self.tokenizer.apply_chat_template(
            [system_msg, user_msg],
            tokenize=True,
            add_generation_prompt=True,  # important for Unsloth
            return_tensors="pt",
        ).to(self.model.device)

        torch.manual_seed(self.base_seed + turn_index)
        with torch.no_grad():
            out = self.model.generate(
            inputs,
                max_new_tokens=max_new_tokens,
                do_sample=False,
                temperature=0.0,
                top_k=None,
                top_p=1.0,
                repetition_penalty=1.1,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
            )

        full_text = self.tokenizer.decode(out[0][inputs.shape[1]:], skip_special_tokens=True)
        # Remove prompt echo
        utter = safe_parse_json(full_text)
        utter = utter.get('text', '')

        
        # Sanitize: remove action-like sentences
        sentences = re.split(r'(?<=[.!?])\s+', utter)
        non_action_sents = [s for s in sentences if not re.search(r'\b(raise|call|fold|check|bet|all-in|allin)\b', s, flags=re.I)]
        if non_action_sents:
            utter = " ".join(non_action_sents).strip()
        else:
            utter = "Not much for meâ€”how about you?"

        self.dialogue_history.append(f"{self.name}: {utter}")
        return utter

    def decide_action(self, stage: str, board: list, other_chips: dict, pot: int, to_call: int, decision_turn_index: int):
        visible_board = ", ".join(map(str, board))
        history_text = "\n".join(self.dialogue_history[-20:])
        opponent_status = ", ".join([f"{name} has {chips} chips" for name, chips in other_chips.items()])

        system_msg = {
            "role": "system",
            "content": (
                "You are a poker AI. Your goal is to maximize your chips over multiple hands.\n\n"
                "--- Game Rules for Decision-Making ---\n"
                "1. Each hand has a pot that collects chips contributed by all active players.\n"
                "2. On your turn, you must choose ONE legal action: check, call, raise, or fold.\n"
                "   - 'Check': stay in the hand without adding chips (only allowed if current bet is zero).\n"
                "   - 'Call': match the current highest bet to remain in the hand.\n"
                "   - 'Raise': increase the current bet and contribute more chips to the pot.\n"
                "   - 'Fold': exit the hand and forfeit any chips already contributed.\n"
                "3. You can only raise or call within your available chips.\n"
                "4. At the end of the hand, the winner collects the entire pot.\n\n"
                "Return ONLY a valid JSON object like {'reasoning':'I have a good hand so I should raise', 'action':'raise','amount':25}.\n"
                "Do not include any text outside the JSON."
            )
        }

        user_msg = {
            "role": "user",
            "content": (
                f"Stage: {stage}\n"
                f"Your chips: {self.chips}\n"
                f"{opponent_status}\n"
                f"Pot: {pot}\n"
                f"To call: {to_call}\n"
                f"Your chance of winning: {self.win_pct*100:.1f}%\n"
                f"Board: {visible_board}\n"
                f"Your hand: {self.hand}\n"
                f"Conversation history:\n{history_text}\n"
                f"Return ONLY a JSON object like {{'action':'raise','amount':25}}."
            )
        }

        inputs = self.tokenizer.apply_chat_template(
            [system_msg, user_msg],
            tokenize=True,
            add_generation_prompt=True,
            return_tensors="pt",
        ).to(self.model.device)

        torch.manual_seed(self.base_seed + decision_turn_index)
        with torch.no_grad():
            out = self.model.generate(
                inputs,
                max_new_tokens=516,
                do_sample=False,
                temperature=0.0,
                top_k=None,
                top_p=1.0,
                repetition_penalty=1.1,
                pad_token_id=self.tokenizer.eos_token_id,
                eos_token_id=self.tokenizer.eos_token_id)

        generated = self.tokenizer.decode(out[0][inputs.shape[1]:], skip_special_tokens=True)

        parsed = safe_parse_json(generated)
        action = parsed.get("action", "check").lower()
        amount = int(parsed.get("amount", 0))
        amount = max(0, min(amount, self.chips))
        reasoning = parsed.get("reasoning", 0)

        return {"action": action, "amount": amount, 'reasoning':reasoning, "raw_model_out": generated}


### Run Hand Class

In [4]:
def betting_round(agents, pot: int, current_bet: int, stage: str, board: list, seed_base: int):
    """
    Executes one betting round. Ends immediately if all but one player folds.
    """
    num_players = len(agents)
    committed = [0] * num_players
    active_players = [not agent.folded for agent in agents]
    round_over = False
    action_turn = 0

    while not round_over:
        round_over = True
        for idx, agent in enumerate(agents):
            if agent.folded:
                continue

            to_call = current_bet - committed[idx]
            other_chips = {a.name: a.chips for j, a in enumerate(agents) if j != idx}
            action_data = agent.decide_action(stage, board, other_chips, pot, to_call, action_turn)
            action_turn += 1

            act = action_data["action"].lower()
            reasoning = action_data["reasoning"]
            amt = min(action_data["amount"], agent.chips)

            # Enforce legality
            if to_call > 0:
                if act == "check":
                    act = "call"
                    amt = to_call
                elif act == "call":
                    amt = to_call
                elif act == "raise":
                    amt = max(amt, to_call + 1)
            else:
                if act not in ["check", "raise"]:
                    act = "check"
                    amt = 0

            # Execute action
            if act == "fold":
                agent.folded = True
                agent.last_action = {"action": "fold", "amount": 0}
                active_players[idx] = False
            elif act == "call":
                agent.chips -= amt
                committed[idx] += amt
                pot += amt
                agent.last_action = {"action": "call", "amount": amt}
            elif act == "raise":
                agent.chips -= amt
                committed[idx] += amt
                pot += amt
                current_bet = committed[idx]
                agent.last_action = {"action": "raise", "amount": amt}
                round_over = False
            elif act == "check":
                agent.last_action = {"action": "check", "amount": 0}

            print(f"{agent.name}: {agent.last_action}, chips={agent.chips}, reasoning={reasoning}")

            # End round/game if only one player remains
            if sum(active_players) == 1:
                round_over = True
                break

        # Check if round is over by folds
        if sum(active_players) <= 1:
            round_over = True

    return pot, committed


def run_hand(deck, evaluator, model, tokenizer, dialogue_turns: int, seed: int, num_players: int, initial_pots: List[int], verbose=True):
    rng = random.Random(seed)
    agents = [LLMAgent(f"Player{i+1}", model=model, tokenizer=tokenizer, seed=seed+i, chips=initial_pots[i]) for i in range(num_players)]

    # Deal hole cards and board
    for agent in agents:
        agent.hand = deck.draw(2)
    board = deck.draw(5)
    pot = 0
    current_bet = 0
    stages = ["FLOP", "TURN", "RIVER"]

    winner_declared = False
    dialogue_history = []
    for stage_idx, stage_name in enumerate(stages):
        visible_board = board[:stage_idx+3]
        print(f"\n=== {stage_name} ===")
        try:
            from treys import Card
            Card.print_pretty_cards(visible_board)
        except Exception:
            print("Board:", visible_board)

        for agent in agents:
            rank = evaluator.evaluate(agent.hand, visible_board)
            rank_class = evaluator.get_rank_class(rank)
            hand_str = evaluator.class_to_string(rank_class)
            agent.win_pct = 1.0 - evaluator.get_five_card_rank_percentage(rank)
            if verbose:
                print(f"{agent.name} win% = {agent.win_pct:.3f} hand={hand_str}")

        if dialogue_turns > 0 and num_players >= 2:
            for t in range(dialogue_turns):
                speaker_idx = t % 2
                listener_idx = 1 - speaker_idx
                speaker = agents[speaker_idx]
                listener = agents[listener_idx]
                other_chips = {listener.name: listener.chips}
                utter = speaker.generate_dialogue(stage_name, visible_board, other_chips, pot, to_call=0, turn_index=t, dialogue_history=dialogue_history)
                dialogue_history.append(f"{speaker.name}: {utter}")
                print(f"{speaker.name}: {utter}")

        # Betting phase
        pot, committed = betting_round(agents, pot, current_bet, stage_name, visible_board, seed + stage_idx)
        current_bet = max(committed)

        # Check for winner by folds
        active_agents = [a for a in agents if not a.folded]
        if len(active_agents) == 1:
            winner = active_agents[0]
            winner.chips += pot
            print(f"\n*** {winner.name} wins the pot of {pot} chips by opponent folding! ***")
            winner_declared = True
            break

        if verbose:
            print(f"Pot now: {pot}")
            for a in agents:
                print(f"{a.name}: chips={a.chips}, folded={a.folded}, last_action={a.last_action}")

    # Showdown if multiple players remain
    if not winner_declared:
        active_agents = [a for a in agents if not a.folded]
        best_rank = None
        winner = None
        for agent in active_agents:
            rank = evaluator.evaluate(agent.hand, board)
            if best_rank is None or rank < best_rank:
                best_rank = rank
                winner = agent
        winner.chips += pot
        print(f"\n*** {winner.name} wins the pot of {pot} chips at showdown! ***")

In [5]:
base_model = "unsloth/Llama-3.3-70B-Instruct"
max_seq_length = 5000
device_map = {'':5}
model, tokenizer = FastLanguageModel.from_pretrained(
    base_model,
    max_seq_length=max_seq_length,
    device_map=device_map,
    load_in_4bit=True,
    fix_tokenizer=True,
    offload_folder="/playpen-ssd/smerrill/offload", 
) 

from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

==((====))==  Unsloth 2025.11.2: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    NVIDIA RTX A6000. Num GPUs = 8. Max memory: 47.438 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [00:39<00:00,  4.99s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 8192, padding_idx=128004)
    (layers): ModuleList(
      (0-79): 80 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=8192, out_features=8192, bias=False)
          (k_proj): Linear4bit(in_features=8192, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=8192, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=8192, out_features=8192, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=8192, out_features=28672, bias=False)
          (up_proj): Linear4bit(in_features=8192, out_features=28672, bias=False)
          (down_proj): Linear4bit(in_features=28672, out_features=8192, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((8192,), eps=1e-05)
        (post_attention_layernorm):

In [None]:
evaluator = Evaluator()
deck = Deck()
num_players=2
dialogue_turns=2
seed=1
initial_pots = [1000, 1000]
run_hand(deck, 
         evaluator, 
         model, 
         tokenizer,
         dialogue_turns,
         seed, 
         num_players, 
         initial_pots,
         verbose=True)cd 

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



=== FLOP ===
 [6â™¦],[9â™£],[5â™ ] 
Player1 win% = 0.592 hand=Two Pair
Player2 win% = 0.095 hand=High Card


Player1: Wow interesting cards, a lot could happen here
Player2: Yeah, pretty unpredictable flop
Player1: {'action': 'raise', 'amount': 50}, chips=950, reasoning=Given the high chance of winning and the potential for improvement on the next community cards, raising to build the pot while having a strong hand is a good strategy.
Player2: {'action': 'fold', 'amount': 0}, chips=1000, reasoning=Low chance of winning and high cost to continue, so folding is the best option to minimize losses.

*** Player1 wins the pot of 50 chips by opponent folding! ***


 [Qâ™£],[3â™ ],[5â™¥],[Aâ™£],[Qâ™¦] 
 [7â™£],[6â™¦] 
 [6â™¥],[5â™¦] 


In [4]:
p1_score = evaluator.evaluate(board, player1_hand)
p2_score = evaluator.evaluate(board, player2_hand)
p1_class = evaluator.get_rank_class(p1_score)
p2_class = evaluator.get_rank_class(p2_score)
print("Player 1 hand rank = %d (%s)\n" % (p1_score, evaluator.class_to_string(p1_class)))
print("Player 2 hand rank = %d (%s)\n" % (p2_score, evaluator.class_to_string(p2_class)))

hands = [player1_hand, player2_hand]
evaluator.hand_summary(board, hands)


Player 1 hand rank = 3806 (Pair)

Player 2 hand rank = 2787 (Two Pair)

Player 1 hand = High Card, percentage rank among all hands = 0.03470919324577859
Player 2 hand = Pair, percentage rank among all hands = 0.2713749664969177
Player 2 hand is currently winning.

Player 1 hand = High Card, percentage rank among all hands = 0.1356204770838917
Player 2 hand = Pair, percentage rank among all hands = 0.28692039667649427
Player 2 hand is currently winning.

Player 1 hand = Pair, percentage rank among all hands = 0.48994907531492893
Player 2 hand = Two Pair, percentage rank among all hands = 0.6265076387027606

Player 2 is the winner with a Two Pair



In [17]:
ranking_dict = {}
stages = ["FLOP", "TURN", "RIVER"]

for i in range(len(stages)):
    
    best_rank = 7463  # rank one worse than worst hand
    winners = []
    for player, hand in enumerate(hands):

        # evaluate current board position
        rank = evaluator.evaluate(hand, board[:(i + 3)])
        rank_class = evaluator.get_rank_class(rank)
        class_string = evaluator.class_to_string(rank_class)
        percentage = 1.0 - evaluator.get_five_card_rank_percentage(rank)  # higher better here
        ranking_dict[(stages[i], player + 1)] = percentage
        print("Player {} hand = {}, percentage rank among all hands = {}".format(player + 1, class_string, percentage))

        # detect winner
        if rank == best_rank:
            winners.append(player)
            best_rank = rank
        elif rank < best_rank:
            winners = [player]
            best_rank = rank


Player 1 hand = Pair, percentage rank among all hands = 0.4092736531760922
Player 2 hand = Pair, percentage rank among all hands = 0.32350576253015273
Player 1 hand = Two Pair, percentage rank among all hands = 0.6021173948003216
Player 2 hand = Pair, percentage rank among all hands = 0.3243098365049585
Player 1 hand = Two Pair, percentage rank among all hands = 0.6026534441168587
Player 2 hand = Pair, percentage rank among all hands = 0.333154650227821


In [18]:
ranking_dict

{('FLOP', 1): 0.4092736531760922,
 ('FLOP', 2): 0.32350576253015273,
 ('TURN', 1): 0.6021173948003216,
 ('TURN', 2): 0.3243098365049585,
 ('RIVER', 1): 0.6026534441168587,
 ('RIVER', 2): 0.333154650227821}

In [56]:
safe_parse_json("{'action': 'raise', 'amount': 50,}")

{'action': 'raise', 'amount': 50}