In [1]:
import torch
import numpy
from ml_modules import *
from sequence_modules import *
from llm_modules import *
from agent import *

from utils import *

from ml_ops_utils import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
softmax_prob = torch.nn.Softmax(dim=-1)

In [3]:
action_validator = PokerActionValidator(
    num_players = 2,
    small_blind = 1,
    big_blind = 2,
    starting_stack_sizes = 400
)

In [4]:
street_embedder_p1 = StreetPositionalEncoding(
    num_streets = 4,
    embedding_dim = 256,
    max_seq_len = 1024,
    device = "cuda"
)

table_position_embedder_p1 = TablePositionalEncoding(
    num_players = 2,
    embedding_dim = 256,
    max_seq_len = 1024,
    device = "cuda"
)

action_embedder_p1 = ActionEncoding(
    #num_actions = 21,
    embedding_dim = 256,
    max_seq_len = 1024,
    device = "cuda"
)

pot_size_embedder_p1 = PotSizeSequenceEmbedder(
    max_seq_len = 1024,
    pad_value = -1,
    device = 'cuda'
)

poker_sequence_embedder_p1 = PokerSequenceEmbedder(
    street_input_dimension = 256,
    table_position_input_dimension = 256,
    action_input_dimension = 256,
    latent_dimensions = [256, 512, 1024, 2048],
    device = 'cuda'
)
cards_p1 = Cards(device = 'cuda')

self_position_embedder_p1 = SelfPositionEmbedder(number_of_positions = 2, device = "cuda")


street_embedder_p2 = StreetPositionalEncoding(
    num_streets = 4,
    embedding_dim = 256,
    max_seq_len = 1024,
    device = "cuda"
)

table_position_embedder_p2 = TablePositionalEncoding(
    num_players = 2,
    embedding_dim = 256,
    max_seq_len = 1024,
    device = "cuda"
)

action_embedder_p2 = ActionEncoding(
    #num_actions = 21,
    embedding_dim = 256,
    max_seq_len = 1024,
    device = "cuda"
)

pot_size_embedder_p2 = PotSizeSequenceEmbedder(
    max_seq_len = 1024,
    pad_value = -1,
    device = 'cuda'
)

poker_sequence_embedder_p2 = PokerSequenceEmbedder(
    street_input_dimension = 256,
    table_position_input_dimension = 256,
    action_input_dimension = 256,
    latent_dimensions = [256, 512, 1024, 2048],
    device = 'cuda'
)

cards_p2 = Cards(device = 'cuda')

self_position_embedder_p2 = SelfPositionEmbedder(number_of_positions = 2, device = "cuda")

In [5]:
deck_order_shuffled = torch.argsort(torch.rand(1, 52))

In [6]:
model_name = "./models/qwen3-1point7b/"


tokenizer, model = load_model(model_name)

model

`torch_dtype` is deprecated! Use `dtype` instead!
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  5.91it/s]
The module name  (originally ) is not a valid Python identifier. Please rename the original module to avoid import issues.


Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 2048)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (v_proj): Linear(in_features=2048, out_features=1024, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (q_norm): Qwen3RMSNorm((128,), eps=1e-06)
          (k_norm): Qwen3RMSNorm((128,), eps=1e-06)
        )
        (mlp): Qwen3MLP(
          (gate_proj): Linear(in_features=2048, out_features=6144, bias=False)
          (up_proj): Linear(in_features=2048, out_features=6144, bias=False)
          (down_proj): Linear(in_features=6144, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): Qwen3RMSNorm((2048,), eps=1e-06)
        (post_attention_layer

In [7]:
policy_model_p1 = PolicyModel(
    num_players = 2,
    self_position_embedder = self_position_embedder_p1,
    active_players_hidden_dims = [1024, 2048],
    stack_size_hidden_dims = [1024, 2048],
    card_embeddings_hidden_dims = [2048, 2048],
    final_output_hidden_dims = [1024, 512, 256],
    device = 'cuda'
)

policy_model_p2 = PolicyModel(
    num_players = 2,
    self_position_embedder = self_position_embedder_p2,
    active_players_hidden_dims = [1024, 2048],
    stack_size_hidden_dims = [1024, 2048],
    card_embeddings_hidden_dims = [2048, 2048],
    final_output_hidden_dims = [1024, 512, 256],
    device = 'cuda'
)

In [8]:
poker_player_p1 = PokerAgent(
    cards_p1,
    street_embedder_p1,
    table_position_embedder_p1,
    action_embedder_p1,
    pot_size_embedder_p1,
    poker_sequence_embedder_p1,
    model,
    policy_model_p1,
    device = 'cuda',
    llm_train = False
)

poker_player_p2 = PokerAgent(
    cards_p2,
    street_embedder_p2,
    table_position_embedder_p2,
    action_embedder_p2,
    pot_size_embedder_p2,
    poker_sequence_embedder_p2,
    model,
    policy_model_p2,
    device = 'cuda',
    llm_train = False
)

In [9]:
poker_player_p1

PokerAgent(
  (cards): Cards(
    (rank_embedder): Embedding(14, 1024)
    (suit_embedder): Embedding(5, 1024)
  )
  (street_embedder): StreetPositionalEncoding(
    (street_embedder): Embedding(7, 256)
  )
  (table_position_embedder): TablePositionalEncoding(
    (player_embedder): Embedding(4, 256)
  )
  (action_embedder): ActionEncoding(
    (action_embedder): Embedding(22, 256)
  )
  (pot_size_embedder): PotSizeSequenceEmbedder()
  (poker_sequence_embedder): PokerSequenceEmbedder(
    (street_MLP): Sequential(
      (0): Linear(in_features=256, out_features=256, bias=True)
      (1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (2): ReLU()
      (3): Linear(in_features=256, out_features=512, bias=True)
      (4): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (5): ReLU()
      (6): Linear(in_features=512, out_features=1024, bias=True)
      (7): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (8): ReLU()
      (9): Linear(in_features=1024, o

In [12]:
street_idxs = (torch.zeros(1024) + 6).long().unsqueeze(0)
street_idxs[0, :2] = 0 # posting small blind and big blind

table_position_idxs = (torch.zeros(1024) + 2).long().unsqueeze(0)
table_position_idxs[0, 0] = 0 # sb/b
table_position_idxs[0, 1] = 1 # bb

action_idxs = (torch.zeros(1024) + 21).long().unsqueeze(0)
action_idxs[0 ,0] = 0 # post sb
action_idxs[0 ,1] = 1 # post bb

pot_size_sequence = (torch.zeros(1024) - 1).unsqueeze(0)
pot_size_sequence[0, 0] = 1
pot_size_sequence[0, 0] = 3


active_players = torch.Tensor([
    [1, 1]
])
print(active_players.shape)
stack_size = torch.Tensor(
    [
        [399, 398],
    ]
)
stack_size.shape

torch.Size([1, 2])


torch.Size([1, 2])

In [13]:
sb_cards = torch.zeros((1,2,7)).to(dtype=torch.long).to('cuda')
bb_cards = torch.zeros((1,2,7)).to(dtype=torch.long).to('cuda')

In [14]:
deck_order_shuffled[0, :2]

tensor([26, 31])

In [15]:
sb_cards[0,0,:2] = deck_order_shuffled[0, :2]%13
sb_cards[0,1,:2] = deck_order_shuffled[0, :2]//13

sb_cards[0,0,2:] = 13
sb_cards[0,1,2:] = 4


bb_cards[0,0,:2] = deck_order_shuffled[0, 2:4]%13
bb_cards[0,1,:2] = deck_order_shuffled[0, 2:4]//13

bb_cards[0,0,2:] = 13
bb_cards[0,1,2:] = 4

In [16]:
sb_cards

tensor([[[ 0,  5, 13, 13, 13, 13, 13],
         [ 2,  2,  4,  4,  4,  4,  4]]], device='cuda:0')

In [17]:
bb_cards

tensor([[[ 0,  0, 13, 13, 13, 13, 13],
         [ 3,  1,  4,  4,  4,  4,  4]]], device='cuda:0')

In [18]:
p1_position_player = torch.Tensor([0]).to('cuda')
p2_position_player = torch.Tensor([1]).to('cuda')

In [19]:
outputs = poker_player_p1(
    p1_position_player,
    sb_cards,
    street_idxs,
    table_position_idxs,
    action_idxs,
    pot_size_sequence,
    active_players.to('cuda'),
    stack_size.to('cuda')
)

In [20]:
outputs['probits']

tensor([[ 0.9621, -0.4319,  0.5310,  0.5411, -0.2802,  0.9809,  0.4215,  0.6577,
          0.4941, -0.9434, -0.9269, -0.1899,  0.1640, -0.4097,  0.2996,  0.5463,
          0.1543, -0.2257,  0.2326,  0.5217, -0.1521]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [21]:
legal_actions = action_validator.get_legal_actions_mask(
    street_idxs,
    table_position_idxs,
    action_idxs, 
    pot_size_sequence,
    active_players
)

In [22]:
legal_actions

tensor([[False, False,  True, False,  True, False,  True,  True,  True,  True,
          True,  True,  True,  True,  True,  True,  True, False, False, False,
         False]])

In [23]:
outputs['probits'].shape

torch.Size([1, 21])

In [24]:
softmax_prob(outputs['probits'] - 1e9 * ((~legal_actions).float()).to('cuda'))

tensor([[0.0000, 0.0000, 0.1115, 0.0000, 0.0496, 0.0000, 0.1000, 0.1266, 0.1075,
         0.0255, 0.0260, 0.0542, 0.0773, 0.0435, 0.0885, 0.1133, 0.0765, 0.0000,
         0.0000, 0.0000, 0.0000]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [25]:
torch.distributions.Categorical(softmax_prob(outputs['probits'] - 1e9 * ((~legal_actions).float()).to('cuda'))).sample()

tensor([16], device='cuda:0')

In [27]:
sb_cards

tensor([[[ 0,  5, 13, 13, 13, 13, 13],
         [ 2,  2,  4,  4,  4,  4,  4]]], device='cuda:0')

In [None]:
def min_bet_size(
    pot_size_sequence,
    stack_size
):
    return min(max(2, 0.25 * max(pot_size_sequence)), stack_size)

def min_raise_size(
    pot_size_sequence,
    street_idxs,
    current_street,
    stack_size
):
    pot_size_sequence_this_street = pot_size_sequence[street_idxs == current_street]
    if len(pot_size_sequence_this_street)