In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn.utils import clip_grad_norm_
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from Environment import PokerGame, PokerHandEvaluator
from copy import deepcopy
import optuna

  from .autonotebook import tqdm as notebook_tqdm


### Data mining

In [2]:
iters = 8
names = {0: '2', 1: '3', 2: '4', 3: '5', 4: '6', 5: '7', 6: '8', 7: '9', 8: '10', 9: "jack", 10: "queen", 11: "king", 12: "ace"}
rounds = 3
init_cards = 2
flop_size = 3
players = 4
game = PokerGame(52, 4, num_cards=init_cards, flop_size=flop_size, num_players=players, rounds=rounds, names=names)

answers = []
hands = []
for i in range(iters):
    game.start_new_game()
    history = game.get_history()

    max_len = 0
    for round in range(rounds):
        for player in range(players):

            ans = np.zeros(2)
            if player in history["winner"]:
                ans[0] = 1
            else:
                ans[1] = 1

            answers.append(ans.copy())
            hands.append(np.array(history["history"][round].get_state()[player]))
            max_len = max(max_len, len(history["history"][round].get_state()[player]))

hands = np.array(hands, dtype=np.ndarray)
answers = np.array(answers)

In [3]:
x = []
y_train = torch.from_numpy(answers).float()
for i in range(len(hands)):
    x.append(hands[i].tolist())
padding_length = max(len(seq) for seq in x)

# Делаем padding вручную, добавив 0 в конце каждого списка
padded_data = [[[0, 0, 0, 0, 0, 0]] * (padding_length - len(seq)) + seq for seq in x]
x_train = torch.tensor(padded_data).float()

### Model

In [5]:
class GRUApproximator(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_length=2, dropout=0.1):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(
            input_size,
            hidden_size,
            num_layers,
            batch_first=True,
            dropout=dropout,
            bidirectional=True
        )
        self.layer_norm = nn.LayerNorm(hidden_size * 2)  # Учитываем bidirectional (2 * hidden_size)
        self.fc1 = nn.Linear(2 * hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_length)
        self.dropout = nn.Dropout(dropout)
        self.activation = nn.Tanh()
        self.act2 = nn.Softmax()

    def forward(self, x):
        batch_size, _, _ = x.size()
        # Initialize hidden state
        hidden_vec = torch.zeros(
            (self.num_layers * 2, batch_size, self.hidden_size)
        ).to(x.device)

        # GRU forward
        output, _ = self.gru(x, hidden_vec)
        output = output[:, -1, :]

        # Apply LayerNorm to output
        output = self.layer_norm(output)

        # Pass through fully connected layers
        y = self.dropout(self.activation(self.fc1(output)))
        y = self.act2(self.dropout(self.fc2(y)))

        return y


### Training

In [11]:
def objective(trial, x_train, y_train):

    hidden_size = trial.suggest_int("hidden_size", 32, 256)
    num_layers = trial.suggest_int("num_layers", 2, 4)
    dropout = trial.suggest_float("dropout", 0.0, 0.5)

    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)

    model = GRUApproximator(input_size=6, hidden_size=hidden_size, num_layers=num_layers,
                     dropout=dropout)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    model.to(device)

    dataset = TensorDataset(x_train, y_train)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

    model.train()

    for epoch in range(3):
        model.train()
        for x_batch, y_batch in train_loader:
            optimizer.zero_grad()

            y_pred = model(x_batch.to(device))

            loss = criterion(y_pred, y_batch.to(device))
            loss.backward()
            clip_grad_norm_(model.parameters(), 2)
            optimizer.step()

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            inputs, targets = batch
            outputs = model(inputs.to(device))
            loss = criterion(outputs, targets.to(device))
            val_loss += loss.item()

    # Сохранение модели, если текущая лучше предыдущих:
    val_loss_avg = val_loss / len(val_loader)

    return val_loss_avg


In [12]:
study = optuna.create_study(direction="minimize")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
study.optimize(lambda trial: objective(trial, x_train, y_train), n_trials=25)

print("Лучшие параметры:", study.best_params)

[I 2025-03-28 20:55:03,549] A new study created in memory with name: no-name-a946ea6a-20d2-4a99-a97c-c910d02c161f
[I 2025-03-28 20:55:37,498] Trial 0 finished with value: 0.6618940387314898 and parameters: {'hidden_size': 185, 'num_layers': 2, 'dropout': 0.024273867223072343, 'learning_rate': 0.002341907226133483}. Best is trial 0 with value: 0.6618940387314898.
[I 2025-03-28 20:56:03,229] Trial 1 finished with value: 0.6399228682362936 and parameters: {'hidden_size': 39, 'num_layers': 3, 'dropout': 0.016952099786626762, 'learning_rate': 0.00014368414909713277}. Best is trial 1 with value: 0.6399228682362936.
[I 2025-03-28 20:56:29,321] Trial 2 finished with value: 0.6407926816281264 and parameters: {'hidden_size': 50, 'num_layers': 3, 'dropout': 0.009991916978159376, 'learning_rate': 0.00017956570375960272}. Best is trial 1 with value: 0.6399228682362936.
[I 2025-03-28 20:56:55,341] Trial 3 finished with value: 0.6506899887953347 and parameters: {'hidden_size': 56, 'num_layers': 3, 'd

Лучшие параметры: {'hidden_size': 134, 'num_layers': 2, 'dropout': 0.11713998871447007, 'learning_rate': 0.0006931035176943864}


In [15]:
# Лучшие параметры: {'hidden_size': 134, 'num_layers': 2, 'dropout': 0.11713998871447007, 'learning_rate': 0.0006931035176943864}
hidden_size = 134 # study.best_params["hidden_size"]
num_layers = 2 # study.best_params["num_layers"]
dropout = 0.11713998871447007 # study.best_params["dropout"]
learning_rate = 0.0006931035176943864 # study.best_params["learning_rate"]

model = GRUApproximator(input_size=6, hidden_size=hidden_size, num_layers=num_layers,
                     dropout=dropout)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

model.to(device)

train_dataset = TensorDataset(x_train, y_train)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

model.train()

total_loss = 0
for epoch in range(30):
    model.train()
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad()

        y_pred = model(x_batch.to(device))

        loss = criterion(y_pred, y_batch.to(device))
        loss.backward()
        clip_grad_norm_(model.parameters(), 2)
        optimizer.step()
        total_loss += loss.item()
    print("epoch:", epoch, "loss:", loss.item())

total_loss_avg = total_loss / len(train_loader)

torch.save(model.state_dict(), f"best_model_trial_{total_loss_avg}.pth")

epoch: 0 loss: 0.6390072703361511
epoch: 1 loss: 0.6359899640083313
epoch: 2 loss: 0.6602068543434143
epoch: 3 loss: 0.5893398523330688
epoch: 4 loss: 0.6088787913322449
epoch: 5 loss: 0.5687258243560791
epoch: 6 loss: 0.5635789632797241
epoch: 7 loss: 0.605431079864502
epoch: 8 loss: 0.5566391348838806
epoch: 9 loss: 0.5563504099845886
epoch: 10 loss: 0.5482103824615479
epoch: 11 loss: 0.5331912636756897
epoch: 12 loss: 0.5218848586082458
epoch: 13 loss: 0.5396378636360168
epoch: 14 loss: 0.5580545663833618
epoch: 15 loss: 0.5273128747940063
epoch: 16 loss: 0.5179847478866577
epoch: 17 loss: 0.5153135061264038
epoch: 18 loss: 0.5260172486305237
epoch: 19 loss: 0.49683091044425964
epoch: 20 loss: 0.5444058179855347
epoch: 21 loss: 0.4735460877418518
epoch: 22 loss: 0.48961353302001953
epoch: 23 loss: 0.5178319811820984
epoch: 24 loss: 0.49323779344558716
epoch: 25 loss: 0.5382354855537415
epoch: 26 loss: 0.5215072631835938
epoch: 27 loss: 0.49264270067214966
epoch: 28 loss: 0.531894445

### Evaluating

In [11]:
hidden_size = 134 # study.best_params["hidden_size"]
num_layers = 2 # study.best_params["num_layers"]
dropout = 0.11713998871447007 # study.best_params["dropout"]
learning_rate = 0.0006931035176943864 # study.best_params["learning_rate"]

model = GRUApproximator(input_size=6, hidden_size=hidden_size, num_layers=num_layers,
                     dropout=dropout)

state_dict = torch.load('best_model_trial_16.349365157046122.pth')

# Загрузка весов в модель
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.load_state_dict(state_dict)
model.to(device)

GRUApproximator(
  (gru): GRU(6, 134, num_layers=2, batch_first=True, dropout=0.11713998871447007, bidirectional=True)
  (layer_norm): LayerNorm((268,), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=268, out_features=134, bias=True)
  (fc2): Linear(in_features=134, out_features=2, bias=True)
  (dropout): Dropout(p=0.11713998871447007, inplace=False)
  (activation): Tanh()
  (act2): Softmax(dim=None)
)

In [12]:
model.eval()
with torch.no_grad():
    for j in range(rounds):
        print("round:", j + 1)
        for i in range(players):

            print("Player:", i + 1)
            x = x_train[j*players + i].to(device).unsqueeze(0)

            print("Cards:")
            print(game.convert_cards(hands[j*players + i]))
            hand1 = PokerHandEvaluator(hands[j*players + i], 52, 4)
            print(hand1.get_hand_rank())

            print("True val:", y_train[j*players + i])
            y_pred = model(x)
            print("Pred val:", y_pred)

round: 1
Player: 1
Cards:
[{'power': '3', 'type': 2}, {'power': '7', 'type': 0}, {'power': 'ace', 'type': 3}, {'power': '5', 'type': 0}, {'power': '7', 'type': 2}]
PokerHand.ONE_PAIR
True val: tensor([1., 0.])
Pred val: tensor([[0.9924, 0.0076]], device='cuda:0')
Player: 2
Cards:
[{'power': '3', 'type': 1}, {'power': '8', 'type': 1}, {'power': 'ace', 'type': 3}, {'power': '5', 'type': 0}, {'power': '7', 'type': 2}]
PokerHand.HIGH_CARD
True val: tensor([0., 1.])
Pred val: tensor([[9.8937e-09, 1.0000e+00]], device='cuda:0')
Player: 3
Cards:
[{'power': '5', 'type': 3}, {'power': '4', 'type': 3}, {'power': 'ace', 'type': 3}, {'power': '5', 'type': 0}, {'power': '7', 'type': 2}]
PokerHand.ONE_PAIR
True val: tensor([1., 0.])
Pred val: tensor([[9.9998e-01, 1.6139e-05]], device='cuda:0')
Player: 4
Cards:
[{'power': '3', 'type': 0}, {'power': '10', 'type': 3}, {'power': 'ace', 'type': 3}, {'power': '5', 'type': 0}, {'power': '7', 'type': 2}]
PokerHand.HIGH_CARD
True val: tensor([1., 0.])
Pred v

  return self._call_impl(*args, **kwargs)


In [None]:
model.eval()

train_dataset = TensorDataset(x_train, y_train)

val_loader = DataLoader(train_dataset, batch_size=128, shuffle=False)
val_loss = 0
with torch.no_grad():
    for batch in val_loader:
        inputs, targets = batch
        outputs = model(inputs.to(device))
        loss = criterion(outputs, targets.to(device))
        val_loss += loss.item()

val_loss_avg = val_loss / len(val_loader)